1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 13 14define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { 15; GENERIC-LABEL: test_cvtpd2pi: 16; GENERIC: # %bb.0: 17; GENERIC-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] 18; GENERIC-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] 19; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] 20; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 21; GENERIC-NEXT: retq # sched: [1:1.00] 22; 23; ATOM-LABEL: test_cvtpd2pi: 24; ATOM: # %bb.0: 25; ATOM-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [8:4.00] 26; ATOM-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [7:3.50] 27; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] 28; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 29; ATOM-NEXT: retq # sched: [79:39.50] 30; 31; SLM-LABEL: test_cvtpd2pi: 32; SLM: # %bb.0: 33; SLM-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [7:1.00] 34; SLM-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:0.50] 35; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 36; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] 37; SLM-NEXT: retq # sched: [4:1.00] 38; 39; SANDY-LABEL: test_cvtpd2pi: 40; SANDY: # %bb.0: 41; SANDY-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] 42; SANDY-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] 43; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] 44; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 45; SANDY-NEXT: retq # sched: [1:1.00] 46; 47; HASWELL-LABEL: test_cvtpd2pi: 48; HASWELL: # %bb.0: 49; HASWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] 50; HASWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] 51; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] 52; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 53; HASWELL-NEXT: retq # sched: [7:1.00] 54; 55; BROADWELL-LABEL: test_cvtpd2pi: 56; BROADWELL: # %bb.0: 57; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00] 58; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [9:1.00] 59; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 60; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 61; BROADWELL-NEXT: retq # sched: [7:1.00] 62; 63; SKYLAKE-LABEL: test_cvtpd2pi: 64; SKYLAKE: # %bb.0: 65; SKYLAKE-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] 66; SKYLAKE-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] 67; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] 68; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] 69; SKYLAKE-NEXT: retq # sched: [7:1.00] 70; 71; SKX-LABEL: test_cvtpd2pi: 72; SKX: # %bb.0: 73; SKX-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] 74; SKX-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] 75; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] 76; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] 77; SKX-NEXT: retq # sched: [7:1.00] 78; 79; BTVER2-LABEL: test_cvtpd2pi: 80; BTVER2: # %bb.0: 81; BTVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [8:1.00] 82; BTVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [3:1.00] 83; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] 84; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] 85; BTVER2-NEXT: retq # sched: [4:1.00] 86; 87; ZNVER1-LABEL: test_cvtpd2pi: 88; ZNVER1: # %bb.0: 89; ZNVER1-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [12:1.00] 90; ZNVER1-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00] 91; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] 92; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] 93; ZNVER1-NEXT: retq # sched: [1:0.50] 94 %1 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) 95 %2 = load <2 x double>, <2 x double> *%a1, align 16 96 %3 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %2) 97 %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) 98 %5 = bitcast x86_mmx %4 to i64 99 ret i64 %5 100} 101declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone 102 103define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize { 104; GENERIC-LABEL: test_cvtpi2pd: 105; GENERIC: # %bb.0: 106; GENERIC-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00] 107; GENERIC-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [10:1.00] 108; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 109; GENERIC-NEXT: retq # sched: [1:1.00] 110; 111; ATOM-LABEL: test_cvtpi2pd: 112; ATOM: # %bb.0: 113; ATOM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [8:4.00] 114; ATOM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [7:3.50] 115; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 116; ATOM-NEXT: retq # sched: [79:39.50] 117; 118; SLM-LABEL: test_cvtpi2pd: 119; SLM: # %bb.0: 120; SLM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [7:1.00] 121; SLM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:0.50] 122; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 123; SLM-NEXT: retq # sched: [4:1.00] 124; 125; SANDY-LABEL: test_cvtpi2pd: 126; SANDY: # %bb.0: 127; SANDY-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] 128; SANDY-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] 129; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 130; SANDY-NEXT: retq # sched: [1:1.00] 131; 132; HASWELL-LABEL: test_cvtpi2pd: 133; HASWELL: # %bb.0: 134; HASWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] 135; HASWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:1.00] 136; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 137; HASWELL-NEXT: retq # sched: [7:1.00] 138; 139; BROADWELL-LABEL: test_cvtpi2pd: 140; BROADWELL: # %bb.0: 141; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [9:1.00] 142; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00] 143; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 144; BROADWELL-NEXT: retq # sched: [7:1.00] 145; 146; SKYLAKE-LABEL: test_cvtpi2pd: 147; SKYLAKE: # %bb.0: 148; SKYLAKE-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00] 149; SKYLAKE-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] 150; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 151; SKYLAKE-NEXT: retq # sched: [7:1.00] 152; 153; SKX-LABEL: test_cvtpi2pd: 154; SKX: # %bb.0: 155; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:0.50] 156; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50] 157; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 158; SKX-NEXT: retq # sched: [7:1.00] 159; 160; BTVER2-LABEL: test_cvtpi2pd: 161; BTVER2: # %bb.0: 162; BTVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00] 163; BTVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00] 164; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 165; BTVER2-NEXT: retq # sched: [4:1.00] 166; 167; ZNVER1-LABEL: test_cvtpi2pd: 168; ZNVER1: # %bb.0: 169; ZNVER1-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [12:1.00] 170; ZNVER1-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00] 171; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 172; ZNVER1-NEXT: retq # sched: [1:0.50] 173 %1 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) 174 %2 = load x86_mmx, x86_mmx *%a1, align 8 175 %3 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %2) 176 %4 = fadd <2 x double> %1, %3 177 ret <2 x double> %4 178} 179declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone 180 181define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 x float> %a3) optsize { 182; GENERIC-LABEL: test_cvtpi2ps: 183; GENERIC: # %bb.0: 184; GENERIC-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 185; GENERIC-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] 186; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 187; GENERIC-NEXT: retq # sched: [1:1.00] 188; 189; ATOM-LABEL: test_cvtpi2ps: 190; ATOM: # %bb.0: 191; ATOM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:5.00] 192; ATOM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [5:5.00] 193; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 194; ATOM-NEXT: retq # sched: [79:39.50] 195; 196; SLM-LABEL: test_cvtpi2ps: 197; SLM: # %bb.0: 198; SLM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [7:1.00] 199; SLM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [4:0.50] 200; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 201; SLM-NEXT: retq # sched: [4:1.00] 202; 203; SANDY-LABEL: test_cvtpi2ps: 204; SANDY: # %bb.0: 205; SANDY-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 206; SANDY-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] 207; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 208; SANDY-NEXT: retq # sched: [1:1.00] 209; 210; HASWELL-LABEL: test_cvtpi2ps: 211; HASWELL: # %bb.0: 212; HASWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 213; HASWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] 214; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 215; HASWELL-NEXT: retq # sched: [7:1.00] 216; 217; BROADWELL-LABEL: test_cvtpi2ps: 218; BROADWELL: # %bb.0: 219; BROADWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 220; BROADWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] 221; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 222; BROADWELL-NEXT: retq # sched: [7:1.00] 223; 224; SKYLAKE-LABEL: test_cvtpi2ps: 225; SKYLAKE: # %bb.0: 226; SKYLAKE-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] 227; SKYLAKE-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] 228; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 229; SKYLAKE-NEXT: retq # sched: [7:1.00] 230; 231; SKX-LABEL: test_cvtpi2ps: 232; SKX: # %bb.0: 233; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] 234; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] 235; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 236; SKX-NEXT: retq # sched: [7:1.00] 237; 238; BTVER2-LABEL: test_cvtpi2ps: 239; BTVER2: # %bb.0: 240; BTVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] 241; BTVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 242; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 243; BTVER2-NEXT: retq # sched: [4:1.00] 244; 245; ZNVER1-LABEL: test_cvtpi2ps: 246; ZNVER1: # %bb.0: 247; ZNVER1-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [12:1.00] 248; ZNVER1-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:1.00] 249; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 250; ZNVER1-NEXT: retq # sched: [1:0.50] 251 %1 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a2, x86_mmx %a0) 252 %2 = load x86_mmx, x86_mmx *%a1, align 8 253 %3 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a3, x86_mmx %2) 254 %4 = fadd <4 x float> %1, %3 255 ret <4 x float> %4 256} 257declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone 258 259define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { 260; GENERIC-LABEL: test_cvtps2pi: 261; GENERIC: # %bb.0: 262; GENERIC-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] 263; GENERIC-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] 264; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] 265; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00] 266; GENERIC-NEXT: retq # sched: [1:1.00] 267; 268; ATOM-LABEL: test_cvtps2pi: 269; ATOM: # %bb.0: 270; ATOM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:5.00] 271; ATOM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [5:5.00] 272; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 273; ATOM-NEXT: movq %mm1, %rax # sched: [3:3.00] 274; ATOM-NEXT: retq # sched: [79:39.50] 275; 276; SLM-LABEL: test_cvtps2pi: 277; SLM: # %bb.0: 278; SLM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [7:1.00] 279; SLM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:0.50] 280; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 281; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] 282; SLM-NEXT: retq # sched: [4:1.00] 283; 284; SANDY-LABEL: test_cvtps2pi: 285; SANDY: # %bb.0: 286; SANDY-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] 287; SANDY-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] 288; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] 289; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00] 290; SANDY-NEXT: retq # sched: [1:1.00] 291; 292; HASWELL-LABEL: test_cvtps2pi: 293; HASWELL: # %bb.0: 294; HASWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] 295; HASWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] 296; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 297; HASWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 298; HASWELL-NEXT: retq # sched: [7:1.00] 299; 300; BROADWELL-LABEL: test_cvtps2pi: 301; BROADWELL: # %bb.0: 302; BROADWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] 303; BROADWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] 304; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 305; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 306; BROADWELL-NEXT: retq # sched: [7:1.00] 307; 308; SKYLAKE-LABEL: test_cvtps2pi: 309; SKYLAKE: # %bb.0: 310; SKYLAKE-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] 311; SKYLAKE-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] 312; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] 313; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] 314; SKYLAKE-NEXT: retq # sched: [7:1.00] 315; 316; SKX-LABEL: test_cvtps2pi: 317; SKX: # %bb.0: 318; SKX-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] 319; SKX-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] 320; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] 321; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] 322; SKX-NEXT: retq # sched: [7:1.00] 323; 324; BTVER2-LABEL: test_cvtps2pi: 325; BTVER2: # %bb.0: 326; BTVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] 327; BTVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] 328; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] 329; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] 330; BTVER2-NEXT: retq # sched: [4:1.00] 331; 332; ZNVER1-LABEL: test_cvtps2pi: 333; ZNVER1: # %bb.0: 334; ZNVER1-NEXT: cvtps2pi (%rdi), %mm1 # sched: [12:1.00] 335; ZNVER1-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] 336; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] 337; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] 338; ZNVER1-NEXT: retq # sched: [1:0.50] 339 %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) 340 %2 = load <4 x float>, <4 x float> *%a1, align 16 341 %3 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %2) 342 %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) 343 %5 = bitcast x86_mmx %4 to i64 344 ret i64 %5 345} 346declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone 347 348define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { 349; GENERIC-LABEL: test_cvttpd2pi: 350; GENERIC: # %bb.0: 351; GENERIC-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] 352; GENERIC-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] 353; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] 354; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 355; GENERIC-NEXT: retq # sched: [1:1.00] 356; 357; ATOM-LABEL: test_cvttpd2pi: 358; ATOM: # %bb.0: 359; ATOM-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [8:4.00] 360; ATOM-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [7:3.50] 361; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] 362; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 363; ATOM-NEXT: retq # sched: [79:39.50] 364; 365; SLM-LABEL: test_cvttpd2pi: 366; SLM: # %bb.0: 367; SLM-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [7:1.00] 368; SLM-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:0.50] 369; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 370; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] 371; SLM-NEXT: retq # sched: [4:1.00] 372; 373; SANDY-LABEL: test_cvttpd2pi: 374; SANDY: # %bb.0: 375; SANDY-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] 376; SANDY-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] 377; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] 378; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 379; SANDY-NEXT: retq # sched: [1:1.00] 380; 381; HASWELL-LABEL: test_cvttpd2pi: 382; HASWELL: # %bb.0: 383; HASWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] 384; HASWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] 385; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] 386; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 387; HASWELL-NEXT: retq # sched: [7:1.00] 388; 389; BROADWELL-LABEL: test_cvttpd2pi: 390; BROADWELL: # %bb.0: 391; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00] 392; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [9:1.00] 393; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 394; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 395; BROADWELL-NEXT: retq # sched: [7:1.00] 396; 397; SKYLAKE-LABEL: test_cvttpd2pi: 398; SKYLAKE: # %bb.0: 399; SKYLAKE-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] 400; SKYLAKE-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] 401; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] 402; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] 403; SKYLAKE-NEXT: retq # sched: [7:1.00] 404; 405; SKX-LABEL: test_cvttpd2pi: 406; SKX: # %bb.0: 407; SKX-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] 408; SKX-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] 409; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] 410; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] 411; SKX-NEXT: retq # sched: [7:1.00] 412; 413; BTVER2-LABEL: test_cvttpd2pi: 414; BTVER2: # %bb.0: 415; BTVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [8:1.00] 416; BTVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [3:1.00] 417; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] 418; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] 419; BTVER2-NEXT: retq # sched: [4:1.00] 420; 421; ZNVER1-LABEL: test_cvttpd2pi: 422; ZNVER1: # %bb.0: 423; ZNVER1-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [12:1.00] 424; ZNVER1-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00] 425; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] 426; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] 427; ZNVER1-NEXT: retq # sched: [1:0.50] 428 %1 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) 429 %2 = load <2 x double>, <2 x double> *%a1, align 16 430 %3 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %2) 431 %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) 432 %5 = bitcast x86_mmx %4 to i64 433 ret i64 %5 434} 435declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone 436 437define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { 438; GENERIC-LABEL: test_cvttps2pi: 439; GENERIC: # %bb.0: 440; GENERIC-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] 441; GENERIC-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] 442; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] 443; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00] 444; GENERIC-NEXT: retq # sched: [1:1.00] 445; 446; ATOM-LABEL: test_cvttps2pi: 447; ATOM: # %bb.0: 448; ATOM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:5.00] 449; ATOM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [5:5.00] 450; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 451; ATOM-NEXT: movq %mm1, %rax # sched: [3:3.00] 452; ATOM-NEXT: retq # sched: [79:39.50] 453; 454; SLM-LABEL: test_cvttps2pi: 455; SLM: # %bb.0: 456; SLM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [7:1.00] 457; SLM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:0.50] 458; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 459; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] 460; SLM-NEXT: retq # sched: [4:1.00] 461; 462; SANDY-LABEL: test_cvttps2pi: 463; SANDY: # %bb.0: 464; SANDY-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] 465; SANDY-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] 466; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] 467; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00] 468; SANDY-NEXT: retq # sched: [1:1.00] 469; 470; HASWELL-LABEL: test_cvttps2pi: 471; HASWELL: # %bb.0: 472; HASWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] 473; HASWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] 474; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 475; HASWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 476; HASWELL-NEXT: retq # sched: [7:1.00] 477; 478; BROADWELL-LABEL: test_cvttps2pi: 479; BROADWELL: # %bb.0: 480; BROADWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] 481; BROADWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] 482; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 483; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 484; BROADWELL-NEXT: retq # sched: [7:1.00] 485; 486; SKYLAKE-LABEL: test_cvttps2pi: 487; SKYLAKE: # %bb.0: 488; SKYLAKE-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] 489; SKYLAKE-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] 490; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] 491; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] 492; SKYLAKE-NEXT: retq # sched: [7:1.00] 493; 494; SKX-LABEL: test_cvttps2pi: 495; SKX: # %bb.0: 496; SKX-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] 497; SKX-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] 498; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] 499; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] 500; SKX-NEXT: retq # sched: [7:1.00] 501; 502; BTVER2-LABEL: test_cvttps2pi: 503; BTVER2: # %bb.0: 504; BTVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] 505; BTVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] 506; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] 507; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] 508; BTVER2-NEXT: retq # sched: [4:1.00] 509; 510; ZNVER1-LABEL: test_cvttps2pi: 511; ZNVER1: # %bb.0: 512; ZNVER1-NEXT: cvttps2pi (%rdi), %mm1 # sched: [12:1.00] 513; ZNVER1-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] 514; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] 515; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] 516; ZNVER1-NEXT: retq # sched: [1:0.50] 517 %1 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) 518 %2 = load <4 x float>, <4 x float> *%a1, align 16 519 %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2) 520 %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) 521 %5 = bitcast x86_mmx %4 to i64 522 ret i64 %5 523} 524declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone 525 526define void @test_emms() optsize { 527; GENERIC-LABEL: test_emms: 528; GENERIC: # %bb.0: 529; GENERIC-NEXT: emms # sched: [31:10.33] 530; GENERIC-NEXT: retq # sched: [1:1.00] 531; 532; ATOM-LABEL: test_emms: 533; ATOM: # %bb.0: 534; ATOM-NEXT: emms # sched: [5:2.50] 535; ATOM-NEXT: retq # sched: [79:39.50] 536; 537; SLM-LABEL: test_emms: 538; SLM: # %bb.0: 539; SLM-NEXT: emms # sched: [10:5.00] 540; SLM-NEXT: retq # sched: [4:1.00] 541; 542; SANDY-LABEL: test_emms: 543; SANDY: # %bb.0: 544; SANDY-NEXT: emms # sched: [31:10.33] 545; SANDY-NEXT: retq # sched: [1:1.00] 546; 547; HASWELL-LABEL: test_emms: 548; HASWELL: # %bb.0: 549; HASWELL-NEXT: emms # sched: [31:10.00] 550; HASWELL-NEXT: retq # sched: [7:1.00] 551; 552; BROADWELL-LABEL: test_emms: 553; BROADWELL: # %bb.0: 554; BROADWELL-NEXT: emms # sched: [31:10.00] 555; BROADWELL-NEXT: retq # sched: [7:1.00] 556; 557; SKYLAKE-LABEL: test_emms: 558; SKYLAKE: # %bb.0: 559; SKYLAKE-NEXT: emms # sched: [10:4.50] 560; SKYLAKE-NEXT: retq # sched: [7:1.00] 561; 562; SKX-LABEL: test_emms: 563; SKX: # %bb.0: 564; SKX-NEXT: emms # sched: [10:4.50] 565; SKX-NEXT: retq # sched: [7:1.00] 566; 567; BTVER2-LABEL: test_emms: 568; BTVER2: # %bb.0: 569; BTVER2-NEXT: emms # sched: [2:0.50] 570; BTVER2-NEXT: retq # sched: [4:1.00] 571; 572; ZNVER1-LABEL: test_emms: 573; ZNVER1: # %bb.0: 574; ZNVER1-NEXT: emms # sched: [2:0.25] 575; ZNVER1-NEXT: retq # sched: [1:0.50] 576 call void @llvm.x86.mmx.emms() 577 ret void 578} 579declare void @llvm.x86.mmx.emms() 580 581define void @test_maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) optsize { 582; GENERIC-LABEL: test_maskmovq: 583; GENERIC: # %bb.0: 584; GENERIC-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 585; GENERIC-NEXT: retq # sched: [1:1.00] 586; 587; ATOM-LABEL: test_maskmovq: 588; ATOM: # %bb.0: 589; ATOM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 590; ATOM-NEXT: retq # sched: [79:39.50] 591; 592; SLM-LABEL: test_maskmovq: 593; SLM: # %bb.0: 594; SLM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 595; SLM-NEXT: retq # sched: [4:1.00] 596; 597; SANDY-LABEL: test_maskmovq: 598; SANDY: # %bb.0: 599; SANDY-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 600; SANDY-NEXT: retq # sched: [1:1.00] 601; 602; HASWELL-LABEL: test_maskmovq: 603; HASWELL: # %bb.0: 604; HASWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 605; HASWELL-NEXT: retq # sched: [7:1.00] 606; 607; BROADWELL-LABEL: test_maskmovq: 608; BROADWELL: # %bb.0: 609; BROADWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 610; BROADWELL-NEXT: retq # sched: [7:1.00] 611; 612; SKYLAKE-LABEL: test_maskmovq: 613; SKYLAKE: # %bb.0: 614; SKYLAKE-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 615; SKYLAKE-NEXT: retq # sched: [7:1.00] 616; 617; SKX-LABEL: test_maskmovq: 618; SKX: # %bb.0: 619; SKX-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 620; SKX-NEXT: retq # sched: [7:1.00] 621; 622; BTVER2-LABEL: test_maskmovq: 623; BTVER2: # %bb.0: 624; BTVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:0.50] 625; BTVER2-NEXT: retq # sched: [4:1.00] 626; 627; ZNVER1-LABEL: test_maskmovq: 628; ZNVER1: # %bb.0: 629; ZNVER1-NEXT: maskmovq %mm1, %mm0 # sched: [100:0.25] 630; ZNVER1-NEXT: retq # sched: [1:0.50] 631 call void @llvm.x86.mmx.maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) 632 ret void 633} 634declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind 635 636define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { 637; GENERIC-LABEL: test_movd: 638; GENERIC: # %bb.0: 639; GENERIC-NEXT: movd %edi, %mm1 # sched: [1:1.00] 640; GENERIC-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 641; GENERIC-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] 642; GENERIC-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] 643; GENERIC-NEXT: movd %mm2, %ecx # sched: [2:1.00] 644; GENERIC-NEXT: movd %mm0, %eax # sched: [2:1.00] 645; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 646; GENERIC-NEXT: retq # sched: [1:1.00] 647; 648; ATOM-LABEL: test_movd: 649; ATOM: # %bb.0: 650; ATOM-NEXT: movd %edi, %mm1 # sched: [1:1.00] 651; ATOM-NEXT: movd (%rsi), %mm2 # sched: [1:1.00] 652; ATOM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 653; ATOM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 654; ATOM-NEXT: movd %mm2, %ecx # sched: [3:3.00] 655; ATOM-NEXT: movd %mm0, %eax # sched: [3:3.00] 656; ATOM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 657; ATOM-NEXT: retq # sched: [79:39.50] 658; 659; SLM-LABEL: test_movd: 660; SLM: # %bb.0: 661; SLM-NEXT: movd (%rsi), %mm2 # sched: [3:1.00] 662; SLM-NEXT: movd %edi, %mm1 # sched: [1:0.50] 663; SLM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 664; SLM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 665; SLM-NEXT: movd %mm2, %ecx # sched: [1:0.50] 666; SLM-NEXT: movd %mm0, %eax # sched: [1:0.50] 667; SLM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 668; SLM-NEXT: retq # sched: [4:1.00] 669; 670; SANDY-LABEL: test_movd: 671; SANDY: # %bb.0: 672; SANDY-NEXT: movd %edi, %mm1 # sched: [1:1.00] 673; SANDY-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 674; SANDY-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] 675; SANDY-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] 676; SANDY-NEXT: movd %mm2, %ecx # sched: [2:1.00] 677; SANDY-NEXT: movd %mm0, %eax # sched: [2:1.00] 678; SANDY-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 679; SANDY-NEXT: retq # sched: [1:1.00] 680; 681; HASWELL-LABEL: test_movd: 682; HASWELL: # %bb.0: 683; HASWELL-NEXT: movd %edi, %mm1 # sched: [1:1.00] 684; HASWELL-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 685; HASWELL-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 686; HASWELL-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 687; HASWELL-NEXT: movd %mm2, %ecx # sched: [1:1.00] 688; HASWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] 689; HASWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 690; HASWELL-NEXT: retq # sched: [7:1.00] 691; 692; BROADWELL-LABEL: test_movd: 693; BROADWELL: # %bb.0: 694; BROADWELL-NEXT: movd %edi, %mm1 # sched: [1:1.00] 695; BROADWELL-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 696; BROADWELL-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 697; BROADWELL-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 698; BROADWELL-NEXT: movd %mm2, %ecx # sched: [1:1.00] 699; BROADWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] 700; BROADWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 701; BROADWELL-NEXT: retq # sched: [7:1.00] 702; 703; SKYLAKE-LABEL: test_movd: 704; SKYLAKE: # %bb.0: 705; SKYLAKE-NEXT: movd %edi, %mm1 # sched: [1:1.00] 706; SKYLAKE-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 707; SKYLAKE-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 708; SKYLAKE-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 709; SKYLAKE-NEXT: movd %mm2, %ecx # sched: [2:1.00] 710; SKYLAKE-NEXT: movd %mm0, %eax # sched: [2:1.00] 711; SKYLAKE-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 712; SKYLAKE-NEXT: retq # sched: [7:1.00] 713; 714; SKX-LABEL: test_movd: 715; SKX: # %bb.0: 716; SKX-NEXT: movd %edi, %mm1 # sched: [1:1.00] 717; SKX-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 718; SKX-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 719; SKX-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 720; SKX-NEXT: movd %mm2, %ecx # sched: [2:1.00] 721; SKX-NEXT: movd %mm0, %eax # sched: [2:1.00] 722; SKX-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 723; SKX-NEXT: retq # sched: [7:1.00] 724; 725; BTVER2-LABEL: test_movd: 726; BTVER2: # %bb.0: 727; BTVER2-NEXT: movd %edi, %mm1 # sched: [8:0.50] 728; BTVER2-NEXT: movd (%rsi), %mm2 # sched: [5:1.00] 729; BTVER2-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 730; BTVER2-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 731; BTVER2-NEXT: movd %mm2, %ecx # sched: [4:1.00] 732; BTVER2-NEXT: movd %mm0, %eax # sched: [4:1.00] 733; BTVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 734; BTVER2-NEXT: retq # sched: [4:1.00] 735; 736; ZNVER1-LABEL: test_movd: 737; ZNVER1: # %bb.0: 738; ZNVER1-NEXT: movd (%rsi), %mm2 # sched: [8:0.50] 739; ZNVER1-NEXT: movd %edi, %mm1 # sched: [3:1.00] 740; ZNVER1-NEXT: paddd %mm1, %mm2 # sched: [1:0.25] 741; ZNVER1-NEXT: paddd %mm2, %mm0 # sched: [1:0.25] 742; ZNVER1-NEXT: movd %mm2, %ecx # sched: [2:1.00] 743; ZNVER1-NEXT: movd %mm0, %eax # sched: [2:1.00] 744; ZNVER1-NEXT: movl %ecx, (%rsi) # sched: [1:0.50] 745; ZNVER1-NEXT: retq # sched: [1:0.50] 746 %1 = insertelement <2 x i32> undef, i32 %a1, i32 0 747 %2 = bitcast <2 x i32> %1 to x86_mmx 748 %3 = load i32, i32 *%a2 749 %4 = insertelement <2 x i32> undef, i32 %3, i32 0 750 %5 = bitcast <2 x i32> %4 to x86_mmx 751 %6 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %5) 752 %7 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %6) 753 %8 = bitcast x86_mmx %6 to <2 x i32> 754 %9 = bitcast x86_mmx %7 to <2 x i32> 755 %10 = extractelement <2 x i32> %8, i32 0 756 %11 = extractelement <2 x i32> %9, i32 0 757 store i32 %10, i32* %a2 758 ret i32 %11 759} 760 761define i64 @test_movdq2q(<2 x i64> %a0) optsize { 762; GENERIC-LABEL: test_movdq2q: 763; GENERIC: # %bb.0: 764; GENERIC-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] 765; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] 766; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 767; GENERIC-NEXT: retq # sched: [1:1.00] 768; 769; ATOM-LABEL: test_movdq2q: 770; ATOM: # %bb.0: 771; ATOM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] 772; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 773; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 774; ATOM-NEXT: retq # sched: [79:39.50] 775; 776; SLM-LABEL: test_movdq2q: 777; SLM: # %bb.0: 778; SLM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] 779; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 780; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 781; SLM-NEXT: retq # sched: [4:1.00] 782; 783; SANDY-LABEL: test_movdq2q: 784; SANDY: # %bb.0: 785; SANDY-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] 786; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] 787; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 788; SANDY-NEXT: retq # sched: [1:1.00] 789; 790; HASWELL-LABEL: test_movdq2q: 791; HASWELL: # %bb.0: 792; HASWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] 793; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 794; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 795; HASWELL-NEXT: retq # sched: [7:1.00] 796; 797; BROADWELL-LABEL: test_movdq2q: 798; BROADWELL: # %bb.0: 799; BROADWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] 800; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 801; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 802; BROADWELL-NEXT: retq # sched: [7:1.00] 803; 804; SKYLAKE-LABEL: test_movdq2q: 805; SKYLAKE: # %bb.0: 806; SKYLAKE-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] 807; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 808; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 809; SKYLAKE-NEXT: retq # sched: [7:1.00] 810; 811; SKX-LABEL: test_movdq2q: 812; SKX: # %bb.0: 813; SKX-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] 814; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 815; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 816; SKX-NEXT: retq # sched: [7:1.00] 817; 818; BTVER2-LABEL: test_movdq2q: 819; BTVER2: # %bb.0: 820; BTVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] 821; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 822; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 823; BTVER2-NEXT: retq # sched: [4:1.00] 824; 825; ZNVER1-LABEL: test_movdq2q: 826; ZNVER1: # %bb.0: 827; ZNVER1-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.25] 828; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25] 829; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 830; ZNVER1-NEXT: retq # sched: [1:0.50] 831 %1 = extractelement <2 x i64> %a0, i32 0 832 %2 = bitcast i64 %1 to x86_mmx 833 %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) 834 %4 = bitcast x86_mmx %3 to i64 835 ret i64 %4 836} 837 838define void @test_movntq(x86_mmx* %a0, x86_mmx %a1) optsize { 839; GENERIC-LABEL: test_movntq: 840; GENERIC: # %bb.0: 841; GENERIC-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 842; GENERIC-NEXT: retq # sched: [1:1.00] 843; 844; ATOM-LABEL: test_movntq: 845; ATOM: # %bb.0: 846; ATOM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 847; ATOM-NEXT: retq # sched: [79:39.50] 848; 849; SLM-LABEL: test_movntq: 850; SLM: # %bb.0: 851; SLM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 852; SLM-NEXT: retq # sched: [4:1.00] 853; 854; SANDY-LABEL: test_movntq: 855; SANDY: # %bb.0: 856; SANDY-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 857; SANDY-NEXT: retq # sched: [1:1.00] 858; 859; HASWELL-LABEL: test_movntq: 860; HASWELL: # %bb.0: 861; HASWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 862; HASWELL-NEXT: retq # sched: [7:1.00] 863; 864; BROADWELL-LABEL: test_movntq: 865; BROADWELL: # %bb.0: 866; BROADWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 867; BROADWELL-NEXT: retq # sched: [7:1.00] 868; 869; SKYLAKE-LABEL: test_movntq: 870; SKYLAKE: # %bb.0: 871; SKYLAKE-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 872; SKYLAKE-NEXT: retq # sched: [7:1.00] 873; 874; SKX-LABEL: test_movntq: 875; SKX: # %bb.0: 876; SKX-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 877; SKX-NEXT: retq # sched: [7:1.00] 878; 879; BTVER2-LABEL: test_movntq: 880; BTVER2: # %bb.0: 881; BTVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00] 882; BTVER2-NEXT: retq # sched: [4:1.00] 883; 884; ZNVER1-LABEL: test_movntq: 885; ZNVER1: # %bb.0: 886; ZNVER1-NEXT: movntq %mm0, (%rdi) # sched: [1:0.50] 887; ZNVER1-NEXT: retq # sched: [1:0.50] 888 call void @llvm.x86.mmx.movnt.dq(x86_mmx* %a0, x86_mmx %a1) 889 ret void 890} 891declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind 892 893define void @test_movq(i64 *%a0) { 894; GENERIC-LABEL: test_movq: 895; GENERIC: # %bb.0: 896; GENERIC-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 897; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] 898; GENERIC-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 899; GENERIC-NEXT: retq # sched: [1:1.00] 900; 901; ATOM-LABEL: test_movq: 902; ATOM: # %bb.0: 903; ATOM-NEXT: movq (%rdi), %mm0 # sched: [1:1.00] 904; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 905; ATOM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 906; ATOM-NEXT: nop # sched: [1:0.50] 907; ATOM-NEXT: nop # sched: [1:0.50] 908; ATOM-NEXT: retq # sched: [79:39.50] 909; 910; SLM-LABEL: test_movq: 911; SLM: # %bb.0: 912; SLM-NEXT: movq (%rdi), %mm0 # sched: [3:1.00] 913; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 914; SLM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 915; SLM-NEXT: retq # sched: [4:1.00] 916; 917; SANDY-LABEL: test_movq: 918; SANDY: # %bb.0: 919; SANDY-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 920; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] 921; SANDY-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 922; SANDY-NEXT: retq # sched: [1:1.00] 923; 924; HASWELL-LABEL: test_movq: 925; HASWELL: # %bb.0: 926; HASWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 927; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 928; HASWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 929; HASWELL-NEXT: retq # sched: [7:1.00] 930; 931; BROADWELL-LABEL: test_movq: 932; BROADWELL: # %bb.0: 933; BROADWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 934; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 935; BROADWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 936; BROADWELL-NEXT: retq # sched: [7:1.00] 937; 938; SKYLAKE-LABEL: test_movq: 939; SKYLAKE: # %bb.0: 940; SKYLAKE-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 941; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 942; SKYLAKE-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 943; SKYLAKE-NEXT: retq # sched: [7:1.00] 944; 945; SKX-LABEL: test_movq: 946; SKX: # %bb.0: 947; SKX-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 948; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 949; SKX-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 950; SKX-NEXT: retq # sched: [7:1.00] 951; 952; BTVER2-LABEL: test_movq: 953; BTVER2: # %bb.0: 954; BTVER2-NEXT: movq (%rdi), %mm0 # sched: [5:1.00] 955; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 956; BTVER2-NEXT: movq %mm0, (%rdi) # sched: [2:1.00] 957; BTVER2-NEXT: retq # sched: [4:1.00] 958; 959; ZNVER1-LABEL: test_movq: 960; ZNVER1: # %bb.0: 961; ZNVER1-NEXT: movq (%rdi), %mm0 # sched: [8:0.50] 962; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25] 963; ZNVER1-NEXT: movq %mm0, (%rdi) # sched: [1:0.50] 964; ZNVER1-NEXT: retq # sched: [1:0.50] 965 %1 = load i64, i64* %a0, align 8 966 %2 = bitcast i64 %1 to x86_mmx 967 %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) 968 %4 = bitcast x86_mmx %3 to i64 969 store i64 %4, i64* %a0, align 8 970 ret void 971} 972 973define <2 x i64> @test_movq2dq(x86_mmx %a0) optsize { 974; GENERIC-LABEL: test_movq2dq: 975; GENERIC: # %bb.0: 976; GENERIC-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] 977; GENERIC-NEXT: retq # sched: [1:1.00] 978; 979; ATOM-LABEL: test_movq2dq: 980; ATOM: # %bb.0: 981; ATOM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] 982; ATOM-NEXT: retq # sched: [79:39.50] 983; 984; SLM-LABEL: test_movq2dq: 985; SLM: # %bb.0: 986; SLM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] 987; SLM-NEXT: retq # sched: [4:1.00] 988; 989; SANDY-LABEL: test_movq2dq: 990; SANDY: # %bb.0: 991; SANDY-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] 992; SANDY-NEXT: retq # sched: [1:1.00] 993; 994; HASWELL-LABEL: test_movq2dq: 995; HASWELL: # %bb.0: 996; HASWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] 997; HASWELL-NEXT: retq # sched: [7:1.00] 998; 999; BROADWELL-LABEL: test_movq2dq: 1000; BROADWELL: # %bb.0: 1001; BROADWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] 1002; BROADWELL-NEXT: retq # sched: [7:1.00] 1003; 1004; SKYLAKE-LABEL: test_movq2dq: 1005; SKYLAKE: # %bb.0: 1006; SKYLAKE-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] 1007; SKYLAKE-NEXT: retq # sched: [7:1.00] 1008; 1009; SKX-LABEL: test_movq2dq: 1010; SKX: # %bb.0: 1011; SKX-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] 1012; SKX-NEXT: retq # sched: [7:1.00] 1013; 1014; BTVER2-LABEL: test_movq2dq: 1015; BTVER2: # %bb.0: 1016; BTVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] 1017; BTVER2-NEXT: retq # sched: [4:1.00] 1018; 1019; ZNVER1-LABEL: test_movq2dq: 1020; ZNVER1: # %bb.0: 1021; ZNVER1-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.25] 1022; ZNVER1-NEXT: retq # sched: [1:0.50] 1023 %1 = bitcast x86_mmx %a0 to i64 1024 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1025 ret <2 x i64> %2 1026} 1027 1028define i64 @test_pabsb(x86_mmx *%a0) optsize { 1029; GENERIC-LABEL: test_pabsb: 1030; GENERIC: # %bb.0: 1031; GENERIC-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1032; GENERIC-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1033; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1034; GENERIC-NEXT: retq # sched: [1:1.00] 1035; 1036; ATOM-LABEL: test_pabsb: 1037; ATOM: # %bb.0: 1038; ATOM-NEXT: pabsb (%rdi), %mm0 # sched: [1:1.00] 1039; ATOM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1040; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1041; ATOM-NEXT: retq # sched: [79:39.50] 1042; 1043; SLM-LABEL: test_pabsb: 1044; SLM: # %bb.0: 1045; SLM-NEXT: pabsb (%rdi), %mm0 # sched: [4:1.00] 1046; SLM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1047; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1048; SLM-NEXT: retq # sched: [4:1.00] 1049; 1050; SANDY-LABEL: test_pabsb: 1051; SANDY: # %bb.0: 1052; SANDY-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1053; SANDY-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1054; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1055; SANDY-NEXT: retq # sched: [1:1.00] 1056; 1057; HASWELL-LABEL: test_pabsb: 1058; HASWELL: # %bb.0: 1059; HASWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1060; HASWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1061; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1062; HASWELL-NEXT: retq # sched: [7:1.00] 1063; 1064; BROADWELL-LABEL: test_pabsb: 1065; BROADWELL: # %bb.0: 1066; BROADWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1067; BROADWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1068; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1069; BROADWELL-NEXT: retq # sched: [7:1.00] 1070; 1071; SKYLAKE-LABEL: test_pabsb: 1072; SKYLAKE: # %bb.0: 1073; SKYLAKE-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1074; SKYLAKE-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1075; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1076; SKYLAKE-NEXT: retq # sched: [7:1.00] 1077; 1078; SKX-LABEL: test_pabsb: 1079; SKX: # %bb.0: 1080; SKX-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1081; SKX-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1082; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1083; SKX-NEXT: retq # sched: [7:1.00] 1084; 1085; BTVER2-LABEL: test_pabsb: 1086; BTVER2: # %bb.0: 1087; BTVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:1.00] 1088; BTVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1089; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1090; BTVER2-NEXT: retq # sched: [4:1.00] 1091; 1092; ZNVER1-LABEL: test_pabsb: 1093; ZNVER1: # %bb.0: 1094; ZNVER1-NEXT: pabsb (%rdi), %mm0 # sched: [8:0.50] 1095; ZNVER1-NEXT: pabsb %mm0, %mm0 # sched: [1:0.25] 1096; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1097; ZNVER1-NEXT: retq # sched: [1:0.50] 1098 %1 = load x86_mmx, x86_mmx *%a0, align 8 1099 %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) 1100 %3 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %2) 1101 %4 = bitcast x86_mmx %3 to i64 1102 ret i64 %4 1103} 1104declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone 1105 1106define i64 @test_pabsd(x86_mmx *%a0) optsize { 1107; GENERIC-LABEL: test_pabsd: 1108; GENERIC: # %bb.0: 1109; GENERIC-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1110; GENERIC-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1111; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1112; GENERIC-NEXT: retq # sched: [1:1.00] 1113; 1114; ATOM-LABEL: test_pabsd: 1115; ATOM: # %bb.0: 1116; ATOM-NEXT: pabsd (%rdi), %mm0 # sched: [1:1.00] 1117; ATOM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1118; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1119; ATOM-NEXT: retq # sched: [79:39.50] 1120; 1121; SLM-LABEL: test_pabsd: 1122; SLM: # %bb.0: 1123; SLM-NEXT: pabsd (%rdi), %mm0 # sched: [4:1.00] 1124; SLM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1125; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1126; SLM-NEXT: retq # sched: [4:1.00] 1127; 1128; SANDY-LABEL: test_pabsd: 1129; SANDY: # %bb.0: 1130; SANDY-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1131; SANDY-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1132; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1133; SANDY-NEXT: retq # sched: [1:1.00] 1134; 1135; HASWELL-LABEL: test_pabsd: 1136; HASWELL: # %bb.0: 1137; HASWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1138; HASWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1139; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1140; HASWELL-NEXT: retq # sched: [7:1.00] 1141; 1142; BROADWELL-LABEL: test_pabsd: 1143; BROADWELL: # %bb.0: 1144; BROADWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1145; BROADWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1146; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1147; BROADWELL-NEXT: retq # sched: [7:1.00] 1148; 1149; SKYLAKE-LABEL: test_pabsd: 1150; SKYLAKE: # %bb.0: 1151; SKYLAKE-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1152; SKYLAKE-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1153; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1154; SKYLAKE-NEXT: retq # sched: [7:1.00] 1155; 1156; SKX-LABEL: test_pabsd: 1157; SKX: # %bb.0: 1158; SKX-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1159; SKX-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1160; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1161; SKX-NEXT: retq # sched: [7:1.00] 1162; 1163; BTVER2-LABEL: test_pabsd: 1164; BTVER2: # %bb.0: 1165; BTVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:1.00] 1166; BTVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1167; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1168; BTVER2-NEXT: retq # sched: [4:1.00] 1169; 1170; ZNVER1-LABEL: test_pabsd: 1171; ZNVER1: # %bb.0: 1172; ZNVER1-NEXT: pabsd (%rdi), %mm0 # sched: [8:0.50] 1173; ZNVER1-NEXT: pabsd %mm0, %mm0 # sched: [1:0.25] 1174; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1175; ZNVER1-NEXT: retq # sched: [1:0.50] 1176 %1 = load x86_mmx, x86_mmx *%a0, align 8 1177 %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) 1178 %3 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %2) 1179 %4 = bitcast x86_mmx %3 to i64 1180 ret i64 %4 1181} 1182declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone 1183 1184define i64 @test_pabsw(x86_mmx *%a0) optsize { 1185; GENERIC-LABEL: test_pabsw: 1186; GENERIC: # %bb.0: 1187; GENERIC-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1188; GENERIC-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1189; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1190; GENERIC-NEXT: retq # sched: [1:1.00] 1191; 1192; ATOM-LABEL: test_pabsw: 1193; ATOM: # %bb.0: 1194; ATOM-NEXT: pabsw (%rdi), %mm0 # sched: [1:1.00] 1195; ATOM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1196; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1197; ATOM-NEXT: retq # sched: [79:39.50] 1198; 1199; SLM-LABEL: test_pabsw: 1200; SLM: # %bb.0: 1201; SLM-NEXT: pabsw (%rdi), %mm0 # sched: [4:1.00] 1202; SLM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1203; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1204; SLM-NEXT: retq # sched: [4:1.00] 1205; 1206; SANDY-LABEL: test_pabsw: 1207; SANDY: # %bb.0: 1208; SANDY-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1209; SANDY-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1210; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1211; SANDY-NEXT: retq # sched: [1:1.00] 1212; 1213; HASWELL-LABEL: test_pabsw: 1214; HASWELL: # %bb.0: 1215; HASWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1216; HASWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1217; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1218; HASWELL-NEXT: retq # sched: [7:1.00] 1219; 1220; BROADWELL-LABEL: test_pabsw: 1221; BROADWELL: # %bb.0: 1222; BROADWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1223; BROADWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1224; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1225; BROADWELL-NEXT: retq # sched: [7:1.00] 1226; 1227; SKYLAKE-LABEL: test_pabsw: 1228; SKYLAKE: # %bb.0: 1229; SKYLAKE-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1230; SKYLAKE-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1231; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1232; SKYLAKE-NEXT: retq # sched: [7:1.00] 1233; 1234; SKX-LABEL: test_pabsw: 1235; SKX: # %bb.0: 1236; SKX-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1237; SKX-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1238; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1239; SKX-NEXT: retq # sched: [7:1.00] 1240; 1241; BTVER2-LABEL: test_pabsw: 1242; BTVER2: # %bb.0: 1243; BTVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:1.00] 1244; BTVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1245; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1246; BTVER2-NEXT: retq # sched: [4:1.00] 1247; 1248; ZNVER1-LABEL: test_pabsw: 1249; ZNVER1: # %bb.0: 1250; ZNVER1-NEXT: pabsw (%rdi), %mm0 # sched: [8:0.50] 1251; ZNVER1-NEXT: pabsw %mm0, %mm0 # sched: [1:0.25] 1252; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1253; ZNVER1-NEXT: retq # sched: [1:0.50] 1254 %1 = load x86_mmx, x86_mmx *%a0, align 8 1255 %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) 1256 %3 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %2) 1257 %4 = bitcast x86_mmx %3 to i64 1258 ret i64 %4 1259} 1260declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone 1261 1262define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1263; GENERIC-LABEL: test_packssdw: 1264; GENERIC: # %bb.0: 1265; GENERIC-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] 1266; GENERIC-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] 1267; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1268; GENERIC-NEXT: retq # sched: [1:1.00] 1269; 1270; ATOM-LABEL: test_packssdw: 1271; ATOM: # %bb.0: 1272; ATOM-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] 1273; ATOM-NEXT: packssdw (%rdi), %mm0 # sched: [1:1.00] 1274; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1275; ATOM-NEXT: retq # sched: [79:39.50] 1276; 1277; SLM-LABEL: test_packssdw: 1278; SLM: # %bb.0: 1279; SLM-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] 1280; SLM-NEXT: packssdw (%rdi), %mm0 # sched: [4:1.00] 1281; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1282; SLM-NEXT: retq # sched: [4:1.00] 1283; 1284; SANDY-LABEL: test_packssdw: 1285; SANDY: # %bb.0: 1286; SANDY-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] 1287; SANDY-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] 1288; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1289; SANDY-NEXT: retq # sched: [1:1.00] 1290; 1291; HASWELL-LABEL: test_packssdw: 1292; HASWELL: # %bb.0: 1293; HASWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] 1294; HASWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] 1295; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1296; HASWELL-NEXT: retq # sched: [7:1.00] 1297; 1298; BROADWELL-LABEL: test_packssdw: 1299; BROADWELL: # %bb.0: 1300; BROADWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] 1301; BROADWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] 1302; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1303; BROADWELL-NEXT: retq # sched: [7:1.00] 1304; 1305; SKYLAKE-LABEL: test_packssdw: 1306; SKYLAKE: # %bb.0: 1307; SKYLAKE-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] 1308; SKYLAKE-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] 1309; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1310; SKYLAKE-NEXT: retq # sched: [7:1.00] 1311; 1312; SKX-LABEL: test_packssdw: 1313; SKX: # %bb.0: 1314; SKX-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] 1315; SKX-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] 1316; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1317; SKX-NEXT: retq # sched: [7:1.00] 1318; 1319; BTVER2-LABEL: test_packssdw: 1320; BTVER2: # %bb.0: 1321; BTVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] 1322; BTVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] 1323; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1324; BTVER2-NEXT: retq # sched: [4:1.00] 1325; 1326; ZNVER1-LABEL: test_packssdw: 1327; ZNVER1: # %bb.0: 1328; ZNVER1-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] 1329; ZNVER1-NEXT: packssdw (%rdi), %mm0 # sched: [1:0.50] 1330; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1331; ZNVER1-NEXT: retq # sched: [1:0.50] 1332 %1 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a0, x86_mmx %a1) 1333 %2 = load x86_mmx, x86_mmx *%a2, align 8 1334 %3 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %1, x86_mmx %2) 1335 %4 = bitcast x86_mmx %3 to i64 1336 ret i64 %4 1337} 1338declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone 1339 1340define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1341; GENERIC-LABEL: test_packsswb: 1342; GENERIC: # %bb.0: 1343; GENERIC-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] 1344; GENERIC-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] 1345; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1346; GENERIC-NEXT: retq # sched: [1:1.00] 1347; 1348; ATOM-LABEL: test_packsswb: 1349; ATOM: # %bb.0: 1350; ATOM-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] 1351; ATOM-NEXT: packsswb (%rdi), %mm0 # sched: [1:1.00] 1352; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1353; ATOM-NEXT: retq # sched: [79:39.50] 1354; 1355; SLM-LABEL: test_packsswb: 1356; SLM: # %bb.0: 1357; SLM-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] 1358; SLM-NEXT: packsswb (%rdi), %mm0 # sched: [4:1.00] 1359; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1360; SLM-NEXT: retq # sched: [4:1.00] 1361; 1362; SANDY-LABEL: test_packsswb: 1363; SANDY: # %bb.0: 1364; SANDY-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] 1365; SANDY-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] 1366; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1367; SANDY-NEXT: retq # sched: [1:1.00] 1368; 1369; HASWELL-LABEL: test_packsswb: 1370; HASWELL: # %bb.0: 1371; HASWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] 1372; HASWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] 1373; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1374; HASWELL-NEXT: retq # sched: [7:1.00] 1375; 1376; BROADWELL-LABEL: test_packsswb: 1377; BROADWELL: # %bb.0: 1378; BROADWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] 1379; BROADWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] 1380; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1381; BROADWELL-NEXT: retq # sched: [7:1.00] 1382; 1383; SKYLAKE-LABEL: test_packsswb: 1384; SKYLAKE: # %bb.0: 1385; SKYLAKE-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] 1386; SKYLAKE-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] 1387; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1388; SKYLAKE-NEXT: retq # sched: [7:1.00] 1389; 1390; SKX-LABEL: test_packsswb: 1391; SKX: # %bb.0: 1392; SKX-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] 1393; SKX-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] 1394; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1395; SKX-NEXT: retq # sched: [7:1.00] 1396; 1397; BTVER2-LABEL: test_packsswb: 1398; BTVER2: # %bb.0: 1399; BTVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] 1400; BTVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] 1401; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1402; BTVER2-NEXT: retq # sched: [4:1.00] 1403; 1404; ZNVER1-LABEL: test_packsswb: 1405; ZNVER1: # %bb.0: 1406; ZNVER1-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] 1407; ZNVER1-NEXT: packsswb (%rdi), %mm0 # sched: [1:0.50] 1408; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1409; ZNVER1-NEXT: retq # sched: [1:0.50] 1410 %1 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a0, x86_mmx %a1) 1411 %2 = load x86_mmx, x86_mmx *%a2, align 8 1412 %3 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %1, x86_mmx %2) 1413 %4 = bitcast x86_mmx %3 to i64 1414 ret i64 %4 1415} 1416declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone 1417 1418define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1419; GENERIC-LABEL: test_packuswb: 1420; GENERIC: # %bb.0: 1421; GENERIC-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] 1422; GENERIC-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] 1423; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1424; GENERIC-NEXT: retq # sched: [1:1.00] 1425; 1426; ATOM-LABEL: test_packuswb: 1427; ATOM: # %bb.0: 1428; ATOM-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] 1429; ATOM-NEXT: packuswb (%rdi), %mm0 # sched: [1:1.00] 1430; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1431; ATOM-NEXT: retq # sched: [79:39.50] 1432; 1433; SLM-LABEL: test_packuswb: 1434; SLM: # %bb.0: 1435; SLM-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] 1436; SLM-NEXT: packuswb (%rdi), %mm0 # sched: [4:1.00] 1437; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1438; SLM-NEXT: retq # sched: [4:1.00] 1439; 1440; SANDY-LABEL: test_packuswb: 1441; SANDY: # %bb.0: 1442; SANDY-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] 1443; SANDY-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] 1444; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1445; SANDY-NEXT: retq # sched: [1:1.00] 1446; 1447; HASWELL-LABEL: test_packuswb: 1448; HASWELL: # %bb.0: 1449; HASWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] 1450; HASWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] 1451; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1452; HASWELL-NEXT: retq # sched: [7:1.00] 1453; 1454; BROADWELL-LABEL: test_packuswb: 1455; BROADWELL: # %bb.0: 1456; BROADWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] 1457; BROADWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] 1458; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1459; BROADWELL-NEXT: retq # sched: [7:1.00] 1460; 1461; SKYLAKE-LABEL: test_packuswb: 1462; SKYLAKE: # %bb.0: 1463; SKYLAKE-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] 1464; SKYLAKE-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] 1465; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1466; SKYLAKE-NEXT: retq # sched: [7:1.00] 1467; 1468; SKX-LABEL: test_packuswb: 1469; SKX: # %bb.0: 1470; SKX-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] 1471; SKX-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] 1472; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1473; SKX-NEXT: retq # sched: [7:1.00] 1474; 1475; BTVER2-LABEL: test_packuswb: 1476; BTVER2: # %bb.0: 1477; BTVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] 1478; BTVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] 1479; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1480; BTVER2-NEXT: retq # sched: [4:1.00] 1481; 1482; ZNVER1-LABEL: test_packuswb: 1483; ZNVER1: # %bb.0: 1484; ZNVER1-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] 1485; ZNVER1-NEXT: packuswb (%rdi), %mm0 # sched: [1:0.50] 1486; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1487; ZNVER1-NEXT: retq # sched: [1:0.50] 1488 %1 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a0, x86_mmx %a1) 1489 %2 = load x86_mmx, x86_mmx *%a2, align 8 1490 %3 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %1, x86_mmx %2) 1491 %4 = bitcast x86_mmx %3 to i64 1492 ret i64 %4 1493} 1494declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone 1495 1496define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1497; GENERIC-LABEL: test_paddb: 1498; GENERIC: # %bb.0: 1499; GENERIC-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] 1500; GENERIC-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] 1501; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1502; GENERIC-NEXT: retq # sched: [1:1.00] 1503; 1504; ATOM-LABEL: test_paddb: 1505; ATOM: # %bb.0: 1506; ATOM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1507; ATOM-NEXT: paddb (%rdi), %mm0 # sched: [1:1.00] 1508; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1509; ATOM-NEXT: retq # sched: [79:39.50] 1510; 1511; SLM-LABEL: test_paddb: 1512; SLM: # %bb.0: 1513; SLM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1514; SLM-NEXT: paddb (%rdi), %mm0 # sched: [4:1.00] 1515; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1516; SLM-NEXT: retq # sched: [4:1.00] 1517; 1518; SANDY-LABEL: test_paddb: 1519; SANDY: # %bb.0: 1520; SANDY-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] 1521; SANDY-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] 1522; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1523; SANDY-NEXT: retq # sched: [1:1.00] 1524; 1525; HASWELL-LABEL: test_paddb: 1526; HASWELL: # %bb.0: 1527; HASWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1528; HASWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] 1529; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1530; HASWELL-NEXT: retq # sched: [7:1.00] 1531; 1532; BROADWELL-LABEL: test_paddb: 1533; BROADWELL: # %bb.0: 1534; BROADWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1535; BROADWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] 1536; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1537; BROADWELL-NEXT: retq # sched: [7:1.00] 1538; 1539; SKYLAKE-LABEL: test_paddb: 1540; SKYLAKE: # %bb.0: 1541; SKYLAKE-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1542; SKYLAKE-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] 1543; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1544; SKYLAKE-NEXT: retq # sched: [7:1.00] 1545; 1546; SKX-LABEL: test_paddb: 1547; SKX: # %bb.0: 1548; SKX-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1549; SKX-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] 1550; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1551; SKX-NEXT: retq # sched: [7:1.00] 1552; 1553; BTVER2-LABEL: test_paddb: 1554; BTVER2: # %bb.0: 1555; BTVER2-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1556; BTVER2-NEXT: paddb (%rdi), %mm0 # sched: [6:1.00] 1557; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1558; BTVER2-NEXT: retq # sched: [4:1.00] 1559; 1560; ZNVER1-LABEL: test_paddb: 1561; ZNVER1: # %bb.0: 1562; ZNVER1-NEXT: paddb %mm1, %mm0 # sched: [1:0.25] 1563; ZNVER1-NEXT: paddb (%rdi), %mm0 # sched: [8:0.50] 1564; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1565; ZNVER1-NEXT: retq # sched: [1:0.50] 1566 %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a1) 1567 %2 = load x86_mmx, x86_mmx *%a2, align 8 1568 %3 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %1, x86_mmx %2) 1569 %4 = bitcast x86_mmx %3 to i64 1570 ret i64 %4 1571} 1572declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone 1573 1574define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1575; GENERIC-LABEL: test_paddd: 1576; GENERIC: # %bb.0: 1577; GENERIC-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] 1578; GENERIC-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] 1579; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1580; GENERIC-NEXT: retq # sched: [1:1.00] 1581; 1582; ATOM-LABEL: test_paddd: 1583; ATOM: # %bb.0: 1584; ATOM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1585; ATOM-NEXT: paddd (%rdi), %mm0 # sched: [1:1.00] 1586; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1587; ATOM-NEXT: retq # sched: [79:39.50] 1588; 1589; SLM-LABEL: test_paddd: 1590; SLM: # %bb.0: 1591; SLM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1592; SLM-NEXT: paddd (%rdi), %mm0 # sched: [4:1.00] 1593; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1594; SLM-NEXT: retq # sched: [4:1.00] 1595; 1596; SANDY-LABEL: test_paddd: 1597; SANDY: # %bb.0: 1598; SANDY-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] 1599; SANDY-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] 1600; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1601; SANDY-NEXT: retq # sched: [1:1.00] 1602; 1603; HASWELL-LABEL: test_paddd: 1604; HASWELL: # %bb.0: 1605; HASWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1606; HASWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] 1607; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1608; HASWELL-NEXT: retq # sched: [7:1.00] 1609; 1610; BROADWELL-LABEL: test_paddd: 1611; BROADWELL: # %bb.0: 1612; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1613; BROADWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] 1614; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1615; BROADWELL-NEXT: retq # sched: [7:1.00] 1616; 1617; SKYLAKE-LABEL: test_paddd: 1618; SKYLAKE: # %bb.0: 1619; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1620; SKYLAKE-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] 1621; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1622; SKYLAKE-NEXT: retq # sched: [7:1.00] 1623; 1624; SKX-LABEL: test_paddd: 1625; SKX: # %bb.0: 1626; SKX-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1627; SKX-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] 1628; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1629; SKX-NEXT: retq # sched: [7:1.00] 1630; 1631; BTVER2-LABEL: test_paddd: 1632; BTVER2: # %bb.0: 1633; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1634; BTVER2-NEXT: paddd (%rdi), %mm0 # sched: [6:1.00] 1635; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1636; BTVER2-NEXT: retq # sched: [4:1.00] 1637; 1638; ZNVER1-LABEL: test_paddd: 1639; ZNVER1: # %bb.0: 1640; ZNVER1-NEXT: paddd %mm1, %mm0 # sched: [1:0.25] 1641; ZNVER1-NEXT: paddd (%rdi), %mm0 # sched: [8:0.50] 1642; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1643; ZNVER1-NEXT: retq # sched: [1:0.50] 1644 %1 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %a1) 1645 %2 = load x86_mmx, x86_mmx *%a2, align 8 1646 %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %1, x86_mmx %2) 1647 %4 = bitcast x86_mmx %3 to i64 1648 ret i64 %4 1649} 1650declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone 1651 1652define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1653; GENERIC-LABEL: test_paddq: 1654; GENERIC: # %bb.0: 1655; GENERIC-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1656; GENERIC-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] 1657; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1658; GENERIC-NEXT: retq # sched: [1:1.00] 1659; 1660; ATOM-LABEL: test_paddq: 1661; ATOM: # %bb.0: 1662; ATOM-NEXT: paddq %mm1, %mm0 # sched: [2:1.00] 1663; ATOM-NEXT: paddq (%rdi), %mm0 # sched: [3:1.50] 1664; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1665; ATOM-NEXT: retq # sched: [79:39.50] 1666; 1667; SLM-LABEL: test_paddq: 1668; SLM: # %bb.0: 1669; SLM-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1670; SLM-NEXT: paddq (%rdi), %mm0 # sched: [4:1.00] 1671; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1672; SLM-NEXT: retq # sched: [4:1.00] 1673; 1674; SANDY-LABEL: test_paddq: 1675; SANDY: # %bb.0: 1676; SANDY-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1677; SANDY-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] 1678; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1679; SANDY-NEXT: retq # sched: [1:1.00] 1680; 1681; HASWELL-LABEL: test_paddq: 1682; HASWELL: # %bb.0: 1683; HASWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1684; HASWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] 1685; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1686; HASWELL-NEXT: retq # sched: [7:1.00] 1687; 1688; BROADWELL-LABEL: test_paddq: 1689; BROADWELL: # %bb.0: 1690; BROADWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1691; BROADWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] 1692; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1693; BROADWELL-NEXT: retq # sched: [7:1.00] 1694; 1695; SKYLAKE-LABEL: test_paddq: 1696; SKYLAKE: # %bb.0: 1697; SKYLAKE-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1698; SKYLAKE-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] 1699; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1700; SKYLAKE-NEXT: retq # sched: [7:1.00] 1701; 1702; SKX-LABEL: test_paddq: 1703; SKX: # %bb.0: 1704; SKX-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1705; SKX-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] 1706; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1707; SKX-NEXT: retq # sched: [7:1.00] 1708; 1709; BTVER2-LABEL: test_paddq: 1710; BTVER2: # %bb.0: 1711; BTVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1712; BTVER2-NEXT: paddq (%rdi), %mm0 # sched: [6:1.00] 1713; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1714; BTVER2-NEXT: retq # sched: [4:1.00] 1715; 1716; ZNVER1-LABEL: test_paddq: 1717; ZNVER1: # %bb.0: 1718; ZNVER1-NEXT: paddq %mm1, %mm0 # sched: [1:0.25] 1719; ZNVER1-NEXT: paddq (%rdi), %mm0 # sched: [8:0.50] 1720; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1721; ZNVER1-NEXT: retq # sched: [1:0.50] 1722 %1 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a0, x86_mmx %a1) 1723 %2 = load x86_mmx, x86_mmx *%a2, align 8 1724 %3 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %1, x86_mmx %2) 1725 %4 = bitcast x86_mmx %3 to i64 1726 ret i64 %4 1727} 1728declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone 1729 1730define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1731; GENERIC-LABEL: test_paddsb: 1732; GENERIC: # %bb.0: 1733; GENERIC-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] 1734; GENERIC-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] 1735; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1736; GENERIC-NEXT: retq # sched: [1:1.00] 1737; 1738; ATOM-LABEL: test_paddsb: 1739; ATOM: # %bb.0: 1740; ATOM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1741; ATOM-NEXT: paddsb (%rdi), %mm0 # sched: [1:1.00] 1742; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1743; ATOM-NEXT: retq # sched: [79:39.50] 1744; 1745; SLM-LABEL: test_paddsb: 1746; SLM: # %bb.0: 1747; SLM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1748; SLM-NEXT: paddsb (%rdi), %mm0 # sched: [4:1.00] 1749; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1750; SLM-NEXT: retq # sched: [4:1.00] 1751; 1752; SANDY-LABEL: test_paddsb: 1753; SANDY: # %bb.0: 1754; SANDY-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] 1755; SANDY-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] 1756; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1757; SANDY-NEXT: retq # sched: [1:1.00] 1758; 1759; HASWELL-LABEL: test_paddsb: 1760; HASWELL: # %bb.0: 1761; HASWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1762; HASWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50] 1763; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1764; HASWELL-NEXT: retq # sched: [7:1.00] 1765; 1766; BROADWELL-LABEL: test_paddsb: 1767; BROADWELL: # %bb.0: 1768; BROADWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1769; BROADWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50] 1770; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1771; BROADWELL-NEXT: retq # sched: [7:1.00] 1772; 1773; SKYLAKE-LABEL: test_paddsb: 1774; SKYLAKE: # %bb.0: 1775; SKYLAKE-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] 1776; SKYLAKE-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] 1777; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1778; SKYLAKE-NEXT: retq # sched: [7:1.00] 1779; 1780; SKX-LABEL: test_paddsb: 1781; SKX: # %bb.0: 1782; SKX-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] 1783; SKX-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] 1784; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1785; SKX-NEXT: retq # sched: [7:1.00] 1786; 1787; BTVER2-LABEL: test_paddsb: 1788; BTVER2: # %bb.0: 1789; BTVER2-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1790; BTVER2-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] 1791; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1792; BTVER2-NEXT: retq # sched: [4:1.00] 1793; 1794; ZNVER1-LABEL: test_paddsb: 1795; ZNVER1: # %bb.0: 1796; ZNVER1-NEXT: paddsb %mm1, %mm0 # sched: [1:0.25] 1797; ZNVER1-NEXT: paddsb (%rdi), %mm0 # sched: [8:0.50] 1798; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1799; ZNVER1-NEXT: retq # sched: [1:0.50] 1800 %1 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a0, x86_mmx %a1) 1801 %2 = load x86_mmx, x86_mmx *%a2, align 8 1802 %3 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %1, x86_mmx %2) 1803 %4 = bitcast x86_mmx %3 to i64 1804 ret i64 %4 1805} 1806declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone 1807 1808define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1809; GENERIC-LABEL: test_paddsw: 1810; GENERIC: # %bb.0: 1811; GENERIC-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] 1812; GENERIC-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] 1813; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1814; GENERIC-NEXT: retq # sched: [1:1.00] 1815; 1816; ATOM-LABEL: test_paddsw: 1817; ATOM: # %bb.0: 1818; ATOM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1819; ATOM-NEXT: paddsw (%rdi), %mm0 # sched: [1:1.00] 1820; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1821; ATOM-NEXT: retq # sched: [79:39.50] 1822; 1823; SLM-LABEL: test_paddsw: 1824; SLM: # %bb.0: 1825; SLM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1826; SLM-NEXT: paddsw (%rdi), %mm0 # sched: [4:1.00] 1827; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1828; SLM-NEXT: retq # sched: [4:1.00] 1829; 1830; SANDY-LABEL: test_paddsw: 1831; SANDY: # %bb.0: 1832; SANDY-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] 1833; SANDY-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] 1834; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1835; SANDY-NEXT: retq # sched: [1:1.00] 1836; 1837; HASWELL-LABEL: test_paddsw: 1838; HASWELL: # %bb.0: 1839; HASWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1840; HASWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50] 1841; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1842; HASWELL-NEXT: retq # sched: [7:1.00] 1843; 1844; BROADWELL-LABEL: test_paddsw: 1845; BROADWELL: # %bb.0: 1846; BROADWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1847; BROADWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50] 1848; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1849; BROADWELL-NEXT: retq # sched: [7:1.00] 1850; 1851; SKYLAKE-LABEL: test_paddsw: 1852; SKYLAKE: # %bb.0: 1853; SKYLAKE-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] 1854; SKYLAKE-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] 1855; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1856; SKYLAKE-NEXT: retq # sched: [7:1.00] 1857; 1858; SKX-LABEL: test_paddsw: 1859; SKX: # %bb.0: 1860; SKX-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] 1861; SKX-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] 1862; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1863; SKX-NEXT: retq # sched: [7:1.00] 1864; 1865; BTVER2-LABEL: test_paddsw: 1866; BTVER2: # %bb.0: 1867; BTVER2-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1868; BTVER2-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] 1869; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1870; BTVER2-NEXT: retq # sched: [4:1.00] 1871; 1872; ZNVER1-LABEL: test_paddsw: 1873; ZNVER1: # %bb.0: 1874; ZNVER1-NEXT: paddsw %mm1, %mm0 # sched: [1:0.25] 1875; ZNVER1-NEXT: paddsw (%rdi), %mm0 # sched: [8:0.50] 1876; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1877; ZNVER1-NEXT: retq # sched: [1:0.50] 1878 %1 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a0, x86_mmx %a1) 1879 %2 = load x86_mmx, x86_mmx *%a2, align 8 1880 %3 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %1, x86_mmx %2) 1881 %4 = bitcast x86_mmx %3 to i64 1882 ret i64 %4 1883} 1884declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone 1885 1886define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1887; GENERIC-LABEL: test_paddusb: 1888; GENERIC: # %bb.0: 1889; GENERIC-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] 1890; GENERIC-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] 1891; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1892; GENERIC-NEXT: retq # sched: [1:1.00] 1893; 1894; ATOM-LABEL: test_paddusb: 1895; ATOM: # %bb.0: 1896; ATOM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1897; ATOM-NEXT: paddusb (%rdi), %mm0 # sched: [1:1.00] 1898; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1899; ATOM-NEXT: retq # sched: [79:39.50] 1900; 1901; SLM-LABEL: test_paddusb: 1902; SLM: # %bb.0: 1903; SLM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1904; SLM-NEXT: paddusb (%rdi), %mm0 # sched: [4:1.00] 1905; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1906; SLM-NEXT: retq # sched: [4:1.00] 1907; 1908; SANDY-LABEL: test_paddusb: 1909; SANDY: # %bb.0: 1910; SANDY-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] 1911; SANDY-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] 1912; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1913; SANDY-NEXT: retq # sched: [1:1.00] 1914; 1915; HASWELL-LABEL: test_paddusb: 1916; HASWELL: # %bb.0: 1917; HASWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1918; HASWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50] 1919; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1920; HASWELL-NEXT: retq # sched: [7:1.00] 1921; 1922; BROADWELL-LABEL: test_paddusb: 1923; BROADWELL: # %bb.0: 1924; BROADWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1925; BROADWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50] 1926; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1927; BROADWELL-NEXT: retq # sched: [7:1.00] 1928; 1929; SKYLAKE-LABEL: test_paddusb: 1930; SKYLAKE: # %bb.0: 1931; SKYLAKE-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] 1932; SKYLAKE-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] 1933; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1934; SKYLAKE-NEXT: retq # sched: [7:1.00] 1935; 1936; SKX-LABEL: test_paddusb: 1937; SKX: # %bb.0: 1938; SKX-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] 1939; SKX-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] 1940; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1941; SKX-NEXT: retq # sched: [7:1.00] 1942; 1943; BTVER2-LABEL: test_paddusb: 1944; BTVER2: # %bb.0: 1945; BTVER2-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1946; BTVER2-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] 1947; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1948; BTVER2-NEXT: retq # sched: [4:1.00] 1949; 1950; ZNVER1-LABEL: test_paddusb: 1951; ZNVER1: # %bb.0: 1952; ZNVER1-NEXT: paddusb %mm1, %mm0 # sched: [1:0.25] 1953; ZNVER1-NEXT: paddusb (%rdi), %mm0 # sched: [8:0.50] 1954; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1955; ZNVER1-NEXT: retq # sched: [1:0.50] 1956 %1 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a0, x86_mmx %a1) 1957 %2 = load x86_mmx, x86_mmx *%a2, align 8 1958 %3 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %1, x86_mmx %2) 1959 %4 = bitcast x86_mmx %3 to i64 1960 ret i64 %4 1961} 1962declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone 1963 1964define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1965; GENERIC-LABEL: test_paddusw: 1966; GENERIC: # %bb.0: 1967; GENERIC-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] 1968; GENERIC-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] 1969; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1970; GENERIC-NEXT: retq # sched: [1:1.00] 1971; 1972; ATOM-LABEL: test_paddusw: 1973; ATOM: # %bb.0: 1974; ATOM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 1975; ATOM-NEXT: paddusw (%rdi), %mm0 # sched: [1:1.00] 1976; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1977; ATOM-NEXT: retq # sched: [79:39.50] 1978; 1979; SLM-LABEL: test_paddusw: 1980; SLM: # %bb.0: 1981; SLM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 1982; SLM-NEXT: paddusw (%rdi), %mm0 # sched: [4:1.00] 1983; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1984; SLM-NEXT: retq # sched: [4:1.00] 1985; 1986; SANDY-LABEL: test_paddusw: 1987; SANDY: # %bb.0: 1988; SANDY-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] 1989; SANDY-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] 1990; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1991; SANDY-NEXT: retq # sched: [1:1.00] 1992; 1993; HASWELL-LABEL: test_paddusw: 1994; HASWELL: # %bb.0: 1995; HASWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 1996; HASWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50] 1997; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1998; HASWELL-NEXT: retq # sched: [7:1.00] 1999; 2000; BROADWELL-LABEL: test_paddusw: 2001; BROADWELL: # %bb.0: 2002; BROADWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 2003; BROADWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50] 2004; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2005; BROADWELL-NEXT: retq # sched: [7:1.00] 2006; 2007; SKYLAKE-LABEL: test_paddusw: 2008; SKYLAKE: # %bb.0: 2009; SKYLAKE-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] 2010; SKYLAKE-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] 2011; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2012; SKYLAKE-NEXT: retq # sched: [7:1.00] 2013; 2014; SKX-LABEL: test_paddusw: 2015; SKX: # %bb.0: 2016; SKX-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] 2017; SKX-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] 2018; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2019; SKX-NEXT: retq # sched: [7:1.00] 2020; 2021; BTVER2-LABEL: test_paddusw: 2022; BTVER2: # %bb.0: 2023; BTVER2-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 2024; BTVER2-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] 2025; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2026; BTVER2-NEXT: retq # sched: [4:1.00] 2027; 2028; ZNVER1-LABEL: test_paddusw: 2029; ZNVER1: # %bb.0: 2030; ZNVER1-NEXT: paddusw %mm1, %mm0 # sched: [1:0.25] 2031; ZNVER1-NEXT: paddusw (%rdi), %mm0 # sched: [8:0.50] 2032; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2033; ZNVER1-NEXT: retq # sched: [1:0.50] 2034 %1 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a0, x86_mmx %a1) 2035 %2 = load x86_mmx, x86_mmx *%a2, align 8 2036 %3 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %1, x86_mmx %2) 2037 %4 = bitcast x86_mmx %3 to i64 2038 ret i64 %4 2039} 2040declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone 2041 2042define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2043; GENERIC-LABEL: test_paddw: 2044; GENERIC: # %bb.0: 2045; GENERIC-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] 2046; GENERIC-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] 2047; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2048; GENERIC-NEXT: retq # sched: [1:1.00] 2049; 2050; ATOM-LABEL: test_paddw: 2051; ATOM: # %bb.0: 2052; ATOM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2053; ATOM-NEXT: paddw (%rdi), %mm0 # sched: [1:1.00] 2054; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2055; ATOM-NEXT: retq # sched: [79:39.50] 2056; 2057; SLM-LABEL: test_paddw: 2058; SLM: # %bb.0: 2059; SLM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2060; SLM-NEXT: paddw (%rdi), %mm0 # sched: [4:1.00] 2061; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2062; SLM-NEXT: retq # sched: [4:1.00] 2063; 2064; SANDY-LABEL: test_paddw: 2065; SANDY: # %bb.0: 2066; SANDY-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] 2067; SANDY-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] 2068; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2069; SANDY-NEXT: retq # sched: [1:1.00] 2070; 2071; HASWELL-LABEL: test_paddw: 2072; HASWELL: # %bb.0: 2073; HASWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2074; HASWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] 2075; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2076; HASWELL-NEXT: retq # sched: [7:1.00] 2077; 2078; BROADWELL-LABEL: test_paddw: 2079; BROADWELL: # %bb.0: 2080; BROADWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2081; BROADWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] 2082; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2083; BROADWELL-NEXT: retq # sched: [7:1.00] 2084; 2085; SKYLAKE-LABEL: test_paddw: 2086; SKYLAKE: # %bb.0: 2087; SKYLAKE-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2088; SKYLAKE-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] 2089; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2090; SKYLAKE-NEXT: retq # sched: [7:1.00] 2091; 2092; SKX-LABEL: test_paddw: 2093; SKX: # %bb.0: 2094; SKX-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2095; SKX-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] 2096; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2097; SKX-NEXT: retq # sched: [7:1.00] 2098; 2099; BTVER2-LABEL: test_paddw: 2100; BTVER2: # %bb.0: 2101; BTVER2-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2102; BTVER2-NEXT: paddw (%rdi), %mm0 # sched: [6:1.00] 2103; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2104; BTVER2-NEXT: retq # sched: [4:1.00] 2105; 2106; ZNVER1-LABEL: test_paddw: 2107; ZNVER1: # %bb.0: 2108; ZNVER1-NEXT: paddw %mm1, %mm0 # sched: [1:0.25] 2109; ZNVER1-NEXT: paddw (%rdi), %mm0 # sched: [8:0.50] 2110; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2111; ZNVER1-NEXT: retq # sched: [1:0.50] 2112 %1 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a0, x86_mmx %a1) 2113 %2 = load x86_mmx, x86_mmx *%a2, align 8 2114 %3 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %1, x86_mmx %2) 2115 %4 = bitcast x86_mmx %3 to i64 2116 ret i64 %4 2117} 2118declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone 2119 2120define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2121; GENERIC-LABEL: test_palignr: 2122; GENERIC: # %bb.0: 2123; GENERIC-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] 2124; GENERIC-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] 2125; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2126; GENERIC-NEXT: retq # sched: [1:1.00] 2127; 2128; ATOM-LABEL: test_palignr: 2129; ATOM: # %bb.0: 2130; ATOM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2131; ATOM-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00] 2132; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2133; ATOM-NEXT: retq # sched: [79:39.50] 2134; 2135; SLM-LABEL: test_palignr: 2136; SLM: # %bb.0: 2137; SLM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2138; SLM-NEXT: palignr $1, (%rdi), %mm0 # sched: [4:1.00] 2139; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2140; SLM-NEXT: retq # sched: [4:1.00] 2141; 2142; SANDY-LABEL: test_palignr: 2143; SANDY: # %bb.0: 2144; SANDY-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] 2145; SANDY-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] 2146; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2147; SANDY-NEXT: retq # sched: [1:1.00] 2148; 2149; HASWELL-LABEL: test_palignr: 2150; HASWELL: # %bb.0: 2151; HASWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2152; HASWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2153; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2154; HASWELL-NEXT: retq # sched: [7:1.00] 2155; 2156; BROADWELL-LABEL: test_palignr: 2157; BROADWELL: # %bb.0: 2158; BROADWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2159; BROADWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2160; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2161; BROADWELL-NEXT: retq # sched: [7:1.00] 2162; 2163; SKYLAKE-LABEL: test_palignr: 2164; SKYLAKE: # %bb.0: 2165; SKYLAKE-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2166; SKYLAKE-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2167; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2168; SKYLAKE-NEXT: retq # sched: [7:1.00] 2169; 2170; SKX-LABEL: test_palignr: 2171; SKX: # %bb.0: 2172; SKX-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2173; SKX-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2174; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2175; SKX-NEXT: retq # sched: [7:1.00] 2176; 2177; BTVER2-LABEL: test_palignr: 2178; BTVER2: # %bb.0: 2179; BTVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] 2180; BTVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2181; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2182; BTVER2-NEXT: retq # sched: [4:1.00] 2183; 2184; ZNVER1-LABEL: test_palignr: 2185; ZNVER1: # %bb.0: 2186; ZNVER1-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.25] 2187; ZNVER1-NEXT: palignr $1, (%rdi), %mm0 # sched: [8:0.50] 2188; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2189; ZNVER1-NEXT: retq # sched: [1:0.50] 2190 %1 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a0, x86_mmx %a1, i8 1) 2191 %2 = load x86_mmx, x86_mmx *%a2, align 8 2192 %3 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %1, x86_mmx %2, i8 1) 2193 %4 = bitcast x86_mmx %3 to i64 2194 ret i64 %4 2195} 2196declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone 2197 2198define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2199; GENERIC-LABEL: test_pand: 2200; GENERIC: # %bb.0: 2201; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:0.33] 2202; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2203; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2204; GENERIC-NEXT: retq # sched: [1:1.00] 2205; 2206; ATOM-LABEL: test_pand: 2207; ATOM: # %bb.0: 2208; ATOM-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2209; ATOM-NEXT: pand (%rdi), %mm0 # sched: [1:1.00] 2210; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2211; ATOM-NEXT: retq # sched: [79:39.50] 2212; 2213; SLM-LABEL: test_pand: 2214; SLM: # %bb.0: 2215; SLM-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2216; SLM-NEXT: pand (%rdi), %mm0 # sched: [4:1.00] 2217; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2218; SLM-NEXT: retq # sched: [4:1.00] 2219; 2220; SANDY-LABEL: test_pand: 2221; SANDY: # %bb.0: 2222; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:0.33] 2223; SANDY-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2224; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2225; SANDY-NEXT: retq # sched: [1:1.00] 2226; 2227; HASWELL-LABEL: test_pand: 2228; HASWELL: # %bb.0: 2229; HASWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] 2230; HASWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2231; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2232; HASWELL-NEXT: retq # sched: [7:1.00] 2233; 2234; BROADWELL-LABEL: test_pand: 2235; BROADWELL: # %bb.0: 2236; BROADWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] 2237; BROADWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2238; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2239; BROADWELL-NEXT: retq # sched: [7:1.00] 2240; 2241; SKYLAKE-LABEL: test_pand: 2242; SKYLAKE: # %bb.0: 2243; SKYLAKE-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2244; SKYLAKE-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2245; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2246; SKYLAKE-NEXT: retq # sched: [7:1.00] 2247; 2248; SKX-LABEL: test_pand: 2249; SKX: # %bb.0: 2250; SKX-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2251; SKX-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2252; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2253; SKX-NEXT: retq # sched: [7:1.00] 2254; 2255; BTVER2-LABEL: test_pand: 2256; BTVER2: # %bb.0: 2257; BTVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2258; BTVER2-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] 2259; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2260; BTVER2-NEXT: retq # sched: [4:1.00] 2261; 2262; ZNVER1-LABEL: test_pand: 2263; ZNVER1: # %bb.0: 2264; ZNVER1-NEXT: pand %mm1, %mm0 # sched: [1:0.25] 2265; ZNVER1-NEXT: pand (%rdi), %mm0 # sched: [8:0.50] 2266; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2267; ZNVER1-NEXT: retq # sched: [1:0.50] 2268 %1 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a0, x86_mmx %a1) 2269 %2 = load x86_mmx, x86_mmx *%a2, align 8 2270 %3 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %1, x86_mmx %2) 2271 %4 = bitcast x86_mmx %3 to i64 2272 ret i64 %4 2273} 2274declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone 2275 2276define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2277; GENERIC-LABEL: test_pandn: 2278; GENERIC: # %bb.0: 2279; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] 2280; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2281; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2282; GENERIC-NEXT: retq # sched: [1:1.00] 2283; 2284; ATOM-LABEL: test_pandn: 2285; ATOM: # %bb.0: 2286; ATOM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2287; ATOM-NEXT: pandn (%rdi), %mm0 # sched: [1:1.00] 2288; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2289; ATOM-NEXT: retq # sched: [79:39.50] 2290; 2291; SLM-LABEL: test_pandn: 2292; SLM: # %bb.0: 2293; SLM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2294; SLM-NEXT: pandn (%rdi), %mm0 # sched: [4:1.00] 2295; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2296; SLM-NEXT: retq # sched: [4:1.00] 2297; 2298; SANDY-LABEL: test_pandn: 2299; SANDY: # %bb.0: 2300; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] 2301; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2302; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2303; SANDY-NEXT: retq # sched: [1:1.00] 2304; 2305; HASWELL-LABEL: test_pandn: 2306; HASWELL: # %bb.0: 2307; HASWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] 2308; HASWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2309; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2310; HASWELL-NEXT: retq # sched: [7:1.00] 2311; 2312; BROADWELL-LABEL: test_pandn: 2313; BROADWELL: # %bb.0: 2314; BROADWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] 2315; BROADWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2316; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2317; BROADWELL-NEXT: retq # sched: [7:1.00] 2318; 2319; SKYLAKE-LABEL: test_pandn: 2320; SKYLAKE: # %bb.0: 2321; SKYLAKE-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2322; SKYLAKE-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2323; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2324; SKYLAKE-NEXT: retq # sched: [7:1.00] 2325; 2326; SKX-LABEL: test_pandn: 2327; SKX: # %bb.0: 2328; SKX-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2329; SKX-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2330; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2331; SKX-NEXT: retq # sched: [7:1.00] 2332; 2333; BTVER2-LABEL: test_pandn: 2334; BTVER2: # %bb.0: 2335; BTVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2336; BTVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] 2337; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2338; BTVER2-NEXT: retq # sched: [4:1.00] 2339; 2340; ZNVER1-LABEL: test_pandn: 2341; ZNVER1: # %bb.0: 2342; ZNVER1-NEXT: pandn %mm1, %mm0 # sched: [1:0.25] 2343; ZNVER1-NEXT: pandn (%rdi), %mm0 # sched: [8:0.50] 2344; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2345; ZNVER1-NEXT: retq # sched: [1:0.50] 2346 %1 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a0, x86_mmx %a1) 2347 %2 = load x86_mmx, x86_mmx *%a2, align 8 2348 %3 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %1, x86_mmx %2) 2349 %4 = bitcast x86_mmx %3 to i64 2350 ret i64 %4 2351} 2352declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone 2353 2354define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2355; GENERIC-LABEL: test_pavgb: 2356; GENERIC: # %bb.0: 2357; GENERIC-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] 2358; GENERIC-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] 2359; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2360; GENERIC-NEXT: retq # sched: [1:1.00] 2361; 2362; ATOM-LABEL: test_pavgb: 2363; ATOM: # %bb.0: 2364; ATOM-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2365; ATOM-NEXT: pavgb (%rdi), %mm0 # sched: [1:1.00] 2366; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2367; ATOM-NEXT: retq # sched: [79:39.50] 2368; 2369; SLM-LABEL: test_pavgb: 2370; SLM: # %bb.0: 2371; SLM-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2372; SLM-NEXT: pavgb (%rdi), %mm0 # sched: [4:1.00] 2373; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2374; SLM-NEXT: retq # sched: [4:1.00] 2375; 2376; SANDY-LABEL: test_pavgb: 2377; SANDY: # %bb.0: 2378; SANDY-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] 2379; SANDY-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] 2380; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2381; SANDY-NEXT: retq # sched: [1:1.00] 2382; 2383; HASWELL-LABEL: test_pavgb: 2384; HASWELL: # %bb.0: 2385; HASWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2386; HASWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50] 2387; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2388; HASWELL-NEXT: retq # sched: [7:1.00] 2389; 2390; BROADWELL-LABEL: test_pavgb: 2391; BROADWELL: # %bb.0: 2392; BROADWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2393; BROADWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50] 2394; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2395; BROADWELL-NEXT: retq # sched: [7:1.00] 2396; 2397; SKYLAKE-LABEL: test_pavgb: 2398; SKYLAKE: # %bb.0: 2399; SKYLAKE-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] 2400; SKYLAKE-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] 2401; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2402; SKYLAKE-NEXT: retq # sched: [7:1.00] 2403; 2404; SKX-LABEL: test_pavgb: 2405; SKX: # %bb.0: 2406; SKX-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] 2407; SKX-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] 2408; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2409; SKX-NEXT: retq # sched: [7:1.00] 2410; 2411; BTVER2-LABEL: test_pavgb: 2412; BTVER2: # %bb.0: 2413; BTVER2-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2414; BTVER2-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] 2415; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2416; BTVER2-NEXT: retq # sched: [4:1.00] 2417; 2418; ZNVER1-LABEL: test_pavgb: 2419; ZNVER1: # %bb.0: 2420; ZNVER1-NEXT: pavgb %mm1, %mm0 # sched: [1:0.25] 2421; ZNVER1-NEXT: pavgb (%rdi), %mm0 # sched: [8:0.50] 2422; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2423; ZNVER1-NEXT: retq # sched: [1:0.50] 2424 %1 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a0, x86_mmx %a1) 2425 %2 = load x86_mmx, x86_mmx *%a2, align 8 2426 %3 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %1, x86_mmx %2) 2427 %4 = bitcast x86_mmx %3 to i64 2428 ret i64 %4 2429} 2430declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone 2431 2432define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2433; GENERIC-LABEL: test_pavgw: 2434; GENERIC: # %bb.0: 2435; GENERIC-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] 2436; GENERIC-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] 2437; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2438; GENERIC-NEXT: retq # sched: [1:1.00] 2439; 2440; ATOM-LABEL: test_pavgw: 2441; ATOM: # %bb.0: 2442; ATOM-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2443; ATOM-NEXT: pavgw (%rdi), %mm0 # sched: [1:1.00] 2444; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2445; ATOM-NEXT: retq # sched: [79:39.50] 2446; 2447; SLM-LABEL: test_pavgw: 2448; SLM: # %bb.0: 2449; SLM-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2450; SLM-NEXT: pavgw (%rdi), %mm0 # sched: [4:1.00] 2451; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2452; SLM-NEXT: retq # sched: [4:1.00] 2453; 2454; SANDY-LABEL: test_pavgw: 2455; SANDY: # %bb.0: 2456; SANDY-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] 2457; SANDY-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] 2458; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2459; SANDY-NEXT: retq # sched: [1:1.00] 2460; 2461; HASWELL-LABEL: test_pavgw: 2462; HASWELL: # %bb.0: 2463; HASWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2464; HASWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50] 2465; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2466; HASWELL-NEXT: retq # sched: [7:1.00] 2467; 2468; BROADWELL-LABEL: test_pavgw: 2469; BROADWELL: # %bb.0: 2470; BROADWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2471; BROADWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50] 2472; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2473; BROADWELL-NEXT: retq # sched: [7:1.00] 2474; 2475; SKYLAKE-LABEL: test_pavgw: 2476; SKYLAKE: # %bb.0: 2477; SKYLAKE-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] 2478; SKYLAKE-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] 2479; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2480; SKYLAKE-NEXT: retq # sched: [7:1.00] 2481; 2482; SKX-LABEL: test_pavgw: 2483; SKX: # %bb.0: 2484; SKX-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] 2485; SKX-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] 2486; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2487; SKX-NEXT: retq # sched: [7:1.00] 2488; 2489; BTVER2-LABEL: test_pavgw: 2490; BTVER2: # %bb.0: 2491; BTVER2-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2492; BTVER2-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] 2493; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2494; BTVER2-NEXT: retq # sched: [4:1.00] 2495; 2496; ZNVER1-LABEL: test_pavgw: 2497; ZNVER1: # %bb.0: 2498; ZNVER1-NEXT: pavgw %mm1, %mm0 # sched: [1:0.25] 2499; ZNVER1-NEXT: pavgw (%rdi), %mm0 # sched: [8:0.50] 2500; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2501; ZNVER1-NEXT: retq # sched: [1:0.50] 2502 %1 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a0, x86_mmx %a1) 2503 %2 = load x86_mmx, x86_mmx *%a2, align 8 2504 %3 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %1, x86_mmx %2) 2505 %4 = bitcast x86_mmx %3 to i64 2506 ret i64 %4 2507} 2508declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone 2509 2510define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2511; GENERIC-LABEL: test_pcmpeqb: 2512; GENERIC: # %bb.0: 2513; GENERIC-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] 2514; GENERIC-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] 2515; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2516; GENERIC-NEXT: retq # sched: [1:1.00] 2517; 2518; ATOM-LABEL: test_pcmpeqb: 2519; ATOM: # %bb.0: 2520; ATOM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2521; ATOM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:1.00] 2522; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2523; ATOM-NEXT: retq # sched: [79:39.50] 2524; 2525; SLM-LABEL: test_pcmpeqb: 2526; SLM: # %bb.0: 2527; SLM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2528; SLM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [4:1.00] 2529; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2530; SLM-NEXT: retq # sched: [4:1.00] 2531; 2532; SANDY-LABEL: test_pcmpeqb: 2533; SANDY: # %bb.0: 2534; SANDY-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] 2535; SANDY-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] 2536; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2537; SANDY-NEXT: retq # sched: [1:1.00] 2538; 2539; HASWELL-LABEL: test_pcmpeqb: 2540; HASWELL: # %bb.0: 2541; HASWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2542; HASWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50] 2543; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2544; HASWELL-NEXT: retq # sched: [7:1.00] 2545; 2546; BROADWELL-LABEL: test_pcmpeqb: 2547; BROADWELL: # %bb.0: 2548; BROADWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2549; BROADWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50] 2550; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2551; BROADWELL-NEXT: retq # sched: [7:1.00] 2552; 2553; SKYLAKE-LABEL: test_pcmpeqb: 2554; SKYLAKE: # %bb.0: 2555; SKYLAKE-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] 2556; SKYLAKE-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] 2557; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2558; SKYLAKE-NEXT: retq # sched: [7:1.00] 2559; 2560; SKX-LABEL: test_pcmpeqb: 2561; SKX: # %bb.0: 2562; SKX-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] 2563; SKX-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] 2564; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2565; SKX-NEXT: retq # sched: [7:1.00] 2566; 2567; BTVER2-LABEL: test_pcmpeqb: 2568; BTVER2: # %bb.0: 2569; BTVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2570; BTVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] 2571; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2572; BTVER2-NEXT: retq # sched: [4:1.00] 2573; 2574; ZNVER1-LABEL: test_pcmpeqb: 2575; ZNVER1: # %bb.0: 2576; ZNVER1-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.25] 2577; ZNVER1-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:0.50] 2578; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2579; ZNVER1-NEXT: retq # sched: [1:0.50] 2580 %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a0, x86_mmx %a1) 2581 %2 = load x86_mmx, x86_mmx *%a2, align 8 2582 %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %1, x86_mmx %2) 2583 %4 = bitcast x86_mmx %3 to i64 2584 ret i64 %4 2585} 2586declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone 2587 2588define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2589; GENERIC-LABEL: test_pcmpeqd: 2590; GENERIC: # %bb.0: 2591; GENERIC-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] 2592; GENERIC-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] 2593; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2594; GENERIC-NEXT: retq # sched: [1:1.00] 2595; 2596; ATOM-LABEL: test_pcmpeqd: 2597; ATOM: # %bb.0: 2598; ATOM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2599; ATOM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:1.00] 2600; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2601; ATOM-NEXT: retq # sched: [79:39.50] 2602; 2603; SLM-LABEL: test_pcmpeqd: 2604; SLM: # %bb.0: 2605; SLM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2606; SLM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [4:1.00] 2607; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2608; SLM-NEXT: retq # sched: [4:1.00] 2609; 2610; SANDY-LABEL: test_pcmpeqd: 2611; SANDY: # %bb.0: 2612; SANDY-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] 2613; SANDY-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] 2614; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2615; SANDY-NEXT: retq # sched: [1:1.00] 2616; 2617; HASWELL-LABEL: test_pcmpeqd: 2618; HASWELL: # %bb.0: 2619; HASWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2620; HASWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50] 2621; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2622; HASWELL-NEXT: retq # sched: [7:1.00] 2623; 2624; BROADWELL-LABEL: test_pcmpeqd: 2625; BROADWELL: # %bb.0: 2626; BROADWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2627; BROADWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50] 2628; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2629; BROADWELL-NEXT: retq # sched: [7:1.00] 2630; 2631; SKYLAKE-LABEL: test_pcmpeqd: 2632; SKYLAKE: # %bb.0: 2633; SKYLAKE-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] 2634; SKYLAKE-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] 2635; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2636; SKYLAKE-NEXT: retq # sched: [7:1.00] 2637; 2638; SKX-LABEL: test_pcmpeqd: 2639; SKX: # %bb.0: 2640; SKX-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] 2641; SKX-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] 2642; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2643; SKX-NEXT: retq # sched: [7:1.00] 2644; 2645; BTVER2-LABEL: test_pcmpeqd: 2646; BTVER2: # %bb.0: 2647; BTVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2648; BTVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] 2649; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2650; BTVER2-NEXT: retq # sched: [4:1.00] 2651; 2652; ZNVER1-LABEL: test_pcmpeqd: 2653; ZNVER1: # %bb.0: 2654; ZNVER1-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.25] 2655; ZNVER1-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:0.50] 2656; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2657; ZNVER1-NEXT: retq # sched: [1:0.50] 2658 %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a0, x86_mmx %a1) 2659 %2 = load x86_mmx, x86_mmx *%a2, align 8 2660 %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %1, x86_mmx %2) 2661 %4 = bitcast x86_mmx %3 to i64 2662 ret i64 %4 2663} 2664declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone 2665 2666define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2667; GENERIC-LABEL: test_pcmpeqw: 2668; GENERIC: # %bb.0: 2669; GENERIC-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] 2670; GENERIC-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] 2671; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2672; GENERIC-NEXT: retq # sched: [1:1.00] 2673; 2674; ATOM-LABEL: test_pcmpeqw: 2675; ATOM: # %bb.0: 2676; ATOM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2677; ATOM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:1.00] 2678; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2679; ATOM-NEXT: retq # sched: [79:39.50] 2680; 2681; SLM-LABEL: test_pcmpeqw: 2682; SLM: # %bb.0: 2683; SLM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2684; SLM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [4:1.00] 2685; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2686; SLM-NEXT: retq # sched: [4:1.00] 2687; 2688; SANDY-LABEL: test_pcmpeqw: 2689; SANDY: # %bb.0: 2690; SANDY-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] 2691; SANDY-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] 2692; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2693; SANDY-NEXT: retq # sched: [1:1.00] 2694; 2695; HASWELL-LABEL: test_pcmpeqw: 2696; HASWELL: # %bb.0: 2697; HASWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2698; HASWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50] 2699; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2700; HASWELL-NEXT: retq # sched: [7:1.00] 2701; 2702; BROADWELL-LABEL: test_pcmpeqw: 2703; BROADWELL: # %bb.0: 2704; BROADWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2705; BROADWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50] 2706; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2707; BROADWELL-NEXT: retq # sched: [7:1.00] 2708; 2709; SKYLAKE-LABEL: test_pcmpeqw: 2710; SKYLAKE: # %bb.0: 2711; SKYLAKE-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] 2712; SKYLAKE-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] 2713; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2714; SKYLAKE-NEXT: retq # sched: [7:1.00] 2715; 2716; SKX-LABEL: test_pcmpeqw: 2717; SKX: # %bb.0: 2718; SKX-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] 2719; SKX-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] 2720; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2721; SKX-NEXT: retq # sched: [7:1.00] 2722; 2723; BTVER2-LABEL: test_pcmpeqw: 2724; BTVER2: # %bb.0: 2725; BTVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2726; BTVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] 2727; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2728; BTVER2-NEXT: retq # sched: [4:1.00] 2729; 2730; ZNVER1-LABEL: test_pcmpeqw: 2731; ZNVER1: # %bb.0: 2732; ZNVER1-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.25] 2733; ZNVER1-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:0.50] 2734; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2735; ZNVER1-NEXT: retq # sched: [1:0.50] 2736 %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a0, x86_mmx %a1) 2737 %2 = load x86_mmx, x86_mmx *%a2, align 8 2738 %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %1, x86_mmx %2) 2739 %4 = bitcast x86_mmx %3 to i64 2740 ret i64 %4 2741} 2742declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone 2743 2744define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2745; GENERIC-LABEL: test_pcmpgtb: 2746; GENERIC: # %bb.0: 2747; GENERIC-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] 2748; GENERIC-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] 2749; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2750; GENERIC-NEXT: retq # sched: [1:1.00] 2751; 2752; ATOM-LABEL: test_pcmpgtb: 2753; ATOM: # %bb.0: 2754; ATOM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2755; ATOM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:1.00] 2756; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2757; ATOM-NEXT: retq # sched: [79:39.50] 2758; 2759; SLM-LABEL: test_pcmpgtb: 2760; SLM: # %bb.0: 2761; SLM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2762; SLM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [4:1.00] 2763; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2764; SLM-NEXT: retq # sched: [4:1.00] 2765; 2766; SANDY-LABEL: test_pcmpgtb: 2767; SANDY: # %bb.0: 2768; SANDY-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] 2769; SANDY-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] 2770; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2771; SANDY-NEXT: retq # sched: [1:1.00] 2772; 2773; HASWELL-LABEL: test_pcmpgtb: 2774; HASWELL: # %bb.0: 2775; HASWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2776; HASWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50] 2777; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2778; HASWELL-NEXT: retq # sched: [7:1.00] 2779; 2780; BROADWELL-LABEL: test_pcmpgtb: 2781; BROADWELL: # %bb.0: 2782; BROADWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2783; BROADWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50] 2784; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2785; BROADWELL-NEXT: retq # sched: [7:1.00] 2786; 2787; SKYLAKE-LABEL: test_pcmpgtb: 2788; SKYLAKE: # %bb.0: 2789; SKYLAKE-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] 2790; SKYLAKE-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] 2791; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2792; SKYLAKE-NEXT: retq # sched: [7:1.00] 2793; 2794; SKX-LABEL: test_pcmpgtb: 2795; SKX: # %bb.0: 2796; SKX-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] 2797; SKX-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] 2798; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2799; SKX-NEXT: retq # sched: [7:1.00] 2800; 2801; BTVER2-LABEL: test_pcmpgtb: 2802; BTVER2: # %bb.0: 2803; BTVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2804; BTVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] 2805; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2806; BTVER2-NEXT: retq # sched: [4:1.00] 2807; 2808; ZNVER1-LABEL: test_pcmpgtb: 2809; ZNVER1: # %bb.0: 2810; ZNVER1-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.25] 2811; ZNVER1-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:0.50] 2812; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2813; ZNVER1-NEXT: retq # sched: [1:0.50] 2814 %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a0, x86_mmx %a1) 2815 %2 = load x86_mmx, x86_mmx *%a2, align 8 2816 %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %1, x86_mmx %2) 2817 %4 = bitcast x86_mmx %3 to i64 2818 ret i64 %4 2819} 2820declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone 2821 2822define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2823; GENERIC-LABEL: test_pcmpgtd: 2824; GENERIC: # %bb.0: 2825; GENERIC-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] 2826; GENERIC-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] 2827; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2828; GENERIC-NEXT: retq # sched: [1:1.00] 2829; 2830; ATOM-LABEL: test_pcmpgtd: 2831; ATOM: # %bb.0: 2832; ATOM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2833; ATOM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:1.00] 2834; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2835; ATOM-NEXT: retq # sched: [79:39.50] 2836; 2837; SLM-LABEL: test_pcmpgtd: 2838; SLM: # %bb.0: 2839; SLM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2840; SLM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [4:1.00] 2841; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2842; SLM-NEXT: retq # sched: [4:1.00] 2843; 2844; SANDY-LABEL: test_pcmpgtd: 2845; SANDY: # %bb.0: 2846; SANDY-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] 2847; SANDY-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] 2848; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2849; SANDY-NEXT: retq # sched: [1:1.00] 2850; 2851; HASWELL-LABEL: test_pcmpgtd: 2852; HASWELL: # %bb.0: 2853; HASWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2854; HASWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50] 2855; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2856; HASWELL-NEXT: retq # sched: [7:1.00] 2857; 2858; BROADWELL-LABEL: test_pcmpgtd: 2859; BROADWELL: # %bb.0: 2860; BROADWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2861; BROADWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50] 2862; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2863; BROADWELL-NEXT: retq # sched: [7:1.00] 2864; 2865; SKYLAKE-LABEL: test_pcmpgtd: 2866; SKYLAKE: # %bb.0: 2867; SKYLAKE-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] 2868; SKYLAKE-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] 2869; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2870; SKYLAKE-NEXT: retq # sched: [7:1.00] 2871; 2872; SKX-LABEL: test_pcmpgtd: 2873; SKX: # %bb.0: 2874; SKX-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] 2875; SKX-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] 2876; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2877; SKX-NEXT: retq # sched: [7:1.00] 2878; 2879; BTVER2-LABEL: test_pcmpgtd: 2880; BTVER2: # %bb.0: 2881; BTVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2882; BTVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] 2883; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2884; BTVER2-NEXT: retq # sched: [4:1.00] 2885; 2886; ZNVER1-LABEL: test_pcmpgtd: 2887; ZNVER1: # %bb.0: 2888; ZNVER1-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.25] 2889; ZNVER1-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:0.50] 2890; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2891; ZNVER1-NEXT: retq # sched: [1:0.50] 2892 %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a0, x86_mmx %a1) 2893 %2 = load x86_mmx, x86_mmx *%a2, align 8 2894 %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %1, x86_mmx %2) 2895 %4 = bitcast x86_mmx %3 to i64 2896 ret i64 %4 2897} 2898declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone 2899 2900define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2901; GENERIC-LABEL: test_pcmpgtw: 2902; GENERIC: # %bb.0: 2903; GENERIC-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] 2904; GENERIC-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] 2905; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2906; GENERIC-NEXT: retq # sched: [1:1.00] 2907; 2908; ATOM-LABEL: test_pcmpgtw: 2909; ATOM: # %bb.0: 2910; ATOM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2911; ATOM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:1.00] 2912; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2913; ATOM-NEXT: retq # sched: [79:39.50] 2914; 2915; SLM-LABEL: test_pcmpgtw: 2916; SLM: # %bb.0: 2917; SLM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2918; SLM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [4:1.00] 2919; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2920; SLM-NEXT: retq # sched: [4:1.00] 2921; 2922; SANDY-LABEL: test_pcmpgtw: 2923; SANDY: # %bb.0: 2924; SANDY-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] 2925; SANDY-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] 2926; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2927; SANDY-NEXT: retq # sched: [1:1.00] 2928; 2929; HASWELL-LABEL: test_pcmpgtw: 2930; HASWELL: # %bb.0: 2931; HASWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2932; HASWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50] 2933; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2934; HASWELL-NEXT: retq # sched: [7:1.00] 2935; 2936; BROADWELL-LABEL: test_pcmpgtw: 2937; BROADWELL: # %bb.0: 2938; BROADWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2939; BROADWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50] 2940; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2941; BROADWELL-NEXT: retq # sched: [7:1.00] 2942; 2943; SKYLAKE-LABEL: test_pcmpgtw: 2944; SKYLAKE: # %bb.0: 2945; SKYLAKE-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] 2946; SKYLAKE-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] 2947; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2948; SKYLAKE-NEXT: retq # sched: [7:1.00] 2949; 2950; SKX-LABEL: test_pcmpgtw: 2951; SKX: # %bb.0: 2952; SKX-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] 2953; SKX-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] 2954; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2955; SKX-NEXT: retq # sched: [7:1.00] 2956; 2957; BTVER2-LABEL: test_pcmpgtw: 2958; BTVER2: # %bb.0: 2959; BTVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2960; BTVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] 2961; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2962; BTVER2-NEXT: retq # sched: [4:1.00] 2963; 2964; ZNVER1-LABEL: test_pcmpgtw: 2965; ZNVER1: # %bb.0: 2966; ZNVER1-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.25] 2967; ZNVER1-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:0.50] 2968; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2969; ZNVER1-NEXT: retq # sched: [1:0.50] 2970 %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a0, x86_mmx %a1) 2971 %2 = load x86_mmx, x86_mmx *%a2, align 8 2972 %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %1, x86_mmx %2) 2973 %4 = bitcast x86_mmx %3 to i64 2974 ret i64 %4 2975} 2976declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone 2977 2978define i32 @test_pextrw(x86_mmx %a0) optsize { 2979; GENERIC-LABEL: test_pextrw: 2980; GENERIC: # %bb.0: 2981; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 2982; GENERIC-NEXT: retq # sched: [1:1.00] 2983; 2984; ATOM-LABEL: test_pextrw: 2985; ATOM: # %bb.0: 2986; ATOM-NEXT: pextrw $0, %mm0, %eax # sched: [4:2.00] 2987; ATOM-NEXT: retq # sched: [79:39.50] 2988; 2989; SLM-LABEL: test_pextrw: 2990; SLM: # %bb.0: 2991; SLM-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00] 2992; SLM-NEXT: retq # sched: [4:1.00] 2993; 2994; SANDY-LABEL: test_pextrw: 2995; SANDY: # %bb.0: 2996; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 2997; SANDY-NEXT: retq # sched: [1:1.00] 2998; 2999; HASWELL-LABEL: test_pextrw: 3000; HASWELL: # %bb.0: 3001; HASWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] 3002; HASWELL-NEXT: retq # sched: [7:1.00] 3003; 3004; BROADWELL-LABEL: test_pextrw: 3005; BROADWELL: # %bb.0: 3006; BROADWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] 3007; BROADWELL-NEXT: retq # sched: [7:1.00] 3008; 3009; SKYLAKE-LABEL: test_pextrw: 3010; SKYLAKE: # %bb.0: 3011; SKYLAKE-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 3012; SKYLAKE-NEXT: retq # sched: [7:1.00] 3013; 3014; SKX-LABEL: test_pextrw: 3015; SKX: # %bb.0: 3016; SKX-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 3017; SKX-NEXT: retq # sched: [7:1.00] 3018; 3019; BTVER2-LABEL: test_pextrw: 3020; BTVER2: # %bb.0: 3021; BTVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 3022; BTVER2-NEXT: retq # sched: [4:1.00] 3023; 3024; ZNVER1-LABEL: test_pextrw: 3025; ZNVER1: # %bb.0: 3026; ZNVER1-NEXT: pextrw $0, %mm0, %eax # sched: [2:2.00] 3027; ZNVER1-NEXT: retq # sched: [1:0.50] 3028 %1 = call i32 @llvm.x86.mmx.pextr.w(x86_mmx %a0, i32 0) 3029 ret i32 %1 3030} 3031declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32) nounwind readnone 3032 3033define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3034; GENERIC-LABEL: test_phaddd: 3035; GENERIC: # %bb.0: 3036; GENERIC-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] 3037; GENERIC-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] 3038; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3039; GENERIC-NEXT: retq # sched: [1:1.00] 3040; 3041; ATOM-LABEL: test_phaddd: 3042; ATOM: # %bb.0: 3043; ATOM-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] 3044; ATOM-NEXT: phaddd (%rdi), %mm0 # sched: [4:2.00] 3045; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3046; ATOM-NEXT: retq # sched: [79:39.50] 3047; 3048; SLM-LABEL: test_phaddd: 3049; SLM: # %bb.0: 3050; SLM-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] 3051; SLM-NEXT: phaddd (%rdi), %mm0 # sched: [4:1.00] 3052; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3053; SLM-NEXT: retq # sched: [4:1.00] 3054; 3055; SANDY-LABEL: test_phaddd: 3056; SANDY: # %bb.0: 3057; SANDY-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] 3058; SANDY-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] 3059; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3060; SANDY-NEXT: retq # sched: [1:1.00] 3061; 3062; HASWELL-LABEL: test_phaddd: 3063; HASWELL: # %bb.0: 3064; HASWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] 3065; HASWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] 3066; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3067; HASWELL-NEXT: retq # sched: [7:1.00] 3068; 3069; BROADWELL-LABEL: test_phaddd: 3070; BROADWELL: # %bb.0: 3071; BROADWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] 3072; BROADWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] 3073; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3074; BROADWELL-NEXT: retq # sched: [7:1.00] 3075; 3076; SKYLAKE-LABEL: test_phaddd: 3077; SKYLAKE: # %bb.0: 3078; SKYLAKE-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] 3079; SKYLAKE-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] 3080; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3081; SKYLAKE-NEXT: retq # sched: [7:1.00] 3082; 3083; SKX-LABEL: test_phaddd: 3084; SKX: # %bb.0: 3085; SKX-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] 3086; SKX-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] 3087; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3088; SKX-NEXT: retq # sched: [7:1.00] 3089; 3090; BTVER2-LABEL: test_phaddd: 3091; BTVER2: # %bb.0: 3092; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] 3093; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00] 3094; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3095; BTVER2-NEXT: retq # sched: [4:1.00] 3096; 3097; ZNVER1-LABEL: test_phaddd: 3098; ZNVER1: # %bb.0: 3099; ZNVER1-NEXT: phaddd %mm1, %mm0 # sched: [100:0.25] 3100; ZNVER1-NEXT: phaddd (%rdi), %mm0 # sched: [100:0.25] 3101; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3102; ZNVER1-NEXT: retq # sched: [1:0.50] 3103 %1 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a0, x86_mmx %a1) 3104 %2 = load x86_mmx, x86_mmx *%a2, align 8 3105 %3 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %1, x86_mmx %2) 3106 %4 = bitcast x86_mmx %3 to i64 3107 ret i64 %4 3108} 3109declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone 3110 3111define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3112; GENERIC-LABEL: test_phaddsw: 3113; GENERIC: # %bb.0: 3114; GENERIC-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] 3115; GENERIC-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] 3116; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3117; GENERIC-NEXT: retq # sched: [1:1.00] 3118; 3119; ATOM-LABEL: test_phaddsw: 3120; ATOM: # %bb.0: 3121; ATOM-NEXT: phaddsw %mm1, %mm0 # sched: [5:2.50] 3122; ATOM-NEXT: phaddsw (%rdi), %mm0 # sched: [6:3.00] 3123; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3124; ATOM-NEXT: retq # sched: [79:39.50] 3125; 3126; SLM-LABEL: test_phaddsw: 3127; SLM: # %bb.0: 3128; SLM-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] 3129; SLM-NEXT: phaddsw (%rdi), %mm0 # sched: [4:1.00] 3130; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3131; SLM-NEXT: retq # sched: [4:1.00] 3132; 3133; SANDY-LABEL: test_phaddsw: 3134; SANDY: # %bb.0: 3135; SANDY-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] 3136; SANDY-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] 3137; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3138; SANDY-NEXT: retq # sched: [1:1.00] 3139; 3140; HASWELL-LABEL: test_phaddsw: 3141; HASWELL: # %bb.0: 3142; HASWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] 3143; HASWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] 3144; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3145; HASWELL-NEXT: retq # sched: [7:1.00] 3146; 3147; BROADWELL-LABEL: test_phaddsw: 3148; BROADWELL: # %bb.0: 3149; BROADWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] 3150; BROADWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] 3151; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3152; BROADWELL-NEXT: retq # sched: [7:1.00] 3153; 3154; SKYLAKE-LABEL: test_phaddsw: 3155; SKYLAKE: # %bb.0: 3156; SKYLAKE-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] 3157; SKYLAKE-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] 3158; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3159; SKYLAKE-NEXT: retq # sched: [7:1.00] 3160; 3161; SKX-LABEL: test_phaddsw: 3162; SKX: # %bb.0: 3163; SKX-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] 3164; SKX-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] 3165; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3166; SKX-NEXT: retq # sched: [7:1.00] 3167; 3168; BTVER2-LABEL: test_phaddsw: 3169; BTVER2: # %bb.0: 3170; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] 3171; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00] 3172; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3173; BTVER2-NEXT: retq # sched: [4:1.00] 3174; 3175; ZNVER1-LABEL: test_phaddsw: 3176; ZNVER1: # %bb.0: 3177; ZNVER1-NEXT: phaddsw %mm1, %mm0 # sched: [100:0.25] 3178; ZNVER1-NEXT: phaddsw (%rdi), %mm0 # sched: [100:0.25] 3179; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3180; ZNVER1-NEXT: retq # sched: [1:0.50] 3181 %1 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a0, x86_mmx %a1) 3182 %2 = load x86_mmx, x86_mmx *%a2, align 8 3183 %3 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %1, x86_mmx %2) 3184 %4 = bitcast x86_mmx %3 to i64 3185 ret i64 %4 3186} 3187declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone 3188 3189define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3190; GENERIC-LABEL: test_phaddw: 3191; GENERIC: # %bb.0: 3192; GENERIC-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] 3193; GENERIC-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] 3194; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3195; GENERIC-NEXT: retq # sched: [1:1.00] 3196; 3197; ATOM-LABEL: test_phaddw: 3198; ATOM: # %bb.0: 3199; ATOM-NEXT: phaddw %mm1, %mm0 # sched: [5:2.50] 3200; ATOM-NEXT: phaddw (%rdi), %mm0 # sched: [6:3.00] 3201; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3202; ATOM-NEXT: retq # sched: [79:39.50] 3203; 3204; SLM-LABEL: test_phaddw: 3205; SLM: # %bb.0: 3206; SLM-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] 3207; SLM-NEXT: phaddw (%rdi), %mm0 # sched: [4:1.00] 3208; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3209; SLM-NEXT: retq # sched: [4:1.00] 3210; 3211; SANDY-LABEL: test_phaddw: 3212; SANDY: # %bb.0: 3213; SANDY-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] 3214; SANDY-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] 3215; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3216; SANDY-NEXT: retq # sched: [1:1.00] 3217; 3218; HASWELL-LABEL: test_phaddw: 3219; HASWELL: # %bb.0: 3220; HASWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] 3221; HASWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] 3222; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3223; HASWELL-NEXT: retq # sched: [7:1.00] 3224; 3225; BROADWELL-LABEL: test_phaddw: 3226; BROADWELL: # %bb.0: 3227; BROADWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] 3228; BROADWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] 3229; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3230; BROADWELL-NEXT: retq # sched: [7:1.00] 3231; 3232; SKYLAKE-LABEL: test_phaddw: 3233; SKYLAKE: # %bb.0: 3234; SKYLAKE-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] 3235; SKYLAKE-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] 3236; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3237; SKYLAKE-NEXT: retq # sched: [7:1.00] 3238; 3239; SKX-LABEL: test_phaddw: 3240; SKX: # %bb.0: 3241; SKX-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] 3242; SKX-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] 3243; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3244; SKX-NEXT: retq # sched: [7:1.00] 3245; 3246; BTVER2-LABEL: test_phaddw: 3247; BTVER2: # %bb.0: 3248; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] 3249; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00] 3250; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3251; BTVER2-NEXT: retq # sched: [4:1.00] 3252; 3253; ZNVER1-LABEL: test_phaddw: 3254; ZNVER1: # %bb.0: 3255; ZNVER1-NEXT: phaddw %mm1, %mm0 # sched: [100:0.25] 3256; ZNVER1-NEXT: phaddw (%rdi), %mm0 # sched: [100:0.25] 3257; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3258; ZNVER1-NEXT: retq # sched: [1:0.50] 3259 %1 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a0, x86_mmx %a1) 3260 %2 = load x86_mmx, x86_mmx *%a2, align 8 3261 %3 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %1, x86_mmx %2) 3262 %4 = bitcast x86_mmx %3 to i64 3263 ret i64 %4 3264} 3265declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone 3266 3267define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3268; GENERIC-LABEL: test_phsubd: 3269; GENERIC: # %bb.0: 3270; GENERIC-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] 3271; GENERIC-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] 3272; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3273; GENERIC-NEXT: retq # sched: [1:1.00] 3274; 3275; ATOM-LABEL: test_phsubd: 3276; ATOM: # %bb.0: 3277; ATOM-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] 3278; ATOM-NEXT: phsubd (%rdi), %mm0 # sched: [4:2.00] 3279; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3280; ATOM-NEXT: retq # sched: [79:39.50] 3281; 3282; SLM-LABEL: test_phsubd: 3283; SLM: # %bb.0: 3284; SLM-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] 3285; SLM-NEXT: phsubd (%rdi), %mm0 # sched: [4:1.00] 3286; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3287; SLM-NEXT: retq # sched: [4:1.00] 3288; 3289; SANDY-LABEL: test_phsubd: 3290; SANDY: # %bb.0: 3291; SANDY-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] 3292; SANDY-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] 3293; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3294; SANDY-NEXT: retq # sched: [1:1.00] 3295; 3296; HASWELL-LABEL: test_phsubd: 3297; HASWELL: # %bb.0: 3298; HASWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] 3299; HASWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] 3300; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3301; HASWELL-NEXT: retq # sched: [7:1.00] 3302; 3303; BROADWELL-LABEL: test_phsubd: 3304; BROADWELL: # %bb.0: 3305; BROADWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] 3306; BROADWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] 3307; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3308; BROADWELL-NEXT: retq # sched: [7:1.00] 3309; 3310; SKYLAKE-LABEL: test_phsubd: 3311; SKYLAKE: # %bb.0: 3312; SKYLAKE-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] 3313; SKYLAKE-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] 3314; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3315; SKYLAKE-NEXT: retq # sched: [7:1.00] 3316; 3317; SKX-LABEL: test_phsubd: 3318; SKX: # %bb.0: 3319; SKX-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] 3320; SKX-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] 3321; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3322; SKX-NEXT: retq # sched: [7:1.00] 3323; 3324; BTVER2-LABEL: test_phsubd: 3325; BTVER2: # %bb.0: 3326; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] 3327; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00] 3328; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3329; BTVER2-NEXT: retq # sched: [4:1.00] 3330; 3331; ZNVER1-LABEL: test_phsubd: 3332; ZNVER1: # %bb.0: 3333; ZNVER1-NEXT: phsubd %mm1, %mm0 # sched: [100:0.25] 3334; ZNVER1-NEXT: phsubd (%rdi), %mm0 # sched: [100:0.25] 3335; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3336; ZNVER1-NEXT: retq # sched: [1:0.50] 3337 %1 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a0, x86_mmx %a1) 3338 %2 = load x86_mmx, x86_mmx *%a2, align 8 3339 %3 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %1, x86_mmx %2) 3340 %4 = bitcast x86_mmx %3 to i64 3341 ret i64 %4 3342} 3343declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone 3344 3345define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3346; GENERIC-LABEL: test_phsubsw: 3347; GENERIC: # %bb.0: 3348; GENERIC-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] 3349; GENERIC-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] 3350; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3351; GENERIC-NEXT: retq # sched: [1:1.00] 3352; 3353; ATOM-LABEL: test_phsubsw: 3354; ATOM: # %bb.0: 3355; ATOM-NEXT: phsubsw %mm1, %mm0 # sched: [5:2.50] 3356; ATOM-NEXT: phsubsw (%rdi), %mm0 # sched: [6:3.00] 3357; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3358; ATOM-NEXT: retq # sched: [79:39.50] 3359; 3360; SLM-LABEL: test_phsubsw: 3361; SLM: # %bb.0: 3362; SLM-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] 3363; SLM-NEXT: phsubsw (%rdi), %mm0 # sched: [4:1.00] 3364; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3365; SLM-NEXT: retq # sched: [4:1.00] 3366; 3367; SANDY-LABEL: test_phsubsw: 3368; SANDY: # %bb.0: 3369; SANDY-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] 3370; SANDY-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] 3371; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3372; SANDY-NEXT: retq # sched: [1:1.00] 3373; 3374; HASWELL-LABEL: test_phsubsw: 3375; HASWELL: # %bb.0: 3376; HASWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] 3377; HASWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] 3378; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3379; HASWELL-NEXT: retq # sched: [7:1.00] 3380; 3381; BROADWELL-LABEL: test_phsubsw: 3382; BROADWELL: # %bb.0: 3383; BROADWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] 3384; BROADWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] 3385; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3386; BROADWELL-NEXT: retq # sched: [7:1.00] 3387; 3388; SKYLAKE-LABEL: test_phsubsw: 3389; SKYLAKE: # %bb.0: 3390; SKYLAKE-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] 3391; SKYLAKE-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] 3392; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3393; SKYLAKE-NEXT: retq # sched: [7:1.00] 3394; 3395; SKX-LABEL: test_phsubsw: 3396; SKX: # %bb.0: 3397; SKX-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] 3398; SKX-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] 3399; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3400; SKX-NEXT: retq # sched: [7:1.00] 3401; 3402; BTVER2-LABEL: test_phsubsw: 3403; BTVER2: # %bb.0: 3404; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] 3405; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00] 3406; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3407; BTVER2-NEXT: retq # sched: [4:1.00] 3408; 3409; ZNVER1-LABEL: test_phsubsw: 3410; ZNVER1: # %bb.0: 3411; ZNVER1-NEXT: phsubsw %mm1, %mm0 # sched: [100:0.25] 3412; ZNVER1-NEXT: phsubsw (%rdi), %mm0 # sched: [100:0.25] 3413; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3414; ZNVER1-NEXT: retq # sched: [1:0.50] 3415 %1 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a0, x86_mmx %a1) 3416 %2 = load x86_mmx, x86_mmx *%a2, align 8 3417 %3 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %1, x86_mmx %2) 3418 %4 = bitcast x86_mmx %3 to i64 3419 ret i64 %4 3420} 3421declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone 3422 3423define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3424; GENERIC-LABEL: test_phsubw: 3425; GENERIC: # %bb.0: 3426; GENERIC-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] 3427; GENERIC-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] 3428; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3429; GENERIC-NEXT: retq # sched: [1:1.00] 3430; 3431; ATOM-LABEL: test_phsubw: 3432; ATOM: # %bb.0: 3433; ATOM-NEXT: phsubw %mm1, %mm0 # sched: [5:2.50] 3434; ATOM-NEXT: phsubw (%rdi), %mm0 # sched: [6:3.00] 3435; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3436; ATOM-NEXT: retq # sched: [79:39.50] 3437; 3438; SLM-LABEL: test_phsubw: 3439; SLM: # %bb.0: 3440; SLM-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] 3441; SLM-NEXT: phsubw (%rdi), %mm0 # sched: [4:1.00] 3442; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3443; SLM-NEXT: retq # sched: [4:1.00] 3444; 3445; SANDY-LABEL: test_phsubw: 3446; SANDY: # %bb.0: 3447; SANDY-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] 3448; SANDY-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] 3449; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3450; SANDY-NEXT: retq # sched: [1:1.00] 3451; 3452; HASWELL-LABEL: test_phsubw: 3453; HASWELL: # %bb.0: 3454; HASWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] 3455; HASWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] 3456; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3457; HASWELL-NEXT: retq # sched: [7:1.00] 3458; 3459; BROADWELL-LABEL: test_phsubw: 3460; BROADWELL: # %bb.0: 3461; BROADWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] 3462; BROADWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] 3463; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3464; BROADWELL-NEXT: retq # sched: [7:1.00] 3465; 3466; SKYLAKE-LABEL: test_phsubw: 3467; SKYLAKE: # %bb.0: 3468; SKYLAKE-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] 3469; SKYLAKE-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] 3470; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3471; SKYLAKE-NEXT: retq # sched: [7:1.00] 3472; 3473; SKX-LABEL: test_phsubw: 3474; SKX: # %bb.0: 3475; SKX-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] 3476; SKX-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] 3477; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3478; SKX-NEXT: retq # sched: [7:1.00] 3479; 3480; BTVER2-LABEL: test_phsubw: 3481; BTVER2: # %bb.0: 3482; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] 3483; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00] 3484; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3485; BTVER2-NEXT: retq # sched: [4:1.00] 3486; 3487; ZNVER1-LABEL: test_phsubw: 3488; ZNVER1: # %bb.0: 3489; ZNVER1-NEXT: phsubw %mm1, %mm0 # sched: [100:0.25] 3490; ZNVER1-NEXT: phsubw (%rdi), %mm0 # sched: [100:0.25] 3491; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3492; ZNVER1-NEXT: retq # sched: [1:0.50] 3493 %1 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a0, x86_mmx %a1) 3494 %2 = load x86_mmx, x86_mmx *%a2, align 8 3495 %3 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %1, x86_mmx %2) 3496 %4 = bitcast x86_mmx %3 to i64 3497 ret i64 %4 3498} 3499declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone 3500 3501define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { 3502; GENERIC-LABEL: test_pinsrw: 3503; GENERIC: # %bb.0: 3504; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] 3505; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3506; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] 3507; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3508; GENERIC-NEXT: retq # sched: [1:1.00] 3509; 3510; ATOM-LABEL: test_pinsrw: 3511; ATOM: # %bb.0: 3512; ATOM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] 3513; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00] 3514; ATOM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] 3515; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3516; ATOM-NEXT: retq # sched: [79:39.50] 3517; 3518; SLM-LABEL: test_pinsrw: 3519; SLM: # %bb.0: 3520; SLM-NEXT: movswl (%rsi), %eax # sched: [4:1.00] 3521; SLM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] 3522; SLM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] 3523; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3524; SLM-NEXT: retq # sched: [4:1.00] 3525; 3526; SANDY-LABEL: test_pinsrw: 3527; SANDY: # %bb.0: 3528; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] 3529; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3530; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] 3531; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3532; SANDY-NEXT: retq # sched: [1:1.00] 3533; 3534; HASWELL-LABEL: test_pinsrw: 3535; HASWELL: # %bb.0: 3536; HASWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] 3537; HASWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3538; HASWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] 3539; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3540; HASWELL-NEXT: retq # sched: [7:1.00] 3541; 3542; BROADWELL-LABEL: test_pinsrw: 3543; BROADWELL: # %bb.0: 3544; BROADWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] 3545; BROADWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3546; BROADWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] 3547; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3548; BROADWELL-NEXT: retq # sched: [7:1.00] 3549; 3550; SKYLAKE-LABEL: test_pinsrw: 3551; SKYLAKE: # %bb.0: 3552; SKYLAKE-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] 3553; SKYLAKE-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3554; SKYLAKE-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] 3555; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3556; SKYLAKE-NEXT: retq # sched: [7:1.00] 3557; 3558; SKX-LABEL: test_pinsrw: 3559; SKX: # %bb.0: 3560; SKX-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] 3561; SKX-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3562; SKX-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] 3563; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3564; SKX-NEXT: retq # sched: [7:1.00] 3565; 3566; BTVER2-LABEL: test_pinsrw: 3567; BTVER2: # %bb.0: 3568; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [7:0.50] 3569; BTVER2-NEXT: movswl (%rsi), %eax # sched: [4:1.00] 3570; BTVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [7:0.50] 3571; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3572; BTVER2-NEXT: retq # sched: [4:1.00] 3573; 3574; ZNVER1-LABEL: test_pinsrw: 3575; ZNVER1: # %bb.0: 3576; ZNVER1-NEXT: movswl (%rsi), %eax # sched: [8:0.50] 3577; ZNVER1-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:0.25] 3578; ZNVER1-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:0.25] 3579; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3580; ZNVER1-NEXT: retq # sched: [1:0.50] 3581 %1 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %a0, i32 %a1, i32 0) 3582 %2 = load i16, i16 *%a2, align 2 3583 %3 = sext i16 %2 to i32 3584 %4 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %1, i32 %3, i32 1) 3585 %5 = bitcast x86_mmx %4 to i64 3586 ret i64 %5 3587} 3588declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32) nounwind readnone 3589 3590define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3591; GENERIC-LABEL: test_pmaddwd: 3592; GENERIC: # %bb.0: 3593; GENERIC-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] 3594; GENERIC-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] 3595; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3596; GENERIC-NEXT: retq # sched: [1:1.00] 3597; 3598; ATOM-LABEL: test_pmaddwd: 3599; ATOM: # %bb.0: 3600; ATOM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:4.00] 3601; ATOM-NEXT: pmaddwd (%rdi), %mm0 # sched: [4:4.00] 3602; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3603; ATOM-NEXT: retq # sched: [79:39.50] 3604; 3605; SLM-LABEL: test_pmaddwd: 3606; SLM: # %bb.0: 3607; SLM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] 3608; SLM-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] 3609; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3610; SLM-NEXT: retq # sched: [4:1.00] 3611; 3612; SANDY-LABEL: test_pmaddwd: 3613; SANDY: # %bb.0: 3614; SANDY-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] 3615; SANDY-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] 3616; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3617; SANDY-NEXT: retq # sched: [1:1.00] 3618; 3619; HASWELL-LABEL: test_pmaddwd: 3620; HASWELL: # %bb.0: 3621; HASWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] 3622; HASWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] 3623; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3624; HASWELL-NEXT: retq # sched: [7:1.00] 3625; 3626; BROADWELL-LABEL: test_pmaddwd: 3627; BROADWELL: # %bb.0: 3628; BROADWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] 3629; BROADWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] 3630; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3631; BROADWELL-NEXT: retq # sched: [7:1.00] 3632; 3633; SKYLAKE-LABEL: test_pmaddwd: 3634; SKYLAKE: # %bb.0: 3635; SKYLAKE-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] 3636; SKYLAKE-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] 3637; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3638; SKYLAKE-NEXT: retq # sched: [7:1.00] 3639; 3640; SKX-LABEL: test_pmaddwd: 3641; SKX: # %bb.0: 3642; SKX-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] 3643; SKX-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] 3644; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3645; SKX-NEXT: retq # sched: [7:1.00] 3646; 3647; BTVER2-LABEL: test_pmaddwd: 3648; BTVER2: # %bb.0: 3649; BTVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [2:1.00] 3650; BTVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] 3651; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3652; BTVER2-NEXT: retq # sched: [4:1.00] 3653; 3654; ZNVER1-LABEL: test_pmaddwd: 3655; ZNVER1: # %bb.0: 3656; ZNVER1-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] 3657; ZNVER1-NEXT: pmaddwd (%rdi), %mm0 # sched: [11:1.00] 3658; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3659; ZNVER1-NEXT: retq # sched: [1:0.50] 3660 %1 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a0, x86_mmx %a1) 3661 %2 = load x86_mmx, x86_mmx *%a2, align 8 3662 %3 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %1, x86_mmx %2) 3663 %4 = bitcast x86_mmx %3 to i64 3664 ret i64 %4 3665} 3666declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone 3667 3668define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3669; GENERIC-LABEL: test_pmaddubsw: 3670; GENERIC: # %bb.0: 3671; GENERIC-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] 3672; GENERIC-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] 3673; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3674; GENERIC-NEXT: retq # sched: [1:1.00] 3675; 3676; ATOM-LABEL: test_pmaddubsw: 3677; ATOM: # %bb.0: 3678; ATOM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:4.00] 3679; ATOM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [4:4.00] 3680; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3681; ATOM-NEXT: retq # sched: [79:39.50] 3682; 3683; SLM-LABEL: test_pmaddubsw: 3684; SLM: # %bb.0: 3685; SLM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] 3686; SLM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] 3687; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3688; SLM-NEXT: retq # sched: [4:1.00] 3689; 3690; SANDY-LABEL: test_pmaddubsw: 3691; SANDY: # %bb.0: 3692; SANDY-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] 3693; SANDY-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] 3694; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3695; SANDY-NEXT: retq # sched: [1:1.00] 3696; 3697; HASWELL-LABEL: test_pmaddubsw: 3698; HASWELL: # %bb.0: 3699; HASWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] 3700; HASWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] 3701; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3702; HASWELL-NEXT: retq # sched: [7:1.00] 3703; 3704; BROADWELL-LABEL: test_pmaddubsw: 3705; BROADWELL: # %bb.0: 3706; BROADWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] 3707; BROADWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] 3708; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3709; BROADWELL-NEXT: retq # sched: [7:1.00] 3710; 3711; SKYLAKE-LABEL: test_pmaddubsw: 3712; SKYLAKE: # %bb.0: 3713; SKYLAKE-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] 3714; SKYLAKE-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] 3715; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3716; SKYLAKE-NEXT: retq # sched: [7:1.00] 3717; 3718; SKX-LABEL: test_pmaddubsw: 3719; SKX: # %bb.0: 3720; SKX-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] 3721; SKX-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] 3722; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3723; SKX-NEXT: retq # sched: [7:1.00] 3724; 3725; BTVER2-LABEL: test_pmaddubsw: 3726; BTVER2: # %bb.0: 3727; BTVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [2:1.00] 3728; BTVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] 3729; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3730; BTVER2-NEXT: retq # sched: [4:1.00] 3731; 3732; ZNVER1-LABEL: test_pmaddubsw: 3733; ZNVER1: # %bb.0: 3734; ZNVER1-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] 3735; ZNVER1-NEXT: pmaddubsw (%rdi), %mm0 # sched: [11:1.00] 3736; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3737; ZNVER1-NEXT: retq # sched: [1:0.50] 3738 %1 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a0, x86_mmx %a1) 3739 %2 = load x86_mmx, x86_mmx *%a2, align 8 3740 %3 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %1, x86_mmx %2) 3741 %4 = bitcast x86_mmx %3 to i64 3742 ret i64 %4 3743} 3744declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone 3745 3746define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3747; GENERIC-LABEL: test_pmaxsw: 3748; GENERIC: # %bb.0: 3749; GENERIC-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] 3750; GENERIC-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] 3751; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3752; GENERIC-NEXT: retq # sched: [1:1.00] 3753; 3754; ATOM-LABEL: test_pmaxsw: 3755; ATOM: # %bb.0: 3756; ATOM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3757; ATOM-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:1.00] 3758; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3759; ATOM-NEXT: retq # sched: [79:39.50] 3760; 3761; SLM-LABEL: test_pmaxsw: 3762; SLM: # %bb.0: 3763; SLM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3764; SLM-NEXT: pmaxsw (%rdi), %mm0 # sched: [4:1.00] 3765; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3766; SLM-NEXT: retq # sched: [4:1.00] 3767; 3768; SANDY-LABEL: test_pmaxsw: 3769; SANDY: # %bb.0: 3770; SANDY-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] 3771; SANDY-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] 3772; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3773; SANDY-NEXT: retq # sched: [1:1.00] 3774; 3775; HASWELL-LABEL: test_pmaxsw: 3776; HASWELL: # %bb.0: 3777; HASWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3778; HASWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50] 3779; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3780; HASWELL-NEXT: retq # sched: [7:1.00] 3781; 3782; BROADWELL-LABEL: test_pmaxsw: 3783; BROADWELL: # %bb.0: 3784; BROADWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3785; BROADWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50] 3786; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3787; BROADWELL-NEXT: retq # sched: [7:1.00] 3788; 3789; SKYLAKE-LABEL: test_pmaxsw: 3790; SKYLAKE: # %bb.0: 3791; SKYLAKE-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] 3792; SKYLAKE-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] 3793; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3794; SKYLAKE-NEXT: retq # sched: [7:1.00] 3795; 3796; SKX-LABEL: test_pmaxsw: 3797; SKX: # %bb.0: 3798; SKX-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] 3799; SKX-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] 3800; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3801; SKX-NEXT: retq # sched: [7:1.00] 3802; 3803; BTVER2-LABEL: test_pmaxsw: 3804; BTVER2: # %bb.0: 3805; BTVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3806; BTVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] 3807; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3808; BTVER2-NEXT: retq # sched: [4:1.00] 3809; 3810; ZNVER1-LABEL: test_pmaxsw: 3811; ZNVER1: # %bb.0: 3812; ZNVER1-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.25] 3813; ZNVER1-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:0.50] 3814; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3815; ZNVER1-NEXT: retq # sched: [1:0.50] 3816 %1 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a0, x86_mmx %a1) 3817 %2 = load x86_mmx, x86_mmx *%a2, align 8 3818 %3 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %1, x86_mmx %2) 3819 %4 = bitcast x86_mmx %3 to i64 3820 ret i64 %4 3821} 3822declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone 3823 3824define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3825; GENERIC-LABEL: test_pmaxub: 3826; GENERIC: # %bb.0: 3827; GENERIC-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] 3828; GENERIC-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] 3829; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3830; GENERIC-NEXT: retq # sched: [1:1.00] 3831; 3832; ATOM-LABEL: test_pmaxub: 3833; ATOM: # %bb.0: 3834; ATOM-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3835; ATOM-NEXT: pmaxub (%rdi), %mm0 # sched: [1:1.00] 3836; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3837; ATOM-NEXT: retq # sched: [79:39.50] 3838; 3839; SLM-LABEL: test_pmaxub: 3840; SLM: # %bb.0: 3841; SLM-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3842; SLM-NEXT: pmaxub (%rdi), %mm0 # sched: [4:1.00] 3843; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3844; SLM-NEXT: retq # sched: [4:1.00] 3845; 3846; SANDY-LABEL: test_pmaxub: 3847; SANDY: # %bb.0: 3848; SANDY-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] 3849; SANDY-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] 3850; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3851; SANDY-NEXT: retq # sched: [1:1.00] 3852; 3853; HASWELL-LABEL: test_pmaxub: 3854; HASWELL: # %bb.0: 3855; HASWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3856; HASWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50] 3857; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3858; HASWELL-NEXT: retq # sched: [7:1.00] 3859; 3860; BROADWELL-LABEL: test_pmaxub: 3861; BROADWELL: # %bb.0: 3862; BROADWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3863; BROADWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50] 3864; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3865; BROADWELL-NEXT: retq # sched: [7:1.00] 3866; 3867; SKYLAKE-LABEL: test_pmaxub: 3868; SKYLAKE: # %bb.0: 3869; SKYLAKE-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] 3870; SKYLAKE-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] 3871; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3872; SKYLAKE-NEXT: retq # sched: [7:1.00] 3873; 3874; SKX-LABEL: test_pmaxub: 3875; SKX: # %bb.0: 3876; SKX-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] 3877; SKX-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] 3878; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3879; SKX-NEXT: retq # sched: [7:1.00] 3880; 3881; BTVER2-LABEL: test_pmaxub: 3882; BTVER2: # %bb.0: 3883; BTVER2-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3884; BTVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] 3885; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3886; BTVER2-NEXT: retq # sched: [4:1.00] 3887; 3888; ZNVER1-LABEL: test_pmaxub: 3889; ZNVER1: # %bb.0: 3890; ZNVER1-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.25] 3891; ZNVER1-NEXT: pmaxub (%rdi), %mm0 # sched: [8:0.50] 3892; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3893; ZNVER1-NEXT: retq # sched: [1:0.50] 3894 %1 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a0, x86_mmx %a1) 3895 %2 = load x86_mmx, x86_mmx *%a2, align 8 3896 %3 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %1, x86_mmx %2) 3897 %4 = bitcast x86_mmx %3 to i64 3898 ret i64 %4 3899} 3900declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone 3901 3902define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3903; GENERIC-LABEL: test_pminsw: 3904; GENERIC: # %bb.0: 3905; GENERIC-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] 3906; GENERIC-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] 3907; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3908; GENERIC-NEXT: retq # sched: [1:1.00] 3909; 3910; ATOM-LABEL: test_pminsw: 3911; ATOM: # %bb.0: 3912; ATOM-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3913; ATOM-NEXT: pminsw (%rdi), %mm0 # sched: [1:1.00] 3914; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3915; ATOM-NEXT: retq # sched: [79:39.50] 3916; 3917; SLM-LABEL: test_pminsw: 3918; SLM: # %bb.0: 3919; SLM-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3920; SLM-NEXT: pminsw (%rdi), %mm0 # sched: [4:1.00] 3921; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3922; SLM-NEXT: retq # sched: [4:1.00] 3923; 3924; SANDY-LABEL: test_pminsw: 3925; SANDY: # %bb.0: 3926; SANDY-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] 3927; SANDY-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] 3928; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3929; SANDY-NEXT: retq # sched: [1:1.00] 3930; 3931; HASWELL-LABEL: test_pminsw: 3932; HASWELL: # %bb.0: 3933; HASWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3934; HASWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50] 3935; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3936; HASWELL-NEXT: retq # sched: [7:1.00] 3937; 3938; BROADWELL-LABEL: test_pminsw: 3939; BROADWELL: # %bb.0: 3940; BROADWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3941; BROADWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50] 3942; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3943; BROADWELL-NEXT: retq # sched: [7:1.00] 3944; 3945; SKYLAKE-LABEL: test_pminsw: 3946; SKYLAKE: # %bb.0: 3947; SKYLAKE-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] 3948; SKYLAKE-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] 3949; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3950; SKYLAKE-NEXT: retq # sched: [7:1.00] 3951; 3952; SKX-LABEL: test_pminsw: 3953; SKX: # %bb.0: 3954; SKX-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] 3955; SKX-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] 3956; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3957; SKX-NEXT: retq # sched: [7:1.00] 3958; 3959; BTVER2-LABEL: test_pminsw: 3960; BTVER2: # %bb.0: 3961; BTVER2-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3962; BTVER2-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] 3963; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3964; BTVER2-NEXT: retq # sched: [4:1.00] 3965; 3966; ZNVER1-LABEL: test_pminsw: 3967; ZNVER1: # %bb.0: 3968; ZNVER1-NEXT: pminsw %mm1, %mm0 # sched: [1:0.25] 3969; ZNVER1-NEXT: pminsw (%rdi), %mm0 # sched: [8:0.50] 3970; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3971; ZNVER1-NEXT: retq # sched: [1:0.50] 3972 %1 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a0, x86_mmx %a1) 3973 %2 = load x86_mmx, x86_mmx *%a2, align 8 3974 %3 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %1, x86_mmx %2) 3975 %4 = bitcast x86_mmx %3 to i64 3976 ret i64 %4 3977} 3978declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone 3979 3980define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3981; GENERIC-LABEL: test_pminub: 3982; GENERIC: # %bb.0: 3983; GENERIC-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] 3984; GENERIC-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] 3985; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3986; GENERIC-NEXT: retq # sched: [1:1.00] 3987; 3988; ATOM-LABEL: test_pminub: 3989; ATOM: # %bb.0: 3990; ATOM-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 3991; ATOM-NEXT: pminub (%rdi), %mm0 # sched: [1:1.00] 3992; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3993; ATOM-NEXT: retq # sched: [79:39.50] 3994; 3995; SLM-LABEL: test_pminub: 3996; SLM: # %bb.0: 3997; SLM-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 3998; SLM-NEXT: pminub (%rdi), %mm0 # sched: [4:1.00] 3999; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4000; SLM-NEXT: retq # sched: [4:1.00] 4001; 4002; SANDY-LABEL: test_pminub: 4003; SANDY: # %bb.0: 4004; SANDY-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] 4005; SANDY-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] 4006; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4007; SANDY-NEXT: retq # sched: [1:1.00] 4008; 4009; HASWELL-LABEL: test_pminub: 4010; HASWELL: # %bb.0: 4011; HASWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 4012; HASWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50] 4013; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4014; HASWELL-NEXT: retq # sched: [7:1.00] 4015; 4016; BROADWELL-LABEL: test_pminub: 4017; BROADWELL: # %bb.0: 4018; BROADWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 4019; BROADWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50] 4020; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4021; BROADWELL-NEXT: retq # sched: [7:1.00] 4022; 4023; SKYLAKE-LABEL: test_pminub: 4024; SKYLAKE: # %bb.0: 4025; SKYLAKE-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] 4026; SKYLAKE-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] 4027; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4028; SKYLAKE-NEXT: retq # sched: [7:1.00] 4029; 4030; SKX-LABEL: test_pminub: 4031; SKX: # %bb.0: 4032; SKX-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] 4033; SKX-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] 4034; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4035; SKX-NEXT: retq # sched: [7:1.00] 4036; 4037; BTVER2-LABEL: test_pminub: 4038; BTVER2: # %bb.0: 4039; BTVER2-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 4040; BTVER2-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] 4041; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4042; BTVER2-NEXT: retq # sched: [4:1.00] 4043; 4044; ZNVER1-LABEL: test_pminub: 4045; ZNVER1: # %bb.0: 4046; ZNVER1-NEXT: pminub %mm1, %mm0 # sched: [1:0.25] 4047; ZNVER1-NEXT: pminub (%rdi), %mm0 # sched: [8:0.50] 4048; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4049; ZNVER1-NEXT: retq # sched: [1:0.50] 4050 %1 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a0, x86_mmx %a1) 4051 %2 = load x86_mmx, x86_mmx *%a2, align 8 4052 %3 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %1, x86_mmx %2) 4053 %4 = bitcast x86_mmx %3 to i64 4054 ret i64 %4 4055} 4056declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone 4057 4058define i32 @test_pmovmskb(x86_mmx %a0) optsize { 4059; GENERIC-LABEL: test_pmovmskb: 4060; GENERIC: # %bb.0: 4061; GENERIC-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4062; GENERIC-NEXT: retq # sched: [1:1.00] 4063; 4064; ATOM-LABEL: test_pmovmskb: 4065; ATOM: # %bb.0: 4066; ATOM-NEXT: pmovmskb %mm0, %eax # sched: [3:3.00] 4067; ATOM-NEXT: retq # sched: [79:39.50] 4068; 4069; SLM-LABEL: test_pmovmskb: 4070; SLM: # %bb.0: 4071; SLM-NEXT: pmovmskb %mm0, %eax # sched: [4:1.00] 4072; SLM-NEXT: retq # sched: [4:1.00] 4073; 4074; SANDY-LABEL: test_pmovmskb: 4075; SANDY: # %bb.0: 4076; SANDY-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4077; SANDY-NEXT: retq # sched: [1:1.00] 4078; 4079; HASWELL-LABEL: test_pmovmskb: 4080; HASWELL: # %bb.0: 4081; HASWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4082; HASWELL-NEXT: retq # sched: [7:1.00] 4083; 4084; BROADWELL-LABEL: test_pmovmskb: 4085; BROADWELL: # %bb.0: 4086; BROADWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4087; BROADWELL-NEXT: retq # sched: [7:1.00] 4088; 4089; SKYLAKE-LABEL: test_pmovmskb: 4090; SKYLAKE: # %bb.0: 4091; SKYLAKE-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] 4092; SKYLAKE-NEXT: retq # sched: [7:1.00] 4093; 4094; SKX-LABEL: test_pmovmskb: 4095; SKX: # %bb.0: 4096; SKX-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] 4097; SKX-NEXT: retq # sched: [7:1.00] 4098; 4099; BTVER2-LABEL: test_pmovmskb: 4100; BTVER2: # %bb.0: 4101; BTVER2-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00] 4102; BTVER2-NEXT: retq # sched: [4:1.00] 4103; 4104; ZNVER1-LABEL: test_pmovmskb: 4105; ZNVER1: # %bb.0: 4106; ZNVER1-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4107; ZNVER1-NEXT: retq # sched: [1:0.50] 4108 %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) 4109 ret i32 %1 4110} 4111declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone 4112 4113define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4114; GENERIC-LABEL: test_pmulhrsw: 4115; GENERIC: # %bb.0: 4116; GENERIC-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] 4117; GENERIC-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] 4118; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4119; GENERIC-NEXT: retq # sched: [1:1.00] 4120; 4121; ATOM-LABEL: test_pmulhrsw: 4122; ATOM: # %bb.0: 4123; ATOM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:4.00] 4124; ATOM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [4:4.00] 4125; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4126; ATOM-NEXT: retq # sched: [79:39.50] 4127; 4128; SLM-LABEL: test_pmulhrsw: 4129; SLM: # %bb.0: 4130; SLM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] 4131; SLM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] 4132; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4133; SLM-NEXT: retq # sched: [4:1.00] 4134; 4135; SANDY-LABEL: test_pmulhrsw: 4136; SANDY: # %bb.0: 4137; SANDY-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] 4138; SANDY-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] 4139; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4140; SANDY-NEXT: retq # sched: [1:1.00] 4141; 4142; HASWELL-LABEL: test_pmulhrsw: 4143; HASWELL: # %bb.0: 4144; HASWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] 4145; HASWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] 4146; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4147; HASWELL-NEXT: retq # sched: [7:1.00] 4148; 4149; BROADWELL-LABEL: test_pmulhrsw: 4150; BROADWELL: # %bb.0: 4151; BROADWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] 4152; BROADWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] 4153; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4154; BROADWELL-NEXT: retq # sched: [7:1.00] 4155; 4156; SKYLAKE-LABEL: test_pmulhrsw: 4157; SKYLAKE: # %bb.0: 4158; SKYLAKE-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] 4159; SKYLAKE-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] 4160; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4161; SKYLAKE-NEXT: retq # sched: [7:1.00] 4162; 4163; SKX-LABEL: test_pmulhrsw: 4164; SKX: # %bb.0: 4165; SKX-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] 4166; SKX-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] 4167; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4168; SKX-NEXT: retq # sched: [7:1.00] 4169; 4170; BTVER2-LABEL: test_pmulhrsw: 4171; BTVER2: # %bb.0: 4172; BTVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [2:1.00] 4173; BTVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] 4174; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4175; BTVER2-NEXT: retq # sched: [4:1.00] 4176; 4177; ZNVER1-LABEL: test_pmulhrsw: 4178; ZNVER1: # %bb.0: 4179; ZNVER1-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] 4180; ZNVER1-NEXT: pmulhrsw (%rdi), %mm0 # sched: [11:1.00] 4181; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4182; ZNVER1-NEXT: retq # sched: [1:0.50] 4183 %1 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a0, x86_mmx %a1) 4184 %2 = load x86_mmx, x86_mmx *%a2, align 8 4185 %3 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %1, x86_mmx %2) 4186 %4 = bitcast x86_mmx %3 to i64 4187 ret i64 %4 4188} 4189declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone 4190 4191define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4192; GENERIC-LABEL: test_pmulhw: 4193; GENERIC: # %bb.0: 4194; GENERIC-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] 4195; GENERIC-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] 4196; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4197; GENERIC-NEXT: retq # sched: [1:1.00] 4198; 4199; ATOM-LABEL: test_pmulhw: 4200; ATOM: # %bb.0: 4201; ATOM-NEXT: pmulhw %mm1, %mm0 # sched: [4:4.00] 4202; ATOM-NEXT: pmulhw (%rdi), %mm0 # sched: [4:4.00] 4203; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4204; ATOM-NEXT: retq # sched: [79:39.50] 4205; 4206; SLM-LABEL: test_pmulhw: 4207; SLM: # %bb.0: 4208; SLM-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] 4209; SLM-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] 4210; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4211; SLM-NEXT: retq # sched: [4:1.00] 4212; 4213; SANDY-LABEL: test_pmulhw: 4214; SANDY: # %bb.0: 4215; SANDY-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] 4216; SANDY-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] 4217; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4218; SANDY-NEXT: retq # sched: [1:1.00] 4219; 4220; HASWELL-LABEL: test_pmulhw: 4221; HASWELL: # %bb.0: 4222; HASWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] 4223; HASWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] 4224; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4225; HASWELL-NEXT: retq # sched: [7:1.00] 4226; 4227; BROADWELL-LABEL: test_pmulhw: 4228; BROADWELL: # %bb.0: 4229; BROADWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] 4230; BROADWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] 4231; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4232; BROADWELL-NEXT: retq # sched: [7:1.00] 4233; 4234; SKYLAKE-LABEL: test_pmulhw: 4235; SKYLAKE: # %bb.0: 4236; SKYLAKE-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] 4237; SKYLAKE-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] 4238; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4239; SKYLAKE-NEXT: retq # sched: [7:1.00] 4240; 4241; SKX-LABEL: test_pmulhw: 4242; SKX: # %bb.0: 4243; SKX-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] 4244; SKX-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] 4245; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4246; SKX-NEXT: retq # sched: [7:1.00] 4247; 4248; BTVER2-LABEL: test_pmulhw: 4249; BTVER2: # %bb.0: 4250; BTVER2-NEXT: pmulhw %mm1, %mm0 # sched: [2:1.00] 4251; BTVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] 4252; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4253; BTVER2-NEXT: retq # sched: [4:1.00] 4254; 4255; ZNVER1-LABEL: test_pmulhw: 4256; ZNVER1: # %bb.0: 4257; ZNVER1-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] 4258; ZNVER1-NEXT: pmulhw (%rdi), %mm0 # sched: [11:1.00] 4259; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4260; ZNVER1-NEXT: retq # sched: [1:0.50] 4261 %1 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a0, x86_mmx %a1) 4262 %2 = load x86_mmx, x86_mmx *%a2, align 8 4263 %3 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %1, x86_mmx %2) 4264 %4 = bitcast x86_mmx %3 to i64 4265 ret i64 %4 4266} 4267declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone 4268 4269define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4270; GENERIC-LABEL: test_pmulhuw: 4271; GENERIC: # %bb.0: 4272; GENERIC-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] 4273; GENERIC-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] 4274; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4275; GENERIC-NEXT: retq # sched: [1:1.00] 4276; 4277; ATOM-LABEL: test_pmulhuw: 4278; ATOM: # %bb.0: 4279; ATOM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:4.00] 4280; ATOM-NEXT: pmulhuw (%rdi), %mm0 # sched: [4:4.00] 4281; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4282; ATOM-NEXT: retq # sched: [79:39.50] 4283; 4284; SLM-LABEL: test_pmulhuw: 4285; SLM: # %bb.0: 4286; SLM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] 4287; SLM-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] 4288; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4289; SLM-NEXT: retq # sched: [4:1.00] 4290; 4291; SANDY-LABEL: test_pmulhuw: 4292; SANDY: # %bb.0: 4293; SANDY-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] 4294; SANDY-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] 4295; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4296; SANDY-NEXT: retq # sched: [1:1.00] 4297; 4298; HASWELL-LABEL: test_pmulhuw: 4299; HASWELL: # %bb.0: 4300; HASWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] 4301; HASWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] 4302; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4303; HASWELL-NEXT: retq # sched: [7:1.00] 4304; 4305; BROADWELL-LABEL: test_pmulhuw: 4306; BROADWELL: # %bb.0: 4307; BROADWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] 4308; BROADWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] 4309; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4310; BROADWELL-NEXT: retq # sched: [7:1.00] 4311; 4312; SKYLAKE-LABEL: test_pmulhuw: 4313; SKYLAKE: # %bb.0: 4314; SKYLAKE-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] 4315; SKYLAKE-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] 4316; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4317; SKYLAKE-NEXT: retq # sched: [7:1.00] 4318; 4319; SKX-LABEL: test_pmulhuw: 4320; SKX: # %bb.0: 4321; SKX-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] 4322; SKX-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] 4323; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4324; SKX-NEXT: retq # sched: [7:1.00] 4325; 4326; BTVER2-LABEL: test_pmulhuw: 4327; BTVER2: # %bb.0: 4328; BTVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [2:1.00] 4329; BTVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] 4330; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4331; BTVER2-NEXT: retq # sched: [4:1.00] 4332; 4333; ZNVER1-LABEL: test_pmulhuw: 4334; ZNVER1: # %bb.0: 4335; ZNVER1-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] 4336; ZNVER1-NEXT: pmulhuw (%rdi), %mm0 # sched: [11:1.00] 4337; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4338; ZNVER1-NEXT: retq # sched: [1:0.50] 4339 %1 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a0, x86_mmx %a1) 4340 %2 = load x86_mmx, x86_mmx *%a2, align 8 4341 %3 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %1, x86_mmx %2) 4342 %4 = bitcast x86_mmx %3 to i64 4343 ret i64 %4 4344} 4345declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone 4346 4347define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4348; GENERIC-LABEL: test_pmullw: 4349; GENERIC: # %bb.0: 4350; GENERIC-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] 4351; GENERIC-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] 4352; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4353; GENERIC-NEXT: retq # sched: [1:1.00] 4354; 4355; ATOM-LABEL: test_pmullw: 4356; ATOM: # %bb.0: 4357; ATOM-NEXT: pmullw %mm1, %mm0 # sched: [4:4.00] 4358; ATOM-NEXT: pmullw (%rdi), %mm0 # sched: [4:4.00] 4359; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4360; ATOM-NEXT: retq # sched: [79:39.50] 4361; 4362; SLM-LABEL: test_pmullw: 4363; SLM: # %bb.0: 4364; SLM-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] 4365; SLM-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] 4366; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4367; SLM-NEXT: retq # sched: [4:1.00] 4368; 4369; SANDY-LABEL: test_pmullw: 4370; SANDY: # %bb.0: 4371; SANDY-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] 4372; SANDY-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] 4373; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4374; SANDY-NEXT: retq # sched: [1:1.00] 4375; 4376; HASWELL-LABEL: test_pmullw: 4377; HASWELL: # %bb.0: 4378; HASWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] 4379; HASWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] 4380; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4381; HASWELL-NEXT: retq # sched: [7:1.00] 4382; 4383; BROADWELL-LABEL: test_pmullw: 4384; BROADWELL: # %bb.0: 4385; BROADWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] 4386; BROADWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] 4387; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4388; BROADWELL-NEXT: retq # sched: [7:1.00] 4389; 4390; SKYLAKE-LABEL: test_pmullw: 4391; SKYLAKE: # %bb.0: 4392; SKYLAKE-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] 4393; SKYLAKE-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] 4394; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4395; SKYLAKE-NEXT: retq # sched: [7:1.00] 4396; 4397; SKX-LABEL: test_pmullw: 4398; SKX: # %bb.0: 4399; SKX-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] 4400; SKX-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] 4401; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4402; SKX-NEXT: retq # sched: [7:1.00] 4403; 4404; BTVER2-LABEL: test_pmullw: 4405; BTVER2: # %bb.0: 4406; BTVER2-NEXT: pmullw %mm1, %mm0 # sched: [2:1.00] 4407; BTVER2-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] 4408; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4409; BTVER2-NEXT: retq # sched: [4:1.00] 4410; 4411; ZNVER1-LABEL: test_pmullw: 4412; ZNVER1: # %bb.0: 4413; ZNVER1-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] 4414; ZNVER1-NEXT: pmullw (%rdi), %mm0 # sched: [11:1.00] 4415; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4416; ZNVER1-NEXT: retq # sched: [1:0.50] 4417 %1 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a0, x86_mmx %a1) 4418 %2 = load x86_mmx, x86_mmx *%a2, align 8 4419 %3 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %1, x86_mmx %2) 4420 %4 = bitcast x86_mmx %3 to i64 4421 ret i64 %4 4422} 4423declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone 4424 4425define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4426; GENERIC-LABEL: test_pmuludq: 4427; GENERIC: # %bb.0: 4428; GENERIC-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] 4429; GENERIC-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] 4430; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4431; GENERIC-NEXT: retq # sched: [1:1.00] 4432; 4433; ATOM-LABEL: test_pmuludq: 4434; ATOM: # %bb.0: 4435; ATOM-NEXT: pmuludq %mm1, %mm0 # sched: [4:4.00] 4436; ATOM-NEXT: pmuludq (%rdi), %mm0 # sched: [4:4.00] 4437; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4438; ATOM-NEXT: retq # sched: [79:39.50] 4439; 4440; SLM-LABEL: test_pmuludq: 4441; SLM: # %bb.0: 4442; SLM-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] 4443; SLM-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] 4444; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4445; SLM-NEXT: retq # sched: [4:1.00] 4446; 4447; SANDY-LABEL: test_pmuludq: 4448; SANDY: # %bb.0: 4449; SANDY-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] 4450; SANDY-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] 4451; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4452; SANDY-NEXT: retq # sched: [1:1.00] 4453; 4454; HASWELL-LABEL: test_pmuludq: 4455; HASWELL: # %bb.0: 4456; HASWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] 4457; HASWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] 4458; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4459; HASWELL-NEXT: retq # sched: [7:1.00] 4460; 4461; BROADWELL-LABEL: test_pmuludq: 4462; BROADWELL: # %bb.0: 4463; BROADWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] 4464; BROADWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] 4465; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4466; BROADWELL-NEXT: retq # sched: [7:1.00] 4467; 4468; SKYLAKE-LABEL: test_pmuludq: 4469; SKYLAKE: # %bb.0: 4470; SKYLAKE-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] 4471; SKYLAKE-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] 4472; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4473; SKYLAKE-NEXT: retq # sched: [7:1.00] 4474; 4475; SKX-LABEL: test_pmuludq: 4476; SKX: # %bb.0: 4477; SKX-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] 4478; SKX-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] 4479; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4480; SKX-NEXT: retq # sched: [7:1.00] 4481; 4482; BTVER2-LABEL: test_pmuludq: 4483; BTVER2: # %bb.0: 4484; BTVER2-NEXT: pmuludq %mm1, %mm0 # sched: [2:1.00] 4485; BTVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] 4486; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4487; BTVER2-NEXT: retq # sched: [4:1.00] 4488; 4489; ZNVER1-LABEL: test_pmuludq: 4490; ZNVER1: # %bb.0: 4491; ZNVER1-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] 4492; ZNVER1-NEXT: pmuludq (%rdi), %mm0 # sched: [11:1.00] 4493; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4494; ZNVER1-NEXT: retq # sched: [1:0.50] 4495 %1 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a0, x86_mmx %a1) 4496 %2 = load x86_mmx, x86_mmx *%a2, align 8 4497 %3 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %1, x86_mmx %2) 4498 %4 = bitcast x86_mmx %3 to i64 4499 ret i64 %4 4500} 4501declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone 4502 4503define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4504; GENERIC-LABEL: test_por: 4505; GENERIC: # %bb.0: 4506; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] 4507; GENERIC-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4508; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4509; GENERIC-NEXT: retq # sched: [1:1.00] 4510; 4511; ATOM-LABEL: test_por: 4512; ATOM: # %bb.0: 4513; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4514; ATOM-NEXT: por (%rdi), %mm0 # sched: [1:1.00] 4515; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4516; ATOM-NEXT: retq # sched: [79:39.50] 4517; 4518; SLM-LABEL: test_por: 4519; SLM: # %bb.0: 4520; SLM-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4521; SLM-NEXT: por (%rdi), %mm0 # sched: [4:1.00] 4522; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4523; SLM-NEXT: retq # sched: [4:1.00] 4524; 4525; SANDY-LABEL: test_por: 4526; SANDY: # %bb.0: 4527; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] 4528; SANDY-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4529; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4530; SANDY-NEXT: retq # sched: [1:1.00] 4531; 4532; HASWELL-LABEL: test_por: 4533; HASWELL: # %bb.0: 4534; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] 4535; HASWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4536; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4537; HASWELL-NEXT: retq # sched: [7:1.00] 4538; 4539; BROADWELL-LABEL: test_por: 4540; BROADWELL: # %bb.0: 4541; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] 4542; BROADWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4543; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4544; BROADWELL-NEXT: retq # sched: [7:1.00] 4545; 4546; SKYLAKE-LABEL: test_por: 4547; SKYLAKE: # %bb.0: 4548; SKYLAKE-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4549; SKYLAKE-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4550; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4551; SKYLAKE-NEXT: retq # sched: [7:1.00] 4552; 4553; SKX-LABEL: test_por: 4554; SKX: # %bb.0: 4555; SKX-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4556; SKX-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4557; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4558; SKX-NEXT: retq # sched: [7:1.00] 4559; 4560; BTVER2-LABEL: test_por: 4561; BTVER2: # %bb.0: 4562; BTVER2-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4563; BTVER2-NEXT: por (%rdi), %mm0 # sched: [6:1.00] 4564; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4565; BTVER2-NEXT: retq # sched: [4:1.00] 4566; 4567; ZNVER1-LABEL: test_por: 4568; ZNVER1: # %bb.0: 4569; ZNVER1-NEXT: por %mm1, %mm0 # sched: [1:0.25] 4570; ZNVER1-NEXT: por (%rdi), %mm0 # sched: [8:0.50] 4571; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4572; ZNVER1-NEXT: retq # sched: [1:0.50] 4573 %1 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a0, x86_mmx %a1) 4574 %2 = load x86_mmx, x86_mmx *%a2, align 8 4575 %3 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %2) 4576 %4 = bitcast x86_mmx %3 to i64 4577 ret i64 %4 4578} 4579declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone 4580 4581define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4582; GENERIC-LABEL: test_psadbw: 4583; GENERIC: # %bb.0: 4584; GENERIC-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] 4585; GENERIC-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4586; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4587; GENERIC-NEXT: retq # sched: [1:1.00] 4588; 4589; ATOM-LABEL: test_psadbw: 4590; ATOM: # %bb.0: 4591; ATOM-NEXT: psadbw %mm1, %mm0 # sched: [4:2.00] 4592; ATOM-NEXT: psadbw (%rdi), %mm0 # sched: [4:2.00] 4593; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4594; ATOM-NEXT: retq # sched: [79:39.50] 4595; 4596; SLM-LABEL: test_psadbw: 4597; SLM: # %bb.0: 4598; SLM-NEXT: psadbw %mm1, %mm0 # sched: [4:1.00] 4599; SLM-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] 4600; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4601; SLM-NEXT: retq # sched: [4:1.00] 4602; 4603; SANDY-LABEL: test_psadbw: 4604; SANDY: # %bb.0: 4605; SANDY-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] 4606; SANDY-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4607; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4608; SANDY-NEXT: retq # sched: [1:1.00] 4609; 4610; HASWELL-LABEL: test_psadbw: 4611; HASWELL: # %bb.0: 4612; HASWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] 4613; HASWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4614; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4615; HASWELL-NEXT: retq # sched: [7:1.00] 4616; 4617; BROADWELL-LABEL: test_psadbw: 4618; BROADWELL: # %bb.0: 4619; BROADWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] 4620; BROADWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4621; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4622; BROADWELL-NEXT: retq # sched: [7:1.00] 4623; 4624; SKYLAKE-LABEL: test_psadbw: 4625; SKYLAKE: # %bb.0: 4626; SKYLAKE-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] 4627; SKYLAKE-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] 4628; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4629; SKYLAKE-NEXT: retq # sched: [7:1.00] 4630; 4631; SKX-LABEL: test_psadbw: 4632; SKX: # %bb.0: 4633; SKX-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] 4634; SKX-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] 4635; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4636; SKX-NEXT: retq # sched: [7:1.00] 4637; 4638; BTVER2-LABEL: test_psadbw: 4639; BTVER2: # %bb.0: 4640; BTVER2-NEXT: psadbw %mm1, %mm0 # sched: [2:0.50] 4641; BTVER2-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] 4642; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4643; BTVER2-NEXT: retq # sched: [4:1.00] 4644; 4645; ZNVER1-LABEL: test_psadbw: 4646; ZNVER1: # %bb.0: 4647; ZNVER1-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] 4648; ZNVER1-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4649; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4650; ZNVER1-NEXT: retq # sched: [1:0.50] 4651 %1 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a0, x86_mmx %a1) 4652 %2 = load x86_mmx, x86_mmx *%a2, align 8 4653 %3 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %1, x86_mmx %2) 4654 %4 = bitcast x86_mmx %3 to i64 4655 ret i64 %4 4656} 4657declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone 4658 4659define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize { 4660; GENERIC-LABEL: test_pshufb: 4661; GENERIC: # %bb.0: 4662; GENERIC-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] 4663; GENERIC-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] 4664; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4665; GENERIC-NEXT: retq # sched: [1:1.00] 4666; 4667; ATOM-LABEL: test_pshufb: 4668; ATOM: # %bb.0: 4669; ATOM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4670; ATOM-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] 4671; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4672; ATOM-NEXT: retq # sched: [79:39.50] 4673; 4674; SLM-LABEL: test_pshufb: 4675; SLM: # %bb.0: 4676; SLM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4677; SLM-NEXT: pshufb (%rdi), %mm0 # sched: [4:1.00] 4678; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4679; SLM-NEXT: retq # sched: [4:1.00] 4680; 4681; SANDY-LABEL: test_pshufb: 4682; SANDY: # %bb.0: 4683; SANDY-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] 4684; SANDY-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] 4685; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4686; SANDY-NEXT: retq # sched: [1:1.00] 4687; 4688; HASWELL-LABEL: test_pshufb: 4689; HASWELL: # %bb.0: 4690; HASWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4691; HASWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] 4692; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4693; HASWELL-NEXT: retq # sched: [7:1.00] 4694; 4695; BROADWELL-LABEL: test_pshufb: 4696; BROADWELL: # %bb.0: 4697; BROADWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4698; BROADWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] 4699; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4700; BROADWELL-NEXT: retq # sched: [7:1.00] 4701; 4702; SKYLAKE-LABEL: test_pshufb: 4703; SKYLAKE: # %bb.0: 4704; SKYLAKE-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4705; SKYLAKE-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] 4706; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4707; SKYLAKE-NEXT: retq # sched: [7:1.00] 4708; 4709; SKX-LABEL: test_pshufb: 4710; SKX: # %bb.0: 4711; SKX-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4712; SKX-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] 4713; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4714; SKX-NEXT: retq # sched: [7:1.00] 4715; 4716; BTVER2-LABEL: test_pshufb: 4717; BTVER2: # %bb.0: 4718; BTVER2-NEXT: pshufb %mm1, %mm0 # sched: [2:2.00] 4719; BTVER2-NEXT: pshufb (%rdi), %mm0 # sched: [7:2.00] 4720; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4721; BTVER2-NEXT: retq # sched: [4:1.00] 4722; 4723; ZNVER1-LABEL: test_pshufb: 4724; ZNVER1: # %bb.0: 4725; ZNVER1-NEXT: pshufb %mm1, %mm0 # sched: [1:0.25] 4726; ZNVER1-NEXT: pshufb (%rdi), %mm0 # sched: [8:0.50] 4727; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4728; ZNVER1-NEXT: retq # sched: [1:0.50] 4729 %1 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a0, x86_mmx %a1) 4730 %2 = load x86_mmx, x86_mmx *%a2, align 8 4731 %3 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %1, x86_mmx %2) 4732 %4 = bitcast x86_mmx %3 to i64 4733 ret i64 %4 4734} 4735declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone 4736 4737define i64 @test_pshufw(x86_mmx *%a0) optsize { 4738; GENERIC-LABEL: test_pshufw: 4739; GENERIC: # %bb.0: 4740; GENERIC-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4741; GENERIC-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4742; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4743; GENERIC-NEXT: retq # sched: [1:1.00] 4744; 4745; ATOM-LABEL: test_pshufw: 4746; ATOM: # %bb.0: 4747; ATOM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] 4748; ATOM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4749; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4750; ATOM-NEXT: retq # sched: [79:39.50] 4751; 4752; SLM-LABEL: test_pshufw: 4753; SLM: # %bb.0: 4754; SLM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [4:1.00] 4755; SLM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4756; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4757; SLM-NEXT: retq # sched: [4:1.00] 4758; 4759; SANDY-LABEL: test_pshufw: 4760; SANDY: # %bb.0: 4761; SANDY-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4762; SANDY-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4763; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4764; SANDY-NEXT: retq # sched: [1:1.00] 4765; 4766; HASWELL-LABEL: test_pshufw: 4767; HASWELL: # %bb.0: 4768; HASWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4769; HASWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4770; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4771; HASWELL-NEXT: retq # sched: [7:1.00] 4772; 4773; BROADWELL-LABEL: test_pshufw: 4774; BROADWELL: # %bb.0: 4775; BROADWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4776; BROADWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4777; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4778; BROADWELL-NEXT: retq # sched: [7:1.00] 4779; 4780; SKYLAKE-LABEL: test_pshufw: 4781; SKYLAKE: # %bb.0: 4782; SKYLAKE-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4783; SKYLAKE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4784; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4785; SKYLAKE-NEXT: retq # sched: [7:1.00] 4786; 4787; SKX-LABEL: test_pshufw: 4788; SKX: # %bb.0: 4789; SKX-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4790; SKX-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4791; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4792; SKX-NEXT: retq # sched: [7:1.00] 4793; 4794; BTVER2-LABEL: test_pshufw: 4795; BTVER2: # %bb.0: 4796; BTVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4797; BTVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50] 4798; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4799; BTVER2-NEXT: retq # sched: [4:1.00] 4800; 4801; ZNVER1-LABEL: test_pshufw: 4802; ZNVER1: # %bb.0: 4803; ZNVER1-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [8:0.50] 4804; ZNVER1-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.25] 4805; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4806; ZNVER1-NEXT: retq # sched: [1:0.50] 4807 %1 = load x86_mmx, x86_mmx *%a0, align 8 4808 %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0) 4809 %3 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %2, i8 0) 4810 %4 = bitcast x86_mmx %3 to i64 4811 ret i64 %4 4812} 4813declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone 4814 4815define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4816; GENERIC-LABEL: test_psignb: 4817; GENERIC: # %bb.0: 4818; GENERIC-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4819; GENERIC-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4820; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4821; GENERIC-NEXT: retq # sched: [1:1.00] 4822; 4823; ATOM-LABEL: test_psignb: 4824; ATOM: # %bb.0: 4825; ATOM-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4826; ATOM-NEXT: psignb (%rdi), %mm0 # sched: [1:1.00] 4827; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4828; ATOM-NEXT: retq # sched: [79:39.50] 4829; 4830; SLM-LABEL: test_psignb: 4831; SLM: # %bb.0: 4832; SLM-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4833; SLM-NEXT: psignb (%rdi), %mm0 # sched: [4:1.00] 4834; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4835; SLM-NEXT: retq # sched: [4:1.00] 4836; 4837; SANDY-LABEL: test_psignb: 4838; SANDY: # %bb.0: 4839; SANDY-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4840; SANDY-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4841; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4842; SANDY-NEXT: retq # sched: [1:1.00] 4843; 4844; HASWELL-LABEL: test_psignb: 4845; HASWELL: # %bb.0: 4846; HASWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4847; HASWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4848; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4849; HASWELL-NEXT: retq # sched: [7:1.00] 4850; 4851; BROADWELL-LABEL: test_psignb: 4852; BROADWELL: # %bb.0: 4853; BROADWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4854; BROADWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4855; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4856; BROADWELL-NEXT: retq # sched: [7:1.00] 4857; 4858; SKYLAKE-LABEL: test_psignb: 4859; SKYLAKE: # %bb.0: 4860; SKYLAKE-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4861; SKYLAKE-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4862; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4863; SKYLAKE-NEXT: retq # sched: [7:1.00] 4864; 4865; SKX-LABEL: test_psignb: 4866; SKX: # %bb.0: 4867; SKX-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4868; SKX-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4869; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4870; SKX-NEXT: retq # sched: [7:1.00] 4871; 4872; BTVER2-LABEL: test_psignb: 4873; BTVER2: # %bb.0: 4874; BTVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4875; BTVER2-NEXT: psignb (%rdi), %mm0 # sched: [6:1.00] 4876; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4877; BTVER2-NEXT: retq # sched: [4:1.00] 4878; 4879; ZNVER1-LABEL: test_psignb: 4880; ZNVER1: # %bb.0: 4881; ZNVER1-NEXT: psignb %mm1, %mm0 # sched: [1:0.25] 4882; ZNVER1-NEXT: psignb (%rdi), %mm0 # sched: [8:0.50] 4883; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4884; ZNVER1-NEXT: retq # sched: [1:0.50] 4885 %1 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) 4886 %2 = load x86_mmx, x86_mmx *%a2, align 8 4887 %3 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %1, x86_mmx %2) 4888 %4 = bitcast x86_mmx %3 to i64 4889 ret i64 %4 4890} 4891declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone 4892 4893define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4894; GENERIC-LABEL: test_psignd: 4895; GENERIC: # %bb.0: 4896; GENERIC-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4897; GENERIC-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4898; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4899; GENERIC-NEXT: retq # sched: [1:1.00] 4900; 4901; ATOM-LABEL: test_psignd: 4902; ATOM: # %bb.0: 4903; ATOM-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4904; ATOM-NEXT: psignd (%rdi), %mm0 # sched: [1:1.00] 4905; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4906; ATOM-NEXT: retq # sched: [79:39.50] 4907; 4908; SLM-LABEL: test_psignd: 4909; SLM: # %bb.0: 4910; SLM-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4911; SLM-NEXT: psignd (%rdi), %mm0 # sched: [4:1.00] 4912; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4913; SLM-NEXT: retq # sched: [4:1.00] 4914; 4915; SANDY-LABEL: test_psignd: 4916; SANDY: # %bb.0: 4917; SANDY-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4918; SANDY-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4919; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4920; SANDY-NEXT: retq # sched: [1:1.00] 4921; 4922; HASWELL-LABEL: test_psignd: 4923; HASWELL: # %bb.0: 4924; HASWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4925; HASWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4926; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4927; HASWELL-NEXT: retq # sched: [7:1.00] 4928; 4929; BROADWELL-LABEL: test_psignd: 4930; BROADWELL: # %bb.0: 4931; BROADWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4932; BROADWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4933; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4934; BROADWELL-NEXT: retq # sched: [7:1.00] 4935; 4936; SKYLAKE-LABEL: test_psignd: 4937; SKYLAKE: # %bb.0: 4938; SKYLAKE-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4939; SKYLAKE-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4940; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4941; SKYLAKE-NEXT: retq # sched: [7:1.00] 4942; 4943; SKX-LABEL: test_psignd: 4944; SKX: # %bb.0: 4945; SKX-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4946; SKX-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4947; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4948; SKX-NEXT: retq # sched: [7:1.00] 4949; 4950; BTVER2-LABEL: test_psignd: 4951; BTVER2: # %bb.0: 4952; BTVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4953; BTVER2-NEXT: psignd (%rdi), %mm0 # sched: [6:1.00] 4954; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4955; BTVER2-NEXT: retq # sched: [4:1.00] 4956; 4957; ZNVER1-LABEL: test_psignd: 4958; ZNVER1: # %bb.0: 4959; ZNVER1-NEXT: psignd %mm1, %mm0 # sched: [1:0.25] 4960; ZNVER1-NEXT: psignd (%rdi), %mm0 # sched: [8:0.50] 4961; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4962; ZNVER1-NEXT: retq # sched: [1:0.50] 4963 %1 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) 4964 %2 = load x86_mmx, x86_mmx *%a2, align 8 4965 %3 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %1, x86_mmx %2) 4966 %4 = bitcast x86_mmx %3 to i64 4967 ret i64 %4 4968} 4969declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone 4970 4971define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4972; GENERIC-LABEL: test_psignw: 4973; GENERIC: # %bb.0: 4974; GENERIC-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 4975; GENERIC-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 4976; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4977; GENERIC-NEXT: retq # sched: [1:1.00] 4978; 4979; ATOM-LABEL: test_psignw: 4980; ATOM: # %bb.0: 4981; ATOM-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 4982; ATOM-NEXT: psignw (%rdi), %mm0 # sched: [1:1.00] 4983; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4984; ATOM-NEXT: retq # sched: [79:39.50] 4985; 4986; SLM-LABEL: test_psignw: 4987; SLM: # %bb.0: 4988; SLM-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 4989; SLM-NEXT: psignw (%rdi), %mm0 # sched: [4:1.00] 4990; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4991; SLM-NEXT: retq # sched: [4:1.00] 4992; 4993; SANDY-LABEL: test_psignw: 4994; SANDY: # %bb.0: 4995; SANDY-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 4996; SANDY-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 4997; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4998; SANDY-NEXT: retq # sched: [1:1.00] 4999; 5000; HASWELL-LABEL: test_psignw: 5001; HASWELL: # %bb.0: 5002; HASWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5003; HASWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 5004; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5005; HASWELL-NEXT: retq # sched: [7:1.00] 5006; 5007; BROADWELL-LABEL: test_psignw: 5008; BROADWELL: # %bb.0: 5009; BROADWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5010; BROADWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 5011; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5012; BROADWELL-NEXT: retq # sched: [7:1.00] 5013; 5014; SKYLAKE-LABEL: test_psignw: 5015; SKYLAKE: # %bb.0: 5016; SKYLAKE-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5017; SKYLAKE-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 5018; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5019; SKYLAKE-NEXT: retq # sched: [7:1.00] 5020; 5021; SKX-LABEL: test_psignw: 5022; SKX: # %bb.0: 5023; SKX-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5024; SKX-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 5025; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5026; SKX-NEXT: retq # sched: [7:1.00] 5027; 5028; BTVER2-LABEL: test_psignw: 5029; BTVER2: # %bb.0: 5030; BTVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5031; BTVER2-NEXT: psignw (%rdi), %mm0 # sched: [6:1.00] 5032; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5033; BTVER2-NEXT: retq # sched: [4:1.00] 5034; 5035; ZNVER1-LABEL: test_psignw: 5036; ZNVER1: # %bb.0: 5037; ZNVER1-NEXT: psignw %mm1, %mm0 # sched: [1:0.25] 5038; ZNVER1-NEXT: psignw (%rdi), %mm0 # sched: [8:0.50] 5039; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5040; ZNVER1-NEXT: retq # sched: [1:0.50] 5041 %1 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) 5042 %2 = load x86_mmx, x86_mmx *%a2, align 8 5043 %3 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %1, x86_mmx %2) 5044 %4 = bitcast x86_mmx %3 to i64 5045 ret i64 %4 5046} 5047declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone 5048 5049define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5050; GENERIC-LABEL: test_pslld: 5051; GENERIC: # %bb.0: 5052; GENERIC-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5053; GENERIC-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5054; GENERIC-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5055; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5056; GENERIC-NEXT: retq # sched: [1:1.00] 5057; 5058; ATOM-LABEL: test_pslld: 5059; ATOM: # %bb.0: 5060; ATOM-NEXT: pslld %mm1, %mm0 # sched: [2:1.00] 5061; ATOM-NEXT: pslld (%rdi), %mm0 # sched: [3:1.50] 5062; ATOM-NEXT: pslld $7, %mm0 # sched: [1:0.50] 5063; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5064; ATOM-NEXT: retq # sched: [79:39.50] 5065; 5066; SLM-LABEL: test_pslld: 5067; SLM: # %bb.0: 5068; SLM-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5069; SLM-NEXT: pslld (%rdi), %mm0 # sched: [4:1.00] 5070; SLM-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5071; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5072; SLM-NEXT: retq # sched: [4:1.00] 5073; 5074; SANDY-LABEL: test_pslld: 5075; SANDY: # %bb.0: 5076; SANDY-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5077; SANDY-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5078; SANDY-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5079; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5080; SANDY-NEXT: retq # sched: [1:1.00] 5081; 5082; HASWELL-LABEL: test_pslld: 5083; HASWELL: # %bb.0: 5084; HASWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5085; HASWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5086; HASWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5087; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5088; HASWELL-NEXT: retq # sched: [7:1.00] 5089; 5090; BROADWELL-LABEL: test_pslld: 5091; BROADWELL: # %bb.0: 5092; BROADWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5093; BROADWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5094; BROADWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5095; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5096; BROADWELL-NEXT: retq # sched: [7:1.00] 5097; 5098; SKYLAKE-LABEL: test_pslld: 5099; SKYLAKE: # %bb.0: 5100; SKYLAKE-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5101; SKYLAKE-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5102; SKYLAKE-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5103; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5104; SKYLAKE-NEXT: retq # sched: [7:1.00] 5105; 5106; SKX-LABEL: test_pslld: 5107; SKX: # %bb.0: 5108; SKX-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5109; SKX-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5110; SKX-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5111; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5112; SKX-NEXT: retq # sched: [7:1.00] 5113; 5114; BTVER2-LABEL: test_pslld: 5115; BTVER2: # %bb.0: 5116; BTVER2-NEXT: pslld %mm1, %mm0 # sched: [1:0.50] 5117; BTVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5118; BTVER2-NEXT: pslld $7, %mm0 # sched: [1:0.50] 5119; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5120; BTVER2-NEXT: retq # sched: [4:1.00] 5121; 5122; ZNVER1-LABEL: test_pslld: 5123; ZNVER1: # %bb.0: 5124; ZNVER1-NEXT: pslld %mm1, %mm0 # sched: [1:0.25] 5125; ZNVER1-NEXT: pslld (%rdi), %mm0 # sched: [8:0.50] 5126; ZNVER1-NEXT: pslld $7, %mm0 # sched: [1:0.25] 5127; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5128; ZNVER1-NEXT: retq # sched: [1:0.50] 5129 %1 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a0, x86_mmx %a1) 5130 %2 = load x86_mmx, x86_mmx *%a2, align 8 5131 %3 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2) 5132 %4 = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %3, i32 7) 5133 %5 = bitcast x86_mmx %4 to i64 5134 ret i64 %5 5135} 5136declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone 5137declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone 5138 5139define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5140; GENERIC-LABEL: test_psllq: 5141; GENERIC: # %bb.0: 5142; GENERIC-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5143; GENERIC-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5144; GENERIC-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5145; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5146; GENERIC-NEXT: retq # sched: [1:1.00] 5147; 5148; ATOM-LABEL: test_psllq: 5149; ATOM: # %bb.0: 5150; ATOM-NEXT: psllq %mm1, %mm0 # sched: [2:1.00] 5151; ATOM-NEXT: psllq (%rdi), %mm0 # sched: [3:1.50] 5152; ATOM-NEXT: psllq $7, %mm0 # sched: [1:0.50] 5153; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5154; ATOM-NEXT: retq # sched: [79:39.50] 5155; 5156; SLM-LABEL: test_psllq: 5157; SLM: # %bb.0: 5158; SLM-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5159; SLM-NEXT: psllq (%rdi), %mm0 # sched: [4:1.00] 5160; SLM-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5161; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5162; SLM-NEXT: retq # sched: [4:1.00] 5163; 5164; SANDY-LABEL: test_psllq: 5165; SANDY: # %bb.0: 5166; SANDY-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5167; SANDY-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5168; SANDY-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5169; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5170; SANDY-NEXT: retq # sched: [1:1.00] 5171; 5172; HASWELL-LABEL: test_psllq: 5173; HASWELL: # %bb.0: 5174; HASWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5175; HASWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5176; HASWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5177; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5178; HASWELL-NEXT: retq # sched: [7:1.00] 5179; 5180; BROADWELL-LABEL: test_psllq: 5181; BROADWELL: # %bb.0: 5182; BROADWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5183; BROADWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5184; BROADWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5185; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5186; BROADWELL-NEXT: retq # sched: [7:1.00] 5187; 5188; SKYLAKE-LABEL: test_psllq: 5189; SKYLAKE: # %bb.0: 5190; SKYLAKE-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5191; SKYLAKE-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5192; SKYLAKE-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5193; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5194; SKYLAKE-NEXT: retq # sched: [7:1.00] 5195; 5196; SKX-LABEL: test_psllq: 5197; SKX: # %bb.0: 5198; SKX-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5199; SKX-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5200; SKX-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5201; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5202; SKX-NEXT: retq # sched: [7:1.00] 5203; 5204; BTVER2-LABEL: test_psllq: 5205; BTVER2: # %bb.0: 5206; BTVER2-NEXT: psllq %mm1, %mm0 # sched: [1:0.50] 5207; BTVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5208; BTVER2-NEXT: psllq $7, %mm0 # sched: [1:0.50] 5209; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5210; BTVER2-NEXT: retq # sched: [4:1.00] 5211; 5212; ZNVER1-LABEL: test_psllq: 5213; ZNVER1: # %bb.0: 5214; ZNVER1-NEXT: psllq %mm1, %mm0 # sched: [1:0.25] 5215; ZNVER1-NEXT: psllq (%rdi), %mm0 # sched: [8:0.50] 5216; ZNVER1-NEXT: psllq $7, %mm0 # sched: [1:0.25] 5217; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5218; ZNVER1-NEXT: retq # sched: [1:0.50] 5219 %1 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a0, x86_mmx %a1) 5220 %2 = load x86_mmx, x86_mmx *%a2, align 8 5221 %3 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %1, x86_mmx %2) 5222 %4 = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %3, i32 7) 5223 %5 = bitcast x86_mmx %4 to i64 5224 ret i64 %5 5225} 5226declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone 5227declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 5228 5229define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5230; GENERIC-LABEL: test_psllw: 5231; GENERIC: # %bb.0: 5232; GENERIC-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5233; GENERIC-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5234; GENERIC-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5235; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5236; GENERIC-NEXT: retq # sched: [1:1.00] 5237; 5238; ATOM-LABEL: test_psllw: 5239; ATOM: # %bb.0: 5240; ATOM-NEXT: psllw %mm1, %mm0 # sched: [2:1.00] 5241; ATOM-NEXT: psllw (%rdi), %mm0 # sched: [3:1.50] 5242; ATOM-NEXT: psllw $7, %mm0 # sched: [1:0.50] 5243; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5244; ATOM-NEXT: retq # sched: [79:39.50] 5245; 5246; SLM-LABEL: test_psllw: 5247; SLM: # %bb.0: 5248; SLM-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5249; SLM-NEXT: psllw (%rdi), %mm0 # sched: [4:1.00] 5250; SLM-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5251; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5252; SLM-NEXT: retq # sched: [4:1.00] 5253; 5254; SANDY-LABEL: test_psllw: 5255; SANDY: # %bb.0: 5256; SANDY-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5257; SANDY-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5258; SANDY-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5259; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5260; SANDY-NEXT: retq # sched: [1:1.00] 5261; 5262; HASWELL-LABEL: test_psllw: 5263; HASWELL: # %bb.0: 5264; HASWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5265; HASWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5266; HASWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5267; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5268; HASWELL-NEXT: retq # sched: [7:1.00] 5269; 5270; BROADWELL-LABEL: test_psllw: 5271; BROADWELL: # %bb.0: 5272; BROADWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5273; BROADWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5274; BROADWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5275; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5276; BROADWELL-NEXT: retq # sched: [7:1.00] 5277; 5278; SKYLAKE-LABEL: test_psllw: 5279; SKYLAKE: # %bb.0: 5280; SKYLAKE-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5281; SKYLAKE-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5282; SKYLAKE-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5283; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5284; SKYLAKE-NEXT: retq # sched: [7:1.00] 5285; 5286; SKX-LABEL: test_psllw: 5287; SKX: # %bb.0: 5288; SKX-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5289; SKX-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5290; SKX-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5291; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5292; SKX-NEXT: retq # sched: [7:1.00] 5293; 5294; BTVER2-LABEL: test_psllw: 5295; BTVER2: # %bb.0: 5296; BTVER2-NEXT: psllw %mm1, %mm0 # sched: [1:0.50] 5297; BTVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5298; BTVER2-NEXT: psllw $7, %mm0 # sched: [1:0.50] 5299; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5300; BTVER2-NEXT: retq # sched: [4:1.00] 5301; 5302; ZNVER1-LABEL: test_psllw: 5303; ZNVER1: # %bb.0: 5304; ZNVER1-NEXT: psllw %mm1, %mm0 # sched: [1:0.25] 5305; ZNVER1-NEXT: psllw (%rdi), %mm0 # sched: [8:0.50] 5306; ZNVER1-NEXT: psllw $7, %mm0 # sched: [1:0.25] 5307; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5308; ZNVER1-NEXT: retq # sched: [1:0.50] 5309 %1 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a0, x86_mmx %a1) 5310 %2 = load x86_mmx, x86_mmx *%a2, align 8 5311 %3 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %1, x86_mmx %2) 5312 %4 = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %3, i32 7) 5313 %5 = bitcast x86_mmx %4 to i64 5314 ret i64 %5 5315} 5316declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone 5317declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone 5318 5319define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5320; GENERIC-LABEL: test_psrad: 5321; GENERIC: # %bb.0: 5322; GENERIC-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5323; GENERIC-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5324; GENERIC-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5325; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5326; GENERIC-NEXT: retq # sched: [1:1.00] 5327; 5328; ATOM-LABEL: test_psrad: 5329; ATOM: # %bb.0: 5330; ATOM-NEXT: psrad %mm1, %mm0 # sched: [2:1.00] 5331; ATOM-NEXT: psrad (%rdi), %mm0 # sched: [3:1.50] 5332; ATOM-NEXT: psrad $7, %mm0 # sched: [1:0.50] 5333; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5334; ATOM-NEXT: retq # sched: [79:39.50] 5335; 5336; SLM-LABEL: test_psrad: 5337; SLM: # %bb.0: 5338; SLM-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5339; SLM-NEXT: psrad (%rdi), %mm0 # sched: [4:1.00] 5340; SLM-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5341; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5342; SLM-NEXT: retq # sched: [4:1.00] 5343; 5344; SANDY-LABEL: test_psrad: 5345; SANDY: # %bb.0: 5346; SANDY-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5347; SANDY-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5348; SANDY-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5349; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5350; SANDY-NEXT: retq # sched: [1:1.00] 5351; 5352; HASWELL-LABEL: test_psrad: 5353; HASWELL: # %bb.0: 5354; HASWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5355; HASWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5356; HASWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5357; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5358; HASWELL-NEXT: retq # sched: [7:1.00] 5359; 5360; BROADWELL-LABEL: test_psrad: 5361; BROADWELL: # %bb.0: 5362; BROADWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5363; BROADWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5364; BROADWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5365; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5366; BROADWELL-NEXT: retq # sched: [7:1.00] 5367; 5368; SKYLAKE-LABEL: test_psrad: 5369; SKYLAKE: # %bb.0: 5370; SKYLAKE-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5371; SKYLAKE-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5372; SKYLAKE-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5373; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5374; SKYLAKE-NEXT: retq # sched: [7:1.00] 5375; 5376; SKX-LABEL: test_psrad: 5377; SKX: # %bb.0: 5378; SKX-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5379; SKX-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5380; SKX-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5381; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5382; SKX-NEXT: retq # sched: [7:1.00] 5383; 5384; BTVER2-LABEL: test_psrad: 5385; BTVER2: # %bb.0: 5386; BTVER2-NEXT: psrad %mm1, %mm0 # sched: [1:0.50] 5387; BTVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5388; BTVER2-NEXT: psrad $7, %mm0 # sched: [1:0.50] 5389; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5390; BTVER2-NEXT: retq # sched: [4:1.00] 5391; 5392; ZNVER1-LABEL: test_psrad: 5393; ZNVER1: # %bb.0: 5394; ZNVER1-NEXT: psrad %mm1, %mm0 # sched: [1:0.25] 5395; ZNVER1-NEXT: psrad (%rdi), %mm0 # sched: [8:0.50] 5396; ZNVER1-NEXT: psrad $7, %mm0 # sched: [1:0.25] 5397; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5398; ZNVER1-NEXT: retq # sched: [1:0.50] 5399 %1 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a0, x86_mmx %a1) 5400 %2 = load x86_mmx, x86_mmx *%a2, align 8 5401 %3 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %1, x86_mmx %2) 5402 %4 = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %3, i32 7) 5403 %5 = bitcast x86_mmx %4 to i64 5404 ret i64 %5 5405} 5406declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 5407declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone 5408 5409define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5410; GENERIC-LABEL: test_psraw: 5411; GENERIC: # %bb.0: 5412; GENERIC-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5413; GENERIC-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5414; GENERIC-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5415; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5416; GENERIC-NEXT: retq # sched: [1:1.00] 5417; 5418; ATOM-LABEL: test_psraw: 5419; ATOM: # %bb.0: 5420; ATOM-NEXT: psraw %mm1, %mm0 # sched: [2:1.00] 5421; ATOM-NEXT: psraw (%rdi), %mm0 # sched: [3:1.50] 5422; ATOM-NEXT: psraw $7, %mm0 # sched: [1:0.50] 5423; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5424; ATOM-NEXT: retq # sched: [79:39.50] 5425; 5426; SLM-LABEL: test_psraw: 5427; SLM: # %bb.0: 5428; SLM-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5429; SLM-NEXT: psraw (%rdi), %mm0 # sched: [4:1.00] 5430; SLM-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5431; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5432; SLM-NEXT: retq # sched: [4:1.00] 5433; 5434; SANDY-LABEL: test_psraw: 5435; SANDY: # %bb.0: 5436; SANDY-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5437; SANDY-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5438; SANDY-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5439; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5440; SANDY-NEXT: retq # sched: [1:1.00] 5441; 5442; HASWELL-LABEL: test_psraw: 5443; HASWELL: # %bb.0: 5444; HASWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5445; HASWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5446; HASWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5447; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5448; HASWELL-NEXT: retq # sched: [7:1.00] 5449; 5450; BROADWELL-LABEL: test_psraw: 5451; BROADWELL: # %bb.0: 5452; BROADWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5453; BROADWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5454; BROADWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5455; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5456; BROADWELL-NEXT: retq # sched: [7:1.00] 5457; 5458; SKYLAKE-LABEL: test_psraw: 5459; SKYLAKE: # %bb.0: 5460; SKYLAKE-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5461; SKYLAKE-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5462; SKYLAKE-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5463; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5464; SKYLAKE-NEXT: retq # sched: [7:1.00] 5465; 5466; SKX-LABEL: test_psraw: 5467; SKX: # %bb.0: 5468; SKX-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5469; SKX-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5470; SKX-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5471; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5472; SKX-NEXT: retq # sched: [7:1.00] 5473; 5474; BTVER2-LABEL: test_psraw: 5475; BTVER2: # %bb.0: 5476; BTVER2-NEXT: psraw %mm1, %mm0 # sched: [1:0.50] 5477; BTVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5478; BTVER2-NEXT: psraw $7, %mm0 # sched: [1:0.50] 5479; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5480; BTVER2-NEXT: retq # sched: [4:1.00] 5481; 5482; ZNVER1-LABEL: test_psraw: 5483; ZNVER1: # %bb.0: 5484; ZNVER1-NEXT: psraw %mm1, %mm0 # sched: [1:0.25] 5485; ZNVER1-NEXT: psraw (%rdi), %mm0 # sched: [8:0.50] 5486; ZNVER1-NEXT: psraw $7, %mm0 # sched: [1:0.25] 5487; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5488; ZNVER1-NEXT: retq # sched: [1:0.50] 5489 %1 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a0, x86_mmx %a1) 5490 %2 = load x86_mmx, x86_mmx *%a2, align 8 5491 %3 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %1, x86_mmx %2) 5492 %4 = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %3, i32 7) 5493 %5 = bitcast x86_mmx %4 to i64 5494 ret i64 %5 5495} 5496declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone 5497declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone 5498 5499define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5500; GENERIC-LABEL: test_psrld: 5501; GENERIC: # %bb.0: 5502; GENERIC-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5503; GENERIC-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5504; GENERIC-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5505; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5506; GENERIC-NEXT: retq # sched: [1:1.00] 5507; 5508; ATOM-LABEL: test_psrld: 5509; ATOM: # %bb.0: 5510; ATOM-NEXT: psrld %mm1, %mm0 # sched: [2:1.00] 5511; ATOM-NEXT: psrld (%rdi), %mm0 # sched: [3:1.50] 5512; ATOM-NEXT: psrld $7, %mm0 # sched: [1:0.50] 5513; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5514; ATOM-NEXT: retq # sched: [79:39.50] 5515; 5516; SLM-LABEL: test_psrld: 5517; SLM: # %bb.0: 5518; SLM-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5519; SLM-NEXT: psrld (%rdi), %mm0 # sched: [4:1.00] 5520; SLM-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5521; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5522; SLM-NEXT: retq # sched: [4:1.00] 5523; 5524; SANDY-LABEL: test_psrld: 5525; SANDY: # %bb.0: 5526; SANDY-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5527; SANDY-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5528; SANDY-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5529; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5530; SANDY-NEXT: retq # sched: [1:1.00] 5531; 5532; HASWELL-LABEL: test_psrld: 5533; HASWELL: # %bb.0: 5534; HASWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5535; HASWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5536; HASWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5537; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5538; HASWELL-NEXT: retq # sched: [7:1.00] 5539; 5540; BROADWELL-LABEL: test_psrld: 5541; BROADWELL: # %bb.0: 5542; BROADWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5543; BROADWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5544; BROADWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5545; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5546; BROADWELL-NEXT: retq # sched: [7:1.00] 5547; 5548; SKYLAKE-LABEL: test_psrld: 5549; SKYLAKE: # %bb.0: 5550; SKYLAKE-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5551; SKYLAKE-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5552; SKYLAKE-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5553; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5554; SKYLAKE-NEXT: retq # sched: [7:1.00] 5555; 5556; SKX-LABEL: test_psrld: 5557; SKX: # %bb.0: 5558; SKX-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5559; SKX-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5560; SKX-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5561; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5562; SKX-NEXT: retq # sched: [7:1.00] 5563; 5564; BTVER2-LABEL: test_psrld: 5565; BTVER2: # %bb.0: 5566; BTVER2-NEXT: psrld %mm1, %mm0 # sched: [1:0.50] 5567; BTVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5568; BTVER2-NEXT: psrld $7, %mm0 # sched: [1:0.50] 5569; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5570; BTVER2-NEXT: retq # sched: [4:1.00] 5571; 5572; ZNVER1-LABEL: test_psrld: 5573; ZNVER1: # %bb.0: 5574; ZNVER1-NEXT: psrld %mm1, %mm0 # sched: [1:0.25] 5575; ZNVER1-NEXT: psrld (%rdi), %mm0 # sched: [8:0.50] 5576; ZNVER1-NEXT: psrld $7, %mm0 # sched: [1:0.25] 5577; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5578; ZNVER1-NEXT: retq # sched: [1:0.50] 5579 %1 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a0, x86_mmx %a1) 5580 %2 = load x86_mmx, x86_mmx *%a2, align 8 5581 %3 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %1, x86_mmx %2) 5582 %4 = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %3, i32 7) 5583 %5 = bitcast x86_mmx %4 to i64 5584 ret i64 %5 5585} 5586declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone 5587declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone 5588 5589define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5590; GENERIC-LABEL: test_psrlq: 5591; GENERIC: # %bb.0: 5592; GENERIC-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5593; GENERIC-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5594; GENERIC-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5595; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5596; GENERIC-NEXT: retq # sched: [1:1.00] 5597; 5598; ATOM-LABEL: test_psrlq: 5599; ATOM: # %bb.0: 5600; ATOM-NEXT: psrlq %mm1, %mm0 # sched: [2:1.00] 5601; ATOM-NEXT: psrlq (%rdi), %mm0 # sched: [3:1.50] 5602; ATOM-NEXT: psrlq $7, %mm0 # sched: [1:0.50] 5603; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5604; ATOM-NEXT: retq # sched: [79:39.50] 5605; 5606; SLM-LABEL: test_psrlq: 5607; SLM: # %bb.0: 5608; SLM-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5609; SLM-NEXT: psrlq (%rdi), %mm0 # sched: [4:1.00] 5610; SLM-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5611; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5612; SLM-NEXT: retq # sched: [4:1.00] 5613; 5614; SANDY-LABEL: test_psrlq: 5615; SANDY: # %bb.0: 5616; SANDY-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5617; SANDY-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5618; SANDY-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5619; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5620; SANDY-NEXT: retq # sched: [1:1.00] 5621; 5622; HASWELL-LABEL: test_psrlq: 5623; HASWELL: # %bb.0: 5624; HASWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5625; HASWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5626; HASWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5627; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5628; HASWELL-NEXT: retq # sched: [7:1.00] 5629; 5630; BROADWELL-LABEL: test_psrlq: 5631; BROADWELL: # %bb.0: 5632; BROADWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5633; BROADWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5634; BROADWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5635; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5636; BROADWELL-NEXT: retq # sched: [7:1.00] 5637; 5638; SKYLAKE-LABEL: test_psrlq: 5639; SKYLAKE: # %bb.0: 5640; SKYLAKE-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5641; SKYLAKE-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5642; SKYLAKE-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5643; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5644; SKYLAKE-NEXT: retq # sched: [7:1.00] 5645; 5646; SKX-LABEL: test_psrlq: 5647; SKX: # %bb.0: 5648; SKX-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5649; SKX-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5650; SKX-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5651; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5652; SKX-NEXT: retq # sched: [7:1.00] 5653; 5654; BTVER2-LABEL: test_psrlq: 5655; BTVER2: # %bb.0: 5656; BTVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:0.50] 5657; BTVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5658; BTVER2-NEXT: psrlq $7, %mm0 # sched: [1:0.50] 5659; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5660; BTVER2-NEXT: retq # sched: [4:1.00] 5661; 5662; ZNVER1-LABEL: test_psrlq: 5663; ZNVER1: # %bb.0: 5664; ZNVER1-NEXT: psrlq %mm1, %mm0 # sched: [1:0.25] 5665; ZNVER1-NEXT: psrlq (%rdi), %mm0 # sched: [8:0.50] 5666; ZNVER1-NEXT: psrlq $7, %mm0 # sched: [1:0.25] 5667; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5668; ZNVER1-NEXT: retq # sched: [1:0.50] 5669 %1 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a0, x86_mmx %a1) 5670 %2 = load x86_mmx, x86_mmx *%a2, align 8 5671 %3 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %1, x86_mmx %2) 5672 %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %3, i32 7) 5673 %5 = bitcast x86_mmx %4 to i64 5674 ret i64 %5 5675} 5676declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone 5677declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone 5678 5679define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5680; GENERIC-LABEL: test_psrlw: 5681; GENERIC: # %bb.0: 5682; GENERIC-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5683; GENERIC-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5684; GENERIC-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5685; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5686; GENERIC-NEXT: retq # sched: [1:1.00] 5687; 5688; ATOM-LABEL: test_psrlw: 5689; ATOM: # %bb.0: 5690; ATOM-NEXT: psrlw %mm1, %mm0 # sched: [2:1.00] 5691; ATOM-NEXT: psrlw (%rdi), %mm0 # sched: [3:1.50] 5692; ATOM-NEXT: psrlw $7, %mm0 # sched: [1:0.50] 5693; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5694; ATOM-NEXT: retq # sched: [79:39.50] 5695; 5696; SLM-LABEL: test_psrlw: 5697; SLM: # %bb.0: 5698; SLM-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5699; SLM-NEXT: psrlw (%rdi), %mm0 # sched: [4:1.00] 5700; SLM-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5701; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5702; SLM-NEXT: retq # sched: [4:1.00] 5703; 5704; SANDY-LABEL: test_psrlw: 5705; SANDY: # %bb.0: 5706; SANDY-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5707; SANDY-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5708; SANDY-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5709; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5710; SANDY-NEXT: retq # sched: [1:1.00] 5711; 5712; HASWELL-LABEL: test_psrlw: 5713; HASWELL: # %bb.0: 5714; HASWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5715; HASWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5716; HASWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5717; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5718; HASWELL-NEXT: retq # sched: [7:1.00] 5719; 5720; BROADWELL-LABEL: test_psrlw: 5721; BROADWELL: # %bb.0: 5722; BROADWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5723; BROADWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5724; BROADWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5725; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5726; BROADWELL-NEXT: retq # sched: [7:1.00] 5727; 5728; SKYLAKE-LABEL: test_psrlw: 5729; SKYLAKE: # %bb.0: 5730; SKYLAKE-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5731; SKYLAKE-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5732; SKYLAKE-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5733; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5734; SKYLAKE-NEXT: retq # sched: [7:1.00] 5735; 5736; SKX-LABEL: test_psrlw: 5737; SKX: # %bb.0: 5738; SKX-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5739; SKX-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5740; SKX-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5741; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5742; SKX-NEXT: retq # sched: [7:1.00] 5743; 5744; BTVER2-LABEL: test_psrlw: 5745; BTVER2: # %bb.0: 5746; BTVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:0.50] 5747; BTVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5748; BTVER2-NEXT: psrlw $7, %mm0 # sched: [1:0.50] 5749; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5750; BTVER2-NEXT: retq # sched: [4:1.00] 5751; 5752; ZNVER1-LABEL: test_psrlw: 5753; ZNVER1: # %bb.0: 5754; ZNVER1-NEXT: psrlw %mm1, %mm0 # sched: [1:0.25] 5755; ZNVER1-NEXT: psrlw (%rdi), %mm0 # sched: [8:0.50] 5756; ZNVER1-NEXT: psrlw $7, %mm0 # sched: [1:0.25] 5757; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5758; ZNVER1-NEXT: retq # sched: [1:0.50] 5759 %1 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a0, x86_mmx %a1) 5760 %2 = load x86_mmx, x86_mmx *%a2, align 8 5761 %3 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %1, x86_mmx %2) 5762 %4 = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %3, i32 7) 5763 %5 = bitcast x86_mmx %4 to i64 5764 ret i64 %5 5765} 5766declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone 5767declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 5768 5769define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5770; GENERIC-LABEL: test_psubb: 5771; GENERIC: # %bb.0: 5772; GENERIC-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] 5773; GENERIC-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] 5774; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5775; GENERIC-NEXT: retq # sched: [1:1.00] 5776; 5777; ATOM-LABEL: test_psubb: 5778; ATOM: # %bb.0: 5779; ATOM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5780; ATOM-NEXT: psubb (%rdi), %mm0 # sched: [1:1.00] 5781; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5782; ATOM-NEXT: retq # sched: [79:39.50] 5783; 5784; SLM-LABEL: test_psubb: 5785; SLM: # %bb.0: 5786; SLM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5787; SLM-NEXT: psubb (%rdi), %mm0 # sched: [4:1.00] 5788; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5789; SLM-NEXT: retq # sched: [4:1.00] 5790; 5791; SANDY-LABEL: test_psubb: 5792; SANDY: # %bb.0: 5793; SANDY-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] 5794; SANDY-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] 5795; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5796; SANDY-NEXT: retq # sched: [1:1.00] 5797; 5798; HASWELL-LABEL: test_psubb: 5799; HASWELL: # %bb.0: 5800; HASWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5801; HASWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] 5802; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5803; HASWELL-NEXT: retq # sched: [7:1.00] 5804; 5805; BROADWELL-LABEL: test_psubb: 5806; BROADWELL: # %bb.0: 5807; BROADWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5808; BROADWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] 5809; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5810; BROADWELL-NEXT: retq # sched: [7:1.00] 5811; 5812; SKYLAKE-LABEL: test_psubb: 5813; SKYLAKE: # %bb.0: 5814; SKYLAKE-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5815; SKYLAKE-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] 5816; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5817; SKYLAKE-NEXT: retq # sched: [7:1.00] 5818; 5819; SKX-LABEL: test_psubb: 5820; SKX: # %bb.0: 5821; SKX-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5822; SKX-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] 5823; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5824; SKX-NEXT: retq # sched: [7:1.00] 5825; 5826; BTVER2-LABEL: test_psubb: 5827; BTVER2: # %bb.0: 5828; BTVER2-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5829; BTVER2-NEXT: psubb (%rdi), %mm0 # sched: [6:1.00] 5830; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5831; BTVER2-NEXT: retq # sched: [4:1.00] 5832; 5833; ZNVER1-LABEL: test_psubb: 5834; ZNVER1: # %bb.0: 5835; ZNVER1-NEXT: psubb %mm1, %mm0 # sched: [1:0.25] 5836; ZNVER1-NEXT: psubb (%rdi), %mm0 # sched: [8:0.50] 5837; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5838; ZNVER1-NEXT: retq # sched: [1:0.50] 5839 %1 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a0, x86_mmx %a1) 5840 %2 = load x86_mmx, x86_mmx *%a2, align 8 5841 %3 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %1, x86_mmx %2) 5842 %4 = bitcast x86_mmx %3 to i64 5843 ret i64 %4 5844} 5845declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone 5846 5847define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5848; GENERIC-LABEL: test_psubd: 5849; GENERIC: # %bb.0: 5850; GENERIC-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] 5851; GENERIC-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] 5852; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5853; GENERIC-NEXT: retq # sched: [1:1.00] 5854; 5855; ATOM-LABEL: test_psubd: 5856; ATOM: # %bb.0: 5857; ATOM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5858; ATOM-NEXT: psubd (%rdi), %mm0 # sched: [1:1.00] 5859; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5860; ATOM-NEXT: retq # sched: [79:39.50] 5861; 5862; SLM-LABEL: test_psubd: 5863; SLM: # %bb.0: 5864; SLM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5865; SLM-NEXT: psubd (%rdi), %mm0 # sched: [4:1.00] 5866; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5867; SLM-NEXT: retq # sched: [4:1.00] 5868; 5869; SANDY-LABEL: test_psubd: 5870; SANDY: # %bb.0: 5871; SANDY-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] 5872; SANDY-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] 5873; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5874; SANDY-NEXT: retq # sched: [1:1.00] 5875; 5876; HASWELL-LABEL: test_psubd: 5877; HASWELL: # %bb.0: 5878; HASWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5879; HASWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] 5880; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5881; HASWELL-NEXT: retq # sched: [7:1.00] 5882; 5883; BROADWELL-LABEL: test_psubd: 5884; BROADWELL: # %bb.0: 5885; BROADWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5886; BROADWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] 5887; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5888; BROADWELL-NEXT: retq # sched: [7:1.00] 5889; 5890; SKYLAKE-LABEL: test_psubd: 5891; SKYLAKE: # %bb.0: 5892; SKYLAKE-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5893; SKYLAKE-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] 5894; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5895; SKYLAKE-NEXT: retq # sched: [7:1.00] 5896; 5897; SKX-LABEL: test_psubd: 5898; SKX: # %bb.0: 5899; SKX-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5900; SKX-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] 5901; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5902; SKX-NEXT: retq # sched: [7:1.00] 5903; 5904; BTVER2-LABEL: test_psubd: 5905; BTVER2: # %bb.0: 5906; BTVER2-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5907; BTVER2-NEXT: psubd (%rdi), %mm0 # sched: [6:1.00] 5908; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5909; BTVER2-NEXT: retq # sched: [4:1.00] 5910; 5911; ZNVER1-LABEL: test_psubd: 5912; ZNVER1: # %bb.0: 5913; ZNVER1-NEXT: psubd %mm1, %mm0 # sched: [1:0.25] 5914; ZNVER1-NEXT: psubd (%rdi), %mm0 # sched: [8:0.50] 5915; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5916; ZNVER1-NEXT: retq # sched: [1:0.50] 5917 %1 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a0, x86_mmx %a1) 5918 %2 = load x86_mmx, x86_mmx *%a2, align 8 5919 %3 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %1, x86_mmx %2) 5920 %4 = bitcast x86_mmx %3 to i64 5921 ret i64 %4 5922} 5923declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone 5924 5925define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5926; GENERIC-LABEL: test_psubq: 5927; GENERIC: # %bb.0: 5928; GENERIC-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] 5929; GENERIC-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] 5930; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5931; GENERIC-NEXT: retq # sched: [1:1.00] 5932; 5933; ATOM-LABEL: test_psubq: 5934; ATOM: # %bb.0: 5935; ATOM-NEXT: psubq %mm1, %mm0 # sched: [2:1.00] 5936; ATOM-NEXT: psubq (%rdi), %mm0 # sched: [3:1.50] 5937; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5938; ATOM-NEXT: retq # sched: [79:39.50] 5939; 5940; SLM-LABEL: test_psubq: 5941; SLM: # %bb.0: 5942; SLM-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5943; SLM-NEXT: psubq (%rdi), %mm0 # sched: [4:1.00] 5944; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5945; SLM-NEXT: retq # sched: [4:1.00] 5946; 5947; SANDY-LABEL: test_psubq: 5948; SANDY: # %bb.0: 5949; SANDY-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] 5950; SANDY-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] 5951; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5952; SANDY-NEXT: retq # sched: [1:1.00] 5953; 5954; HASWELL-LABEL: test_psubq: 5955; HASWELL: # %bb.0: 5956; HASWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5957; HASWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] 5958; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5959; HASWELL-NEXT: retq # sched: [7:1.00] 5960; 5961; BROADWELL-LABEL: test_psubq: 5962; BROADWELL: # %bb.0: 5963; BROADWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5964; BROADWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] 5965; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5966; BROADWELL-NEXT: retq # sched: [7:1.00] 5967; 5968; SKYLAKE-LABEL: test_psubq: 5969; SKYLAKE: # %bb.0: 5970; SKYLAKE-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5971; SKYLAKE-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] 5972; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5973; SKYLAKE-NEXT: retq # sched: [7:1.00] 5974; 5975; SKX-LABEL: test_psubq: 5976; SKX: # %bb.0: 5977; SKX-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5978; SKX-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] 5979; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5980; SKX-NEXT: retq # sched: [7:1.00] 5981; 5982; BTVER2-LABEL: test_psubq: 5983; BTVER2: # %bb.0: 5984; BTVER2-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5985; BTVER2-NEXT: psubq (%rdi), %mm0 # sched: [6:1.00] 5986; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5987; BTVER2-NEXT: retq # sched: [4:1.00] 5988; 5989; ZNVER1-LABEL: test_psubq: 5990; ZNVER1: # %bb.0: 5991; ZNVER1-NEXT: psubq %mm1, %mm0 # sched: [1:0.25] 5992; ZNVER1-NEXT: psubq (%rdi), %mm0 # sched: [8:0.50] 5993; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5994; ZNVER1-NEXT: retq # sched: [1:0.50] 5995 %1 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a0, x86_mmx %a1) 5996 %2 = load x86_mmx, x86_mmx *%a2, align 8 5997 %3 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %1, x86_mmx %2) 5998 %4 = bitcast x86_mmx %3 to i64 5999 ret i64 %4 6000} 6001declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone 6002 6003define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6004; GENERIC-LABEL: test_psubsb: 6005; GENERIC: # %bb.0: 6006; GENERIC-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] 6007; GENERIC-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] 6008; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6009; GENERIC-NEXT: retq # sched: [1:1.00] 6010; 6011; ATOM-LABEL: test_psubsb: 6012; ATOM: # %bb.0: 6013; ATOM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6014; ATOM-NEXT: psubsb (%rdi), %mm0 # sched: [1:1.00] 6015; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6016; ATOM-NEXT: retq # sched: [79:39.50] 6017; 6018; SLM-LABEL: test_psubsb: 6019; SLM: # %bb.0: 6020; SLM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6021; SLM-NEXT: psubsb (%rdi), %mm0 # sched: [4:1.00] 6022; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6023; SLM-NEXT: retq # sched: [4:1.00] 6024; 6025; SANDY-LABEL: test_psubsb: 6026; SANDY: # %bb.0: 6027; SANDY-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] 6028; SANDY-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] 6029; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6030; SANDY-NEXT: retq # sched: [1:1.00] 6031; 6032; HASWELL-LABEL: test_psubsb: 6033; HASWELL: # %bb.0: 6034; HASWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6035; HASWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50] 6036; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6037; HASWELL-NEXT: retq # sched: [7:1.00] 6038; 6039; BROADWELL-LABEL: test_psubsb: 6040; BROADWELL: # %bb.0: 6041; BROADWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6042; BROADWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50] 6043; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6044; BROADWELL-NEXT: retq # sched: [7:1.00] 6045; 6046; SKYLAKE-LABEL: test_psubsb: 6047; SKYLAKE: # %bb.0: 6048; SKYLAKE-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] 6049; SKYLAKE-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] 6050; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6051; SKYLAKE-NEXT: retq # sched: [7:1.00] 6052; 6053; SKX-LABEL: test_psubsb: 6054; SKX: # %bb.0: 6055; SKX-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] 6056; SKX-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] 6057; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6058; SKX-NEXT: retq # sched: [7:1.00] 6059; 6060; BTVER2-LABEL: test_psubsb: 6061; BTVER2: # %bb.0: 6062; BTVER2-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6063; BTVER2-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] 6064; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6065; BTVER2-NEXT: retq # sched: [4:1.00] 6066; 6067; ZNVER1-LABEL: test_psubsb: 6068; ZNVER1: # %bb.0: 6069; ZNVER1-NEXT: psubsb %mm1, %mm0 # sched: [1:0.25] 6070; ZNVER1-NEXT: psubsb (%rdi), %mm0 # sched: [8:0.50] 6071; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6072; ZNVER1-NEXT: retq # sched: [1:0.50] 6073 %1 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a0, x86_mmx %a1) 6074 %2 = load x86_mmx, x86_mmx *%a2, align 8 6075 %3 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %1, x86_mmx %2) 6076 %4 = bitcast x86_mmx %3 to i64 6077 ret i64 %4 6078} 6079declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone 6080 6081define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6082; GENERIC-LABEL: test_psubsw: 6083; GENERIC: # %bb.0: 6084; GENERIC-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] 6085; GENERIC-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] 6086; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6087; GENERIC-NEXT: retq # sched: [1:1.00] 6088; 6089; ATOM-LABEL: test_psubsw: 6090; ATOM: # %bb.0: 6091; ATOM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6092; ATOM-NEXT: psubsw (%rdi), %mm0 # sched: [1:1.00] 6093; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6094; ATOM-NEXT: retq # sched: [79:39.50] 6095; 6096; SLM-LABEL: test_psubsw: 6097; SLM: # %bb.0: 6098; SLM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6099; SLM-NEXT: psubsw (%rdi), %mm0 # sched: [4:1.00] 6100; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6101; SLM-NEXT: retq # sched: [4:1.00] 6102; 6103; SANDY-LABEL: test_psubsw: 6104; SANDY: # %bb.0: 6105; SANDY-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] 6106; SANDY-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] 6107; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6108; SANDY-NEXT: retq # sched: [1:1.00] 6109; 6110; HASWELL-LABEL: test_psubsw: 6111; HASWELL: # %bb.0: 6112; HASWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6113; HASWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50] 6114; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6115; HASWELL-NEXT: retq # sched: [7:1.00] 6116; 6117; BROADWELL-LABEL: test_psubsw: 6118; BROADWELL: # %bb.0: 6119; BROADWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6120; BROADWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50] 6121; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6122; BROADWELL-NEXT: retq # sched: [7:1.00] 6123; 6124; SKYLAKE-LABEL: test_psubsw: 6125; SKYLAKE: # %bb.0: 6126; SKYLAKE-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] 6127; SKYLAKE-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] 6128; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6129; SKYLAKE-NEXT: retq # sched: [7:1.00] 6130; 6131; SKX-LABEL: test_psubsw: 6132; SKX: # %bb.0: 6133; SKX-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] 6134; SKX-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] 6135; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6136; SKX-NEXT: retq # sched: [7:1.00] 6137; 6138; BTVER2-LABEL: test_psubsw: 6139; BTVER2: # %bb.0: 6140; BTVER2-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6141; BTVER2-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] 6142; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6143; BTVER2-NEXT: retq # sched: [4:1.00] 6144; 6145; ZNVER1-LABEL: test_psubsw: 6146; ZNVER1: # %bb.0: 6147; ZNVER1-NEXT: psubsw %mm1, %mm0 # sched: [1:0.25] 6148; ZNVER1-NEXT: psubsw (%rdi), %mm0 # sched: [8:0.50] 6149; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6150; ZNVER1-NEXT: retq # sched: [1:0.50] 6151 %1 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a0, x86_mmx %a1) 6152 %2 = load x86_mmx, x86_mmx *%a2, align 8 6153 %3 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %1, x86_mmx %2) 6154 %4 = bitcast x86_mmx %3 to i64 6155 ret i64 %4 6156} 6157declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone 6158 6159define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6160; GENERIC-LABEL: test_psubusb: 6161; GENERIC: # %bb.0: 6162; GENERIC-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] 6163; GENERIC-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] 6164; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6165; GENERIC-NEXT: retq # sched: [1:1.00] 6166; 6167; ATOM-LABEL: test_psubusb: 6168; ATOM: # %bb.0: 6169; ATOM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6170; ATOM-NEXT: psubusb (%rdi), %mm0 # sched: [1:1.00] 6171; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6172; ATOM-NEXT: retq # sched: [79:39.50] 6173; 6174; SLM-LABEL: test_psubusb: 6175; SLM: # %bb.0: 6176; SLM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6177; SLM-NEXT: psubusb (%rdi), %mm0 # sched: [4:1.00] 6178; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6179; SLM-NEXT: retq # sched: [4:1.00] 6180; 6181; SANDY-LABEL: test_psubusb: 6182; SANDY: # %bb.0: 6183; SANDY-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] 6184; SANDY-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] 6185; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6186; SANDY-NEXT: retq # sched: [1:1.00] 6187; 6188; HASWELL-LABEL: test_psubusb: 6189; HASWELL: # %bb.0: 6190; HASWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6191; HASWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50] 6192; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6193; HASWELL-NEXT: retq # sched: [7:1.00] 6194; 6195; BROADWELL-LABEL: test_psubusb: 6196; BROADWELL: # %bb.0: 6197; BROADWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6198; BROADWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50] 6199; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6200; BROADWELL-NEXT: retq # sched: [7:1.00] 6201; 6202; SKYLAKE-LABEL: test_psubusb: 6203; SKYLAKE: # %bb.0: 6204; SKYLAKE-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] 6205; SKYLAKE-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] 6206; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6207; SKYLAKE-NEXT: retq # sched: [7:1.00] 6208; 6209; SKX-LABEL: test_psubusb: 6210; SKX: # %bb.0: 6211; SKX-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] 6212; SKX-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] 6213; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6214; SKX-NEXT: retq # sched: [7:1.00] 6215; 6216; BTVER2-LABEL: test_psubusb: 6217; BTVER2: # %bb.0: 6218; BTVER2-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6219; BTVER2-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] 6220; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6221; BTVER2-NEXT: retq # sched: [4:1.00] 6222; 6223; ZNVER1-LABEL: test_psubusb: 6224; ZNVER1: # %bb.0: 6225; ZNVER1-NEXT: psubusb %mm1, %mm0 # sched: [1:0.25] 6226; ZNVER1-NEXT: psubusb (%rdi), %mm0 # sched: [8:0.50] 6227; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6228; ZNVER1-NEXT: retq # sched: [1:0.50] 6229 %1 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a0, x86_mmx %a1) 6230 %2 = load x86_mmx, x86_mmx *%a2, align 8 6231 %3 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %1, x86_mmx %2) 6232 %4 = bitcast x86_mmx %3 to i64 6233 ret i64 %4 6234} 6235declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone 6236 6237define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6238; GENERIC-LABEL: test_psubusw: 6239; GENERIC: # %bb.0: 6240; GENERIC-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] 6241; GENERIC-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] 6242; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6243; GENERIC-NEXT: retq # sched: [1:1.00] 6244; 6245; ATOM-LABEL: test_psubusw: 6246; ATOM: # %bb.0: 6247; ATOM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6248; ATOM-NEXT: psubusw (%rdi), %mm0 # sched: [1:1.00] 6249; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6250; ATOM-NEXT: retq # sched: [79:39.50] 6251; 6252; SLM-LABEL: test_psubusw: 6253; SLM: # %bb.0: 6254; SLM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6255; SLM-NEXT: psubusw (%rdi), %mm0 # sched: [4:1.00] 6256; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6257; SLM-NEXT: retq # sched: [4:1.00] 6258; 6259; SANDY-LABEL: test_psubusw: 6260; SANDY: # %bb.0: 6261; SANDY-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] 6262; SANDY-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] 6263; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6264; SANDY-NEXT: retq # sched: [1:1.00] 6265; 6266; HASWELL-LABEL: test_psubusw: 6267; HASWELL: # %bb.0: 6268; HASWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6269; HASWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50] 6270; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6271; HASWELL-NEXT: retq # sched: [7:1.00] 6272; 6273; BROADWELL-LABEL: test_psubusw: 6274; BROADWELL: # %bb.0: 6275; BROADWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6276; BROADWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50] 6277; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6278; BROADWELL-NEXT: retq # sched: [7:1.00] 6279; 6280; SKYLAKE-LABEL: test_psubusw: 6281; SKYLAKE: # %bb.0: 6282; SKYLAKE-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] 6283; SKYLAKE-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] 6284; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6285; SKYLAKE-NEXT: retq # sched: [7:1.00] 6286; 6287; SKX-LABEL: test_psubusw: 6288; SKX: # %bb.0: 6289; SKX-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] 6290; SKX-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] 6291; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6292; SKX-NEXT: retq # sched: [7:1.00] 6293; 6294; BTVER2-LABEL: test_psubusw: 6295; BTVER2: # %bb.0: 6296; BTVER2-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6297; BTVER2-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] 6298; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6299; BTVER2-NEXT: retq # sched: [4:1.00] 6300; 6301; ZNVER1-LABEL: test_psubusw: 6302; ZNVER1: # %bb.0: 6303; ZNVER1-NEXT: psubusw %mm1, %mm0 # sched: [1:0.25] 6304; ZNVER1-NEXT: psubusw (%rdi), %mm0 # sched: [8:0.50] 6305; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6306; ZNVER1-NEXT: retq # sched: [1:0.50] 6307 %1 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a0, x86_mmx %a1) 6308 %2 = load x86_mmx, x86_mmx *%a2, align 8 6309 %3 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %1, x86_mmx %2) 6310 %4 = bitcast x86_mmx %3 to i64 6311 ret i64 %4 6312} 6313declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone 6314 6315define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6316; GENERIC-LABEL: test_psubw: 6317; GENERIC: # %bb.0: 6318; GENERIC-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] 6319; GENERIC-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] 6320; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6321; GENERIC-NEXT: retq # sched: [1:1.00] 6322; 6323; ATOM-LABEL: test_psubw: 6324; ATOM: # %bb.0: 6325; ATOM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6326; ATOM-NEXT: psubw (%rdi), %mm0 # sched: [1:1.00] 6327; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6328; ATOM-NEXT: retq # sched: [79:39.50] 6329; 6330; SLM-LABEL: test_psubw: 6331; SLM: # %bb.0: 6332; SLM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6333; SLM-NEXT: psubw (%rdi), %mm0 # sched: [4:1.00] 6334; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6335; SLM-NEXT: retq # sched: [4:1.00] 6336; 6337; SANDY-LABEL: test_psubw: 6338; SANDY: # %bb.0: 6339; SANDY-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] 6340; SANDY-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] 6341; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6342; SANDY-NEXT: retq # sched: [1:1.00] 6343; 6344; HASWELL-LABEL: test_psubw: 6345; HASWELL: # %bb.0: 6346; HASWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6347; HASWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] 6348; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6349; HASWELL-NEXT: retq # sched: [7:1.00] 6350; 6351; BROADWELL-LABEL: test_psubw: 6352; BROADWELL: # %bb.0: 6353; BROADWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6354; BROADWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] 6355; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6356; BROADWELL-NEXT: retq # sched: [7:1.00] 6357; 6358; SKYLAKE-LABEL: test_psubw: 6359; SKYLAKE: # %bb.0: 6360; SKYLAKE-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6361; SKYLAKE-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] 6362; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6363; SKYLAKE-NEXT: retq # sched: [7:1.00] 6364; 6365; SKX-LABEL: test_psubw: 6366; SKX: # %bb.0: 6367; SKX-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6368; SKX-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] 6369; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6370; SKX-NEXT: retq # sched: [7:1.00] 6371; 6372; BTVER2-LABEL: test_psubw: 6373; BTVER2: # %bb.0: 6374; BTVER2-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6375; BTVER2-NEXT: psubw (%rdi), %mm0 # sched: [6:1.00] 6376; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6377; BTVER2-NEXT: retq # sched: [4:1.00] 6378; 6379; ZNVER1-LABEL: test_psubw: 6380; ZNVER1: # %bb.0: 6381; ZNVER1-NEXT: psubw %mm1, %mm0 # sched: [1:0.25] 6382; ZNVER1-NEXT: psubw (%rdi), %mm0 # sched: [8:0.50] 6383; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6384; ZNVER1-NEXT: retq # sched: [1:0.50] 6385 %1 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a0, x86_mmx %a1) 6386 %2 = load x86_mmx, x86_mmx *%a2, align 8 6387 %3 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %1, x86_mmx %2) 6388 %4 = bitcast x86_mmx %3 to i64 6389 ret i64 %4 6390} 6391declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone 6392 6393define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6394; GENERIC-LABEL: test_punpckhbw: 6395; GENERIC: # %bb.0: 6396; GENERIC-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6397; GENERIC-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6398; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6399; GENERIC-NEXT: retq # sched: [1:1.00] 6400; 6401; ATOM-LABEL: test_punpckhbw: 6402; ATOM: # %bb.0: 6403; ATOM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] 6404; ATOM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] 6405; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6406; ATOM-NEXT: retq # sched: [79:39.50] 6407; 6408; SLM-LABEL: test_punpckhbw: 6409; SLM: # %bb.0: 6410; SLM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6411; SLM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [4:1.00] 6412; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6413; SLM-NEXT: retq # sched: [4:1.00] 6414; 6415; SANDY-LABEL: test_punpckhbw: 6416; SANDY: # %bb.0: 6417; SANDY-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6418; SANDY-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6419; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6420; SANDY-NEXT: retq # sched: [1:1.00] 6421; 6422; HASWELL-LABEL: test_punpckhbw: 6423; HASWELL: # %bb.0: 6424; HASWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6425; HASWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6426; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6427; HASWELL-NEXT: retq # sched: [7:1.00] 6428; 6429; BROADWELL-LABEL: test_punpckhbw: 6430; BROADWELL: # %bb.0: 6431; BROADWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6432; BROADWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6433; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6434; BROADWELL-NEXT: retq # sched: [7:1.00] 6435; 6436; SKYLAKE-LABEL: test_punpckhbw: 6437; SKYLAKE: # %bb.0: 6438; SKYLAKE-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6439; SKYLAKE-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6440; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6441; SKYLAKE-NEXT: retq # sched: [7:1.00] 6442; 6443; SKX-LABEL: test_punpckhbw: 6444; SKX: # %bb.0: 6445; SKX-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6446; SKX-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6447; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6448; SKX-NEXT: retq # sched: [7:1.00] 6449; 6450; BTVER2-LABEL: test_punpckhbw: 6451; BTVER2: # %bb.0: 6452; BTVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] 6453; BTVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6454; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6455; BTVER2-NEXT: retq # sched: [4:1.00] 6456; 6457; ZNVER1-LABEL: test_punpckhbw: 6458; ZNVER1: # %bb.0: 6459; ZNVER1-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.25] 6460; ZNVER1-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [8:0.50] 6461; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6462; ZNVER1-NEXT: retq # sched: [1:0.50] 6463 %1 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a0, x86_mmx %a1) 6464 %2 = load x86_mmx, x86_mmx *%a2, align 8 6465 %3 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %1, x86_mmx %2) 6466 %4 = bitcast x86_mmx %3 to i64 6467 ret i64 %4 6468} 6469declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone 6470 6471define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6472; GENERIC-LABEL: test_punpckhdq: 6473; GENERIC: # %bb.0: 6474; GENERIC-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6475; GENERIC-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6476; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6477; GENERIC-NEXT: retq # sched: [1:1.00] 6478; 6479; ATOM-LABEL: test_punpckhdq: 6480; ATOM: # %bb.0: 6481; ATOM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] 6482; ATOM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] 6483; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6484; ATOM-NEXT: retq # sched: [79:39.50] 6485; 6486; SLM-LABEL: test_punpckhdq: 6487; SLM: # %bb.0: 6488; SLM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6489; SLM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [4:1.00] 6490; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6491; SLM-NEXT: retq # sched: [4:1.00] 6492; 6493; SANDY-LABEL: test_punpckhdq: 6494; SANDY: # %bb.0: 6495; SANDY-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6496; SANDY-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6497; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6498; SANDY-NEXT: retq # sched: [1:1.00] 6499; 6500; HASWELL-LABEL: test_punpckhdq: 6501; HASWELL: # %bb.0: 6502; HASWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6503; HASWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6504; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6505; HASWELL-NEXT: retq # sched: [7:1.00] 6506; 6507; BROADWELL-LABEL: test_punpckhdq: 6508; BROADWELL: # %bb.0: 6509; BROADWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6510; BROADWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6511; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6512; BROADWELL-NEXT: retq # sched: [7:1.00] 6513; 6514; SKYLAKE-LABEL: test_punpckhdq: 6515; SKYLAKE: # %bb.0: 6516; SKYLAKE-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6517; SKYLAKE-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6518; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6519; SKYLAKE-NEXT: retq # sched: [7:1.00] 6520; 6521; SKX-LABEL: test_punpckhdq: 6522; SKX: # %bb.0: 6523; SKX-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6524; SKX-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6525; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6526; SKX-NEXT: retq # sched: [7:1.00] 6527; 6528; BTVER2-LABEL: test_punpckhdq: 6529; BTVER2: # %bb.0: 6530; BTVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] 6531; BTVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6532; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6533; BTVER2-NEXT: retq # sched: [4:1.00] 6534; 6535; ZNVER1-LABEL: test_punpckhdq: 6536; ZNVER1: # %bb.0: 6537; ZNVER1-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.25] 6538; ZNVER1-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [8:0.50] 6539; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6540; ZNVER1-NEXT: retq # sched: [1:0.50] 6541 %1 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a0, x86_mmx %a1) 6542 %2 = load x86_mmx, x86_mmx *%a2, align 8 6543 %3 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %1, x86_mmx %2) 6544 %4 = bitcast x86_mmx %3 to i64 6545 ret i64 %4 6546} 6547declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone 6548 6549define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6550; GENERIC-LABEL: test_punpckhwd: 6551; GENERIC: # %bb.0: 6552; GENERIC-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6553; GENERIC-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6554; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6555; GENERIC-NEXT: retq # sched: [1:1.00] 6556; 6557; ATOM-LABEL: test_punpckhwd: 6558; ATOM: # %bb.0: 6559; ATOM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] 6560; ATOM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] 6561; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6562; ATOM-NEXT: retq # sched: [79:39.50] 6563; 6564; SLM-LABEL: test_punpckhwd: 6565; SLM: # %bb.0: 6566; SLM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6567; SLM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00] 6568; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6569; SLM-NEXT: retq # sched: [4:1.00] 6570; 6571; SANDY-LABEL: test_punpckhwd: 6572; SANDY: # %bb.0: 6573; SANDY-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6574; SANDY-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6575; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6576; SANDY-NEXT: retq # sched: [1:1.00] 6577; 6578; HASWELL-LABEL: test_punpckhwd: 6579; HASWELL: # %bb.0: 6580; HASWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6581; HASWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6582; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6583; HASWELL-NEXT: retq # sched: [7:1.00] 6584; 6585; BROADWELL-LABEL: test_punpckhwd: 6586; BROADWELL: # %bb.0: 6587; BROADWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6588; BROADWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6589; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6590; BROADWELL-NEXT: retq # sched: [7:1.00] 6591; 6592; SKYLAKE-LABEL: test_punpckhwd: 6593; SKYLAKE: # %bb.0: 6594; SKYLAKE-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6595; SKYLAKE-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6596; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6597; SKYLAKE-NEXT: retq # sched: [7:1.00] 6598; 6599; SKX-LABEL: test_punpckhwd: 6600; SKX: # %bb.0: 6601; SKX-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6602; SKX-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6603; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6604; SKX-NEXT: retq # sched: [7:1.00] 6605; 6606; BTVER2-LABEL: test_punpckhwd: 6607; BTVER2: # %bb.0: 6608; BTVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] 6609; BTVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6610; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6611; BTVER2-NEXT: retq # sched: [4:1.00] 6612; 6613; ZNVER1-LABEL: test_punpckhwd: 6614; ZNVER1: # %bb.0: 6615; ZNVER1-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25] 6616; ZNVER1-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50] 6617; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6618; ZNVER1-NEXT: retq # sched: [1:0.50] 6619 %1 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a0, x86_mmx %a1) 6620 %2 = load x86_mmx, x86_mmx *%a2, align 8 6621 %3 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %1, x86_mmx %2) 6622 %4 = bitcast x86_mmx %3 to i64 6623 ret i64 %4 6624} 6625declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone 6626 6627define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6628; GENERIC-LABEL: test_punpcklbw: 6629; GENERIC: # %bb.0: 6630; GENERIC-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6631; GENERIC-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6632; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6633; GENERIC-NEXT: retq # sched: [1:1.00] 6634; 6635; ATOM-LABEL: test_punpcklbw: 6636; ATOM: # %bb.0: 6637; ATOM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6638; ATOM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] 6639; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6640; ATOM-NEXT: retq # sched: [79:39.50] 6641; 6642; SLM-LABEL: test_punpcklbw: 6643; SLM: # %bb.0: 6644; SLM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6645; SLM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00] 6646; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6647; SLM-NEXT: retq # sched: [4:1.00] 6648; 6649; SANDY-LABEL: test_punpcklbw: 6650; SANDY: # %bb.0: 6651; SANDY-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6652; SANDY-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6653; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6654; SANDY-NEXT: retq # sched: [1:1.00] 6655; 6656; HASWELL-LABEL: test_punpcklbw: 6657; HASWELL: # %bb.0: 6658; HASWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6659; HASWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6660; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6661; HASWELL-NEXT: retq # sched: [7:1.00] 6662; 6663; BROADWELL-LABEL: test_punpcklbw: 6664; BROADWELL: # %bb.0: 6665; BROADWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6666; BROADWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6667; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6668; BROADWELL-NEXT: retq # sched: [7:1.00] 6669; 6670; SKYLAKE-LABEL: test_punpcklbw: 6671; SKYLAKE: # %bb.0: 6672; SKYLAKE-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6673; SKYLAKE-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6674; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6675; SKYLAKE-NEXT: retq # sched: [7:1.00] 6676; 6677; SKX-LABEL: test_punpcklbw: 6678; SKX: # %bb.0: 6679; SKX-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6680; SKX-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6681; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6682; SKX-NEXT: retq # sched: [7:1.00] 6683; 6684; BTVER2-LABEL: test_punpcklbw: 6685; BTVER2: # %bb.0: 6686; BTVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] 6687; BTVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6688; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6689; BTVER2-NEXT: retq # sched: [4:1.00] 6690; 6691; ZNVER1-LABEL: test_punpcklbw: 6692; ZNVER1: # %bb.0: 6693; ZNVER1-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25] 6694; ZNVER1-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50] 6695; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6696; ZNVER1-NEXT: retq # sched: [1:0.50] 6697 %1 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a0, x86_mmx %a1) 6698 %2 = load x86_mmx, x86_mmx *%a2, align 8 6699 %3 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %1, x86_mmx %2) 6700 %4 = bitcast x86_mmx %3 to i64 6701 ret i64 %4 6702} 6703declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone 6704 6705define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6706; GENERIC-LABEL: test_punpckldq: 6707; GENERIC: # %bb.0: 6708; GENERIC-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6709; GENERIC-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6710; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6711; GENERIC-NEXT: retq # sched: [1:1.00] 6712; 6713; ATOM-LABEL: test_punpckldq: 6714; ATOM: # %bb.0: 6715; ATOM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6716; ATOM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] 6717; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6718; ATOM-NEXT: retq # sched: [79:39.50] 6719; 6720; SLM-LABEL: test_punpckldq: 6721; SLM: # %bb.0: 6722; SLM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6723; SLM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [4:1.00] 6724; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6725; SLM-NEXT: retq # sched: [4:1.00] 6726; 6727; SANDY-LABEL: test_punpckldq: 6728; SANDY: # %bb.0: 6729; SANDY-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6730; SANDY-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6731; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6732; SANDY-NEXT: retq # sched: [1:1.00] 6733; 6734; HASWELL-LABEL: test_punpckldq: 6735; HASWELL: # %bb.0: 6736; HASWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6737; HASWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6738; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6739; HASWELL-NEXT: retq # sched: [7:1.00] 6740; 6741; BROADWELL-LABEL: test_punpckldq: 6742; BROADWELL: # %bb.0: 6743; BROADWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6744; BROADWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6745; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6746; BROADWELL-NEXT: retq # sched: [7:1.00] 6747; 6748; SKYLAKE-LABEL: test_punpckldq: 6749; SKYLAKE: # %bb.0: 6750; SKYLAKE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6751; SKYLAKE-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6752; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6753; SKYLAKE-NEXT: retq # sched: [7:1.00] 6754; 6755; SKX-LABEL: test_punpckldq: 6756; SKX: # %bb.0: 6757; SKX-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6758; SKX-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6759; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6760; SKX-NEXT: retq # sched: [7:1.00] 6761; 6762; BTVER2-LABEL: test_punpckldq: 6763; BTVER2: # %bb.0: 6764; BTVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50] 6765; BTVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6766; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6767; BTVER2-NEXT: retq # sched: [4:1.00] 6768; 6769; ZNVER1-LABEL: test_punpckldq: 6770; ZNVER1: # %bb.0: 6771; ZNVER1-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.25] 6772; ZNVER1-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [8:0.50] 6773; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6774; ZNVER1-NEXT: retq # sched: [1:0.50] 6775 %1 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a0, x86_mmx %a1) 6776 %2 = load x86_mmx, x86_mmx *%a2, align 8 6777 %3 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %1, x86_mmx %2) 6778 %4 = bitcast x86_mmx %3 to i64 6779 ret i64 %4 6780} 6781declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone 6782 6783define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6784; GENERIC-LABEL: test_punpcklwd: 6785; GENERIC: # %bb.0: 6786; GENERIC-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6787; GENERIC-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6788; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6789; GENERIC-NEXT: retq # sched: [1:1.00] 6790; 6791; ATOM-LABEL: test_punpcklwd: 6792; ATOM: # %bb.0: 6793; ATOM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6794; ATOM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] 6795; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6796; ATOM-NEXT: retq # sched: [79:39.50] 6797; 6798; SLM-LABEL: test_punpcklwd: 6799; SLM: # %bb.0: 6800; SLM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6801; SLM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [4:1.00] 6802; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6803; SLM-NEXT: retq # sched: [4:1.00] 6804; 6805; SANDY-LABEL: test_punpcklwd: 6806; SANDY: # %bb.0: 6807; SANDY-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6808; SANDY-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6809; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6810; SANDY-NEXT: retq # sched: [1:1.00] 6811; 6812; HASWELL-LABEL: test_punpcklwd: 6813; HASWELL: # %bb.0: 6814; HASWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6815; HASWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6816; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6817; HASWELL-NEXT: retq # sched: [7:1.00] 6818; 6819; BROADWELL-LABEL: test_punpcklwd: 6820; BROADWELL: # %bb.0: 6821; BROADWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6822; BROADWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6823; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6824; BROADWELL-NEXT: retq # sched: [7:1.00] 6825; 6826; SKYLAKE-LABEL: test_punpcklwd: 6827; SKYLAKE: # %bb.0: 6828; SKYLAKE-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6829; SKYLAKE-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6830; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6831; SKYLAKE-NEXT: retq # sched: [7:1.00] 6832; 6833; SKX-LABEL: test_punpcklwd: 6834; SKX: # %bb.0: 6835; SKX-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6836; SKX-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6837; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6838; SKX-NEXT: retq # sched: [7:1.00] 6839; 6840; BTVER2-LABEL: test_punpcklwd: 6841; BTVER2: # %bb.0: 6842; BTVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50] 6843; BTVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6844; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6845; BTVER2-NEXT: retq # sched: [4:1.00] 6846; 6847; ZNVER1-LABEL: test_punpcklwd: 6848; ZNVER1: # %bb.0: 6849; ZNVER1-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.25] 6850; ZNVER1-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [8:0.50] 6851; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6852; ZNVER1-NEXT: retq # sched: [1:0.50] 6853 %1 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a0, x86_mmx %a1) 6854 %2 = load x86_mmx, x86_mmx *%a2, align 8 6855 %3 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %1, x86_mmx %2) 6856 %4 = bitcast x86_mmx %3 to i64 6857 ret i64 %4 6858} 6859declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone 6860 6861define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6862; GENERIC-LABEL: test_pxor: 6863; GENERIC: # %bb.0: 6864; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] 6865; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6866; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6867; GENERIC-NEXT: retq # sched: [1:1.00] 6868; 6869; ATOM-LABEL: test_pxor: 6870; ATOM: # %bb.0: 6871; ATOM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6872; ATOM-NEXT: pxor (%rdi), %mm0 # sched: [1:1.00] 6873; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6874; ATOM-NEXT: retq # sched: [79:39.50] 6875; 6876; SLM-LABEL: test_pxor: 6877; SLM: # %bb.0: 6878; SLM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6879; SLM-NEXT: pxor (%rdi), %mm0 # sched: [4:1.00] 6880; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6881; SLM-NEXT: retq # sched: [4:1.00] 6882; 6883; SANDY-LABEL: test_pxor: 6884; SANDY: # %bb.0: 6885; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] 6886; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6887; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6888; SANDY-NEXT: retq # sched: [1:1.00] 6889; 6890; HASWELL-LABEL: test_pxor: 6891; HASWELL: # %bb.0: 6892; HASWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] 6893; HASWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6894; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6895; HASWELL-NEXT: retq # sched: [7:1.00] 6896; 6897; BROADWELL-LABEL: test_pxor: 6898; BROADWELL: # %bb.0: 6899; BROADWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] 6900; BROADWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6901; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6902; BROADWELL-NEXT: retq # sched: [7:1.00] 6903; 6904; SKYLAKE-LABEL: test_pxor: 6905; SKYLAKE: # %bb.0: 6906; SKYLAKE-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6907; SKYLAKE-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6908; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6909; SKYLAKE-NEXT: retq # sched: [7:1.00] 6910; 6911; SKX-LABEL: test_pxor: 6912; SKX: # %bb.0: 6913; SKX-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6914; SKX-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6915; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6916; SKX-NEXT: retq # sched: [7:1.00] 6917; 6918; BTVER2-LABEL: test_pxor: 6919; BTVER2: # %bb.0: 6920; BTVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6921; BTVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] 6922; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6923; BTVER2-NEXT: retq # sched: [4:1.00] 6924; 6925; ZNVER1-LABEL: test_pxor: 6926; ZNVER1: # %bb.0: 6927; ZNVER1-NEXT: pxor %mm1, %mm0 # sched: [1:0.25] 6928; ZNVER1-NEXT: pxor (%rdi), %mm0 # sched: [8:0.50] 6929; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6930; ZNVER1-NEXT: retq # sched: [1:0.50] 6931 %1 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a0, x86_mmx %a1) 6932 %2 = load x86_mmx, x86_mmx *%a2, align 8 6933 %3 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %1, x86_mmx %2) 6934 %4 = bitcast x86_mmx %3 to i64 6935 ret i64 %4 6936} 6937declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone 6938