1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE 14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE 15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE 16; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX 17; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE 18; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 19; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE 20; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 21 22define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 23; GENERIC-LABEL: test_addsubpd: 24; GENERIC: # %bb.0: 25; GENERIC-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 26; GENERIC-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] 27; GENERIC-NEXT: retq # sched: [1:1.00] 28; 29; ATOM-LABEL: test_addsubpd: 30; ATOM: # %bb.0: 31; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00] 32; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [7:3.50] 33; ATOM-NEXT: retq # sched: [79:39.50] 34; 35; SLM-LABEL: test_addsubpd: 36; SLM: # %bb.0: 37; SLM-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 38; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00] 39; SLM-NEXT: retq # sched: [4:1.00] 40; 41; SANDY-SSE-LABEL: test_addsubpd: 42; SANDY-SSE: # %bb.0: 43; SANDY-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 44; SANDY-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] 45; SANDY-SSE-NEXT: retq # sched: [1:1.00] 46; 47; SANDY-LABEL: test_addsubpd: 48; SANDY: # %bb.0: 49; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 50; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 51; SANDY-NEXT: retq # sched: [1:1.00] 52; 53; HASWELL-SSE-LABEL: test_addsubpd: 54; HASWELL-SSE: # %bb.0: 55; HASWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 56; HASWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] 57; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 58; 59; HASWELL-LABEL: test_addsubpd: 60; HASWELL: # %bb.0: 61; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 62; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 63; HASWELL-NEXT: retq # sched: [7:1.00] 64; 65; BROADWELL-SSE-LABEL: test_addsubpd: 66; BROADWELL-SSE: # %bb.0: 67; BROADWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 68; BROADWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00] 69; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 70; 71; BROADWELL-LABEL: test_addsubpd: 72; BROADWELL: # %bb.0: 73; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 74; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 75; BROADWELL-NEXT: retq # sched: [7:1.00] 76; 77; SKYLAKE-SSE-LABEL: test_addsubpd: 78; SKYLAKE-SSE: # %bb.0: 79; SKYLAKE-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] 80; SKYLAKE-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] 81; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 82; 83; SKYLAKE-LABEL: test_addsubpd: 84; SKYLAKE: # %bb.0: 85; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 86; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 87; SKYLAKE-NEXT: retq # sched: [7:1.00] 88; 89; SKX-SSE-LABEL: test_addsubpd: 90; SKX-SSE: # %bb.0: 91; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] 92; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] 93; SKX-SSE-NEXT: retq # sched: [7:1.00] 94; 95; SKX-LABEL: test_addsubpd: 96; SKX: # %bb.0: 97; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 98; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 99; SKX-NEXT: retq # sched: [7:1.00] 100; 101; BTVER2-SSE-LABEL: test_addsubpd: 102; BTVER2-SSE: # %bb.0: 103; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 104; BTVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00] 105; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 106; 107; BTVER2-LABEL: test_addsubpd: 108; BTVER2: # %bb.0: 109; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 110; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 111; BTVER2-NEXT: retq # sched: [4:1.00] 112; 113; ZNVER1-SSE-LABEL: test_addsubpd: 114; ZNVER1-SSE: # %bb.0: 115; ZNVER1-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 116; ZNVER1-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00] 117; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 118; 119; ZNVER1-LABEL: test_addsubpd: 120; ZNVER1: # %bb.0: 121; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 122; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 123; ZNVER1-NEXT: retq # sched: [1:0.50] 124 %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 125 %2 = load <2 x double>, <2 x double> *%a2, align 16 126 %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2) 127 ret <2 x double> %3 128} 129declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 130 131define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 132; GENERIC-LABEL: test_addsubps: 133; GENERIC: # %bb.0: 134; GENERIC-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 135; GENERIC-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] 136; GENERIC-NEXT: retq # sched: [1:1.00] 137; 138; ATOM-LABEL: test_addsubps: 139; ATOM: # %bb.0: 140; ATOM-NEXT: addsubps %xmm1, %xmm0 # sched: [5:5.00] 141; ATOM-NEXT: addsubps (%rdi), %xmm0 # sched: [5:5.00] 142; ATOM-NEXT: retq # sched: [79:39.50] 143; 144; SLM-LABEL: test_addsubps: 145; SLM: # %bb.0: 146; SLM-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 147; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00] 148; SLM-NEXT: retq # sched: [4:1.00] 149; 150; SANDY-SSE-LABEL: test_addsubps: 151; SANDY-SSE: # %bb.0: 152; SANDY-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 153; SANDY-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] 154; SANDY-SSE-NEXT: retq # sched: [1:1.00] 155; 156; SANDY-LABEL: test_addsubps: 157; SANDY: # %bb.0: 158; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 159; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 160; SANDY-NEXT: retq # sched: [1:1.00] 161; 162; HASWELL-SSE-LABEL: test_addsubps: 163; HASWELL-SSE: # %bb.0: 164; HASWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 165; HASWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] 166; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 167; 168; HASWELL-LABEL: test_addsubps: 169; HASWELL: # %bb.0: 170; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 171; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 172; HASWELL-NEXT: retq # sched: [7:1.00] 173; 174; BROADWELL-SSE-LABEL: test_addsubps: 175; BROADWELL-SSE: # %bb.0: 176; BROADWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 177; BROADWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00] 178; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 179; 180; BROADWELL-LABEL: test_addsubps: 181; BROADWELL: # %bb.0: 182; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 183; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 184; BROADWELL-NEXT: retq # sched: [7:1.00] 185; 186; SKYLAKE-SSE-LABEL: test_addsubps: 187; SKYLAKE-SSE: # %bb.0: 188; SKYLAKE-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] 189; SKYLAKE-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] 190; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 191; 192; SKYLAKE-LABEL: test_addsubps: 193; SKYLAKE: # %bb.0: 194; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 195; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 196; SKYLAKE-NEXT: retq # sched: [7:1.00] 197; 198; SKX-SSE-LABEL: test_addsubps: 199; SKX-SSE: # %bb.0: 200; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] 201; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] 202; SKX-SSE-NEXT: retq # sched: [7:1.00] 203; 204; SKX-LABEL: test_addsubps: 205; SKX: # %bb.0: 206; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 207; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 208; SKX-NEXT: retq # sched: [7:1.00] 209; 210; BTVER2-SSE-LABEL: test_addsubps: 211; BTVER2-SSE: # %bb.0: 212; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 213; BTVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00] 214; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 215; 216; BTVER2-LABEL: test_addsubps: 217; BTVER2: # %bb.0: 218; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 219; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 220; BTVER2-NEXT: retq # sched: [4:1.00] 221; 222; ZNVER1-SSE-LABEL: test_addsubps: 223; ZNVER1-SSE: # %bb.0: 224; ZNVER1-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 225; ZNVER1-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00] 226; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 227; 228; ZNVER1-LABEL: test_addsubps: 229; ZNVER1: # %bb.0: 230; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 231; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 232; ZNVER1-NEXT: retq # sched: [1:0.50] 233 %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 234 %2 = load <4 x float>, <4 x float> *%a2, align 16 235 %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2) 236 ret <4 x float> %3 237} 238declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 239 240define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 241; GENERIC-LABEL: test_haddpd: 242; GENERIC: # %bb.0: 243; GENERIC-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] 244; GENERIC-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] 245; GENERIC-NEXT: retq # sched: [1:1.00] 246; 247; ATOM-LABEL: test_haddpd: 248; ATOM: # %bb.0: 249; ATOM-NEXT: haddpd %xmm1, %xmm0 # sched: [8:4.00] 250; ATOM-NEXT: haddpd (%rdi), %xmm0 # sched: [9:4.50] 251; ATOM-NEXT: retq # sched: [79:39.50] 252; 253; SLM-LABEL: test_haddpd: 254; SLM: # %bb.0: 255; SLM-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00] 256; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00] 257; SLM-NEXT: retq # sched: [4:1.00] 258; 259; SANDY-SSE-LABEL: test_haddpd: 260; SANDY-SSE: # %bb.0: 261; SANDY-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] 262; SANDY-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] 263; SANDY-SSE-NEXT: retq # sched: [1:1.00] 264; 265; SANDY-LABEL: test_haddpd: 266; SANDY: # %bb.0: 267; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 268; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 269; SANDY-NEXT: retq # sched: [1:1.00] 270; 271; HASWELL-SSE-LABEL: test_haddpd: 272; HASWELL-SSE: # %bb.0: 273; HASWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] 274; HASWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] 275; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 276; 277; HASWELL-LABEL: test_haddpd: 278; HASWELL: # %bb.0: 279; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 280; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 281; HASWELL-NEXT: retq # sched: [7:1.00] 282; 283; BROADWELL-SSE-LABEL: test_haddpd: 284; BROADWELL-SSE: # %bb.0: 285; BROADWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] 286; BROADWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [10:2.00] 287; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 288; 289; BROADWELL-LABEL: test_haddpd: 290; BROADWELL: # %bb.0: 291; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 292; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 293; BROADWELL-NEXT: retq # sched: [7:1.00] 294; 295; SKYLAKE-SSE-LABEL: test_haddpd: 296; SKYLAKE-SSE: # %bb.0: 297; SKYLAKE-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00] 298; SKYLAKE-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00] 299; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 300; 301; SKYLAKE-LABEL: test_haddpd: 302; SKYLAKE: # %bb.0: 303; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 304; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 305; SKYLAKE-NEXT: retq # sched: [7:1.00] 306; 307; SKX-SSE-LABEL: test_haddpd: 308; SKX-SSE: # %bb.0: 309; SKX-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00] 310; SKX-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00] 311; SKX-SSE-NEXT: retq # sched: [7:1.00] 312; 313; SKX-LABEL: test_haddpd: 314; SKX: # %bb.0: 315; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 316; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 317; SKX-NEXT: retq # sched: [7:1.00] 318; 319; BTVER2-SSE-LABEL: test_haddpd: 320; BTVER2-SSE: # %bb.0: 321; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00] 322; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [8:1.00] 323; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 324; 325; BTVER2-LABEL: test_haddpd: 326; BTVER2: # %bb.0: 327; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 328; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 329; BTVER2-NEXT: retq # sched: [4:1.00] 330; 331; ZNVER1-SSE-LABEL: test_haddpd: 332; ZNVER1-SSE: # %bb.0: 333; ZNVER1-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [100:0.25] 334; ZNVER1-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [100:0.25] 335; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 336; 337; ZNVER1-LABEL: test_haddpd: 338; ZNVER1: # %bb.0: 339; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 340; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 341; ZNVER1-NEXT: retq # sched: [1:0.50] 342 %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 343 %2 = load <2 x double>, <2 x double> *%a2, align 16 344 %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2) 345 ret <2 x double> %3 346} 347declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 348 349define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 350; GENERIC-LABEL: test_haddps: 351; GENERIC: # %bb.0: 352; GENERIC-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] 353; GENERIC-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] 354; GENERIC-NEXT: retq # sched: [1:1.00] 355; 356; ATOM-LABEL: test_haddps: 357; ATOM: # %bb.0: 358; ATOM-NEXT: haddps %xmm1, %xmm0 # sched: [8:4.00] 359; ATOM-NEXT: haddps (%rdi), %xmm0 # sched: [9:4.50] 360; ATOM-NEXT: retq # sched: [79:39.50] 361; 362; SLM-LABEL: test_haddps: 363; SLM: # %bb.0: 364; SLM-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00] 365; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00] 366; SLM-NEXT: retq # sched: [4:1.00] 367; 368; SANDY-SSE-LABEL: test_haddps: 369; SANDY-SSE: # %bb.0: 370; SANDY-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] 371; SANDY-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] 372; SANDY-SSE-NEXT: retq # sched: [1:1.00] 373; 374; SANDY-LABEL: test_haddps: 375; SANDY: # %bb.0: 376; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 377; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 378; SANDY-NEXT: retq # sched: [1:1.00] 379; 380; HASWELL-SSE-LABEL: test_haddps: 381; HASWELL-SSE: # %bb.0: 382; HASWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] 383; HASWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] 384; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 385; 386; HASWELL-LABEL: test_haddps: 387; HASWELL: # %bb.0: 388; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 389; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 390; HASWELL-NEXT: retq # sched: [7:1.00] 391; 392; BROADWELL-SSE-LABEL: test_haddps: 393; BROADWELL-SSE: # %bb.0: 394; BROADWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] 395; BROADWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [10:2.00] 396; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 397; 398; BROADWELL-LABEL: test_haddps: 399; BROADWELL: # %bb.0: 400; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 401; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 402; BROADWELL-NEXT: retq # sched: [7:1.00] 403; 404; SKYLAKE-SSE-LABEL: test_haddps: 405; SKYLAKE-SSE: # %bb.0: 406; SKYLAKE-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00] 407; SKYLAKE-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00] 408; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 409; 410; SKYLAKE-LABEL: test_haddps: 411; SKYLAKE: # %bb.0: 412; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 413; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 414; SKYLAKE-NEXT: retq # sched: [7:1.00] 415; 416; SKX-SSE-LABEL: test_haddps: 417; SKX-SSE: # %bb.0: 418; SKX-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00] 419; SKX-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00] 420; SKX-SSE-NEXT: retq # sched: [7:1.00] 421; 422; SKX-LABEL: test_haddps: 423; SKX: # %bb.0: 424; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 425; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 426; SKX-NEXT: retq # sched: [7:1.00] 427; 428; BTVER2-SSE-LABEL: test_haddps: 429; BTVER2-SSE: # %bb.0: 430; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00] 431; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [8:1.00] 432; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 433; 434; BTVER2-LABEL: test_haddps: 435; BTVER2: # %bb.0: 436; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 437; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 438; BTVER2-NEXT: retq # sched: [4:1.00] 439; 440; ZNVER1-SSE-LABEL: test_haddps: 441; ZNVER1-SSE: # %bb.0: 442; ZNVER1-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [100:0.25] 443; ZNVER1-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [100:0.25] 444; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 445; 446; ZNVER1-LABEL: test_haddps: 447; ZNVER1: # %bb.0: 448; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 449; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 450; ZNVER1-NEXT: retq # sched: [1:0.50] 451 %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 452 %2 = load <4 x float>, <4 x float> *%a2, align 16 453 %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2) 454 ret <4 x float> %3 455} 456declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 457 458define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 459; GENERIC-LABEL: test_hsubpd: 460; GENERIC: # %bb.0: 461; GENERIC-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] 462; GENERIC-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] 463; GENERIC-NEXT: retq # sched: [1:1.00] 464; 465; ATOM-LABEL: test_hsubpd: 466; ATOM: # %bb.0: 467; ATOM-NEXT: hsubpd %xmm1, %xmm0 # sched: [8:4.00] 468; ATOM-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:4.50] 469; ATOM-NEXT: retq # sched: [79:39.50] 470; 471; SLM-LABEL: test_hsubpd: 472; SLM: # %bb.0: 473; SLM-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00] 474; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00] 475; SLM-NEXT: retq # sched: [4:1.00] 476; 477; SANDY-SSE-LABEL: test_hsubpd: 478; SANDY-SSE: # %bb.0: 479; SANDY-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] 480; SANDY-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] 481; SANDY-SSE-NEXT: retq # sched: [1:1.00] 482; 483; SANDY-LABEL: test_hsubpd: 484; SANDY: # %bb.0: 485; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 486; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 487; SANDY-NEXT: retq # sched: [1:1.00] 488; 489; HASWELL-SSE-LABEL: test_hsubpd: 490; HASWELL-SSE: # %bb.0: 491; HASWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] 492; HASWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] 493; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 494; 495; HASWELL-LABEL: test_hsubpd: 496; HASWELL: # %bb.0: 497; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 498; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 499; HASWELL-NEXT: retq # sched: [7:1.00] 500; 501; BROADWELL-SSE-LABEL: test_hsubpd: 502; BROADWELL-SSE: # %bb.0: 503; BROADWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] 504; BROADWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [10:2.00] 505; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 506; 507; BROADWELL-LABEL: test_hsubpd: 508; BROADWELL: # %bb.0: 509; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 510; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 511; BROADWELL-NEXT: retq # sched: [7:1.00] 512; 513; SKYLAKE-SSE-LABEL: test_hsubpd: 514; SKYLAKE-SSE: # %bb.0: 515; SKYLAKE-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00] 516; SKYLAKE-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00] 517; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 518; 519; SKYLAKE-LABEL: test_hsubpd: 520; SKYLAKE: # %bb.0: 521; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 522; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 523; SKYLAKE-NEXT: retq # sched: [7:1.00] 524; 525; SKX-SSE-LABEL: test_hsubpd: 526; SKX-SSE: # %bb.0: 527; SKX-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00] 528; SKX-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00] 529; SKX-SSE-NEXT: retq # sched: [7:1.00] 530; 531; SKX-LABEL: test_hsubpd: 532; SKX: # %bb.0: 533; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 534; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 535; SKX-NEXT: retq # sched: [7:1.00] 536; 537; BTVER2-SSE-LABEL: test_hsubpd: 538; BTVER2-SSE: # %bb.0: 539; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00] 540; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [8:1.00] 541; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 542; 543; BTVER2-LABEL: test_hsubpd: 544; BTVER2: # %bb.0: 545; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 546; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 547; BTVER2-NEXT: retq # sched: [4:1.00] 548; 549; ZNVER1-SSE-LABEL: test_hsubpd: 550; ZNVER1-SSE: # %bb.0: 551; ZNVER1-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [100:0.25] 552; ZNVER1-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [100:0.25] 553; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 554; 555; ZNVER1-LABEL: test_hsubpd: 556; ZNVER1: # %bb.0: 557; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 558; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 559; ZNVER1-NEXT: retq # sched: [1:0.50] 560 %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 561 %2 = load <2 x double>, <2 x double> *%a2, align 16 562 %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2) 563 ret <2 x double> %3 564} 565declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 566 567define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 568; GENERIC-LABEL: test_hsubps: 569; GENERIC: # %bb.0: 570; GENERIC-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] 571; GENERIC-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] 572; GENERIC-NEXT: retq # sched: [1:1.00] 573; 574; ATOM-LABEL: test_hsubps: 575; ATOM: # %bb.0: 576; ATOM-NEXT: hsubps %xmm1, %xmm0 # sched: [8:4.00] 577; ATOM-NEXT: hsubps (%rdi), %xmm0 # sched: [9:4.50] 578; ATOM-NEXT: retq # sched: [79:39.50] 579; 580; SLM-LABEL: test_hsubps: 581; SLM: # %bb.0: 582; SLM-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00] 583; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00] 584; SLM-NEXT: retq # sched: [4:1.00] 585; 586; SANDY-SSE-LABEL: test_hsubps: 587; SANDY-SSE: # %bb.0: 588; SANDY-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] 589; SANDY-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] 590; SANDY-SSE-NEXT: retq # sched: [1:1.00] 591; 592; SANDY-LABEL: test_hsubps: 593; SANDY: # %bb.0: 594; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 595; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 596; SANDY-NEXT: retq # sched: [1:1.00] 597; 598; HASWELL-SSE-LABEL: test_hsubps: 599; HASWELL-SSE: # %bb.0: 600; HASWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] 601; HASWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] 602; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 603; 604; HASWELL-LABEL: test_hsubps: 605; HASWELL: # %bb.0: 606; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 607; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 608; HASWELL-NEXT: retq # sched: [7:1.00] 609; 610; BROADWELL-SSE-LABEL: test_hsubps: 611; BROADWELL-SSE: # %bb.0: 612; BROADWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] 613; BROADWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [10:2.00] 614; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 615; 616; BROADWELL-LABEL: test_hsubps: 617; BROADWELL: # %bb.0: 618; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 619; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 620; BROADWELL-NEXT: retq # sched: [7:1.00] 621; 622; SKYLAKE-SSE-LABEL: test_hsubps: 623; SKYLAKE-SSE: # %bb.0: 624; SKYLAKE-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00] 625; SKYLAKE-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00] 626; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 627; 628; SKYLAKE-LABEL: test_hsubps: 629; SKYLAKE: # %bb.0: 630; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 631; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 632; SKYLAKE-NEXT: retq # sched: [7:1.00] 633; 634; SKX-SSE-LABEL: test_hsubps: 635; SKX-SSE: # %bb.0: 636; SKX-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00] 637; SKX-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00] 638; SKX-SSE-NEXT: retq # sched: [7:1.00] 639; 640; SKX-LABEL: test_hsubps: 641; SKX: # %bb.0: 642; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 643; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 644; SKX-NEXT: retq # sched: [7:1.00] 645; 646; BTVER2-SSE-LABEL: test_hsubps: 647; BTVER2-SSE: # %bb.0: 648; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00] 649; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [8:1.00] 650; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 651; 652; BTVER2-LABEL: test_hsubps: 653; BTVER2: # %bb.0: 654; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 655; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 656; BTVER2-NEXT: retq # sched: [4:1.00] 657; 658; ZNVER1-SSE-LABEL: test_hsubps: 659; ZNVER1-SSE: # %bb.0: 660; ZNVER1-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [100:0.25] 661; ZNVER1-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [100:0.25] 662; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 663; 664; ZNVER1-LABEL: test_hsubps: 665; ZNVER1: # %bb.0: 666; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 667; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 668; ZNVER1-NEXT: retq # sched: [1:0.50] 669 %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 670 %2 = load <4 x float>, <4 x float> *%a2, align 16 671 %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2) 672 ret <4 x float> %3 673} 674declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 675 676define <16 x i8> @test_lddqu(i8* %a0) { 677; GENERIC-LABEL: test_lddqu: 678; GENERIC: # %bb.0: 679; GENERIC-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 680; GENERIC-NEXT: retq # sched: [1:1.00] 681; 682; ATOM-LABEL: test_lddqu: 683; ATOM: # %bb.0: 684; ATOM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.50] 685; ATOM-NEXT: nop # sched: [1:0.50] 686; ATOM-NEXT: nop # sched: [1:0.50] 687; ATOM-NEXT: retq # sched: [79:39.50] 688; 689; SLM-LABEL: test_lddqu: 690; SLM: # %bb.0: 691; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00] 692; SLM-NEXT: retq # sched: [4:1.00] 693; 694; SANDY-SSE-LABEL: test_lddqu: 695; SANDY-SSE: # %bb.0: 696; SANDY-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 697; SANDY-SSE-NEXT: retq # sched: [1:1.00] 698; 699; SANDY-LABEL: test_lddqu: 700; SANDY: # %bb.0: 701; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] 702; SANDY-NEXT: retq # sched: [1:1.00] 703; 704; HASWELL-SSE-LABEL: test_lddqu: 705; HASWELL-SSE: # %bb.0: 706; HASWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 707; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 708; 709; HASWELL-LABEL: test_lddqu: 710; HASWELL: # %bb.0: 711; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] 712; HASWELL-NEXT: retq # sched: [7:1.00] 713; 714; BROADWELL-SSE-LABEL: test_lddqu: 715; BROADWELL-SSE: # %bb.0: 716; BROADWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50] 717; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 718; 719; BROADWELL-LABEL: test_lddqu: 720; BROADWELL: # %bb.0: 721; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50] 722; BROADWELL-NEXT: retq # sched: [7:1.00] 723; 724; SKYLAKE-SSE-LABEL: test_lddqu: 725; SKYLAKE-SSE: # %bb.0: 726; SKYLAKE-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 727; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 728; 729; SKYLAKE-LABEL: test_lddqu: 730; SKYLAKE: # %bb.0: 731; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] 732; SKYLAKE-NEXT: retq # sched: [7:1.00] 733; 734; SKX-SSE-LABEL: test_lddqu: 735; SKX-SSE: # %bb.0: 736; SKX-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 737; SKX-SSE-NEXT: retq # sched: [7:1.00] 738; 739; SKX-LABEL: test_lddqu: 740; SKX: # %bb.0: 741; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] 742; SKX-NEXT: retq # sched: [7:1.00] 743; 744; BTVER2-SSE-LABEL: test_lddqu: 745; BTVER2-SSE: # %bb.0: 746; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00] 747; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 748; 749; BTVER2-LABEL: test_lddqu: 750; BTVER2: # %bb.0: 751; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00] 752; BTVER2-NEXT: retq # sched: [4:1.00] 753; 754; ZNVER1-SSE-LABEL: test_lddqu: 755; ZNVER1-SSE: # %bb.0: 756; ZNVER1-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [8:0.50] 757; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 758; 759; ZNVER1-LABEL: test_lddqu: 760; ZNVER1: # %bb.0: 761; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50] 762; ZNVER1-NEXT: retq # sched: [1:0.50] 763 %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) 764 ret <16 x i8> %1 765} 766declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 767 768define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { 769; GENERIC-LABEL: test_monitor: 770; GENERIC: # %bb.0: 771; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 772; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] 773; GENERIC-NEXT: monitor # sched: [100:0.33] 774; GENERIC-NEXT: retq # sched: [1:1.00] 775; 776; ATOM-LABEL: test_monitor: 777; ATOM: # %bb.0: 778; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] 779; ATOM-NEXT: movl %esi, %ecx # sched: [1:0.50] 780; ATOM-NEXT: monitor # sched: [45:22.50] 781; ATOM-NEXT: retq # sched: [79:39.50] 782; 783; SLM-LABEL: test_monitor: 784; SLM: # %bb.0: 785; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] 786; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50] 787; SLM-NEXT: monitor # sched: [100:1.00] 788; SLM-NEXT: retq # sched: [4:1.00] 789; 790; SANDY-SSE-LABEL: test_monitor: 791; SANDY-SSE: # %bb.0: 792; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 793; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33] 794; SANDY-SSE-NEXT: monitor # sched: [100:0.33] 795; SANDY-SSE-NEXT: retq # sched: [1:1.00] 796; 797; SANDY-LABEL: test_monitor: 798; SANDY: # %bb.0: 799; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 800; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33] 801; SANDY-NEXT: monitor # sched: [100:0.33] 802; SANDY-NEXT: retq # sched: [1:1.00] 803; 804; HASWELL-SSE-LABEL: test_monitor: 805; HASWELL-SSE: # %bb.0: 806; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 807; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 808; HASWELL-SSE-NEXT: monitor # sched: [100:0.25] 809; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 810; 811; HASWELL-LABEL: test_monitor: 812; HASWELL: # %bb.0: 813; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 814; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] 815; HASWELL-NEXT: monitor # sched: [100:0.25] 816; HASWELL-NEXT: retq # sched: [7:1.00] 817; 818; BROADWELL-SSE-LABEL: test_monitor: 819; BROADWELL-SSE: # %bb.0: 820; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 821; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 822; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25] 823; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 824; 825; BROADWELL-LABEL: test_monitor: 826; BROADWELL: # %bb.0: 827; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 828; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] 829; BROADWELL-NEXT: monitor # sched: [100:0.25] 830; BROADWELL-NEXT: retq # sched: [7:1.00] 831; 832; SKYLAKE-SSE-LABEL: test_monitor: 833; SKYLAKE-SSE: # %bb.0: 834; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 835; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 836; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25] 837; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 838; 839; SKYLAKE-LABEL: test_monitor: 840; SKYLAKE: # %bb.0: 841; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 842; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25] 843; SKYLAKE-NEXT: monitor # sched: [100:0.25] 844; SKYLAKE-NEXT: retq # sched: [7:1.00] 845; 846; SKX-SSE-LABEL: test_monitor: 847; SKX-SSE: # %bb.0: 848; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 849; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 850; SKX-SSE-NEXT: monitor # sched: [100:0.25] 851; SKX-SSE-NEXT: retq # sched: [7:1.00] 852; 853; SKX-LABEL: test_monitor: 854; SKX: # %bb.0: 855; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 856; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] 857; SKX-NEXT: monitor # sched: [100:0.25] 858; SKX-NEXT: retq # sched: [7:1.00] 859; 860; BTVER2-SSE-LABEL: test_monitor: 861; BTVER2-SSE: # %bb.0: 862; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 863; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50] 864; BTVER2-SSE-NEXT: monitor # sched: [100:0.50] 865; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 866; 867; BTVER2-LABEL: test_monitor: 868; BTVER2: # %bb.0: 869; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 870; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] 871; BTVER2-NEXT: monitor # sched: [100:0.50] 872; BTVER2-NEXT: retq # sched: [4:1.00] 873; 874; ZNVER1-SSE-LABEL: test_monitor: 875; ZNVER1-SSE: # %bb.0: 876; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25] 877; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 878; ZNVER1-SSE-NEXT: monitor # sched: [100:0.25] 879; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 880; 881; ZNVER1-LABEL: test_monitor: 882; ZNVER1: # %bb.0: 883; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] 884; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] 885; ZNVER1-NEXT: monitor # sched: [100:0.25] 886; ZNVER1-NEXT: retq # sched: [1:0.50] 887 tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2) 888 ret void 889} 890declare void @llvm.x86.sse3.monitor(i8*, i32, i32) 891 892define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { 893; GENERIC-LABEL: test_movddup: 894; GENERIC: # %bb.0: 895; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 896; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] 897; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 898; GENERIC-NEXT: retq # sched: [1:1.00] 899; 900; ATOM-LABEL: test_movddup: 901; ATOM: # %bb.0: 902; ATOM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 903; ATOM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00] 904; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00] 905; ATOM-NEXT: retq # sched: [79:39.50] 906; 907; SLM-LABEL: test_movddup: 908; SLM: # %bb.0: 909; SLM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00] 910; SLM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 911; SLM-NEXT: subpd %xmm0, %xmm1 # sched: [3:1.00] 912; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 913; SLM-NEXT: retq # sched: [4:1.00] 914; 915; SANDY-SSE-LABEL: test_movddup: 916; SANDY-SSE: # %bb.0: 917; SANDY-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 918; SANDY-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] 919; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 920; SANDY-SSE-NEXT: retq # sched: [1:1.00] 921; 922; SANDY-LABEL: test_movddup: 923; SANDY: # %bb.0: 924; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 925; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] 926; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 927; SANDY-NEXT: retq # sched: [1:1.00] 928; 929; HASWELL-SSE-LABEL: test_movddup: 930; HASWELL-SSE: # %bb.0: 931; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 932; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] 933; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 934; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 935; 936; HASWELL-LABEL: test_movddup: 937; HASWELL: # %bb.0: 938; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 939; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] 940; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 941; HASWELL-NEXT: retq # sched: [7:1.00] 942; 943; BROADWELL-SSE-LABEL: test_movddup: 944; BROADWELL-SSE: # %bb.0: 945; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 946; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] 947; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 948; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 949; 950; BROADWELL-LABEL: test_movddup: 951; BROADWELL: # %bb.0: 952; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 953; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] 954; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 955; BROADWELL-NEXT: retq # sched: [7:1.00] 956; 957; SKYLAKE-SSE-LABEL: test_movddup: 958; SKYLAKE-SSE: # %bb.0: 959; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 960; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] 961; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] 962; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 963; 964; SKYLAKE-LABEL: test_movddup: 965; SKYLAKE: # %bb.0: 966; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 967; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] 968; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 969; SKYLAKE-NEXT: retq # sched: [7:1.00] 970; 971; SKX-SSE-LABEL: test_movddup: 972; SKX-SSE: # %bb.0: 973; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 974; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] 975; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] 976; SKX-SSE-NEXT: retq # sched: [7:1.00] 977; 978; SKX-LABEL: test_movddup: 979; SKX: # %bb.0: 980; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 981; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] 982; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 983; SKX-NEXT: retq # sched: [7:1.00] 984; 985; BTVER2-SSE-LABEL: test_movddup: 986; BTVER2-SSE: # %bb.0: 987; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50] 988; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00] 989; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 990; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 991; 992; BTVER2-LABEL: test_movddup: 993; BTVER2: # %bb.0: 994; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00] 995; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50] 996; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 997; BTVER2-NEXT: retq # sched: [4:1.00] 998; 999; ZNVER1-SSE-LABEL: test_movddup: 1000; ZNVER1-SSE: # %bb.0: 1001; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50] 1002; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50] 1003; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 1004; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1005; 1006; ZNVER1-LABEL: test_movddup: 1007; ZNVER1: # %bb.0: 1008; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50] 1009; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50] 1010; ZNVER1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 1011; ZNVER1-NEXT: retq # sched: [1:0.50] 1012 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer 1013 %2 = load <2 x double>, <2 x double> *%a1, align 16 1014 %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer 1015 %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl. 1016 ret <2 x double> %4 1017} 1018 1019define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { 1020; GENERIC-LABEL: test_movshdup: 1021; GENERIC: # %bb.0: 1022; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1023; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1024; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1025; GENERIC-NEXT: retq # sched: [1:1.00] 1026; 1027; ATOM-LABEL: test_movshdup: 1028; ATOM: # %bb.0: 1029; ATOM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1030; ATOM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00] 1031; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 1032; ATOM-NEXT: retq # sched: [79:39.50] 1033; 1034; SLM-LABEL: test_movshdup: 1035; SLM: # %bb.0: 1036; SLM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00] 1037; SLM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1038; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 1039; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 1040; SLM-NEXT: retq # sched: [4:1.00] 1041; 1042; SANDY-SSE-LABEL: test_movshdup: 1043; SANDY-SSE: # %bb.0: 1044; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1045; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1046; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1047; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1048; 1049; SANDY-LABEL: test_movshdup: 1050; SANDY: # %bb.0: 1051; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1052; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] 1053; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1054; SANDY-NEXT: retq # sched: [1:1.00] 1055; 1056; HASWELL-SSE-LABEL: test_movshdup: 1057; HASWELL-SSE: # %bb.0: 1058; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1059; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1060; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1061; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1062; 1063; HASWELL-LABEL: test_movshdup: 1064; HASWELL: # %bb.0: 1065; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1066; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] 1067; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1068; HASWELL-NEXT: retq # sched: [7:1.00] 1069; 1070; BROADWELL-SSE-LABEL: test_movshdup: 1071; BROADWELL-SSE: # %bb.0: 1072; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1073; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50] 1074; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1075; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1076; 1077; BROADWELL-LABEL: test_movshdup: 1078; BROADWELL: # %bb.0: 1079; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1080; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50] 1081; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1082; BROADWELL-NEXT: retq # sched: [7:1.00] 1083; 1084; SKYLAKE-SSE-LABEL: test_movshdup: 1085; SKYLAKE-SSE: # %bb.0: 1086; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1087; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1088; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1089; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1090; 1091; SKYLAKE-LABEL: test_movshdup: 1092; SKYLAKE: # %bb.0: 1093; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1094; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] 1095; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1096; SKYLAKE-NEXT: retq # sched: [7:1.00] 1097; 1098; SKX-SSE-LABEL: test_movshdup: 1099; SKX-SSE: # %bb.0: 1100; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1101; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1102; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1103; SKX-SSE-NEXT: retq # sched: [7:1.00] 1104; 1105; SKX-LABEL: test_movshdup: 1106; SKX: # %bb.0: 1107; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1108; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] 1109; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1110; SKX-NEXT: retq # sched: [7:1.00] 1111; 1112; BTVER2-SSE-LABEL: test_movshdup: 1113; BTVER2-SSE: # %bb.0: 1114; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50] 1115; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00] 1116; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1117; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1118; 1119; BTVER2-LABEL: test_movshdup: 1120; BTVER2: # %bb.0: 1121; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00] 1122; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50] 1123; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1124; BTVER2-NEXT: retq # sched: [4:1.00] 1125; 1126; ZNVER1-SSE-LABEL: test_movshdup: 1127; ZNVER1-SSE: # %bb.0: 1128; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50] 1129; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50] 1130; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1131; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1132; 1133; ZNVER1-LABEL: test_movshdup: 1134; ZNVER1: # %bb.0: 1135; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50] 1136; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50] 1137; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1138; ZNVER1-NEXT: retq # sched: [1:0.50] 1139 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1140 %2 = load <4 x float>, <4 x float> *%a1, align 16 1141 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1142 %4 = fadd <4 x float> %1, %3 1143 ret <4 x float> %4 1144} 1145 1146define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { 1147; GENERIC-LABEL: test_movsldup: 1148; GENERIC: # %bb.0: 1149; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1150; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1151; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1152; GENERIC-NEXT: retq # sched: [1:1.00] 1153; 1154; ATOM-LABEL: test_movsldup: 1155; ATOM: # %bb.0: 1156; ATOM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1157; ATOM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00] 1158; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 1159; ATOM-NEXT: retq # sched: [79:39.50] 1160; 1161; SLM-LABEL: test_movsldup: 1162; SLM: # %bb.0: 1163; SLM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00] 1164; SLM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1165; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 1166; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 1167; SLM-NEXT: retq # sched: [4:1.00] 1168; 1169; SANDY-SSE-LABEL: test_movsldup: 1170; SANDY-SSE: # %bb.0: 1171; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1172; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1173; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1174; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1175; 1176; SANDY-LABEL: test_movsldup: 1177; SANDY: # %bb.0: 1178; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1179; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] 1180; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1181; SANDY-NEXT: retq # sched: [1:1.00] 1182; 1183; HASWELL-SSE-LABEL: test_movsldup: 1184; HASWELL-SSE: # %bb.0: 1185; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1186; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1187; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1188; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1189; 1190; HASWELL-LABEL: test_movsldup: 1191; HASWELL: # %bb.0: 1192; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1193; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] 1194; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1195; HASWELL-NEXT: retq # sched: [7:1.00] 1196; 1197; BROADWELL-SSE-LABEL: test_movsldup: 1198; BROADWELL-SSE: # %bb.0: 1199; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1200; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50] 1201; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1202; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1203; 1204; BROADWELL-LABEL: test_movsldup: 1205; BROADWELL: # %bb.0: 1206; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1207; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50] 1208; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1209; BROADWELL-NEXT: retq # sched: [7:1.00] 1210; 1211; SKYLAKE-SSE-LABEL: test_movsldup: 1212; SKYLAKE-SSE: # %bb.0: 1213; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1214; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1215; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1216; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1217; 1218; SKYLAKE-LABEL: test_movsldup: 1219; SKYLAKE: # %bb.0: 1220; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1221; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] 1222; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1223; SKYLAKE-NEXT: retq # sched: [7:1.00] 1224; 1225; SKX-SSE-LABEL: test_movsldup: 1226; SKX-SSE: # %bb.0: 1227; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1228; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1229; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1230; SKX-SSE-NEXT: retq # sched: [7:1.00] 1231; 1232; SKX-LABEL: test_movsldup: 1233; SKX: # %bb.0: 1234; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1235; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] 1236; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1237; SKX-NEXT: retq # sched: [7:1.00] 1238; 1239; BTVER2-SSE-LABEL: test_movsldup: 1240; BTVER2-SSE: # %bb.0: 1241; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50] 1242; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00] 1243; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1244; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1245; 1246; BTVER2-LABEL: test_movsldup: 1247; BTVER2: # %bb.0: 1248; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00] 1249; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50] 1250; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1251; BTVER2-NEXT: retq # sched: [4:1.00] 1252; 1253; ZNVER1-SSE-LABEL: test_movsldup: 1254; ZNVER1-SSE: # %bb.0: 1255; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25] 1256; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25] 1257; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1258; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1259; 1260; ZNVER1-LABEL: test_movsldup: 1261; ZNVER1: # %bb.0: 1262; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50] 1263; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50] 1264; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1265; ZNVER1-NEXT: retq # sched: [1:0.50] 1266 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1267 %2 = load <4 x float>, <4 x float> *%a1, align 16 1268 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1269 %4 = fadd <4 x float> %1, %3 1270 ret <4 x float> %4 1271} 1272 1273define void @test_mwait(i32 %a0, i32 %a1) { 1274; GENERIC-LABEL: test_mwait: 1275; GENERIC: # %bb.0: 1276; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] 1277; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] 1278; GENERIC-NEXT: mwait # sched: [100:0.33] 1279; GENERIC-NEXT: retq # sched: [1:1.00] 1280; 1281; ATOM-LABEL: test_mwait: 1282; ATOM: # %bb.0: 1283; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] 1284; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50] 1285; ATOM-NEXT: mwait # sched: [46:23.00] 1286; ATOM-NEXT: retq # sched: [79:39.50] 1287; 1288; SLM-LABEL: test_mwait: 1289; SLM: # %bb.0: 1290; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] 1291; SLM-NEXT: movl %esi, %eax # sched: [1:0.50] 1292; SLM-NEXT: mwait # sched: [100:1.00] 1293; SLM-NEXT: retq # sched: [4:1.00] 1294; 1295; SANDY-SSE-LABEL: test_mwait: 1296; SANDY-SSE: # %bb.0: 1297; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33] 1298; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33] 1299; SANDY-SSE-NEXT: mwait # sched: [100:0.33] 1300; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1301; 1302; SANDY-LABEL: test_mwait: 1303; SANDY: # %bb.0: 1304; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] 1305; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33] 1306; SANDY-NEXT: mwait # sched: [100:0.33] 1307; SANDY-NEXT: retq # sched: [1:1.00] 1308; 1309; HASWELL-SSE-LABEL: test_mwait: 1310; HASWELL-SSE: # %bb.0: 1311; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1312; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1313; HASWELL-SSE-NEXT: mwait # sched: [20:2.50] 1314; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1315; 1316; HASWELL-LABEL: test_mwait: 1317; HASWELL: # %bb.0: 1318; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] 1319; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25] 1320; HASWELL-NEXT: mwait # sched: [20:2.50] 1321; HASWELL-NEXT: retq # sched: [7:1.00] 1322; 1323; BROADWELL-SSE-LABEL: test_mwait: 1324; BROADWELL-SSE: # %bb.0: 1325; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1326; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1327; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25] 1328; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1329; 1330; BROADWELL-LABEL: test_mwait: 1331; BROADWELL: # %bb.0: 1332; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] 1333; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] 1334; BROADWELL-NEXT: mwait # sched: [100:0.25] 1335; BROADWELL-NEXT: retq # sched: [7:1.00] 1336; 1337; SKYLAKE-SSE-LABEL: test_mwait: 1338; SKYLAKE-SSE: # %bb.0: 1339; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1340; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1341; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50] 1342; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1343; 1344; SKYLAKE-LABEL: test_mwait: 1345; SKYLAKE: # %bb.0: 1346; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1347; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25] 1348; SKYLAKE-NEXT: mwait # sched: [20:2.50] 1349; SKYLAKE-NEXT: retq # sched: [7:1.00] 1350; 1351; SKX-SSE-LABEL: test_mwait: 1352; SKX-SSE: # %bb.0: 1353; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1354; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1355; SKX-SSE-NEXT: mwait # sched: [20:2.50] 1356; SKX-SSE-NEXT: retq # sched: [7:1.00] 1357; 1358; SKX-LABEL: test_mwait: 1359; SKX: # %bb.0: 1360; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] 1361; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] 1362; SKX-NEXT: mwait # sched: [20:2.50] 1363; SKX-NEXT: retq # sched: [7:1.00] 1364; 1365; BTVER2-SSE-LABEL: test_mwait: 1366; BTVER2-SSE: # %bb.0: 1367; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50] 1368; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50] 1369; BTVER2-SSE-NEXT: mwait # sched: [100:0.50] 1370; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1371; 1372; BTVER2-LABEL: test_mwait: 1373; BTVER2: # %bb.0: 1374; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] 1375; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.50] 1376; BTVER2-NEXT: mwait # sched: [100:0.50] 1377; BTVER2-NEXT: retq # sched: [4:1.00] 1378; 1379; ZNVER1-SSE-LABEL: test_mwait: 1380; ZNVER1-SSE: # %bb.0: 1381; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1382; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1383; ZNVER1-SSE-NEXT: mwait # sched: [100:0.25] 1384; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1385; 1386; ZNVER1-LABEL: test_mwait: 1387; ZNVER1: # %bb.0: 1388; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] 1389; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] 1390; ZNVER1-NEXT: mwait # sched: [100:0.25] 1391; ZNVER1-NEXT: retq # sched: [1:0.50] 1392 tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1) 1393 ret void 1394} 1395declare void @llvm.x86.sse3.mwait(i32, i32) 1396