1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-pc-linux -mattr=mmx < %s | FileCheck %s 3 4; MMX packed sub opcodes were wrongly marked as commutative. 5; This test checks that the operands of packed sub instructions are 6; never interchanged by the "Two-Address instruction pass". 7 8declare { i64, double } @getFirstParam() 9declare { i64, double } @getSecondParam() 10 11define i64 @test_psubb() { 12; CHECK-LABEL: test_psubb: 13; CHECK: # %bb.0: # %entry 14; CHECK-NEXT: pushq %rbx 15; CHECK-NEXT: .cfi_def_cfa_offset 16 16; CHECK-NEXT: .cfi_offset %rbx, -16 17; CHECK-NEXT: callq getFirstParam 18; CHECK-NEXT: movq %rax, %rbx 19; CHECK-NEXT: callq getSecondParam 20; CHECK-NEXT: movq %rbx, %mm0 21; CHECK-NEXT: movq %rax, %mm1 22; CHECK-NEXT: psubb %mm1, %mm0 23; CHECK-NEXT: movq %mm0, %rax 24; CHECK-NEXT: popq %rbx 25; CHECK-NEXT: .cfi_def_cfa_offset 8 26; CHECK-NEXT: retq 27entry: 28 %call = tail call { i64, double } @getFirstParam() 29 %0 = extractvalue { i64, double } %call, 0 30 %call2 = tail call { i64, double } @getSecondParam() 31 %1 = extractvalue { i64, double } %call2, 0 32 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 33 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 34 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> 35 %3 = bitcast <8 x i8> %2 to x86_mmx 36 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> 37 %5 = bitcast <8 x i8> %4 to x86_mmx 38 %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind 39 %7 = bitcast x86_mmx %6 to <8 x i8> 40 %8 = bitcast <8 x i8> %7 to <1 x i64> 41 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 42 ret i64 %retval.0.extract.i15 43} 44 45define i64 @test_psubw() { 46; CHECK-LABEL: test_psubw: 47; CHECK: # %bb.0: # %entry 48; CHECK-NEXT: pushq %rbx 49; CHECK-NEXT: .cfi_def_cfa_offset 16 50; CHECK-NEXT: .cfi_offset %rbx, -16 51; CHECK-NEXT: callq getFirstParam 52; CHECK-NEXT: movq %rax, %rbx 53; CHECK-NEXT: callq getSecondParam 54; CHECK-NEXT: movq %rbx, %mm0 55; CHECK-NEXT: movq %rax, %mm1 56; CHECK-NEXT: psubw %mm1, %mm0 57; CHECK-NEXT: movq %mm0, %rax 58; CHECK-NEXT: popq %rbx 59; CHECK-NEXT: .cfi_def_cfa_offset 8 60; CHECK-NEXT: retq 61entry: 62 %call = tail call { i64, double } @getFirstParam() 63 %0 = extractvalue { i64, double } %call, 0 64 %call2 = tail call { i64, double } @getSecondParam() 65 %1 = extractvalue { i64, double } %call2, 0 66 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 67 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 68 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> 69 %3 = bitcast <4 x i16> %2 to x86_mmx 70 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> 71 %5 = bitcast <4 x i16> %4 to x86_mmx 72 %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind 73 %7 = bitcast x86_mmx %6 to <4 x i16> 74 %8 = bitcast <4 x i16> %7 to <1 x i64> 75 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 76 ret i64 %retval.0.extract.i15 77} 78 79define i64 @test_psubd() { 80; CHECK-LABEL: test_psubd: 81; CHECK: # %bb.0: # %entry 82; CHECK-NEXT: pushq %rbx 83; CHECK-NEXT: .cfi_def_cfa_offset 16 84; CHECK-NEXT: .cfi_offset %rbx, -16 85; CHECK-NEXT: callq getFirstParam 86; CHECK-NEXT: movq %rax, %rbx 87; CHECK-NEXT: callq getSecondParam 88; CHECK-NEXT: movq %rbx, %mm0 89; CHECK-NEXT: movq %rax, %mm1 90; CHECK-NEXT: psubd %mm1, %mm0 91; CHECK-NEXT: movq %mm0, %rax 92; CHECK-NEXT: popq %rbx 93; CHECK-NEXT: .cfi_def_cfa_offset 8 94; CHECK-NEXT: retq 95entry: 96 %call = tail call { i64, double } @getFirstParam() 97 %0 = extractvalue { i64, double } %call, 0 98 %call2 = tail call { i64, double } @getSecondParam() 99 %1 = extractvalue { i64, double } %call2, 0 100 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 101 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 102 %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32> 103 %3 = bitcast <2 x i32> %2 to x86_mmx 104 %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32> 105 %5 = bitcast <2 x i32> %4 to x86_mmx 106 %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind 107 %7 = bitcast x86_mmx %6 to <2 x i32> 108 %8 = bitcast <2 x i32> %7 to <1 x i64> 109 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 110 ret i64 %retval.0.extract.i15 111} 112 113define i64 @test_psubsb() { 114; CHECK-LABEL: test_psubsb: 115; CHECK: # %bb.0: # %entry 116; CHECK-NEXT: pushq %rbx 117; CHECK-NEXT: .cfi_def_cfa_offset 16 118; CHECK-NEXT: .cfi_offset %rbx, -16 119; CHECK-NEXT: callq getFirstParam 120; CHECK-NEXT: movq %rax, %rbx 121; CHECK-NEXT: callq getSecondParam 122; CHECK-NEXT: movq %rbx, %mm0 123; CHECK-NEXT: movq %rax, %mm1 124; CHECK-NEXT: psubsb %mm1, %mm0 125; CHECK-NEXT: movq %mm0, %rax 126; CHECK-NEXT: popq %rbx 127; CHECK-NEXT: .cfi_def_cfa_offset 8 128; CHECK-NEXT: retq 129entry: 130 %call = tail call { i64, double } @getFirstParam() 131 %0 = extractvalue { i64, double } %call, 0 132 %call2 = tail call { i64, double } @getSecondParam() 133 %1 = extractvalue { i64, double } %call2, 0 134 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 135 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 136 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> 137 %3 = bitcast <8 x i8> %2 to x86_mmx 138 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> 139 %5 = bitcast <8 x i8> %4 to x86_mmx 140 %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind 141 %7 = bitcast x86_mmx %6 to <8 x i8> 142 %8 = bitcast <8 x i8> %7 to <1 x i64> 143 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 144 ret i64 %retval.0.extract.i15 145} 146 147define i64 @test_psubswv() { 148; CHECK-LABEL: test_psubswv: 149; CHECK: # %bb.0: # %entry 150; CHECK-NEXT: pushq %rbx 151; CHECK-NEXT: .cfi_def_cfa_offset 16 152; CHECK-NEXT: .cfi_offset %rbx, -16 153; CHECK-NEXT: callq getFirstParam 154; CHECK-NEXT: movq %rax, %rbx 155; CHECK-NEXT: callq getSecondParam 156; CHECK-NEXT: movq %rbx, %mm0 157; CHECK-NEXT: movq %rax, %mm1 158; CHECK-NEXT: psubsw %mm1, %mm0 159; CHECK-NEXT: movq %mm0, %rax 160; CHECK-NEXT: popq %rbx 161; CHECK-NEXT: .cfi_def_cfa_offset 8 162; CHECK-NEXT: retq 163entry: 164 %call = tail call { i64, double } @getFirstParam() 165 %0 = extractvalue { i64, double } %call, 0 166 %call2 = tail call { i64, double } @getSecondParam() 167 %1 = extractvalue { i64, double } %call2, 0 168 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 169 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 170 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> 171 %3 = bitcast <4 x i16> %2 to x86_mmx 172 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> 173 %5 = bitcast <4 x i16> %4 to x86_mmx 174 %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind 175 %7 = bitcast x86_mmx %6 to <4 x i16> 176 %8 = bitcast <4 x i16> %7 to <1 x i64> 177 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 178 ret i64 %retval.0.extract.i15 179} 180 181define i64 @test_psubusbv() { 182; CHECK-LABEL: test_psubusbv: 183; CHECK: # %bb.0: # %entry 184; CHECK-NEXT: pushq %rbx 185; CHECK-NEXT: .cfi_def_cfa_offset 16 186; CHECK-NEXT: .cfi_offset %rbx, -16 187; CHECK-NEXT: callq getFirstParam 188; CHECK-NEXT: movq %rax, %rbx 189; CHECK-NEXT: callq getSecondParam 190; CHECK-NEXT: movq %rbx, %mm0 191; CHECK-NEXT: movq %rax, %mm1 192; CHECK-NEXT: psubusb %mm1, %mm0 193; CHECK-NEXT: movq %mm0, %rax 194; CHECK-NEXT: popq %rbx 195; CHECK-NEXT: .cfi_def_cfa_offset 8 196; CHECK-NEXT: retq 197entry: 198 %call = tail call { i64, double } @getFirstParam() 199 %0 = extractvalue { i64, double } %call, 0 200 %call2 = tail call { i64, double } @getSecondParam() 201 %1 = extractvalue { i64, double } %call2, 0 202 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 203 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 204 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8> 205 %3 = bitcast <8 x i8> %2 to x86_mmx 206 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8> 207 %5 = bitcast <8 x i8> %4 to x86_mmx 208 %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind 209 %7 = bitcast x86_mmx %6 to <8 x i8> 210 %8 = bitcast <8 x i8> %7 to <1 x i64> 211 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 212 ret i64 %retval.0.extract.i15 213} 214 215define i64 @test_psubuswv() { 216; CHECK-LABEL: test_psubuswv: 217; CHECK: # %bb.0: # %entry 218; CHECK-NEXT: pushq %rbx 219; CHECK-NEXT: .cfi_def_cfa_offset 16 220; CHECK-NEXT: .cfi_offset %rbx, -16 221; CHECK-NEXT: callq getFirstParam 222; CHECK-NEXT: movq %rax, %rbx 223; CHECK-NEXT: callq getSecondParam 224; CHECK-NEXT: movq %rbx, %mm0 225; CHECK-NEXT: movq %rax, %mm1 226; CHECK-NEXT: psubusw %mm1, %mm0 227; CHECK-NEXT: movq %mm0, %rax 228; CHECK-NEXT: popq %rbx 229; CHECK-NEXT: .cfi_def_cfa_offset 8 230; CHECK-NEXT: retq 231entry: 232 %call = tail call { i64, double } @getFirstParam() 233 %0 = extractvalue { i64, double } %call, 0 234 %call2 = tail call { i64, double } @getSecondParam() 235 %1 = extractvalue { i64, double } %call2, 0 236 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0 237 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0 238 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16> 239 %3 = bitcast <4 x i16> %2 to x86_mmx 240 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16> 241 %5 = bitcast <4 x i16> %4 to x86_mmx 242 %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind 243 %7 = bitcast x86_mmx %6 to <4 x i16> 244 %8 = bitcast <4 x i16> %7 to <1 x i64> 245 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0 246 ret i64 %retval.0.extract.i15 247} 248 249declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone 250 251declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone 252 253declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone 254 255declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone 256 257declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone 258 259declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone 260 261declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone 262