1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64 4 5define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 6; X32-LABEL: commute_m_pfadd: 7; X32: # %bb.0: 8; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 10; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 11; X32-NEXT: movq (%edx), %mm0 12; X32-NEXT: pfadd (%eax), %mm0 13; X32-NEXT: pfadd (%ecx), %mm0 14; X32-NEXT: movq %mm0, (%ecx) 15; X32-NEXT: retl 16; 17; X64-LABEL: commute_m_pfadd: 18; X64: # %bb.0: 19; X64-NEXT: movq (%rdi), %mm0 20; X64-NEXT: pfadd (%rsi), %mm0 21; X64-NEXT: pfadd (%rdx), %mm0 22; X64-NEXT: movq %mm0, (%rdx) 23; X64-NEXT: retq 24 %1 = load x86_mmx, x86_mmx* %a0 25 %2 = load x86_mmx, x86_mmx* %a1 26 %3 = load x86_mmx, x86_mmx* %a2 27 %4 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2) 28 %5 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %3, x86_mmx %4) 29 store x86_mmx %5, x86_mmx* %a2 30 ret void 31} 32declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) 33 34define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 35; X32-LABEL: commute_m_pfsub: 36; X32: # %bb.0: 37; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 38; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 39; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 40; X32-NEXT: movq (%edx), %mm0 41; X32-NEXT: pfsub (%eax), %mm0 42; X32-NEXT: pfsubr (%ecx), %mm0 43; X32-NEXT: movq %mm0, (%ecx) 44; X32-NEXT: retl 45; 46; X64-LABEL: commute_m_pfsub: 47; X64: # %bb.0: 48; X64-NEXT: movq (%rdi), %mm0 49; X64-NEXT: pfsub (%rsi), %mm0 50; X64-NEXT: pfsubr (%rdx), %mm0 51; X64-NEXT: movq %mm0, (%rdx) 52; X64-NEXT: retq 53 %1 = load x86_mmx, x86_mmx* %a0 54 %2 = load x86_mmx, x86_mmx* %a1 55 %3 = load x86_mmx, x86_mmx* %a2 56 %4 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2) 57 %5 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %3, x86_mmx %4) 58 store x86_mmx %5, x86_mmx* %a2 59 ret void 60} 61declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) 62 63define void @commute_m_pfsubr(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 64; X32-LABEL: commute_m_pfsubr: 65; X32: # %bb.0: 66; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 67; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 68; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 69; X32-NEXT: movq (%edx), %mm0 70; X32-NEXT: pfsubr (%eax), %mm0 71; X32-NEXT: pfsub (%ecx), %mm0 72; X32-NEXT: movq %mm0, (%ecx) 73; X32-NEXT: retl 74; 75; X64-LABEL: commute_m_pfsubr: 76; X64: # %bb.0: 77; X64-NEXT: movq (%rdi), %mm0 78; X64-NEXT: pfsubr (%rsi), %mm0 79; X64-NEXT: pfsub (%rdx), %mm0 80; X64-NEXT: movq %mm0, (%rdx) 81; X64-NEXT: retq 82 %1 = load x86_mmx, x86_mmx* %a0 83 %2 = load x86_mmx, x86_mmx* %a1 84 %3 = load x86_mmx, x86_mmx* %a2 85 %4 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2) 86 %5 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %3, x86_mmx %4) 87 store x86_mmx %5, x86_mmx* %a2 88 ret void 89} 90declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) 91 92define void @commute_m_pfmul(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 93; X32-LABEL: commute_m_pfmul: 94; X32: # %bb.0: 95; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 96; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 97; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 98; X32-NEXT: movq (%edx), %mm0 99; X32-NEXT: pfmul (%eax), %mm0 100; X32-NEXT: pfmul (%ecx), %mm0 101; X32-NEXT: movq %mm0, (%ecx) 102; X32-NEXT: retl 103; 104; X64-LABEL: commute_m_pfmul: 105; X64: # %bb.0: 106; X64-NEXT: movq (%rdi), %mm0 107; X64-NEXT: pfmul (%rsi), %mm0 108; X64-NEXT: pfmul (%rdx), %mm0 109; X64-NEXT: movq %mm0, (%rdx) 110; X64-NEXT: retq 111 %1 = load x86_mmx, x86_mmx* %a0 112 %2 = load x86_mmx, x86_mmx* %a1 113 %3 = load x86_mmx, x86_mmx* %a2 114 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2) 115 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %3, x86_mmx %4) 116 store x86_mmx %5, x86_mmx* %a2 117 ret void 118} 119declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) 120 121; PFMAX can't commute without fast-math. 122define void @commute_m_pfmax(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 123; X32-LABEL: commute_m_pfmax: 124; X32: # %bb.0: 125; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 126; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 127; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 128; X32-NEXT: movq (%edx), %mm0 129; X32-NEXT: movq (%ecx), %mm1 130; X32-NEXT: pfmax (%eax), %mm0 131; X32-NEXT: pfmax %mm0, %mm1 132; X32-NEXT: movq %mm1, (%ecx) 133; X32-NEXT: retl 134; 135; X64-LABEL: commute_m_pfmax: 136; X64: # %bb.0: 137; X64-NEXT: movq (%rdi), %mm0 138; X64-NEXT: movq (%rdx), %mm1 139; X64-NEXT: pfmax (%rsi), %mm0 140; X64-NEXT: pfmax %mm0, %mm1 141; X64-NEXT: movq %mm1, (%rdx) 142; X64-NEXT: retq 143 %1 = load x86_mmx, x86_mmx* %a0 144 %2 = load x86_mmx, x86_mmx* %a1 145 %3 = load x86_mmx, x86_mmx* %a2 146 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2) 147 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %3, x86_mmx %4) 148 store x86_mmx %5, x86_mmx* %a2 149 ret void 150} 151declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) 152 153; PFMIN can't commute without fast-math. 154define void @commute_m_pfmin(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 155; X32-LABEL: commute_m_pfmin: 156; X32: # %bb.0: 157; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 158; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 159; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 160; X32-NEXT: movq (%edx), %mm0 161; X32-NEXT: movq (%ecx), %mm1 162; X32-NEXT: pfmin (%eax), %mm0 163; X32-NEXT: pfmin %mm0, %mm1 164; X32-NEXT: movq %mm1, (%ecx) 165; X32-NEXT: retl 166; 167; X64-LABEL: commute_m_pfmin: 168; X64: # %bb.0: 169; X64-NEXT: movq (%rdi), %mm0 170; X64-NEXT: movq (%rdx), %mm1 171; X64-NEXT: pfmin (%rsi), %mm0 172; X64-NEXT: pfmin %mm0, %mm1 173; X64-NEXT: movq %mm1, (%rdx) 174; X64-NEXT: retq 175 %1 = load x86_mmx, x86_mmx* %a0 176 %2 = load x86_mmx, x86_mmx* %a1 177 %3 = load x86_mmx, x86_mmx* %a2 178 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2) 179 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %3, x86_mmx %4) 180 store x86_mmx %5, x86_mmx* %a2 181 ret void 182} 183declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) 184 185define void @commute_m_pfcmpeq(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 186; X32-LABEL: commute_m_pfcmpeq: 187; X32: # %bb.0: 188; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 189; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 190; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 191; X32-NEXT: movq (%edx), %mm0 192; X32-NEXT: pfcmpeq (%eax), %mm0 193; X32-NEXT: pfcmpeq (%ecx), %mm0 194; X32-NEXT: movq %mm0, (%ecx) 195; X32-NEXT: retl 196; 197; X64-LABEL: commute_m_pfcmpeq: 198; X64: # %bb.0: 199; X64-NEXT: movq (%rdi), %mm0 200; X64-NEXT: pfcmpeq (%rsi), %mm0 201; X64-NEXT: pfcmpeq (%rdx), %mm0 202; X64-NEXT: movq %mm0, (%rdx) 203; X64-NEXT: retq 204 %1 = load x86_mmx, x86_mmx* %a0 205 %2 = load x86_mmx, x86_mmx* %a1 206 %3 = load x86_mmx, x86_mmx* %a2 207 %4 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2) 208 %5 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %3, x86_mmx %4) 209 store x86_mmx %5, x86_mmx* %a2 210 ret void 211} 212declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) 213 214define void @commute_m_pavgusb(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 215; X32-LABEL: commute_m_pavgusb: 216; X32: # %bb.0: 217; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 218; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 219; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 220; X32-NEXT: movq (%edx), %mm0 221; X32-NEXT: pavgusb (%eax), %mm0 222; X32-NEXT: pavgusb (%ecx), %mm0 223; X32-NEXT: movq %mm0, (%ecx) 224; X32-NEXT: retl 225; 226; X64-LABEL: commute_m_pavgusb: 227; X64: # %bb.0: 228; X64-NEXT: movq (%rdi), %mm0 229; X64-NEXT: pavgusb (%rsi), %mm0 230; X64-NEXT: pavgusb (%rdx), %mm0 231; X64-NEXT: movq %mm0, (%rdx) 232; X64-NEXT: retq 233 %1 = load x86_mmx, x86_mmx* %a0 234 %2 = load x86_mmx, x86_mmx* %a1 235 %3 = load x86_mmx, x86_mmx* %a2 236 %4 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2) 237 %5 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %3, x86_mmx %4) 238 store x86_mmx %5, x86_mmx* %a2 239 ret void 240} 241declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) 242 243define void @commute_m_pmulhrw(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind { 244; X32-LABEL: commute_m_pmulhrw: 245; X32: # %bb.0: 246; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 247; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 248; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 249; X32-NEXT: movq (%edx), %mm0 250; X32-NEXT: pmulhrw (%eax), %mm0 251; X32-NEXT: pmulhrw (%ecx), %mm0 252; X32-NEXT: movq %mm0, (%ecx) 253; X32-NEXT: retl 254; 255; X64-LABEL: commute_m_pmulhrw: 256; X64: # %bb.0: 257; X64-NEXT: movq (%rdi), %mm0 258; X64-NEXT: pmulhrw (%rsi), %mm0 259; X64-NEXT: pmulhrw (%rdx), %mm0 260; X64-NEXT: movq %mm0, (%rdx) 261; X64-NEXT: retq 262 %1 = load x86_mmx, x86_mmx* %a0 263 %2 = load x86_mmx, x86_mmx* %a1 264 %3 = load x86_mmx, x86_mmx* %a2 265 %4 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2) 266 %5 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %3, x86_mmx %4) 267 store x86_mmx %5, x86_mmx* %a2 268 ret void 269} 270declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) 271