1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s 3 4define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) { 5; CHECK-LABEL: stack_fold_cvtpd2pi: 6; CHECK: # %bb.0: 7; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8; CHECK-NEXT: #APP 9; CHECK-NEXT: nop 10; CHECK-NEXT: #NO_APP 11; CHECK-NEXT: cvtpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload 12; CHECK-NEXT: movq2dq %mm0, %xmm0 13; CHECK-NEXT: retq 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone 16 ret x86_mmx %2 17} 18declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone 19 20define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) { 21; CHECK-LABEL: stack_fold_cvtpi2pd: 22; CHECK: # %bb.0: 23; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 24; CHECK-NEXT: #APP 25; CHECK-NEXT: nop 26; CHECK-NEXT: #NO_APP 27; CHECK-NEXT: cvtpi2pd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 28; CHECK-NEXT: retq 29 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 30 %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone 31 ret <2 x double> %2 32} 33declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone 34 35define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) { 36; CHECK-LABEL: stack_fold_cvtpi2ps: 37; CHECK: # %bb.0: 38; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 39; CHECK-NEXT: #APP 40; CHECK-NEXT: nop 41; CHECK-NEXT: #NO_APP 42; CHECK-NEXT: cvtpi2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload 43; CHECK-NEXT: retq 44 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 45 %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone 46 ret <4 x float> %2 47} 48declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone 49 50define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) { 51; CHECK-LABEL: stack_fold_cvtps2pi: 52; CHECK: # %bb.0: 53; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 54; CHECK-NEXT: #APP 55; CHECK-NEXT: nop 56; CHECK-NEXT: #NO_APP 57; CHECK-NEXT: cvtps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload 58; CHECK-NEXT: movq2dq %mm0, %xmm0 59; CHECK-NEXT: retq 60 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 61 %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone 62 ret x86_mmx %2 63} 64declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone 65 66define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) { 67; CHECK-LABEL: stack_fold_cvttpd2pi: 68; CHECK: # %bb.0: 69; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 70; CHECK-NEXT: #APP 71; CHECK-NEXT: nop 72; CHECK-NEXT: #NO_APP 73; CHECK-NEXT: cvttpd2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload 74; CHECK-NEXT: movq2dq %mm0, %xmm0 75; CHECK-NEXT: retq 76 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 77 %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone 78 ret x86_mmx %2 79} 80declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone 81 82define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) { 83; CHECK-LABEL: stack_fold_cvttps2pi: 84; CHECK: # %bb.0: 85; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 86; CHECK-NEXT: #APP 87; CHECK-NEXT: nop 88; CHECK-NEXT: #NO_APP 89; CHECK-NEXT: cvttps2pi {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 16-byte Folded Reload 90; CHECK-NEXT: movq2dq %mm0, %xmm0 91; CHECK-NEXT: retq 92 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 93 %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone 94 ret x86_mmx %2 95} 96declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone 97 98; TODO stack_fold_movd_load 99 100; padd forces execution on mmx 101define i32 @stack_fold_movd_store(x86_mmx %a0) nounwind { 102; CHECK-LABEL: stack_fold_movd_store: 103; CHECK: # %bb.0: 104; CHECK-NEXT: pushq %rbp 105; CHECK-NEXT: pushq %r15 106; CHECK-NEXT: pushq %r14 107; CHECK-NEXT: pushq %r13 108; CHECK-NEXT: pushq %r12 109; CHECK-NEXT: pushq %rbx 110; CHECK-NEXT: paddb %mm0, %mm0 111; CHECK-NEXT: movd %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 112; CHECK-NEXT: #APP 113; CHECK-NEXT: nop 114; CHECK-NEXT: #NO_APP 115; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 116; CHECK-NEXT: popq %rbx 117; CHECK-NEXT: popq %r12 118; CHECK-NEXT: popq %r13 119; CHECK-NEXT: popq %r14 120; CHECK-NEXT: popq %r15 121; CHECK-NEXT: popq %rbp 122; CHECK-NEXT: retq 123 %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0) 124 %2 = bitcast x86_mmx %1 to <2 x i32> 125 %3 = extractelement <2 x i32> %2, i32 0 126 %4 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 127 ret i32 %3 128} 129 130; TODO stack_fold_movq_load 131 132; padd forces execution on mmx 133define i64 @stack_fold_movq_store(x86_mmx %a0) nounwind { 134; CHECK-LABEL: stack_fold_movq_store: 135; CHECK: # %bb.0: 136; CHECK-NEXT: pushq %rbp 137; CHECK-NEXT: pushq %r15 138; CHECK-NEXT: pushq %r14 139; CHECK-NEXT: pushq %r13 140; CHECK-NEXT: pushq %r12 141; CHECK-NEXT: pushq %rbx 142; CHECK-NEXT: paddb %mm0, %mm0 143; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill 144; CHECK-NEXT: #APP 145; CHECK-NEXT: nop 146; CHECK-NEXT: #NO_APP 147; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 148; CHECK-NEXT: popq %rbx 149; CHECK-NEXT: popq %r12 150; CHECK-NEXT: popq %r13 151; CHECK-NEXT: popq %r14 152; CHECK-NEXT: popq %r15 153; CHECK-NEXT: popq %rbp 154; CHECK-NEXT: retq 155 %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a0) 156 %2 = bitcast x86_mmx %1 to i64 157 %3 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 158 ret i64 %2 159} 160 161define x86_mmx @stack_fold_pabsb(x86_mmx %a0) { 162; CHECK-LABEL: stack_fold_pabsb: 163; CHECK: # %bb.0: 164; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 165; CHECK-NEXT: #APP 166; CHECK-NEXT: nop 167; CHECK-NEXT: #NO_APP 168; CHECK-NEXT: pabsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 169; CHECK-NEXT: movq2dq %mm0, %xmm0 170; CHECK-NEXT: retq 171 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 172 %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone 173 ret x86_mmx %2 174} 175declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone 176 177define x86_mmx @stack_fold_pabsd(x86_mmx %a0) { 178; CHECK-LABEL: stack_fold_pabsd: 179; CHECK: # %bb.0: 180; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 181; CHECK-NEXT: #APP 182; CHECK-NEXT: nop 183; CHECK-NEXT: #NO_APP 184; CHECK-NEXT: pabsd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 185; CHECK-NEXT: movq2dq %mm0, %xmm0 186; CHECK-NEXT: retq 187 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 188 %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone 189 ret x86_mmx %2 190} 191declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone 192 193define x86_mmx @stack_fold_pabsw(x86_mmx %a0) { 194; CHECK-LABEL: stack_fold_pabsw: 195; CHECK: # %bb.0: 196; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 197; CHECK-NEXT: #APP 198; CHECK-NEXT: nop 199; CHECK-NEXT: #NO_APP 200; CHECK-NEXT: pabsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 201; CHECK-NEXT: movq2dq %mm0, %xmm0 202; CHECK-NEXT: retq 203 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 204 %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone 205 ret x86_mmx %2 206} 207declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone 208 209define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) { 210; CHECK-LABEL: stack_fold_packssdw: 211; CHECK: # %bb.0: 212; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 213; CHECK-NEXT: #APP 214; CHECK-NEXT: nop 215; CHECK-NEXT: #NO_APP 216; CHECK-NEXT: packssdw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 217; CHECK-NEXT: movq2dq %mm0, %xmm0 218; CHECK-NEXT: retq 219 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 220 %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone 221 ret x86_mmx %2 222} 223declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone 224 225define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) { 226; CHECK-LABEL: stack_fold_packsswb: 227; CHECK: # %bb.0: 228; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 229; CHECK-NEXT: #APP 230; CHECK-NEXT: nop 231; CHECK-NEXT: #NO_APP 232; CHECK-NEXT: packsswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 233; CHECK-NEXT: movq2dq %mm0, %xmm0 234; CHECK-NEXT: retq 235 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 236 %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone 237 ret x86_mmx %2 238} 239declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone 240 241define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) { 242; CHECK-LABEL: stack_fold_packuswb: 243; CHECK: # %bb.0: 244; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 245; CHECK-NEXT: #APP 246; CHECK-NEXT: nop 247; CHECK-NEXT: #NO_APP 248; CHECK-NEXT: packuswb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 249; CHECK-NEXT: movq2dq %mm0, %xmm0 250; CHECK-NEXT: retq 251 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 252 %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone 253 ret x86_mmx %2 254} 255declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone 256 257define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) { 258; CHECK-LABEL: stack_fold_paddb: 259; CHECK: # %bb.0: 260; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 261; CHECK-NEXT: #APP 262; CHECK-NEXT: nop 263; CHECK-NEXT: #NO_APP 264; CHECK-NEXT: paddb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 265; CHECK-NEXT: movq2dq %mm0, %xmm0 266; CHECK-NEXT: retq 267 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 268 %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone 269 ret x86_mmx %2 270} 271declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone 272 273define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) { 274; CHECK-LABEL: stack_fold_paddd: 275; CHECK: # %bb.0: 276; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 277; CHECK-NEXT: #APP 278; CHECK-NEXT: nop 279; CHECK-NEXT: #NO_APP 280; CHECK-NEXT: paddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 281; CHECK-NEXT: movq2dq %mm0, %xmm0 282; CHECK-NEXT: retq 283 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 284 %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone 285 ret x86_mmx %2 286} 287declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone 288 289define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) { 290; CHECK-LABEL: stack_fold_paddq: 291; CHECK: # %bb.0: 292; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 293; CHECK-NEXT: #APP 294; CHECK-NEXT: nop 295; CHECK-NEXT: #NO_APP 296; CHECK-NEXT: paddq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 297; CHECK-NEXT: movq2dq %mm0, %xmm0 298; CHECK-NEXT: retq 299 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 300 %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone 301 ret x86_mmx %2 302} 303declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone 304 305define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) { 306; CHECK-LABEL: stack_fold_paddsb: 307; CHECK: # %bb.0: 308; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 309; CHECK-NEXT: #APP 310; CHECK-NEXT: nop 311; CHECK-NEXT: #NO_APP 312; CHECK-NEXT: paddsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 313; CHECK-NEXT: movq2dq %mm0, %xmm0 314; CHECK-NEXT: retq 315 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 316 %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone 317 ret x86_mmx %2 318} 319declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone 320 321define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) { 322; CHECK-LABEL: stack_fold_paddsw: 323; CHECK: # %bb.0: 324; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 325; CHECK-NEXT: #APP 326; CHECK-NEXT: nop 327; CHECK-NEXT: #NO_APP 328; CHECK-NEXT: paddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 329; CHECK-NEXT: movq2dq %mm0, %xmm0 330; CHECK-NEXT: retq 331 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 332 %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone 333 ret x86_mmx %2 334} 335declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone 336 337define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) { 338; CHECK-LABEL: stack_fold_paddusb: 339; CHECK: # %bb.0: 340; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 341; CHECK-NEXT: #APP 342; CHECK-NEXT: nop 343; CHECK-NEXT: #NO_APP 344; CHECK-NEXT: paddusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 345; CHECK-NEXT: movq2dq %mm0, %xmm0 346; CHECK-NEXT: retq 347 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 348 %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone 349 ret x86_mmx %2 350} 351declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone 352 353define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) { 354; CHECK-LABEL: stack_fold_paddusw: 355; CHECK: # %bb.0: 356; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 357; CHECK-NEXT: #APP 358; CHECK-NEXT: nop 359; CHECK-NEXT: #NO_APP 360; CHECK-NEXT: paddusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 361; CHECK-NEXT: movq2dq %mm0, %xmm0 362; CHECK-NEXT: retq 363 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 364 %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone 365 ret x86_mmx %2 366} 367declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone 368 369define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) { 370; CHECK-LABEL: stack_fold_paddw: 371; CHECK: # %bb.0: 372; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 373; CHECK-NEXT: #APP 374; CHECK-NEXT: nop 375; CHECK-NEXT: #NO_APP 376; CHECK-NEXT: paddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 377; CHECK-NEXT: movq2dq %mm0, %xmm0 378; CHECK-NEXT: retq 379 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 380 %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone 381 ret x86_mmx %2 382} 383declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone 384 385define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) { 386; CHECK-LABEL: stack_fold_palignr: 387; CHECK: # %bb.0: 388; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 389; CHECK-NEXT: #APP 390; CHECK-NEXT: nop 391; CHECK-NEXT: #NO_APP 392; CHECK-NEXT: palignr $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 393; CHECK-NEXT: movq2dq %mm0, %xmm0 394; CHECK-NEXT: retq 395 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 396 %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone 397 ret x86_mmx %2 398} 399declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone 400 401define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) { 402; CHECK-LABEL: stack_fold_pand: 403; CHECK: # %bb.0: 404; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 405; CHECK-NEXT: #APP 406; CHECK-NEXT: nop 407; CHECK-NEXT: #NO_APP 408; CHECK-NEXT: pand {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 409; CHECK-NEXT: movq2dq %mm0, %xmm0 410; CHECK-NEXT: retq 411 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 412 %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone 413 ret x86_mmx %2 414} 415declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone 416 417define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) { 418; CHECK-LABEL: stack_fold_pandn: 419; CHECK: # %bb.0: 420; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 421; CHECK-NEXT: #APP 422; CHECK-NEXT: nop 423; CHECK-NEXT: #NO_APP 424; CHECK-NEXT: pandn {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 425; CHECK-NEXT: movq2dq %mm0, %xmm0 426; CHECK-NEXT: retq 427 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 428 %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone 429 ret x86_mmx %2 430} 431declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone 432 433define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) { 434; CHECK-LABEL: stack_fold_pavgb: 435; CHECK: # %bb.0: 436; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 437; CHECK-NEXT: #APP 438; CHECK-NEXT: nop 439; CHECK-NEXT: #NO_APP 440; CHECK-NEXT: pavgb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 441; CHECK-NEXT: movq2dq %mm0, %xmm0 442; CHECK-NEXT: retq 443 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 444 %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone 445 ret x86_mmx %2 446} 447declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone 448 449define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) { 450; CHECK-LABEL: stack_fold_pavgw: 451; CHECK: # %bb.0: 452; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 453; CHECK-NEXT: #APP 454; CHECK-NEXT: nop 455; CHECK-NEXT: #NO_APP 456; CHECK-NEXT: pavgw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 457; CHECK-NEXT: movq2dq %mm0, %xmm0 458; CHECK-NEXT: retq 459 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 460 %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone 461 ret x86_mmx %2 462} 463declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone 464 465define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) { 466; CHECK-LABEL: stack_fold_pcmpeqb: 467; CHECK: # %bb.0: 468; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 469; CHECK-NEXT: #APP 470; CHECK-NEXT: nop 471; CHECK-NEXT: #NO_APP 472; CHECK-NEXT: pcmpeqb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 473; CHECK-NEXT: movq2dq %mm0, %xmm0 474; CHECK-NEXT: retq 475 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 476 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone 477 ret x86_mmx %2 478} 479declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone 480 481define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) { 482; CHECK-LABEL: stack_fold_pcmpeqd: 483; CHECK: # %bb.0: 484; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 485; CHECK-NEXT: #APP 486; CHECK-NEXT: nop 487; CHECK-NEXT: #NO_APP 488; CHECK-NEXT: pcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 489; CHECK-NEXT: movq2dq %mm0, %xmm0 490; CHECK-NEXT: retq 491 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 492 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone 493 ret x86_mmx %2 494} 495declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone 496 497define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) { 498; CHECK-LABEL: stack_fold_pcmpeqw: 499; CHECK: # %bb.0: 500; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 501; CHECK-NEXT: #APP 502; CHECK-NEXT: nop 503; CHECK-NEXT: #NO_APP 504; CHECK-NEXT: pcmpeqw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 505; CHECK-NEXT: movq2dq %mm0, %xmm0 506; CHECK-NEXT: retq 507 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 508 %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone 509 ret x86_mmx %2 510} 511declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone 512 513define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) { 514; CHECK-LABEL: stack_fold_pcmpgtb: 515; CHECK: # %bb.0: 516; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 517; CHECK-NEXT: #APP 518; CHECK-NEXT: nop 519; CHECK-NEXT: #NO_APP 520; CHECK-NEXT: pcmpgtb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 521; CHECK-NEXT: movq2dq %mm0, %xmm0 522; CHECK-NEXT: retq 523 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 524 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone 525 ret x86_mmx %2 526} 527declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone 528 529define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) { 530; CHECK-LABEL: stack_fold_pcmpgtd: 531; CHECK: # %bb.0: 532; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 533; CHECK-NEXT: #APP 534; CHECK-NEXT: nop 535; CHECK-NEXT: #NO_APP 536; CHECK-NEXT: pcmpgtd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 537; CHECK-NEXT: movq2dq %mm0, %xmm0 538; CHECK-NEXT: retq 539 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 540 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone 541 ret x86_mmx %2 542} 543declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone 544 545define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) { 546; CHECK-LABEL: stack_fold_pcmpgtw: 547; CHECK: # %bb.0: 548; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 549; CHECK-NEXT: #APP 550; CHECK-NEXT: nop 551; CHECK-NEXT: #NO_APP 552; CHECK-NEXT: pcmpgtw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 553; CHECK-NEXT: movq2dq %mm0, %xmm0 554; CHECK-NEXT: retq 555 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 556 %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone 557 ret x86_mmx %2 558} 559declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone 560 561define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) { 562; CHECK-LABEL: stack_fold_phaddd: 563; CHECK: # %bb.0: 564; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 565; CHECK-NEXT: #APP 566; CHECK-NEXT: nop 567; CHECK-NEXT: #NO_APP 568; CHECK-NEXT: phaddd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 569; CHECK-NEXT: movq2dq %mm0, %xmm0 570; CHECK-NEXT: retq 571 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 572 %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone 573 ret x86_mmx %2 574} 575declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone 576 577define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) { 578; CHECK-LABEL: stack_fold_phaddsw: 579; CHECK: # %bb.0: 580; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 581; CHECK-NEXT: #APP 582; CHECK-NEXT: nop 583; CHECK-NEXT: #NO_APP 584; CHECK-NEXT: phaddsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 585; CHECK-NEXT: movq2dq %mm0, %xmm0 586; CHECK-NEXT: retq 587 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 588 %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone 589 ret x86_mmx %2 590} 591declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone 592 593define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) { 594; CHECK-LABEL: stack_fold_phaddw: 595; CHECK: # %bb.0: 596; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 597; CHECK-NEXT: #APP 598; CHECK-NEXT: nop 599; CHECK-NEXT: #NO_APP 600; CHECK-NEXT: phaddw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 601; CHECK-NEXT: movq2dq %mm0, %xmm0 602; CHECK-NEXT: retq 603 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 604 %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone 605 ret x86_mmx %2 606} 607declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone 608 609define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) { 610; CHECK-LABEL: stack_fold_phsubd: 611; CHECK: # %bb.0: 612; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 613; CHECK-NEXT: #APP 614; CHECK-NEXT: nop 615; CHECK-NEXT: #NO_APP 616; CHECK-NEXT: phsubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 617; CHECK-NEXT: movq2dq %mm0, %xmm0 618; CHECK-NEXT: retq 619 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 620 %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone 621 ret x86_mmx %2 622} 623declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone 624 625define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) { 626; CHECK-LABEL: stack_fold_phsubsw: 627; CHECK: # %bb.0: 628; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 629; CHECK-NEXT: #APP 630; CHECK-NEXT: nop 631; CHECK-NEXT: #NO_APP 632; CHECK-NEXT: phsubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 633; CHECK-NEXT: movq2dq %mm0, %xmm0 634; CHECK-NEXT: retq 635 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 636 %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone 637 ret x86_mmx %2 638} 639declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone 640 641define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) { 642; CHECK-LABEL: stack_fold_phsubw: 643; CHECK: # %bb.0: 644; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 645; CHECK-NEXT: #APP 646; CHECK-NEXT: nop 647; CHECK-NEXT: #NO_APP 648; CHECK-NEXT: phsubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 649; CHECK-NEXT: movq2dq %mm0, %xmm0 650; CHECK-NEXT: retq 651 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 652 %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone 653 ret x86_mmx %2 654} 655declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone 656 657; TODO stack_fold_pinsrw 658 659define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) { 660; CHECK-LABEL: stack_fold_pmaddubsw: 661; CHECK: # %bb.0: 662; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 663; CHECK-NEXT: #APP 664; CHECK-NEXT: nop 665; CHECK-NEXT: #NO_APP 666; CHECK-NEXT: pmaddubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 667; CHECK-NEXT: movq2dq %mm0, %xmm0 668; CHECK-NEXT: retq 669 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 670 %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone 671 ret x86_mmx %2 672} 673declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone 674 675define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) { 676; CHECK-LABEL: stack_fold_pmaddwd: 677; CHECK: # %bb.0: 678; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 679; CHECK-NEXT: #APP 680; CHECK-NEXT: nop 681; CHECK-NEXT: #NO_APP 682; CHECK-NEXT: pmaddwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 683; CHECK-NEXT: movq2dq %mm0, %xmm0 684; CHECK-NEXT: retq 685 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 686 %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone 687 ret x86_mmx %2 688} 689declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone 690 691define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) { 692; CHECK-LABEL: stack_fold_pmaxsw: 693; CHECK: # %bb.0: 694; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 695; CHECK-NEXT: #APP 696; CHECK-NEXT: nop 697; CHECK-NEXT: #NO_APP 698; CHECK-NEXT: pmaxsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 699; CHECK-NEXT: movq2dq %mm0, %xmm0 700; CHECK-NEXT: retq 701 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 702 %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone 703 ret x86_mmx %2 704} 705declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone 706 707define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) { 708; CHECK-LABEL: stack_fold_pmaxub: 709; CHECK: # %bb.0: 710; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 711; CHECK-NEXT: #APP 712; CHECK-NEXT: nop 713; CHECK-NEXT: #NO_APP 714; CHECK-NEXT: pmaxub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 715; CHECK-NEXT: movq2dq %mm0, %xmm0 716; CHECK-NEXT: retq 717 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 718 %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone 719 ret x86_mmx %2 720} 721declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone 722 723define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) { 724; CHECK-LABEL: stack_fold_pminsw: 725; CHECK: # %bb.0: 726; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 727; CHECK-NEXT: #APP 728; CHECK-NEXT: nop 729; CHECK-NEXT: #NO_APP 730; CHECK-NEXT: pminsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 731; CHECK-NEXT: movq2dq %mm0, %xmm0 732; CHECK-NEXT: retq 733 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 734 %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone 735 ret x86_mmx %2 736} 737declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone 738 739define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) { 740; CHECK-LABEL: stack_fold_pminub: 741; CHECK: # %bb.0: 742; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 743; CHECK-NEXT: #APP 744; CHECK-NEXT: nop 745; CHECK-NEXT: #NO_APP 746; CHECK-NEXT: pminub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 747; CHECK-NEXT: movq2dq %mm0, %xmm0 748; CHECK-NEXT: retq 749 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 750 %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone 751 ret x86_mmx %2 752} 753declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone 754 755define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) { 756; CHECK-LABEL: stack_fold_pmulhrsw: 757; CHECK: # %bb.0: 758; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 759; CHECK-NEXT: #APP 760; CHECK-NEXT: nop 761; CHECK-NEXT: #NO_APP 762; CHECK-NEXT: pmulhrsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 763; CHECK-NEXT: movq2dq %mm0, %xmm0 764; CHECK-NEXT: retq 765 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 766 %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone 767 ret x86_mmx %2 768} 769declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone 770 771define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) { 772; CHECK-LABEL: stack_fold_pmulhuw: 773; CHECK: # %bb.0: 774; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 775; CHECK-NEXT: #APP 776; CHECK-NEXT: nop 777; CHECK-NEXT: #NO_APP 778; CHECK-NEXT: pmulhuw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 779; CHECK-NEXT: movq2dq %mm0, %xmm0 780; CHECK-NEXT: retq 781 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 782 %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone 783 ret x86_mmx %2 784} 785declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone 786 787define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) { 788; CHECK-LABEL: stack_fold_pmulhw: 789; CHECK: # %bb.0: 790; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 791; CHECK-NEXT: #APP 792; CHECK-NEXT: nop 793; CHECK-NEXT: #NO_APP 794; CHECK-NEXT: pmulhw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 795; CHECK-NEXT: movq2dq %mm0, %xmm0 796; CHECK-NEXT: retq 797 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 798 %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone 799 ret x86_mmx %2 800} 801declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone 802 803define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) { 804; CHECK-LABEL: stack_fold_pmullw: 805; CHECK: # %bb.0: 806; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 807; CHECK-NEXT: #APP 808; CHECK-NEXT: nop 809; CHECK-NEXT: #NO_APP 810; CHECK-NEXT: pmullw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 811; CHECK-NEXT: movq2dq %mm0, %xmm0 812; CHECK-NEXT: retq 813 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 814 %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone 815 ret x86_mmx %2 816} 817declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone 818 819define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) { 820; CHECK-LABEL: stack_fold_pmuludq: 821; CHECK: # %bb.0: 822; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 823; CHECK-NEXT: #APP 824; CHECK-NEXT: nop 825; CHECK-NEXT: #NO_APP 826; CHECK-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 827; CHECK-NEXT: movq2dq %mm0, %xmm0 828; CHECK-NEXT: retq 829 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 830 %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone 831 ret x86_mmx %2 832} 833declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone 834 835define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) { 836; CHECK-LABEL: stack_fold_por: 837; CHECK: # %bb.0: 838; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 839; CHECK-NEXT: #APP 840; CHECK-NEXT: nop 841; CHECK-NEXT: #NO_APP 842; CHECK-NEXT: por {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 843; CHECK-NEXT: movq2dq %mm0, %xmm0 844; CHECK-NEXT: retq 845 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 846 %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone 847 ret x86_mmx %2 848} 849declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone 850 851define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) { 852; CHECK-LABEL: stack_fold_psadbw: 853; CHECK: # %bb.0: 854; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 855; CHECK-NEXT: #APP 856; CHECK-NEXT: nop 857; CHECK-NEXT: #NO_APP 858; CHECK-NEXT: psadbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 859; CHECK-NEXT: movq2dq %mm0, %xmm0 860; CHECK-NEXT: retq 861 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 862 %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone 863 ret x86_mmx %2 864} 865declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone 866 867define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) { 868; CHECK-LABEL: stack_fold_pshufb: 869; CHECK: # %bb.0: 870; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 871; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 872; CHECK-NEXT: #APP 873; CHECK-NEXT: nop 874; CHECK-NEXT: #NO_APP 875; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Reload 876; CHECK-NEXT: pshufb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 877; CHECK-NEXT: movq2dq %mm0, %xmm0 878; CHECK-NEXT: retq 879 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 880 %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone 881 ret x86_mmx %2 882} 883declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone 884 885define x86_mmx @stack_fold_pshufw(x86_mmx %a) { 886; CHECK-LABEL: stack_fold_pshufw: 887; CHECK: # %bb.0: 888; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 889; CHECK-NEXT: #APP 890; CHECK-NEXT: nop 891; CHECK-NEXT: #NO_APP 892; CHECK-NEXT: pshufw $1, {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 893; CHECK-NEXT: # mm0 = mem[1,0,0,0] 894; CHECK-NEXT: movq2dq %mm0, %xmm0 895; CHECK-NEXT: retq 896 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 897 %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone 898 ret x86_mmx %2 899} 900declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone 901 902define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) { 903; CHECK-LABEL: stack_fold_psignb: 904; CHECK: # %bb.0: 905; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 906; CHECK-NEXT: #APP 907; CHECK-NEXT: nop 908; CHECK-NEXT: #NO_APP 909; CHECK-NEXT: psignb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 910; CHECK-NEXT: movq2dq %mm0, %xmm0 911; CHECK-NEXT: retq 912 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 913 %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone 914 ret x86_mmx %2 915} 916declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone 917 918define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) { 919; CHECK-LABEL: stack_fold_psignd: 920; CHECK: # %bb.0: 921; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 922; CHECK-NEXT: #APP 923; CHECK-NEXT: nop 924; CHECK-NEXT: #NO_APP 925; CHECK-NEXT: psignd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 926; CHECK-NEXT: movq2dq %mm0, %xmm0 927; CHECK-NEXT: retq 928 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 929 %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone 930 ret x86_mmx %2 931} 932declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone 933 934define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) { 935; CHECK-LABEL: stack_fold_psignw: 936; CHECK: # %bb.0: 937; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 938; CHECK-NEXT: #APP 939; CHECK-NEXT: nop 940; CHECK-NEXT: #NO_APP 941; CHECK-NEXT: psignw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 942; CHECK-NEXT: movq2dq %mm0, %xmm0 943; CHECK-NEXT: retq 944 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 945 %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone 946 ret x86_mmx %2 947} 948declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone 949 950define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) { 951; CHECK-LABEL: stack_fold_pslld: 952; CHECK: # %bb.0: 953; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 954; CHECK-NEXT: #APP 955; CHECK-NEXT: nop 956; CHECK-NEXT: #NO_APP 957; CHECK-NEXT: pslld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 958; CHECK-NEXT: movq2dq %mm0, %xmm0 959; CHECK-NEXT: retq 960 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 961 %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone 962 ret x86_mmx %2 963} 964declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone 965 966define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) { 967; CHECK-LABEL: stack_fold_psllq: 968; CHECK: # %bb.0: 969; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 970; CHECK-NEXT: #APP 971; CHECK-NEXT: nop 972; CHECK-NEXT: #NO_APP 973; CHECK-NEXT: psllq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 974; CHECK-NEXT: movq2dq %mm0, %xmm0 975; CHECK-NEXT: retq 976 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 977 %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone 978 ret x86_mmx %2 979} 980declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone 981 982define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) { 983; CHECK-LABEL: stack_fold_psllw: 984; CHECK: # %bb.0: 985; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 986; CHECK-NEXT: #APP 987; CHECK-NEXT: nop 988; CHECK-NEXT: #NO_APP 989; CHECK-NEXT: psllw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 990; CHECK-NEXT: movq2dq %mm0, %xmm0 991; CHECK-NEXT: retq 992 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 993 %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone 994 ret x86_mmx %2 995} 996declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone 997 998define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) { 999; CHECK-LABEL: stack_fold_psrad: 1000; CHECK: # %bb.0: 1001; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1002; CHECK-NEXT: #APP 1003; CHECK-NEXT: nop 1004; CHECK-NEXT: #NO_APP 1005; CHECK-NEXT: psrad {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1006; CHECK-NEXT: movq2dq %mm0, %xmm0 1007; CHECK-NEXT: retq 1008 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1009 %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone 1010 ret x86_mmx %2 1011} 1012declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 1013 1014define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) { 1015; CHECK-LABEL: stack_fold_psraw: 1016; CHECK: # %bb.0: 1017; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1018; CHECK-NEXT: #APP 1019; CHECK-NEXT: nop 1020; CHECK-NEXT: #NO_APP 1021; CHECK-NEXT: psraw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1022; CHECK-NEXT: movq2dq %mm0, %xmm0 1023; CHECK-NEXT: retq 1024 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1025 %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone 1026 ret x86_mmx %2 1027} 1028declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone 1029 1030define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) { 1031; CHECK-LABEL: stack_fold_psrld: 1032; CHECK: # %bb.0: 1033; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1034; CHECK-NEXT: #APP 1035; CHECK-NEXT: nop 1036; CHECK-NEXT: #NO_APP 1037; CHECK-NEXT: psrld {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1038; CHECK-NEXT: movq2dq %mm0, %xmm0 1039; CHECK-NEXT: retq 1040 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1041 %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone 1042 ret x86_mmx %2 1043} 1044declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone 1045 1046define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) { 1047; CHECK-LABEL: stack_fold_psrlq: 1048; CHECK: # %bb.0: 1049; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1050; CHECK-NEXT: #APP 1051; CHECK-NEXT: nop 1052; CHECK-NEXT: #NO_APP 1053; CHECK-NEXT: psrlq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1054; CHECK-NEXT: movq2dq %mm0, %xmm0 1055; CHECK-NEXT: retq 1056 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1057 %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone 1058 ret x86_mmx %2 1059} 1060declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone 1061 1062define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) { 1063; CHECK-LABEL: stack_fold_psrlw: 1064; CHECK: # %bb.0: 1065; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1066; CHECK-NEXT: #APP 1067; CHECK-NEXT: nop 1068; CHECK-NEXT: #NO_APP 1069; CHECK-NEXT: psrlw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1070; CHECK-NEXT: movq2dq %mm0, %xmm0 1071; CHECK-NEXT: retq 1072 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1073 %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone 1074 ret x86_mmx %2 1075} 1076declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone 1077 1078define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) { 1079; CHECK-LABEL: stack_fold_psubb: 1080; CHECK: # %bb.0: 1081; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1082; CHECK-NEXT: #APP 1083; CHECK-NEXT: nop 1084; CHECK-NEXT: #NO_APP 1085; CHECK-NEXT: psubb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1086; CHECK-NEXT: movq2dq %mm0, %xmm0 1087; CHECK-NEXT: retq 1088 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1089 %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone 1090 ret x86_mmx %2 1091} 1092declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone 1093 1094define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) { 1095; CHECK-LABEL: stack_fold_psubd: 1096; CHECK: # %bb.0: 1097; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1098; CHECK-NEXT: #APP 1099; CHECK-NEXT: nop 1100; CHECK-NEXT: #NO_APP 1101; CHECK-NEXT: psubd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1102; CHECK-NEXT: movq2dq %mm0, %xmm0 1103; CHECK-NEXT: retq 1104 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1105 %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone 1106 ret x86_mmx %2 1107} 1108declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone 1109 1110define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) { 1111; CHECK-LABEL: stack_fold_psubq: 1112; CHECK: # %bb.0: 1113; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1114; CHECK-NEXT: #APP 1115; CHECK-NEXT: nop 1116; CHECK-NEXT: #NO_APP 1117; CHECK-NEXT: psubq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1118; CHECK-NEXT: movq2dq %mm0, %xmm0 1119; CHECK-NEXT: retq 1120 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1121 %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone 1122 ret x86_mmx %2 1123} 1124declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone 1125 1126define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) { 1127; CHECK-LABEL: stack_fold_psubsb: 1128; CHECK: # %bb.0: 1129; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1130; CHECK-NEXT: #APP 1131; CHECK-NEXT: nop 1132; CHECK-NEXT: #NO_APP 1133; CHECK-NEXT: psubsb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1134; CHECK-NEXT: movq2dq %mm0, %xmm0 1135; CHECK-NEXT: retq 1136 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1137 %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone 1138 ret x86_mmx %2 1139} 1140declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone 1141 1142define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) { 1143; CHECK-LABEL: stack_fold_psubsw: 1144; CHECK: # %bb.0: 1145; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1146; CHECK-NEXT: #APP 1147; CHECK-NEXT: nop 1148; CHECK-NEXT: #NO_APP 1149; CHECK-NEXT: psubsw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1150; CHECK-NEXT: movq2dq %mm0, %xmm0 1151; CHECK-NEXT: retq 1152 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1153 %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone 1154 ret x86_mmx %2 1155} 1156declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone 1157 1158define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) { 1159; CHECK-LABEL: stack_fold_psubusb: 1160; CHECK: # %bb.0: 1161; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1162; CHECK-NEXT: #APP 1163; CHECK-NEXT: nop 1164; CHECK-NEXT: #NO_APP 1165; CHECK-NEXT: psubusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1166; CHECK-NEXT: movq2dq %mm0, %xmm0 1167; CHECK-NEXT: retq 1168 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1169 %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone 1170 ret x86_mmx %2 1171} 1172declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone 1173 1174define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) { 1175; CHECK-LABEL: stack_fold_psubusw: 1176; CHECK: # %bb.0: 1177; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1178; CHECK-NEXT: #APP 1179; CHECK-NEXT: nop 1180; CHECK-NEXT: #NO_APP 1181; CHECK-NEXT: psubusw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1182; CHECK-NEXT: movq2dq %mm0, %xmm0 1183; CHECK-NEXT: retq 1184 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1185 %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone 1186 ret x86_mmx %2 1187} 1188declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone 1189 1190define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) { 1191; CHECK-LABEL: stack_fold_psubw: 1192; CHECK: # %bb.0: 1193; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1194; CHECK-NEXT: #APP 1195; CHECK-NEXT: nop 1196; CHECK-NEXT: #NO_APP 1197; CHECK-NEXT: psubw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1198; CHECK-NEXT: movq2dq %mm0, %xmm0 1199; CHECK-NEXT: retq 1200 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1201 %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone 1202 ret x86_mmx %2 1203} 1204declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone 1205 1206define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) { 1207; CHECK-LABEL: stack_fold_punpckhbw: 1208; CHECK: # %bb.0: 1209; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1210; CHECK-NEXT: #APP 1211; CHECK-NEXT: nop 1212; CHECK-NEXT: #NO_APP 1213; CHECK-NEXT: punpckhbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1214; CHECK-NEXT: # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] 1215; CHECK-NEXT: movq2dq %mm0, %xmm0 1216; CHECK-NEXT: retq 1217 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1218 %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone 1219 ret x86_mmx %2 1220} 1221declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone 1222 1223define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) { 1224; CHECK-LABEL: stack_fold_punpckhdq: 1225; CHECK: # %bb.0: 1226; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1227; CHECK-NEXT: #APP 1228; CHECK-NEXT: nop 1229; CHECK-NEXT: #NO_APP 1230; CHECK-NEXT: punpckhdq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1231; CHECK-NEXT: # mm0 = mm0[1],mem[1] 1232; CHECK-NEXT: movq2dq %mm0, %xmm0 1233; CHECK-NEXT: retq 1234 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1235 %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone 1236 ret x86_mmx %2 1237} 1238declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone 1239 1240define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) { 1241; CHECK-LABEL: stack_fold_punpckhwd: 1242; CHECK: # %bb.0: 1243; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1244; CHECK-NEXT: #APP 1245; CHECK-NEXT: nop 1246; CHECK-NEXT: #NO_APP 1247; CHECK-NEXT: punpckhwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1248; CHECK-NEXT: # mm0 = mm0[2],mem[2],mm0[3],mem[3] 1249; CHECK-NEXT: movq2dq %mm0, %xmm0 1250; CHECK-NEXT: retq 1251 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1252 %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone 1253 ret x86_mmx %2 1254} 1255declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone 1256 1257define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) { 1258; CHECK-LABEL: stack_fold_punpcklbw: 1259; CHECK: # %bb.0: 1260; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1261; CHECK-NEXT: #APP 1262; CHECK-NEXT: nop 1263; CHECK-NEXT: #NO_APP 1264; CHECK-NEXT: punpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1265; CHECK-NEXT: # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] 1266; CHECK-NEXT: movq2dq %mm0, %xmm0 1267; CHECK-NEXT: retq 1268 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1269 %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone 1270 ret x86_mmx %2 1271} 1272declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone 1273 1274define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) { 1275; CHECK-LABEL: stack_fold_punpckldq: 1276; CHECK: # %bb.0: 1277; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1278; CHECK-NEXT: #APP 1279; CHECK-NEXT: nop 1280; CHECK-NEXT: #NO_APP 1281; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1282; CHECK-NEXT: # mm0 = mm0[0],mem[0] 1283; CHECK-NEXT: movq2dq %mm0, %xmm0 1284; CHECK-NEXT: retq 1285 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1286 %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone 1287 ret x86_mmx %2 1288} 1289declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone 1290 1291define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) { 1292; CHECK-LABEL: stack_fold_punpcklwd: 1293; CHECK: # %bb.0: 1294; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1295; CHECK-NEXT: #APP 1296; CHECK-NEXT: nop 1297; CHECK-NEXT: #NO_APP 1298; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1299; CHECK-NEXT: # mm0 = mm0[0],mem[0],mm0[1],mem[1] 1300; CHECK-NEXT: movq2dq %mm0, %xmm0 1301; CHECK-NEXT: retq 1302 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1303 %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone 1304 ret x86_mmx %2 1305} 1306declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone 1307 1308define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) { 1309; CHECK-LABEL: stack_fold_pxor: 1310; CHECK: # %bb.0: 1311; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1312; CHECK-NEXT: #APP 1313; CHECK-NEXT: nop 1314; CHECK-NEXT: #NO_APP 1315; CHECK-NEXT: pxor {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload 1316; CHECK-NEXT: movq2dq %mm0, %xmm0 1317; CHECK-NEXT: retq 1318 %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() 1319 %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone 1320 ret x86_mmx %2 1321} 1322declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone 1323