1//===---------------------------------------------------------------------===// 2// Random ideas for the X86 backend: MMX-specific stuff. 3//===---------------------------------------------------------------------===// 4 5//===---------------------------------------------------------------------===// 6 7This: 8 9#include <mmintrin.h> 10 11__v2si qux(int A) { 12 return (__v2si){ 0, A }; 13} 14 15is compiled into: 16 17_qux: 18 subl $28, %esp 19 movl 32(%esp), %eax 20 movd %eax, %mm0 21 movq %mm0, (%esp) 22 movl (%esp), %eax 23 movl %eax, 20(%esp) 24 movq %mm0, 8(%esp) 25 movl 12(%esp), %eax 26 movl %eax, 16(%esp) 27 movq 16(%esp), %mm0 28 addl $28, %esp 29 ret 30 31Yuck! 32 33GCC gives us: 34 35_qux: 36 subl $12, %esp 37 movl 16(%esp), %eax 38 movl 20(%esp), %edx 39 movl $0, (%eax) 40 movl %edx, 4(%eax) 41 addl $12, %esp 42 ret $4 43 44//===---------------------------------------------------------------------===// 45 46We generate crappy code for this: 47 48__m64 t() { 49 return _mm_cvtsi32_si64(1); 50} 51 52_t: 53 subl $12, %esp 54 movl $1, %eax 55 movd %eax, %mm0 56 movq %mm0, (%esp) 57 movl (%esp), %eax 58 movl 4(%esp), %edx 59 addl $12, %esp 60 ret 61 62The extra stack traffic is covered in the previous entry. But the other reason 63is we are not smart about materializing constants in MMX registers. With -m64 64 65 movl $1, %eax 66 movd %eax, %mm0 67 movd %mm0, %rax 68 ret 69 70We should be using a constantpool load instead: 71 movq LC0(%rip), %rax 72