1; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2 < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-unknown" 5 6; Stack reload folding tests. 7; 8; By including a nop call with sideeffects we can force a partial register spill of the 9; relevant registers and check that the reload is correctly folded into the instruction. 10 11define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17} 18 19define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 20 ;CHECK-LABEL: stack_fold_addps 21 ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x float> %a0, %a1 24 ret <4 x float> %2 25} 26 27define double @stack_fold_addsd(double %a0, double %a1) { 28 ;CHECK-LABEL: stack_fold_addsd 29 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd double %a0, %a1 32 ret double %2 33} 34 35define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 36 ;CHECK-LABEL: stack_fold_addsd_int 37 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) 40 ret <2 x double> %2 41} 42declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 43 44define float @stack_fold_addss(float %a0, float %a1) { 45 ;CHECK-LABEL: stack_fold_addss 46 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 47 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 48 %2 = fadd float %a0, %a1 49 ret float %2 50} 51 52define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 53 ;CHECK-LABEL: stack_fold_addss_int 54 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 55 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 56 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) 57 ret <4 x float> %2 58} 59declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 60 61define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 62 ;CHECK-LABEL: stack_fold_addsubpd 63 ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 64 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 65 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 66 ret <2 x double> %2 67} 68declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 69 70define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 71 ;CHECK-LABEL: stack_fold_addsubps 72 ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 73 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 74 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 75 ret <4 x float> %2 76} 77declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 78 79define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 80 ;CHECK-LABEL: stack_fold_andnpd 81 ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 82 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 83 %2 = bitcast <2 x double> %a0 to <2 x i64> 84 %3 = bitcast <2 x double> %a1 to <2 x i64> 85 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 86 %5 = and <2 x i64> %4, %3 87 %6 = bitcast <2 x i64> %5 to <2 x double> 88 ; fadd forces execution domain 89 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 90 ret <2 x double> %7 91} 92 93define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 94 ;CHECK-LABEL: stack_fold_andnps 95 ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 96 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 97 %2 = bitcast <4 x float> %a0 to <2 x i64> 98 %3 = bitcast <4 x float> %a1 to <2 x i64> 99 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 100 %5 = and <2 x i64> %4, %3 101 %6 = bitcast <2 x i64> %5 to <4 x float> 102 ; fadd forces execution domain 103 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 104 ret <4 x float> %7 105} 106 107define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 108 ;CHECK-LABEL: stack_fold_andpd 109 ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 110 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 111 %2 = bitcast <2 x double> %a0 to <2 x i64> 112 %3 = bitcast <2 x double> %a1 to <2 x i64> 113 %4 = and <2 x i64> %2, %3 114 %5 = bitcast <2 x i64> %4 to <2 x double> 115 ; fadd forces execution domain 116 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 117 ret <2 x double> %6 118} 119 120define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 121 ;CHECK-LABEL: stack_fold_andps 122 ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 123 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 124 %2 = bitcast <4 x float> %a0 to <2 x i64> 125 %3 = bitcast <4 x float> %a1 to <2 x i64> 126 %4 = and <2 x i64> %2, %3 127 %5 = bitcast <2 x i64> %4 to <4 x float> 128 ; fadd forces execution domain 129 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 130 ret <4 x float> %6 131} 132 133define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 134 ;CHECK-LABEL: stack_fold_blendpd 135 ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 137 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 138 ret <2 x double> %2 139} 140 141define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 142 ;CHECK-LABEL: stack_fold_blendps 143 ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 146 ret <4 x float> %2 147} 148 149define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 150 ;CHECK-LABEL: stack_fold_blendvpd 151 ;CHECK: blendvpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 153 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 154 ret <2 x double> %2 155} 156declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 157 158define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 159 ;CHECK-LABEL: stack_fold_blendvps 160 ;CHECK: blendvps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 162 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 163 ret <4 x float> %2 164} 165declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 166 167define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 168 ;CHECK-LABEL: stack_fold_cmppd 169 ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 170 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 171 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 172 ret <2 x double> %2 173} 174declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 175 176define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 177 ;CHECK-LABEL: stack_fold_cmpps 178 ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 179 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 180 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 181 ret <4 x float> %2 182} 183declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 184 185define i32 @stack_fold_cmpsd(double %a0, double %a1) { 186 ;CHECK-LABEL: stack_fold_cmpsd 187 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 189 %2 = fcmp oeq double %a0, %a1 190 %3 = zext i1 %2 to i32 191 ret i32 %3 192} 193 194define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 195 ;CHECK-LABEL: stack_fold_cmpsd_int 196 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 197 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 198 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 199 ret <2 x double> %2 200} 201declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 202 203define i32 @stack_fold_cmpss(float %a0, float %a1) { 204 ;CHECK-LABEL: stack_fold_cmpss 205 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 207 %2 = fcmp oeq float %a0, %a1 208 %3 = zext i1 %2 to i32 209 ret i32 %3 210} 211 212define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 213 ;CHECK-LABEL: stack_fold_cmpss_int 214 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 216 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 217 ret <4 x float> %2 218} 219declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 220 221; TODO stack_fold_comisd 222 223define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 224 ;CHECK-LABEL: stack_fold_comisd_int 225 ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 227 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 228 ret i32 %2 229} 230declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 231 232; TODO stack_fold_comiss 233 234define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 235 ;CHECK-LABEL: stack_fold_comiss_int 236 ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 237 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 238 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 239 ret i32 %2 240} 241declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 242 243define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 244 ;CHECK-LABEL: stack_fold_cvtdq2pd 245 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 246 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 247 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) 248 ret <2 x double> %2 249} 250declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 251 252define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 253 ;CHECK-LABEL: stack_fold_cvtdq2ps 254 ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 255 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 256 %2 = sitofp <4 x i32> %a0 to <4 x float> 257 ret <4 x float> %2 258} 259 260define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 261 ;CHECK-LABEL: stack_fold_cvtpd2dq 262 ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 263 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 264 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 265 ret <4 x i32> %2 266} 267declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 268 269define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 270 ;CHECK-LABEL: stack_fold_cvtpd2ps 271 ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 273 %2 = fptrunc <2 x double> %a0 to <2 x float> 274 ret <2 x float> %2 275} 276 277define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 278 ;CHECK-LABEL: stack_fold_cvtps2dq 279 ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 280 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 281 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 282 ret <4 x i32> %2 283} 284declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 285 286define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 287 ;CHECK-LABEL: stack_fold_cvtps2pd 288 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 290 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) 291 ret <2 x double> %2 292} 293declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 294 295; TODO stack_fold_cvtsd2si 296 297define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 298 ;CHECK-LABEL: stack_fold_cvtsd2si_int 299 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 300 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 301 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 302 ret i32 %2 303} 304declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 305 306; TODO stack_fold_cvtsd2si64 307 308define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 309 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 310 ;CHECK: cvtsd2siq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 312 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 313 ret i64 %2 314} 315declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 316 317; TODO stack_fold_cvtsd2ss 318 319define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { 320 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 321 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 322 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 323 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 324 ret <4 x float> %2 325} 326declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 327 328define double @stack_fold_cvtsi2sd(i32 %a0) optsize { 329 ;CHECK-LABEL: stack_fold_cvtsi2sd 330 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 331 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 332 %2 = sitofp i32 %a0 to double 333 ret double %2 334} 335 336define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { 337 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 338 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 339 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 340 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0) 341 ret <2 x double> %2 342} 343declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 344 345define double @stack_fold_cvtsi642sd(i64 %a0) optsize { 346 ;CHECK-LABEL: stack_fold_cvtsi642sd 347 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 348 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 349 %2 = sitofp i64 %a0 to double 350 ret double %2 351} 352 353define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { 354 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 355 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 356 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 357 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0) 358 ret <2 x double> %2 359} 360declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 361 362define float @stack_fold_cvtsi2ss(i32 %a0) optsize { 363 ;CHECK-LABEL: stack_fold_cvtsi2ss 364 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 365 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 366 %2 = sitofp i32 %a0 to float 367 ret float %2 368} 369 370define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { 371 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 372 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 373 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 374 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0) 375 ret <4 x float> %2 376} 377declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 378 379define float @stack_fold_cvtsi642ss(i64 %a0) optsize { 380 ;CHECK-LABEL: stack_fold_cvtsi642ss 381 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 382 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 383 %2 = sitofp i64 %a0 to float 384 ret float %2 385} 386 387define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { 388 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 389 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 390 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 391 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0) 392 ret <4 x float> %2 393} 394declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 395 396define double @stack_fold_cvtss2sd(float %a0) optsize { 397 ;CHECK-LABEL: stack_fold_cvtss2sd 398 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 399 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 400 %2 = fpext float %a0 to double 401 ret double %2 402} 403 404define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize { 405 ;CHECK-LABEL: stack_fold_cvtss2sd_int 406 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 407 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 408 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0) 409 ret <2 x double> %2 410} 411declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 412 413; TODO stack_fold_cvtss2si 414 415define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 416 ;CHECK-LABEL: stack_fold_cvtss2si_int 417 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 418 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 419 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 420 ret i32 %2 421} 422declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 423 424; TODO stack_fold_cvtss2si64 425 426define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 427 ;CHECK-LABEL: stack_fold_cvtss2si64_int 428 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 429 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 430 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 431 ret i64 %2 432} 433declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 434 435define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 436 ;CHECK-LABEL: stack_fold_cvttpd2dq 437 ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 438 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 439 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 440 ret <4 x i32> %2 441} 442declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 443 444define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 445 ;CHECK-LABEL: stack_fold_cvttps2dq 446 ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 447 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 448 %2 = fptosi <4 x float> %a0 to <4 x i32> 449 ret <4 x i32> %2 450} 451 452define i32 @stack_fold_cvttsd2si(double %a0) { 453 ;CHECK-LABEL: stack_fold_cvttsd2si 454 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 455 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 456 %2 = fptosi double %a0 to i32 457 ret i32 %2 458} 459 460define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 461 ;CHECK-LABEL: stack_fold_cvttsd2si_int 462 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 463 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 464 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 465 ret i32 %2 466} 467declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 468 469define i64 @stack_fold_cvttsd2si64(double %a0) { 470 ;CHECK-LABEL: stack_fold_cvttsd2si64 471 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 472 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 473 %2 = fptosi double %a0 to i64 474 ret i64 %2 475} 476 477define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 478 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 479 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 481 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 482 ret i64 %2 483} 484declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 485 486define i32 @stack_fold_cvttss2si(float %a0) { 487 ;CHECK-LABEL: stack_fold_cvttss2si 488 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 489 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 490 %2 = fptosi float %a0 to i32 491 ret i32 %2 492} 493 494define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 495 ;CHECK-LABEL: stack_fold_cvttss2si_int 496 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 497 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 498 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 499 ret i32 %2 500} 501declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 502 503define i64 @stack_fold_cvttss2si64(float %a0) { 504 ;CHECK-LABEL: stack_fold_cvttss2si64 505 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 506 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 507 %2 = fptosi float %a0 to i64 508 ret i64 %2 509} 510 511define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 512 ;CHECK-LABEL: stack_fold_cvttss2si64_int 513 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 514 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 515 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 516 ret i64 %2 517} 518declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 519 520define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 521 ;CHECK-LABEL: stack_fold_divpd 522 ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 523 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 524 %2 = fdiv <2 x double> %a0, %a1 525 ret <2 x double> %2 526} 527 528define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 529 ;CHECK-LABEL: stack_fold_divps 530 ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 531 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 532 %2 = fdiv <4 x float> %a0, %a1 533 ret <4 x float> %2 534} 535 536define double @stack_fold_divsd(double %a0, double %a1) { 537 ;CHECK-LABEL: stack_fold_divsd 538 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 540 %2 = fdiv double %a0, %a1 541 ret double %2 542} 543 544define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 545 ;CHECK-LABEL: stack_fold_divsd_int 546 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 547 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 548 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) 549 ret <2 x double> %2 550} 551declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 552 553define float @stack_fold_divss(float %a0, float %a1) { 554 ;CHECK-LABEL: stack_fold_divss 555 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 556 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 557 %2 = fdiv float %a0, %a1 558 ret float %2 559} 560 561define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 562 ;CHECK-LABEL: stack_fold_divss_int 563 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 564 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 565 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) 566 ret <4 x float> %2 567} 568declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 569 570define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 571 ;CHECK-LABEL: stack_fold_dppd 572 ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 573 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 574 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 575 ret <2 x double> %2 576} 577declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 578 579define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 580 ;CHECK-LABEL: stack_fold_dpps 581 ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 582 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 583 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 584 ret <4 x float> %2 585} 586declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 587 588define i32 @stack_fold_extractps(<4 x float> %a0) { 589 ;CHECK-LABEL: stack_fold_extractps 590 ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 591 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 592 %1 = extractelement <4 x float> %a0, i32 1 593 %2 = bitcast float %1 to i32 594 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 595 ret i32 %2 596} 597 598define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 599 ;CHECK-LABEL: stack_fold_haddpd 600 ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 601 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 602 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 603 ret <2 x double> %2 604} 605declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 606 607define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 608 ;CHECK-LABEL: stack_fold_haddps 609 ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 610 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 611 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 612 ret <4 x float> %2 613} 614declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 615 616define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 617 ;CHECK-LABEL: stack_fold_hsubpd 618 ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 619 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 620 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 621 ret <2 x double> %2 622} 623declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 624 625define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 626 ;CHECK-LABEL: stack_fold_hsubps 627 ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 628 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 629 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 630 ret <4 x float> %2 631} 632declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 633 634; TODO stack_fold_insertps 635 636define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { 637 ;CHECK-LABEL: stack_fold_maxpd 638 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 639 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 640 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 641 ret <2 x double> %2 642} 643declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 644 645define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { 646 ;CHECK-LABEL: stack_fold_maxps 647 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 648 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 649 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 650 ret <4 x float> %2 651} 652declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 653 654define double @stack_fold_maxsd(double %a0, double %a1) { 655 ;CHECK-LABEL: stack_fold_maxsd 656 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 657 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 658 %2 = fcmp ogt double %a0, %a1 659 %3 = select i1 %2, double %a0, double %a1 660 ret double %3 661} 662 663define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { 664 ;CHECK-LABEL: stack_fold_maxsd_int 665 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 666 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 667 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 668 ret <2 x double> %2 669} 670declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 671 672define float @stack_fold_maxss(float %a0, float %a1) { 673 ;CHECK-LABEL: stack_fold_maxss 674 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 675 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 676 %2 = fcmp ogt float %a0, %a1 677 %3 = select i1 %2, float %a0, float %a1 678 ret float %3 679} 680 681define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { 682 ;CHECK-LABEL: stack_fold_maxss_int 683 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 684 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 685 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 686 ret <4 x float> %2 687} 688declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 689 690define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { 691 ;CHECK-LABEL: stack_fold_minpd 692 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 693 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 694 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 695 ret <2 x double> %2 696} 697declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 698 699define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { 700 ;CHECK-LABEL: stack_fold_minps 701 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 702 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 703 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 704 ret <4 x float> %2 705} 706declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 707 708define double @stack_fold_minsd(double %a0, double %a1) { 709 ;CHECK-LABEL: stack_fold_minsd 710 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 711 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 712 %2 = fcmp olt double %a0, %a1 713 %3 = select i1 %2, double %a0, double %a1 714 ret double %3 715} 716 717define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 718 ;CHECK-LABEL: stack_fold_minsd_int 719 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 720 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 721 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 722 ret <2 x double> %2 723} 724declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 725 726define float @stack_fold_minss(float %a0, float %a1) { 727 ;CHECK-LABEL: stack_fold_minss 728 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 729 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 730 %2 = fcmp olt float %a0, %a1 731 %3 = select i1 %2, float %a0, float %a1 732 ret float %3 733} 734 735define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { 736 ;CHECK-LABEL: stack_fold_minss_int 737 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 738 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 739 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 740 ret <4 x float> %2 741} 742declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 743 744define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 745 ;CHECK-LABEL: stack_fold_movddup 746 ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 747 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 748 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 749 ret <2 x double> %2 750} 751; TODO stack_fold_movhpd (load / store) 752; TODO stack_fold_movhps (load / store) 753 754; TODO stack_fold_movlpd (load / store) 755; TODO stack_fold_movlps (load / store) 756 757define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 758 ;CHECK-LABEL: stack_fold_movshdup 759 ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 760 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 761 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 762 ret <4 x float> %2 763} 764 765define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 766 ;CHECK-LABEL: stack_fold_movsldup 767 ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 768 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 769 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 770 ret <4 x float> %2 771} 772 773define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 774 ;CHECK-LABEL: stack_fold_mulpd 775 ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 776 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 777 %2 = fmul <2 x double> %a0, %a1 778 ret <2 x double> %2 779} 780 781define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 782 ;CHECK-LABEL: stack_fold_mulps 783 ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 784 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 785 %2 = fmul <4 x float> %a0, %a1 786 ret <4 x float> %2 787} 788 789define double @stack_fold_mulsd(double %a0, double %a1) { 790 ;CHECK-LABEL: stack_fold_mulsd 791 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 792 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 793 %2 = fmul double %a0, %a1 794 ret double %2 795} 796 797define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 798 ;CHECK-LABEL: stack_fold_mulsd_int 799 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 800 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 801 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) 802 ret <2 x double> %2 803} 804declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 805 806define float @stack_fold_mulss(float %a0, float %a1) { 807 ;CHECK-LABEL: stack_fold_mulss 808 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 809 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 810 %2 = fmul float %a0, %a1 811 ret float %2 812} 813 814define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 815 ;CHECK-LABEL: stack_fold_mulss_int 816 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 817 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 818 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) 819 ret <4 x float> %2 820} 821declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 822 823define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 824 ;CHECK-LABEL: stack_fold_orpd 825 ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 826 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 827 %2 = bitcast <2 x double> %a0 to <2 x i64> 828 %3 = bitcast <2 x double> %a1 to <2 x i64> 829 %4 = or <2 x i64> %2, %3 830 %5 = bitcast <2 x i64> %4 to <2 x double> 831 ; fadd forces execution domain 832 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 833 ret <2 x double> %6 834} 835 836define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 837 ;CHECK-LABEL: stack_fold_orps 838 ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 839 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 840 %2 = bitcast <4 x float> %a0 to <2 x i64> 841 %3 = bitcast <4 x float> %a1 to <2 x i64> 842 %4 = or <2 x i64> %2, %3 843 %5 = bitcast <2 x i64> %4 to <4 x float> 844 ; fadd forces execution domain 845 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 846 ret <4 x float> %6 847} 848 849; TODO stack_fold_rcpps 850 851define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 852 ;CHECK-LABEL: stack_fold_rcpps_int 853 ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 854 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 855 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 856 ret <4 x float> %2 857} 858declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 859 860; TODO stack_fold_rcpss 861; TODO stack_fold_rcpss_int 862 863define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 864 ;CHECK-LABEL: stack_fold_roundpd 865 ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 866 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 867 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 868 ret <2 x double> %2 869} 870declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 871 872define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 873 ;CHECK-LABEL: stack_fold_roundps 874 ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 875 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 876 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 877 ret <4 x float> %2 878} 879declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 880 881; TODO stack_fold_roundsd 882; TODO stack_fold_roundsd_int 883 884; TODO stack_fold_roundss 885; TODO stack_fold_roundss_int 886 887; TODO stack_fold_rsqrtps 888 889define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 890 ;CHECK-LABEL: stack_fold_rsqrtps_int 891 ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 892 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 893 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 894 ret <4 x float> %2 895} 896declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 897 898; TODO stack_fold_rsqrtss 899; TODO stack_fold_rsqrtss_int 900 901define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 902 ;CHECK-LABEL: stack_fold_shufpd 903 ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 904 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 905 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 906 ret <2 x double> %2 907} 908 909define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 910 ;CHECK-LABEL: stack_fold_shufps 911 ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 912 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 913 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 914 ret <4 x float> %2 915} 916 917define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 918 ;CHECK-LABEL: stack_fold_sqrtpd 919 ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 920 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 921 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 922 ret <2 x double> %2 923} 924declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 925 926define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 927 ;CHECK-LABEL: stack_fold_sqrtps 928 ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 929 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 930 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 931 ret <4 x float> %2 932} 933declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 934 935; TODO stack_fold_sqrtsd 936declare double @llvm.sqrt.f64(double) nounwind readnone 937 938; TODO stack_fold_sqrtsd_int 939declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 940 941; TODO stack_fold_sqrtss 942declare float @llvm.sqrt.f32(float) nounwind readnone 943 944; TODO stack_fold_sqrtss_int 945declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 946 947define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 948 ;CHECK-LABEL: stack_fold_subpd 949 ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 950 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 951 %2 = fsub <2 x double> %a0, %a1 952 ret <2 x double> %2 953} 954 955define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 956 ;CHECK-LABEL: stack_fold_subps 957 ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 958 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 959 %2 = fsub <4 x float> %a0, %a1 960 ret <4 x float> %2 961} 962 963define double @stack_fold_subsd(double %a0, double %a1) { 964 ;CHECK-LABEL: stack_fold_subsd 965 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 966 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 967 %2 = fsub double %a0, %a1 968 ret double %2 969} 970 971define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 972 ;CHECK-LABEL: stack_fold_subsd_int 973 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 974 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 975 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) 976 ret <2 x double> %2 977} 978declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 979 980define float @stack_fold_subss(float %a0, float %a1) { 981 ;CHECK-LABEL: stack_fold_subss 982 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 983 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 984 %2 = fsub float %a0, %a1 985 ret float %2 986} 987 988define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 989 ;CHECK-LABEL: stack_fold_subss_int 990 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 991 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 992 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) 993 ret <4 x float> %2 994} 995declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 996 997define i32 @stack_fold_ucomisd(double %a0, double %a1) { 998 ;CHECK-LABEL: stack_fold_ucomisd 999 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1000 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1001 %2 = fcmp ueq double %a0, %a1 1002 %3 = select i1 %2, i32 1, i32 -1 1003 ret i32 %3 1004} 1005 1006define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1007 ;CHECK-LABEL: stack_fold_ucomisd_int 1008 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1009 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1010 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1011 ret i32 %2 1012} 1013declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1014 1015define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1016 ;CHECK-LABEL: stack_fold_ucomiss 1017 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1018 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1019 %2 = fcmp ueq float %a0, %a1 1020 %3 = select i1 %2, i32 1, i32 -1 1021 ret i32 %3 1022} 1023 1024define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1025 ;CHECK-LABEL: stack_fold_ucomiss_int 1026 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1027 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1028 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1029 ret i32 %2 1030} 1031declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1032 1033define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1034 ;CHECK-LABEL: stack_fold_unpckhpd 1035 ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1036 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1037 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1038 ret <2 x double> %2 1039} 1040 1041define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1042 ;CHECK-LABEL: stack_fold_unpckhps 1043 ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1044 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1045 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1046 ret <4 x float> %2 1047} 1048 1049define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1050 ;CHECK-LABEL: stack_fold_unpcklpd 1051 ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1052 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1053 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1054 ret <2 x double> %2 1055} 1056 1057define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1058 ;CHECK-LABEL: stack_fold_unpcklps 1059 ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1060 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1061 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1062 ret <4 x float> %2 1063} 1064 1065define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1066 ;CHECK-LABEL: stack_fold_xorpd 1067 ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1068 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1069 %2 = bitcast <2 x double> %a0 to <2 x i64> 1070 %3 = bitcast <2 x double> %a1 to <2 x i64> 1071 %4 = xor <2 x i64> %2, %3 1072 %5 = bitcast <2 x i64> %4 to <2 x double> 1073 ; fadd forces execution domain 1074 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1075 ret <2 x double> %6 1076} 1077 1078define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1079 ;CHECK-LABEL: stack_fold_xorps 1080 ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1081 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1082 %2 = bitcast <4 x float> %a0 to <2 x i64> 1083 %3 = bitcast <4 x float> %a1 to <2 x i64> 1084 %4 = xor <2 x i64> %2, %3 1085 %5 = bitcast <2 x i64> %4 to <4 x float> 1086 ; fadd forces execution domain 1087 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1088 ret <4 x float> %6 1089} 1090