1; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-unknown" 5 6; Stack reload folding tests. 7; 8; By including a nop call with sideeffects we can force a partial register spill of the 9; relevant registers and check that the reload is correctly folded into the instruction. 10 11define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17} 18 19define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 20 ;CHECK-LABEL: stack_fold_addps 21 ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x float> %a0, %a1 24 ret <4 x float> %2 25} 26 27define double @stack_fold_addsd(double %a0, double %a1) { 28 ;CHECK-LABEL: stack_fold_addsd 29 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd double %a0, %a1 32 ret double %2 33} 34 35define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 36 ;CHECK-LABEL: stack_fold_addsd_int 37 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) 40 ret <2 x double> %2 41} 42declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 43 44define float @stack_fold_addss(float %a0, float %a1) { 45 ;CHECK-LABEL: stack_fold_addss 46 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 47 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 48 %2 = fadd float %a0, %a1 49 ret float %2 50} 51 52define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 53 ;CHECK-LABEL: stack_fold_addss_int 54 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 55 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 56 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) 57 ret <4 x float> %2 58} 59declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 60 61define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 62 ;CHECK-LABEL: stack_fold_addsubpd 63 ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 64 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 65 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 66 ret <2 x double> %2 67} 68declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 69 70define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 71 ;CHECK-LABEL: stack_fold_addsubps 72 ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 73 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 74 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 75 ret <4 x float> %2 76} 77declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 78 79define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 80 ;CHECK-LABEL: stack_fold_andnpd 81 ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 82 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 83 %2 = bitcast <2 x double> %a0 to <2 x i64> 84 %3 = bitcast <2 x double> %a1 to <2 x i64> 85 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 86 %5 = and <2 x i64> %4, %3 87 %6 = bitcast <2 x i64> %5 to <2 x double> 88 ; fadd forces execution domain 89 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 90 ret <2 x double> %7 91} 92 93define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 94 ;CHECK-LABEL: stack_fold_andnps 95 ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 96 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 97 %2 = bitcast <4 x float> %a0 to <2 x i64> 98 %3 = bitcast <4 x float> %a1 to <2 x i64> 99 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 100 %5 = and <2 x i64> %4, %3 101 %6 = bitcast <2 x i64> %5 to <4 x float> 102 ; fadd forces execution domain 103 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 104 ret <4 x float> %7 105} 106 107define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 108 ;CHECK-LABEL: stack_fold_andpd 109 ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 110 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 111 %2 = bitcast <2 x double> %a0 to <2 x i64> 112 %3 = bitcast <2 x double> %a1 to <2 x i64> 113 %4 = and <2 x i64> %2, %3 114 %5 = bitcast <2 x i64> %4 to <2 x double> 115 ; fadd forces execution domain 116 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 117 ret <2 x double> %6 118} 119 120define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 121 ;CHECK-LABEL: stack_fold_andps 122 ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 123 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 124 %2 = bitcast <4 x float> %a0 to <2 x i64> 125 %3 = bitcast <4 x float> %a1 to <2 x i64> 126 %4 = and <2 x i64> %2, %3 127 %5 = bitcast <2 x i64> %4 to <4 x float> 128 ; fadd forces execution domain 129 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 130 ret <4 x float> %6 131} 132 133define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 134 ;CHECK-LABEL: stack_fold_blendpd 135 ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 137 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 138 ret <2 x double> %2 139} 140 141define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 142 ;CHECK-LABEL: stack_fold_blendps 143 ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 146 ret <4 x float> %2 147} 148 149define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 150 ;CHECK-LABEL: stack_fold_blendvpd 151 ;CHECK: blendvpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 153 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 154 ret <2 x double> %2 155} 156declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 157 158define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 159 ;CHECK-LABEL: stack_fold_blendvps 160 ;CHECK: blendvps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 162 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 163 ret <4 x float> %2 164} 165declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 166 167define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 168 ;CHECK-LABEL: stack_fold_cmppd 169 ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 170 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 171 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 172 ret <2 x double> %2 173} 174declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 175 176define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 177 ;CHECK-LABEL: stack_fold_cmpps 178 ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 179 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 180 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 181 ret <4 x float> %2 182} 183declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 184 185define i32 @stack_fold_cmpsd(double %a0, double %a1) { 186 ;CHECK-LABEL: stack_fold_cmpsd 187 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 189 %2 = fcmp oeq double %a0, %a1 190 %3 = zext i1 %2 to i32 191 ret i32 %3 192} 193 194define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 195 ;CHECK-LABEL: stack_fold_cmpsd_int 196 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 197 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 198 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 199 ret <2 x double> %2 200} 201declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 202 203define i32 @stack_fold_cmpss(float %a0, float %a1) { 204 ;CHECK-LABEL: stack_fold_cmpss 205 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 207 %2 = fcmp oeq float %a0, %a1 208 %3 = zext i1 %2 to i32 209 ret i32 %3 210} 211 212define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 213 ;CHECK-LABEL: stack_fold_cmpss_int 214 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 216 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 217 ret <4 x float> %2 218} 219declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 220 221; TODO stack_fold_comisd 222 223define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 224 ;CHECK-LABEL: stack_fold_comisd_int 225 ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 227 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 228 ret i32 %2 229} 230declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 231 232; TODO stack_fold_comiss 233 234define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 235 ;CHECK-LABEL: stack_fold_comiss_int 236 ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 237 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 238 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 239 ret i32 %2 240} 241declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 242 243define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 244 ;CHECK-LABEL: stack_fold_cvtdq2pd 245 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 246 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 247 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) 248 ret <2 x double> %2 249} 250declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 251 252define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 253 ;CHECK-LABEL: stack_fold_cvtdq2ps 254 ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 255 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 256 %2 = sitofp <4 x i32> %a0 to <4 x float> 257 ret <4 x float> %2 258} 259 260define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 261 ;CHECK-LABEL: stack_fold_cvtpd2dq 262 ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 263 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 264 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 265 ret <4 x i32> %2 266} 267declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 268 269define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 270 ;CHECK-LABEL: stack_fold_cvtpd2ps 271 ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 273 %2 = fptrunc <2 x double> %a0 to <2 x float> 274 ret <2 x float> %2 275} 276 277define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 278 ;CHECK-LABEL: stack_fold_cvtps2dq 279 ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 280 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 281 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 282 ret <4 x i32> %2 283} 284declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 285 286define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 287 ;CHECK-LABEL: stack_fold_cvtps2pd 288 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 290 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) 291 ret <2 x double> %2 292} 293declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 294 295; TODO stack_fold_cvtsd2si 296 297define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 298 ;CHECK-LABEL: stack_fold_cvtsd2si_int 299 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 300 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 301 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 302 ret i32 %2 303} 304declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 305 306; TODO stack_fold_cvtsd2si64 307 308define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 309 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 310 ;CHECK: cvtsd2siq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 312 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 313 ret i64 %2 314} 315declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 316 317define float @stack_fold_cvtsd2ss(double %a0) minsize { 318 ;CHECK-LABEL: stack_fold_cvtsd2ss 319 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 320 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 321 %2 = fptrunc double %a0 to float 322 ret float %2 323} 324 325define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { 326 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 327 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 329 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 330 ret <4 x float> %2 331} 332declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 333 334define double @stack_fold_cvtsi2sd(i32 %a0) minsize { 335 ;CHECK-LABEL: stack_fold_cvtsi2sd 336 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 338 %2 = sitofp i32 %a0 to double 339 ret double %2 340} 341 342define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { 343 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 344 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 345 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 346 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0) 347 ret <2 x double> %2 348} 349declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 350 351define double @stack_fold_cvtsi642sd(i64 %a0) optsize { 352 ;CHECK-LABEL: stack_fold_cvtsi642sd 353 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 354 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 355 %2 = sitofp i64 %a0 to double 356 ret double %2 357} 358 359define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { 360 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 361 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 362 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 363 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0) 364 ret <2 x double> %2 365} 366declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 367 368define float @stack_fold_cvtsi2ss(i32 %a0) minsize { 369 ;CHECK-LABEL: stack_fold_cvtsi2ss 370 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 371 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 372 %2 = sitofp i32 %a0 to float 373 ret float %2 374} 375 376define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { 377 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 378 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 379 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 380 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0) 381 ret <4 x float> %2 382} 383declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 384 385define float @stack_fold_cvtsi642ss(i64 %a0) optsize { 386 ;CHECK-LABEL: stack_fold_cvtsi642ss 387 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 388 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 389 %2 = sitofp i64 %a0 to float 390 ret float %2 391} 392 393define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { 394 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 395 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 396 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 397 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0) 398 ret <4 x float> %2 399} 400declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 401 402define double @stack_fold_cvtss2sd(float %a0) minsize { 403 ;CHECK-LABEL: stack_fold_cvtss2sd 404 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 406 %2 = fpext float %a0 to double 407 ret double %2 408} 409 410define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize { 411 ;CHECK-LABEL: stack_fold_cvtss2sd_int 412 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 413 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 414 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0) 415 ret <2 x double> %2 416} 417declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 418 419; TODO stack_fold_cvtss2si 420 421define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 422 ;CHECK-LABEL: stack_fold_cvtss2si_int 423 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 424 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 425 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 426 ret i32 %2 427} 428declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 429 430; TODO stack_fold_cvtss2si64 431 432define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 433 ;CHECK-LABEL: stack_fold_cvtss2si64_int 434 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 435 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 436 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 437 ret i64 %2 438} 439declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 440 441define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 442 ;CHECK-LABEL: stack_fold_cvttpd2dq 443 ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 444 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 445 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 446 ret <4 x i32> %2 447} 448declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 449 450define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 451 ;CHECK-LABEL: stack_fold_cvttps2dq 452 ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 453 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 454 %2 = fptosi <4 x float> %a0 to <4 x i32> 455 ret <4 x i32> %2 456} 457 458define i32 @stack_fold_cvttsd2si(double %a0) { 459 ;CHECK-LABEL: stack_fold_cvttsd2si 460 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 461 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 462 %2 = fptosi double %a0 to i32 463 ret i32 %2 464} 465 466define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 467 ;CHECK-LABEL: stack_fold_cvttsd2si_int 468 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 469 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 470 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 471 ret i32 %2 472} 473declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 474 475define i64 @stack_fold_cvttsd2si64(double %a0) { 476 ;CHECK-LABEL: stack_fold_cvttsd2si64 477 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 478 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 479 %2 = fptosi double %a0 to i64 480 ret i64 %2 481} 482 483define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 484 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 485 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 486 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 487 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 488 ret i64 %2 489} 490declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 491 492define i32 @stack_fold_cvttss2si(float %a0) { 493 ;CHECK-LABEL: stack_fold_cvttss2si 494 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 495 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 496 %2 = fptosi float %a0 to i32 497 ret i32 %2 498} 499 500define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 501 ;CHECK-LABEL: stack_fold_cvttss2si_int 502 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 503 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 504 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 505 ret i32 %2 506} 507declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 508 509define i64 @stack_fold_cvttss2si64(float %a0) { 510 ;CHECK-LABEL: stack_fold_cvttss2si64 511 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 512 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 513 %2 = fptosi float %a0 to i64 514 ret i64 %2 515} 516 517define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 518 ;CHECK-LABEL: stack_fold_cvttss2si64_int 519 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 520 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 521 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 522 ret i64 %2 523} 524declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 525 526define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 527 ;CHECK-LABEL: stack_fold_divpd 528 ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 529 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 530 %2 = fdiv <2 x double> %a0, %a1 531 ret <2 x double> %2 532} 533 534define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 535 ;CHECK-LABEL: stack_fold_divps 536 ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 537 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 538 %2 = fdiv <4 x float> %a0, %a1 539 ret <4 x float> %2 540} 541 542define double @stack_fold_divsd(double %a0, double %a1) { 543 ;CHECK-LABEL: stack_fold_divsd 544 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 545 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 546 %2 = fdiv double %a0, %a1 547 ret double %2 548} 549 550define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 551 ;CHECK-LABEL: stack_fold_divsd_int 552 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 553 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 554 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) 555 ret <2 x double> %2 556} 557declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 558 559define float @stack_fold_divss(float %a0, float %a1) { 560 ;CHECK-LABEL: stack_fold_divss 561 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 562 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 563 %2 = fdiv float %a0, %a1 564 ret float %2 565} 566 567define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 568 ;CHECK-LABEL: stack_fold_divss_int 569 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 570 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 571 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) 572 ret <4 x float> %2 573} 574declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 575 576define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 577 ;CHECK-LABEL: stack_fold_dppd 578 ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 579 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 580 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 581 ret <2 x double> %2 582} 583declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 584 585define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 586 ;CHECK-LABEL: stack_fold_dpps 587 ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 588 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 589 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 590 ret <4 x float> %2 591} 592declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 593 594define i32 @stack_fold_extractps(<4 x float> %a0) { 595 ;CHECK-LABEL: stack_fold_extractps 596 ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 597 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 598 %1 = extractelement <4 x float> %a0, i32 1 599 %2 = bitcast float %1 to i32 600 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 601 ret i32 %2 602} 603 604define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 605 ;CHECK-LABEL: stack_fold_haddpd 606 ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 607 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 608 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 609 ret <2 x double> %2 610} 611declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 612 613define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 614 ;CHECK-LABEL: stack_fold_haddps 615 ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 616 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 617 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 618 ret <4 x float> %2 619} 620declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 621 622define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 623 ;CHECK-LABEL: stack_fold_hsubpd 624 ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 625 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 626 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 627 ret <2 x double> %2 628} 629declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 630 631define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 632 ;CHECK-LABEL: stack_fold_hsubps 633 ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 634 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 635 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 636 ret <4 x float> %2 637} 638declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 639 640define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 641 ;CHECK-LABEL: stack_fold_insertps 642 ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 643 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 644 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 645 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 646 ret <4 x float> %2 647} 648declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 649 650define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { 651 ;CHECK-LABEL: stack_fold_maxpd 652 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 653 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 654 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 655 ret <2 x double> %2 656} 657declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 658 659define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { 660 ;CHECK-LABEL: stack_fold_maxps 661 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 662 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 663 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 664 ret <4 x float> %2 665} 666declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 667 668define double @stack_fold_maxsd(double %a0, double %a1) { 669 ;CHECK-LABEL: stack_fold_maxsd 670 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 671 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 672 %2 = fcmp ogt double %a0, %a1 673 %3 = select i1 %2, double %a0, double %a1 674 ret double %3 675} 676 677define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { 678 ;CHECK-LABEL: stack_fold_maxsd_int 679 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 680 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 681 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 682 ret <2 x double> %2 683} 684declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 685 686define float @stack_fold_maxss(float %a0, float %a1) { 687 ;CHECK-LABEL: stack_fold_maxss 688 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 689 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 690 %2 = fcmp ogt float %a0, %a1 691 %3 = select i1 %2, float %a0, float %a1 692 ret float %3 693} 694 695define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { 696 ;CHECK-LABEL: stack_fold_maxss_int 697 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 698 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 699 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 700 ret <4 x float> %2 701} 702declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 703 704define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { 705 ;CHECK-LABEL: stack_fold_minpd 706 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 707 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 708 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 709 ret <2 x double> %2 710} 711declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 712 713define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { 714 ;CHECK-LABEL: stack_fold_minps 715 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 716 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 717 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 718 ret <4 x float> %2 719} 720declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 721 722define double @stack_fold_minsd(double %a0, double %a1) { 723 ;CHECK-LABEL: stack_fold_minsd 724 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 725 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 726 %2 = fcmp olt double %a0, %a1 727 %3 = select i1 %2, double %a0, double %a1 728 ret double %3 729} 730 731define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 732 ;CHECK-LABEL: stack_fold_minsd_int 733 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 734 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 735 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 736 ret <2 x double> %2 737} 738declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 739 740define float @stack_fold_minss(float %a0, float %a1) { 741 ;CHECK-LABEL: stack_fold_minss 742 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 743 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 744 %2 = fcmp olt float %a0, %a1 745 %3 = select i1 %2, float %a0, float %a1 746 ret float %3 747} 748 749define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { 750 ;CHECK-LABEL: stack_fold_minss_int 751 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 753 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 754 ret <4 x float> %2 755} 756declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 757 758define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 759 ;CHECK-LABEL: stack_fold_movddup 760 ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 761 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 762 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 763 ret <2 x double> %2 764} 765; TODO stack_fold_movhpd (load / store) 766; TODO stack_fold_movhps (load / store) 767 768; TODO stack_fold_movlpd (load / store) 769; TODO stack_fold_movlps (load / store) 770 771define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 772 ;CHECK-LABEL: stack_fold_movshdup 773 ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 774 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 775 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 776 ret <4 x float> %2 777} 778 779define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 780 ;CHECK-LABEL: stack_fold_movsldup 781 ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 783 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 784 ret <4 x float> %2 785} 786 787define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 788 ;CHECK-LABEL: stack_fold_mulpd 789 ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 790 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 791 %2 = fmul <2 x double> %a0, %a1 792 ret <2 x double> %2 793} 794 795define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 796 ;CHECK-LABEL: stack_fold_mulps 797 ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 798 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 799 %2 = fmul <4 x float> %a0, %a1 800 ret <4 x float> %2 801} 802 803define double @stack_fold_mulsd(double %a0, double %a1) { 804 ;CHECK-LABEL: stack_fold_mulsd 805 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 806 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 807 %2 = fmul double %a0, %a1 808 ret double %2 809} 810 811define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 812 ;CHECK-LABEL: stack_fold_mulsd_int 813 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 814 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 815 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) 816 ret <2 x double> %2 817} 818declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 819 820define float @stack_fold_mulss(float %a0, float %a1) { 821 ;CHECK-LABEL: stack_fold_mulss 822 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 823 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 824 %2 = fmul float %a0, %a1 825 ret float %2 826} 827 828define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 829 ;CHECK-LABEL: stack_fold_mulss_int 830 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 831 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 832 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) 833 ret <4 x float> %2 834} 835declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 836 837define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 838 ;CHECK-LABEL: stack_fold_orpd 839 ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 840 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 841 %2 = bitcast <2 x double> %a0 to <2 x i64> 842 %3 = bitcast <2 x double> %a1 to <2 x i64> 843 %4 = or <2 x i64> %2, %3 844 %5 = bitcast <2 x i64> %4 to <2 x double> 845 ; fadd forces execution domain 846 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 847 ret <2 x double> %6 848} 849 850define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 851 ;CHECK-LABEL: stack_fold_orps 852 ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 853 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 854 %2 = bitcast <4 x float> %a0 to <2 x i64> 855 %3 = bitcast <4 x float> %a1 to <2 x i64> 856 %4 = or <2 x i64> %2, %3 857 %5 = bitcast <2 x i64> %4 to <4 x float> 858 ; fadd forces execution domain 859 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 860 ret <4 x float> %6 861} 862 863; TODO stack_fold_rcpps 864 865define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 866 ;CHECK-LABEL: stack_fold_rcpps_int 867 ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 868 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 869 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 870 ret <4 x float> %2 871} 872declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 873 874; TODO stack_fold_rcpss 875; TODO stack_fold_rcpss_int 876 877define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 878 ;CHECK-LABEL: stack_fold_roundpd 879 ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 880 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 881 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 882 ret <2 x double> %2 883} 884declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 885 886define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 887 ;CHECK-LABEL: stack_fold_roundps 888 ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 889 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 890 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 891 ret <4 x float> %2 892} 893declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 894 895define double @stack_fold_roundsd(double %a0) optsize { 896 ;CHECK-LABEL: stack_fold_roundsd 897 ;CHECK: roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 898 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 899 %2 = call double @llvm.floor.f64(double %a0) 900 ret double %2 901} 902declare double @llvm.floor.f64(double) nounwind readnone 903 904; TODO stack_fold_roundsd_int 905declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 906 907define float @stack_fold_roundss(float %a0) minsize { 908 ;CHECK-LABEL: stack_fold_roundss 909 ;CHECK: roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 910 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 911 %2 = call float @llvm.floor.f32(float %a0) 912 ret float %2 913} 914declare float @llvm.floor.f32(float) nounwind readnone 915 916; TODO stack_fold_roundss_int 917declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 918 919; TODO stack_fold_rsqrtps 920 921define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 922 ;CHECK-LABEL: stack_fold_rsqrtps_int 923 ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 924 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 925 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 926 ret <4 x float> %2 927} 928declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 929 930; TODO stack_fold_rsqrtss 931; TODO stack_fold_rsqrtss_int 932 933define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 934 ;CHECK-LABEL: stack_fold_shufpd 935 ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 936 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 937 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 938 ret <2 x double> %2 939} 940 941define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 942 ;CHECK-LABEL: stack_fold_shufps 943 ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 944 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 945 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 946 ret <4 x float> %2 947} 948 949define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 950 ;CHECK-LABEL: stack_fold_sqrtpd 951 ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 952 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 953 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 954 ret <2 x double> %2 955} 956declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 957 958define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 959 ;CHECK-LABEL: stack_fold_sqrtps 960 ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 961 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 962 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 963 ret <4 x float> %2 964} 965declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 966 967define double @stack_fold_sqrtsd(double %a0) optsize { 968 ;CHECK-LABEL: stack_fold_sqrtsd 969 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 970 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 971 %2 = call double @llvm.sqrt.f64(double %a0) 972 ret double %2 973} 974declare double @llvm.sqrt.f64(double) nounwind readnone 975 976; TODO stack_fold_sqrtsd_int 977declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 978 979define float @stack_fold_sqrtss(float %a0) minsize { 980 ;CHECK-LABEL: stack_fold_sqrtss 981 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 982 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 983 %2 = call float @llvm.sqrt.f32(float %a0) 984 ret float %2 985} 986declare float @llvm.sqrt.f32(float) nounwind readnone 987 988; TODO stack_fold_sqrtss_int 989declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 990 991define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 992 ;CHECK-LABEL: stack_fold_subpd 993 ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 994 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 995 %2 = fsub <2 x double> %a0, %a1 996 ret <2 x double> %2 997} 998 999define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1000 ;CHECK-LABEL: stack_fold_subps 1001 ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1002 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1003 %2 = fsub <4 x float> %a0, %a1 1004 ret <4 x float> %2 1005} 1006 1007define double @stack_fold_subsd(double %a0, double %a1) { 1008 ;CHECK-LABEL: stack_fold_subsd 1009 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1010 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1011 %2 = fsub double %a0, %a1 1012 ret double %2 1013} 1014 1015define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1016 ;CHECK-LABEL: stack_fold_subsd_int 1017 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1018 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1019 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) 1020 ret <2 x double> %2 1021} 1022declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1023 1024define float @stack_fold_subss(float %a0, float %a1) { 1025 ;CHECK-LABEL: stack_fold_subss 1026 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1027 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1028 %2 = fsub float %a0, %a1 1029 ret float %2 1030} 1031 1032define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1033 ;CHECK-LABEL: stack_fold_subss_int 1034 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1035 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1036 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) 1037 ret <4 x float> %2 1038} 1039declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1040 1041define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1042 ;CHECK-LABEL: stack_fold_ucomisd 1043 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1044 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1045 %2 = fcmp ueq double %a0, %a1 1046 %3 = select i1 %2, i32 1, i32 -1 1047 ret i32 %3 1048} 1049 1050define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1051 ;CHECK-LABEL: stack_fold_ucomisd_int 1052 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1053 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1054 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1055 ret i32 %2 1056} 1057declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1058 1059define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1060 ;CHECK-LABEL: stack_fold_ucomiss 1061 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1062 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1063 %2 = fcmp ueq float %a0, %a1 1064 %3 = select i1 %2, i32 1, i32 -1 1065 ret i32 %3 1066} 1067 1068define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1069 ;CHECK-LABEL: stack_fold_ucomiss_int 1070 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1071 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1072 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1073 ret i32 %2 1074} 1075declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1076 1077define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1078 ;CHECK-LABEL: stack_fold_unpckhpd 1079 ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1080 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1081 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1082 ; fadd forces execution domain 1083 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1084 ret <2 x double> %3 1085} 1086 1087define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1088 ;CHECK-LABEL: stack_fold_unpckhps 1089 ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1090 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1091 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1092 ; fadd forces execution domain 1093 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1094 ret <4 x float> %3 1095} 1096 1097define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1098 ;CHECK-LABEL: stack_fold_unpcklpd 1099 ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1100 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1101 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1102 ; fadd forces execution domain 1103 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1104 ret <2 x double> %3 1105} 1106 1107define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1108 ;CHECK-LABEL: stack_fold_unpcklps 1109 ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1110 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1111 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1112 ; fadd forces execution domain 1113 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1114 ret <4 x float> %3 1115} 1116 1117define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1118 ;CHECK-LABEL: stack_fold_xorpd 1119 ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1120 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1121 %2 = bitcast <2 x double> %a0 to <2 x i64> 1122 %3 = bitcast <2 x double> %a1 to <2 x i64> 1123 %4 = xor <2 x i64> %2, %3 1124 %5 = bitcast <2 x i64> %4 to <2 x double> 1125 ; fadd forces execution domain 1126 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1127 ret <2 x double> %6 1128} 1129 1130define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1131 ;CHECK-LABEL: stack_fold_xorps 1132 ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1133 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1134 %2 = bitcast <4 x float> %a0 to <2 x i64> 1135 %3 = bitcast <4 x float> %a1 to <2 x i64> 1136 %4 = xor <2 x i64> %2, %3 1137 %5 = bitcast <2 x i64> %4 to <4 x float> 1138 ; fadd forces execution domain 1139 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1140 ret <4 x float> %6 1141} 1142