1; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-unknown" 5 6; Stack reload folding tests. 7; 8; By including a nop call with sideeffects we can force a partial register spill of the 9; relevant registers and check that the reload is correctly folded into the instruction. 10 11define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17} 18 19define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 20 ;CHECK-LABEL: stack_fold_addps 21 ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x float> %a0, %a1 24 ret <4 x float> %2 25} 26 27define double @stack_fold_addsd(double %a0, double %a1) { 28 ;CHECK-LABEL: stack_fold_addsd 29 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd double %a0, %a1 32 ret double %2 33} 34 35define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 36 ;CHECK-LABEL: stack_fold_addsd_int 37 ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) 40 ret <2 x double> %2 41} 42declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 43 44define float @stack_fold_addss(float %a0, float %a1) { 45 ;CHECK-LABEL: stack_fold_addss 46 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 47 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 48 %2 = fadd float %a0, %a1 49 ret float %2 50} 51 52define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 53 ;CHECK-LABEL: stack_fold_addss_int 54 ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 55 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 56 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) 57 ret <4 x float> %2 58} 59declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 60 61define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 62 ;CHECK-LABEL: stack_fold_addsubpd 63 ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 64 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 65 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 66 ret <2 x double> %2 67} 68declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 69 70define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 71 ;CHECK-LABEL: stack_fold_addsubps 72 ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 73 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 74 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 75 ret <4 x float> %2 76} 77declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 78 79define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 80 ;CHECK-LABEL: stack_fold_andnpd 81 ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 82 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 83 %2 = bitcast <2 x double> %a0 to <2 x i64> 84 %3 = bitcast <2 x double> %a1 to <2 x i64> 85 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 86 %5 = and <2 x i64> %4, %3 87 %6 = bitcast <2 x i64> %5 to <2 x double> 88 ; fadd forces execution domain 89 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 90 ret <2 x double> %7 91} 92 93define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 94 ;CHECK-LABEL: stack_fold_andnps 95 ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 96 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 97 %2 = bitcast <4 x float> %a0 to <2 x i64> 98 %3 = bitcast <4 x float> %a1 to <2 x i64> 99 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 100 %5 = and <2 x i64> %4, %3 101 %6 = bitcast <2 x i64> %5 to <4 x float> 102 ; fadd forces execution domain 103 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 104 ret <4 x float> %7 105} 106 107define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 108 ;CHECK-LABEL: stack_fold_andpd 109 ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 110 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 111 %2 = bitcast <2 x double> %a0 to <2 x i64> 112 %3 = bitcast <2 x double> %a1 to <2 x i64> 113 %4 = and <2 x i64> %2, %3 114 %5 = bitcast <2 x i64> %4 to <2 x double> 115 ; fadd forces execution domain 116 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 117 ret <2 x double> %6 118} 119 120define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 121 ;CHECK-LABEL: stack_fold_andps 122 ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 123 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 124 %2 = bitcast <4 x float> %a0 to <2 x i64> 125 %3 = bitcast <4 x float> %a1 to <2 x i64> 126 %4 = and <2 x i64> %2, %3 127 %5 = bitcast <2 x i64> %4 to <4 x float> 128 ; fadd forces execution domain 129 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 130 ret <4 x float> %6 131} 132 133define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 134 ;CHECK-LABEL: stack_fold_blendpd 135 ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 137 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 138 ret <2 x double> %2 139} 140 141define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 142 ;CHECK-LABEL: stack_fold_blendps 143 ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 146 ret <4 x float> %2 147} 148 149define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 150 ;CHECK-LABEL: stack_fold_blendvpd 151 ;CHECK: blendvpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 153 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 154 ret <2 x double> %2 155} 156declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 157 158define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 159 ;CHECK-LABEL: stack_fold_blendvps 160 ;CHECK: blendvps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 162 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 163 ret <4 x float> %2 164} 165declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 166 167define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 168 ;CHECK-LABEL: stack_fold_cmppd 169 ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 170 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 171 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 172 ret <2 x double> %2 173} 174declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 175 176define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 177 ;CHECK-LABEL: stack_fold_cmpps 178 ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 179 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 180 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 181 ret <4 x float> %2 182} 183declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 184 185define i32 @stack_fold_cmpsd(double %a0, double %a1) { 186 ;CHECK-LABEL: stack_fold_cmpsd 187 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 189 %2 = fcmp oeq double %a0, %a1 190 %3 = zext i1 %2 to i32 191 ret i32 %3 192} 193 194define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 195 ;CHECK-LABEL: stack_fold_cmpsd_int 196 ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 197 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 198 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 199 ret <2 x double> %2 200} 201declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 202 203define i32 @stack_fold_cmpss(float %a0, float %a1) { 204 ;CHECK-LABEL: stack_fold_cmpss 205 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 206 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 207 %2 = fcmp oeq float %a0, %a1 208 %3 = zext i1 %2 to i32 209 ret i32 %3 210} 211 212define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 213 ;CHECK-LABEL: stack_fold_cmpss_int 214 ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 216 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 217 ret <4 x float> %2 218} 219declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 220 221; TODO stack_fold_comisd 222 223define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 224 ;CHECK-LABEL: stack_fold_comisd_int 225 ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 227 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 228 ret i32 %2 229} 230declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 231 232; TODO stack_fold_comiss 233 234define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 235 ;CHECK-LABEL: stack_fold_comiss_int 236 ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 237 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 238 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 239 ret i32 %2 240} 241declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 242 243define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 244 ;CHECK-LABEL: stack_fold_cvtdq2pd 245 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 246 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 247 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 248 %3 = sitofp <2 x i32> %2 to <2 x double> 249 ret <2 x double> %3 250} 251 252define <2 x double> @stack_fold_cvtdq2pd_int(<4 x i32> %a0) { 253 ;CHECK-LABEL: stack_fold_cvtdq2pd_int 254 ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 255 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 256 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) 257 ret <2 x double> %2 258} 259declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 260 261define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 262 ;CHECK-LABEL: stack_fold_cvtdq2ps 263 ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 264 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 265 %2 = sitofp <4 x i32> %a0 to <4 x float> 266 ret <4 x float> %2 267} 268 269define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 270 ;CHECK-LABEL: stack_fold_cvtpd2dq 271 ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 273 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 274 ret <4 x i32> %2 275} 276declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 277 278define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 279 ;CHECK-LABEL: stack_fold_cvtpd2ps 280 ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 281 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 282 %2 = fptrunc <2 x double> %a0 to <2 x float> 283 ret <2 x float> %2 284} 285 286define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 287 ;CHECK-LABEL: stack_fold_cvtps2dq 288 ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 289 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 290 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 291 ret <4 x i32> %2 292} 293declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 294 295define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 296 ;CHECK-LABEL: stack_fold_cvtps2pd 297 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 298 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 299 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1> 300 %3 = fpext <2 x float> %2 to <2 x double> 301 ret <2 x double> %3 302} 303 304define <2 x double> @stack_fold_cvtps2pd_int(<4 x float> %a0) { 305 ;CHECK-LABEL: stack_fold_cvtps2pd_int 306 ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 307 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 308 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) 309 ret <2 x double> %2 310} 311declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 312 313; TODO stack_fold_cvtsd2si 314 315define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 316 ;CHECK-LABEL: stack_fold_cvtsd2si_int 317 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 318 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 319 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 320 ret i32 %2 321} 322declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 323 324; TODO stack_fold_cvtsd2si64 325 326define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 327 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 328 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 329 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 330 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 331 ret i64 %2 332} 333declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 334 335define float @stack_fold_cvtsd2ss(double %a0) minsize { 336 ;CHECK-LABEL: stack_fold_cvtsd2ss 337 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 338 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 339 %2 = fptrunc double %a0 to float 340 ret float %2 341} 342 343define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { 344 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 345 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 347 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 348 ret <4 x float> %2 349} 350declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 351 352define double @stack_fold_cvtsi2sd(i32 %a0) minsize { 353 ;CHECK-LABEL: stack_fold_cvtsi2sd 354 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 355 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 356 %2 = sitofp i32 %a0 to double 357 ret double %2 358} 359 360define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { 361 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 362 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 363 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 364 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0) 365 ret <2 x double> %2 366} 367declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 368 369define double @stack_fold_cvtsi642sd(i64 %a0) optsize { 370 ;CHECK-LABEL: stack_fold_cvtsi642sd 371 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 372 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 373 %2 = sitofp i64 %a0 to double 374 ret double %2 375} 376 377define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { 378 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 379 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 380 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 381 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0) 382 ret <2 x double> %2 383} 384declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 385 386define float @stack_fold_cvtsi2ss(i32 %a0) minsize { 387 ;CHECK-LABEL: stack_fold_cvtsi2ss 388 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 389 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 390 %2 = sitofp i32 %a0 to float 391 ret float %2 392} 393 394define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { 395 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 396 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 397 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 398 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0) 399 ret <4 x float> %2 400} 401declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 402 403define float @stack_fold_cvtsi642ss(i64 %a0) optsize { 404 ;CHECK-LABEL: stack_fold_cvtsi642ss 405 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 406 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 407 %2 = sitofp i64 %a0 to float 408 ret float %2 409} 410 411define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { 412 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 413 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 414 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 415 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0) 416 ret <4 x float> %2 417} 418declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 419 420define double @stack_fold_cvtss2sd(float %a0) minsize { 421 ;CHECK-LABEL: stack_fold_cvtss2sd 422 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 423 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 424 %2 = fpext float %a0 to double 425 ret double %2 426} 427 428define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize { 429 ;CHECK-LABEL: stack_fold_cvtss2sd_int 430 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 431 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 432 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0) 433 ret <2 x double> %2 434} 435declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 436 437; TODO stack_fold_cvtss2si 438 439define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 440 ;CHECK-LABEL: stack_fold_cvtss2si_int 441 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 442 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 443 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 444 ret i32 %2 445} 446declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 447 448; TODO stack_fold_cvtss2si64 449 450define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 451 ;CHECK-LABEL: stack_fold_cvtss2si64_int 452 ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 453 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 454 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 455 ret i64 %2 456} 457declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 458 459define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 460 ;CHECK-LABEL: stack_fold_cvttpd2dq 461 ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 462 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 463 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 464 ret <4 x i32> %2 465} 466declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 467 468define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 469 ;CHECK-LABEL: stack_fold_cvttps2dq 470 ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 471 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 472 %2 = fptosi <4 x float> %a0 to <4 x i32> 473 ret <4 x i32> %2 474} 475 476define i32 @stack_fold_cvttsd2si(double %a0) { 477 ;CHECK-LABEL: stack_fold_cvttsd2si 478 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 479 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 480 %2 = fptosi double %a0 to i32 481 ret i32 %2 482} 483 484define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 485 ;CHECK-LABEL: stack_fold_cvttsd2si_int 486 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 487 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 488 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 489 ret i32 %2 490} 491declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 492 493define i64 @stack_fold_cvttsd2si64(double %a0) { 494 ;CHECK-LABEL: stack_fold_cvttsd2si64 495 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 497 %2 = fptosi double %a0 to i64 498 ret i64 %2 499} 500 501define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 502 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 503 ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 504 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 505 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 506 ret i64 %2 507} 508declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 509 510define i32 @stack_fold_cvttss2si(float %a0) { 511 ;CHECK-LABEL: stack_fold_cvttss2si 512 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 513 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 514 %2 = fptosi float %a0 to i32 515 ret i32 %2 516} 517 518define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 519 ;CHECK-LABEL: stack_fold_cvttss2si_int 520 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 521 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 522 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 523 ret i32 %2 524} 525declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 526 527define i64 @stack_fold_cvttss2si64(float %a0) { 528 ;CHECK-LABEL: stack_fold_cvttss2si64 529 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 530 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 531 %2 = fptosi float %a0 to i64 532 ret i64 %2 533} 534 535define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 536 ;CHECK-LABEL: stack_fold_cvttss2si64_int 537 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 538 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 539 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 540 ret i64 %2 541} 542declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 543 544define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 545 ;CHECK-LABEL: stack_fold_divpd 546 ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 547 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 548 %2 = fdiv <2 x double> %a0, %a1 549 ret <2 x double> %2 550} 551 552define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 553 ;CHECK-LABEL: stack_fold_divps 554 ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 555 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 556 %2 = fdiv <4 x float> %a0, %a1 557 ret <4 x float> %2 558} 559 560define double @stack_fold_divsd(double %a0, double %a1) { 561 ;CHECK-LABEL: stack_fold_divsd 562 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 563 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 564 %2 = fdiv double %a0, %a1 565 ret double %2 566} 567 568define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 569 ;CHECK-LABEL: stack_fold_divsd_int 570 ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 571 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 572 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) 573 ret <2 x double> %2 574} 575declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 576 577define float @stack_fold_divss(float %a0, float %a1) { 578 ;CHECK-LABEL: stack_fold_divss 579 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 580 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 581 %2 = fdiv float %a0, %a1 582 ret float %2 583} 584 585define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 586 ;CHECK-LABEL: stack_fold_divss_int 587 ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 588 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 589 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) 590 ret <4 x float> %2 591} 592declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 593 594define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 595 ;CHECK-LABEL: stack_fold_dppd 596 ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 597 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 598 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 599 ret <2 x double> %2 600} 601declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 602 603define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 604 ;CHECK-LABEL: stack_fold_dpps 605 ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 606 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 607 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 608 ret <4 x float> %2 609} 610declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 611 612define i32 @stack_fold_extractps(<4 x float> %a0) { 613 ;CHECK-LABEL: stack_fold_extractps 614 ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 615 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 616 %1 = extractelement <4 x float> %a0, i32 1 617 %2 = bitcast float %1 to i32 618 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 619 ret i32 %2 620} 621 622define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 623 ;CHECK-LABEL: stack_fold_haddpd 624 ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 625 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 626 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 627 ret <2 x double> %2 628} 629declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 630 631define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 632 ;CHECK-LABEL: stack_fold_haddps 633 ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 634 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 635 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 636 ret <4 x float> %2 637} 638declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 639 640define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 641 ;CHECK-LABEL: stack_fold_hsubpd 642 ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 643 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 644 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 645 ret <2 x double> %2 646} 647declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 648 649define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 650 ;CHECK-LABEL: stack_fold_hsubps 651 ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 652 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 653 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 654 ret <4 x float> %2 655} 656declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 657 658define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 659 ;CHECK-LABEL: stack_fold_insertps 660 ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 661 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 662 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 663 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 664 ret <4 x float> %2 665} 666declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 667 668define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { 669 ;CHECK-LABEL: stack_fold_maxpd 670 ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 671 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 672 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 673 ret <2 x double> %2 674} 675declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 676 677define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { 678 ;CHECK-LABEL: stack_fold_maxps 679 ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 680 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 681 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 682 ret <4 x float> %2 683} 684declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 685 686define double @stack_fold_maxsd(double %a0, double %a1) { 687 ;CHECK-LABEL: stack_fold_maxsd 688 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 689 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 690 %2 = fcmp ogt double %a0, %a1 691 %3 = select i1 %2, double %a0, double %a1 692 ret double %3 693} 694 695define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { 696 ;CHECK-LABEL: stack_fold_maxsd_int 697 ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 698 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 699 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 700 ret <2 x double> %2 701} 702declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 703 704define float @stack_fold_maxss(float %a0, float %a1) { 705 ;CHECK-LABEL: stack_fold_maxss 706 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 707 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 708 %2 = fcmp ogt float %a0, %a1 709 %3 = select i1 %2, float %a0, float %a1 710 ret float %3 711} 712 713define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { 714 ;CHECK-LABEL: stack_fold_maxss_int 715 ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 716 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 717 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 718 ret <4 x float> %2 719} 720declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 721 722define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { 723 ;CHECK-LABEL: stack_fold_minpd 724 ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 725 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 726 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 727 ret <2 x double> %2 728} 729declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 730 731define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { 732 ;CHECK-LABEL: stack_fold_minps 733 ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 734 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 735 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 736 ret <4 x float> %2 737} 738declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 739 740define double @stack_fold_minsd(double %a0, double %a1) { 741 ;CHECK-LABEL: stack_fold_minsd 742 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 743 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 744 %2 = fcmp olt double %a0, %a1 745 %3 = select i1 %2, double %a0, double %a1 746 ret double %3 747} 748 749define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 750 ;CHECK-LABEL: stack_fold_minsd_int 751 ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 753 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 754 ret <2 x double> %2 755} 756declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 757 758define float @stack_fold_minss(float %a0, float %a1) { 759 ;CHECK-LABEL: stack_fold_minss 760 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 761 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 762 %2 = fcmp olt float %a0, %a1 763 %3 = select i1 %2, float %a0, float %a1 764 ret float %3 765} 766 767define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { 768 ;CHECK-LABEL: stack_fold_minss_int 769 ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 770 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 771 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 772 ret <4 x float> %2 773} 774declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 775 776define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 777 ;CHECK-LABEL: stack_fold_movddup 778 ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 779 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 780 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 781 ret <2 x double> %2 782} 783; TODO stack_fold_movhpd (load / store) 784; TODO stack_fold_movhps (load / store) 785 786; TODO stack_fold_movlpd (load / store) 787; TODO stack_fold_movlps (load / store) 788 789define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 790 ;CHECK-LABEL: stack_fold_movshdup 791 ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 792 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 793 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 794 ret <4 x float> %2 795} 796 797define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 798 ;CHECK-LABEL: stack_fold_movsldup 799 ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 800 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 801 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 802 ret <4 x float> %2 803} 804 805define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 806 ;CHECK-LABEL: stack_fold_mulpd 807 ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 808 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 809 %2 = fmul <2 x double> %a0, %a1 810 ret <2 x double> %2 811} 812 813define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 814 ;CHECK-LABEL: stack_fold_mulps 815 ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 816 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 817 %2 = fmul <4 x float> %a0, %a1 818 ret <4 x float> %2 819} 820 821define double @stack_fold_mulsd(double %a0, double %a1) { 822 ;CHECK-LABEL: stack_fold_mulsd 823 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 824 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 825 %2 = fmul double %a0, %a1 826 ret double %2 827} 828 829define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 830 ;CHECK-LABEL: stack_fold_mulsd_int 831 ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 832 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 833 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) 834 ret <2 x double> %2 835} 836declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 837 838define float @stack_fold_mulss(float %a0, float %a1) { 839 ;CHECK-LABEL: stack_fold_mulss 840 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 841 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 842 %2 = fmul float %a0, %a1 843 ret float %2 844} 845 846define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 847 ;CHECK-LABEL: stack_fold_mulss_int 848 ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 849 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 850 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) 851 ret <4 x float> %2 852} 853declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 854 855define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 856 ;CHECK-LABEL: stack_fold_orpd 857 ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 858 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 859 %2 = bitcast <2 x double> %a0 to <2 x i64> 860 %3 = bitcast <2 x double> %a1 to <2 x i64> 861 %4 = or <2 x i64> %2, %3 862 %5 = bitcast <2 x i64> %4 to <2 x double> 863 ; fadd forces execution domain 864 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 865 ret <2 x double> %6 866} 867 868define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 869 ;CHECK-LABEL: stack_fold_orps 870 ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 871 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 872 %2 = bitcast <4 x float> %a0 to <2 x i64> 873 %3 = bitcast <4 x float> %a1 to <2 x i64> 874 %4 = or <2 x i64> %2, %3 875 %5 = bitcast <2 x i64> %4 to <4 x float> 876 ; fadd forces execution domain 877 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 878 ret <4 x float> %6 879} 880 881; TODO stack_fold_rcpps 882 883define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 884 ;CHECK-LABEL: stack_fold_rcpps_int 885 ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 886 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 887 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 888 ret <4 x float> %2 889} 890declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 891 892; TODO stack_fold_rcpss 893; TODO stack_fold_rcpss_int 894 895define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 896 ;CHECK-LABEL: stack_fold_roundpd 897 ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 898 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 899 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 900 ret <2 x double> %2 901} 902declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 903 904define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 905 ;CHECK-LABEL: stack_fold_roundps 906 ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 907 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 908 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 909 ret <4 x float> %2 910} 911declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 912 913define double @stack_fold_roundsd(double %a0) optsize { 914 ;CHECK-LABEL: stack_fold_roundsd 915 ;CHECK: roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 916 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 917 %2 = call double @llvm.floor.f64(double %a0) 918 ret double %2 919} 920declare double @llvm.floor.f64(double) nounwind readnone 921 922; TODO stack_fold_roundsd_int 923declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 924 925define float @stack_fold_roundss(float %a0) minsize { 926 ;CHECK-LABEL: stack_fold_roundss 927 ;CHECK: roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 928 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 929 %2 = call float @llvm.floor.f32(float %a0) 930 ret float %2 931} 932declare float @llvm.floor.f32(float) nounwind readnone 933 934; TODO stack_fold_roundss_int 935declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 936 937; TODO stack_fold_rsqrtps 938 939define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 940 ;CHECK-LABEL: stack_fold_rsqrtps_int 941 ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 942 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 943 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 944 ret <4 x float> %2 945} 946declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 947 948; TODO stack_fold_rsqrtss 949; TODO stack_fold_rsqrtss_int 950 951define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 952 ;CHECK-LABEL: stack_fold_shufpd 953 ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 954 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 955 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 956 ret <2 x double> %2 957} 958 959define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 960 ;CHECK-LABEL: stack_fold_shufps 961 ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 963 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 964 ret <4 x float> %2 965} 966 967define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 968 ;CHECK-LABEL: stack_fold_sqrtpd 969 ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 970 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 971 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 972 ret <2 x double> %2 973} 974declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 975 976define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 977 ;CHECK-LABEL: stack_fold_sqrtps 978 ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 979 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 980 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 981 ret <4 x float> %2 982} 983declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 984 985define double @stack_fold_sqrtsd(double %a0) optsize { 986 ;CHECK-LABEL: stack_fold_sqrtsd 987 ;CHECK: sqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 988 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 989 %2 = call double @llvm.sqrt.f64(double %a0) 990 ret double %2 991} 992declare double @llvm.sqrt.f64(double) nounwind readnone 993 994; TODO stack_fold_sqrtsd_int 995declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 996 997define float @stack_fold_sqrtss(float %a0) minsize { 998 ;CHECK-LABEL: stack_fold_sqrtss 999 ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1000 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1001 %2 = call float @llvm.sqrt.f32(float %a0) 1002 ret float %2 1003} 1004declare float @llvm.sqrt.f32(float) nounwind readnone 1005 1006; TODO stack_fold_sqrtss_int 1007declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1008 1009define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 1010 ;CHECK-LABEL: stack_fold_subpd 1011 ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1012 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1013 %2 = fsub <2 x double> %a0, %a1 1014 ret <2 x double> %2 1015} 1016 1017define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1018 ;CHECK-LABEL: stack_fold_subps 1019 ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1020 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1021 %2 = fsub <4 x float> %a0, %a1 1022 ret <4 x float> %2 1023} 1024 1025define double @stack_fold_subsd(double %a0, double %a1) { 1026 ;CHECK-LABEL: stack_fold_subsd 1027 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1028 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1029 %2 = fsub double %a0, %a1 1030 ret double %2 1031} 1032 1033define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1034 ;CHECK-LABEL: stack_fold_subsd_int 1035 ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1036 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1037 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) 1038 ret <2 x double> %2 1039} 1040declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1041 1042define float @stack_fold_subss(float %a0, float %a1) { 1043 ;CHECK-LABEL: stack_fold_subss 1044 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1045 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1046 %2 = fsub float %a0, %a1 1047 ret float %2 1048} 1049 1050define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1051 ;CHECK-LABEL: stack_fold_subss_int 1052 ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1053 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1054 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) 1055 ret <4 x float> %2 1056} 1057declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1058 1059define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1060 ;CHECK-LABEL: stack_fold_ucomisd 1061 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1062 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1063 %2 = fcmp ueq double %a0, %a1 1064 %3 = select i1 %2, i32 1, i32 -1 1065 ret i32 %3 1066} 1067 1068define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1069 ;CHECK-LABEL: stack_fold_ucomisd_int 1070 ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1071 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1072 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1073 ret i32 %2 1074} 1075declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1076 1077define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1078 ;CHECK-LABEL: stack_fold_ucomiss 1079 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1080 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1081 %2 = fcmp ueq float %a0, %a1 1082 %3 = select i1 %2, i32 1, i32 -1 1083 ret i32 %3 1084} 1085 1086define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1087 ;CHECK-LABEL: stack_fold_ucomiss_int 1088 ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1089 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1090 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1091 ret i32 %2 1092} 1093declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1094 1095define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1096 ;CHECK-LABEL: stack_fold_unpckhpd 1097 ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1098 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1099 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1100 ; fadd forces execution domain 1101 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1102 ret <2 x double> %3 1103} 1104 1105define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1106 ;CHECK-LABEL: stack_fold_unpckhps 1107 ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1108 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1109 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1110 ; fadd forces execution domain 1111 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1112 ret <4 x float> %3 1113} 1114 1115define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1116 ;CHECK-LABEL: stack_fold_unpcklpd 1117 ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1118 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1119 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1120 ; fadd forces execution domain 1121 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1122 ret <2 x double> %3 1123} 1124 1125define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1126 ;CHECK-LABEL: stack_fold_unpcklps 1127 ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1128 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1129 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1130 ; fadd forces execution domain 1131 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1132 ret <4 x float> %3 1133} 1134 1135define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1136 ;CHECK-LABEL: stack_fold_xorpd 1137 ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1138 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1139 %2 = bitcast <2 x double> %a0 to <2 x i64> 1140 %3 = bitcast <2 x double> %a1 to <2 x i64> 1141 %4 = xor <2 x i64> %2, %3 1142 %5 = bitcast <2 x i64> %4 to <2 x double> 1143 ; fadd forces execution domain 1144 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1145 ret <2 x double> %6 1146} 1147 1148define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1149 ;CHECK-LABEL: stack_fold_xorps 1150 ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1151 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1152 %2 = bitcast <4 x float> %a0 to <2 x i64> 1153 %3 = bitcast <4 x float> %a1 to <2 x i64> 1154 %4 = xor <2 x i64> %2, %3 1155 %5 = bitcast <2 x i64> %4 to <4 x float> 1156 ; fadd forces execution domain 1157 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1158 ret <4 x float> %6 1159} 1160