1; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-unknown" 5 6; Stack reload folding tests. 7; 8; By including a nop call with sideeffects we can force a partial register spill of the 9; relevant registers and check that the reload is correctly folded into the instruction. 10 11define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) { 12 ;CHECK-LABEL: stack_fold_addpd 13 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = fadd <2 x double> %a0, %a1 16 ret <2 x double> %2 17} 18 19define <4 x double> @stack_fold_addpd_ymm(<4 x double> %a0, <4 x double> %a1) { 20 ;CHECK-LABEL: stack_fold_addpd_ymm 21 ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 22 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 23 %2 = fadd <4 x double> %a0, %a1 24 ret <4 x double> %2 25} 26 27define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) { 28 ;CHECK-LABEL: stack_fold_addps 29 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 30 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 31 %2 = fadd <4 x float> %a0, %a1 32 ret <4 x float> %2 33} 34 35define <8 x float> @stack_fold_addps_ymm(<8 x float> %a0, <8 x float> %a1) { 36 ;CHECK-LABEL: stack_fold_addps_ymm 37 ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 38 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 39 %2 = fadd <8 x float> %a0, %a1 40 ret <8 x float> %2 41} 42 43define double @stack_fold_addsd(double %a0, double %a1) { 44 ;CHECK-LABEL: stack_fold_addsd 45 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 46 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 47 %2 = fadd double %a0, %a1 48 ret double %2 49} 50 51define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) { 52 ;CHECK-LABEL: stack_fold_addsd_int 53 ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 54 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 55 %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) 56 ret <2 x double> %2 57} 58declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 59 60define float @stack_fold_addss(float %a0, float %a1) { 61 ;CHECK-LABEL: stack_fold_addss 62 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 63 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 64 %2 = fadd float %a0, %a1 65 ret float %2 66} 67 68define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) { 69 ;CHECK-LABEL: stack_fold_addss_int 70 ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 71 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 72 %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) 73 ret <4 x float> %2 74} 75declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 76 77define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) { 78 ;CHECK-LABEL: stack_fold_addsubpd 79 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 80 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 81 %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 82 ret <2 x double> %2 83} 84declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 85 86define <4 x double> @stack_fold_addsubpd_ymm(<4 x double> %a0, <4 x double> %a1) { 87 ;CHECK-LABEL: stack_fold_addsubpd_ymm 88 ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 89 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 90 %2 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) 91 ret <4 x double> %2 92} 93declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 94 95define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) { 96 ;CHECK-LABEL: stack_fold_addsubps 97 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 98 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 99 %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 100 ret <4 x float> %2 101} 102declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 103 104define <8 x float> @stack_fold_addsubps_ymm(<8 x float> %a0, <8 x float> %a1) { 105 ;CHECK-LABEL: stack_fold_addsubps_ymm 106 ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 107 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 108 %2 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) 109 ret <8 x float> %2 110} 111declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 112 113define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) { 114 ;CHECK-LABEL: stack_fold_andnpd 115 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 116 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 117 %2 = bitcast <2 x double> %a0 to <2 x i64> 118 %3 = bitcast <2 x double> %a1 to <2 x i64> 119 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 120 %5 = and <2 x i64> %4, %3 121 %6 = bitcast <2 x i64> %5 to <2 x double> 122 ; fadd forces execution domain 123 %7 = fadd <2 x double> %6, <double 0x0, double 0x0> 124 ret <2 x double> %7 125} 126 127define <4 x double> @stack_fold_andnpd_ymm(<4 x double> %a0, <4 x double> %a1) { 128 ;CHECK-LABEL: stack_fold_andnpd_ymm 129 ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 130 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 131 %2 = bitcast <4 x double> %a0 to <4 x i64> 132 %3 = bitcast <4 x double> %a1 to <4 x i64> 133 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 134 %5 = and <4 x i64> %4, %3 135 %6 = bitcast <4 x i64> %5 to <4 x double> 136 ; fadd forces execution domain 137 %7 = fadd <4 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0> 138 ret <4 x double> %7 139} 140 141define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) { 142 ;CHECK-LABEL: stack_fold_andnps 143 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = bitcast <4 x float> %a0 to <2 x i64> 146 %3 = bitcast <4 x float> %a1 to <2 x i64> 147 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 148 %5 = and <2 x i64> %4, %3 149 %6 = bitcast <2 x i64> %5 to <4 x float> 150 ; fadd forces execution domain 151 %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0> 152 ret <4 x float> %7 153} 154 155define <8 x float> @stack_fold_andnps_ymm(<8 x float> %a0, <8 x float> %a1) { 156 ;CHECK-LABEL: stack_fold_andnps_ymm 157 ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 158 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 159 %2 = bitcast <8 x float> %a0 to <4 x i64> 160 %3 = bitcast <8 x float> %a1 to <4 x i64> 161 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 162 %5 = and <4 x i64> %4, %3 163 %6 = bitcast <4 x i64> %5 to <8 x float> 164 ; fadd forces execution domain 165 %7 = fadd <8 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 166 ret <8 x float> %7 167} 168 169define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) { 170 ;CHECK-LABEL: stack_fold_andpd 171 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 172 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 173 %2 = bitcast <2 x double> %a0 to <2 x i64> 174 %3 = bitcast <2 x double> %a1 to <2 x i64> 175 %4 = and <2 x i64> %2, %3 176 %5 = bitcast <2 x i64> %4 to <2 x double> 177 ; fadd forces execution domain 178 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 179 ret <2 x double> %6 180} 181 182define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) { 183 ;CHECK-LABEL: stack_fold_andpd_ymm 184 ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 185 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 186 %2 = bitcast <4 x double> %a0 to <4 x i64> 187 %3 = bitcast <4 x double> %a1 to <4 x i64> 188 %4 = and <4 x i64> %2, %3 189 %5 = bitcast <4 x i64> %4 to <4 x double> 190 ; fadd forces execution domain 191 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 192 ret <4 x double> %6 193} 194 195define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) { 196 ;CHECK-LABEL: stack_fold_andps 197 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 198 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 199 %2 = bitcast <4 x float> %a0 to <2 x i64> 200 %3 = bitcast <4 x float> %a1 to <2 x i64> 201 %4 = and <2 x i64> %2, %3 202 %5 = bitcast <2 x i64> %4 to <4 x float> 203 ; fadd forces execution domain 204 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 205 ret <4 x float> %6 206} 207 208define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) { 209 ;CHECK-LABEL: stack_fold_andps_ymm 210 ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 211 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 212 %2 = bitcast <8 x float> %a0 to <4 x i64> 213 %3 = bitcast <8 x float> %a1 to <4 x i64> 214 %4 = and <4 x i64> %2, %3 215 %5 = bitcast <4 x i64> %4 to <8 x float> 216 ; fadd forces execution domain 217 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 218 ret <8 x float> %6 219} 220 221define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) { 222 ;CHECK-LABEL: stack_fold_blendpd 223 ;CHECK: vblendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 224 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 225 %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1 226 ret <2 x double> %2 227} 228 229define <4 x double> @stack_fold_blendpd_ymm(<4 x double> %a0, <4 x double> %a1) { 230 ;CHECK-LABEL: stack_fold_blendpd_ymm 231 ;CHECK: vblendpd $6, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 232 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 233 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x double> %a0, <4 x double> %a1 234 ret <4 x double> %2 235} 236 237define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) { 238 ;CHECK-LABEL: stack_fold_blendps 239 ;CHECK: vblendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 240 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 241 %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1 242 ret <4 x float> %2 243} 244 245define <8 x float> @stack_fold_blendps_ymm(<8 x float> %a0, <8 x float> %a1) { 246 ;CHECK-LABEL: stack_fold_blendps_ymm 247 ;CHECK: vblendps $102, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 248 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 249 %2 = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %a0, <8 x float> %a1 250 ret <8 x float> %2 251} 252 253define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) { 254 ;CHECK-LABEL: stack_fold_blendvpd 255 ;CHECK: vblendvpd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 256 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 257 %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0) 258 ret <2 x double> %2 259} 260declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 261 262define <4 x double> @stack_fold_blendvpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %c) { 263 ;CHECK-LABEL: stack_fold_blendvpd_ymm 264 ;CHECK: vblendvpd {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 265 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 266 %2 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a1, <4 x double> %c, <4 x double> %a0) 267 ret <4 x double> %2 268} 269declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 270 271define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) { 272 ;CHECK-LABEL: stack_fold_blendvps 273 ;CHECK: vblendvps {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 274 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 275 %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0) 276 ret <4 x float> %2 277} 278declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 279 280define <8 x float> @stack_fold_blendvps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %c) { 281 ;CHECK-LABEL: stack_fold_blendvps_ymm 282 ;CHECK: vblendvps {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 283 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 284 %2 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a1, <8 x float> %c, <8 x float> %a0) 285 ret <8 x float> %2 286} 287declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 288 289define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) { 290 ;CHECK-LABEL: stack_fold_cmppd 291 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 292 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 293 %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0) 294 ret <2 x double> %2 295} 296declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 297 298define <4 x double> @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) { 299 ;CHECK-LABEL: stack_fold_cmppd_ymm 300 ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 301 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 302 %2 = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0) 303 ret <4 x double> %2 304} 305declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 306 307define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) { 308 ;CHECK-LABEL: stack_fold_cmpps 309 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 310 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 311 %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0) 312 ret <4 x float> %2 313} 314declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 315 316define <8 x float> @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) { 317 ;CHECK-LABEL: stack_fold_cmpps_ymm 318 ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 319 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 320 %2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) 321 ret <8 x float> %2 322} 323declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 324 325define i32 @stack_fold_cmpsd(double %a0, double %a1) { 326 ;CHECK-LABEL: stack_fold_cmpsd 327 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 329 %2 = fcmp oeq double %a0, %a1 330 %3 = zext i1 %2 to i32 331 ret i32 %3 332} 333 334define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) { 335 ;CHECK-LABEL: stack_fold_cmpsd_int 336 ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 337 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 338 %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 339 ret <2 x double> %2 340} 341declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 342 343define i32 @stack_fold_cmpss(float %a0, float %a1) { 344 ;CHECK-LABEL: stack_fold_cmpss 345 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 346 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 347 %2 = fcmp oeq float %a0, %a1 348 %3 = zext i1 %2 to i32 349 ret i32 %3 350} 351 352define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) { 353 ;CHECK-LABEL: stack_fold_cmpss_int 354 ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 355 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 356 %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 357 ret <4 x float> %2 358} 359declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 360 361; TODO stack_fold_comisd 362 363define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) { 364 ;CHECK-LABEL: stack_fold_comisd_int 365 ;CHECK: vcomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 366 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 367 %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 368 ret i32 %2 369} 370declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 371 372; TODO stack_fold_comiss 373 374define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) { 375 ;CHECK-LABEL: stack_fold_comiss_int 376 ;CHECK: vcomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 377 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 378 %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 379 ret i32 %2 380} 381declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 382 383define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) { 384 ;CHECK-LABEL: stack_fold_cvtdq2pd 385 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 386 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 387 %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) 388 ret <2 x double> %2 389} 390declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 391 392define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) { 393 ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm 394 ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 395 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 396 %2 = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) 397 ret <4 x double> %2 398} 399declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 400 401define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) { 402 ;CHECK-LABEL: stack_fold_cvtdq2ps 403 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 404 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 405 %2 = sitofp <4 x i32> %a0 to <4 x float> 406 ret <4 x float> %2 407} 408 409define <8 x float> @stack_fold_cvtdq2ps_ymm(<8 x i32> %a0) { 410 ;CHECK-LABEL: stack_fold_cvtdq2ps_ymm 411 ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 412 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 413 %2 = sitofp <8 x i32> %a0 to <8 x float> 414 ret <8 x float> %2 415} 416 417define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) { 418 ;CHECK-LABEL: stack_fold_cvtpd2dq 419 ;CHECK: vcvtpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 420 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 421 %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 422 ret <4 x i32> %2 423} 424declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 425 426define <4 x i32> @stack_fold_cvtpd2dq_ymm(<4 x double> %a0) { 427 ;CHECK-LABEL: stack_fold_cvtpd2dq_ymm 428 ;CHECK: vcvtpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 429 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 430 %2 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) 431 ret <4 x i32> %2 432} 433declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 434 435define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) { 436 ;CHECK-LABEL: stack_fold_cvtpd2ps 437 ;CHECK: vcvtpd2psx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 438 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 439 %2 = fptrunc <2 x double> %a0 to <2 x float> 440 ret <2 x float> %2 441} 442 443define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) { 444 ;CHECK-LABEL: stack_fold_cvtpd2ps_ymm 445 ;CHECK: vcvtpd2psy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 446 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 447 %2 = fptrunc <4 x double> %a0 to <4 x float> 448 ret <4 x float> %2 449} 450 451define <4 x float> @stack_fold_cvtph2ps(<8 x i16> %a0) { 452 ;CHECK-LABEL: stack_fold_cvtph2ps 453 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 454 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 455 %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) 456 ret <4 x float> %2 457} 458declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly 459 460define <8 x float> @stack_fold_cvtph2ps_ymm(<8 x i16> %a0) { 461 ;CHECK-LABEL: stack_fold_cvtph2ps_ymm 462 ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 463 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 464 %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) 465 ret <8 x float> %2 466} 467declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly 468 469define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) { 470 ;CHECK-LABEL: stack_fold_cvtps2dq 471 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 472 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 473 %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 474 ret <4 x i32> %2 475} 476declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 477 478define <8 x i32> @stack_fold_cvtps2dq_ymm(<8 x float> %a0) { 479 ;CHECK-LABEL: stack_fold_cvtps2dq_ymm 480 ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 481 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 482 %2 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) 483 ret <8 x i32> %2 484} 485declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 486 487define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) { 488 ;CHECK-LABEL: stack_fold_cvtps2pd 489 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 490 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 491 %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) 492 ret <2 x double> %2 493} 494declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 495 496define <4 x double> @stack_fold_cvtps2pd_ymm(<4 x float> %a0) { 497 ;CHECK-LABEL: stack_fold_cvtps2pd_ymm 498 ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 499 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 500 %2 = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) 501 ret <4 x double> %2 502} 503declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 504 505define <8 x i16> @stack_fold_cvtps2ph(<4 x float> %a0) { 506 ;CHECK-LABEL: stack_fold_cvtps2ph 507 ;CHECK: vcvtps2ph $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 508 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) 509 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 510 ret <8 x i16> %1 511} 512declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly 513 514define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) { 515 ;CHECK-LABEL: stack_fold_cvtps2ph_ymm 516 ;CHECK: vcvtps2ph $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 517 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) 518 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 519 ret <8 x i16> %1 520} 521declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly 522 523; TODO stack_fold_cvtsd2si 524 525define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) { 526 ;CHECK-LABEL: stack_fold_cvtsd2si_int 527 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 528 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 529 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 530 ret i32 %2 531} 532declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 533 534; TODO stack_fold_cvtsd2si64 535 536define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { 537 ;CHECK-LABEL: stack_fold_cvtsd2si64_int 538 ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 540 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) 541 ret i64 %2 542} 543declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 544 545; TODO stack_fold_cvtsd2ss 546 547define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) { 548 ;CHECK-LABEL: stack_fold_cvtsd2ss_int 549 ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 550 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 551 %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0) 552 ret <4 x float> %2 553} 554declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 555 556define double @stack_fold_cvtsi2sd(i32 %a0) { 557 ;CHECK-LABEL: stack_fold_cvtsi2sd 558 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 559 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 560 %2 = sitofp i32 %a0 to double 561 ret double %2 562} 563 564define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) { 565 ;CHECK-LABEL: stack_fold_cvtsi2sd_int 566 ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 567 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 568 %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0) 569 ret <2 x double> %2 570} 571declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 572 573define double @stack_fold_cvtsi642sd(i64 %a0) { 574 ;CHECK-LABEL: stack_fold_cvtsi642sd 575 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 576 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 577 %2 = sitofp i64 %a0 to double 578 ret double %2 579} 580 581define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { 582 ;CHECK-LABEL: stack_fold_cvtsi642sd_int 583 ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 585 %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0) 586 ret <2 x double> %2 587} 588declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 589 590define float @stack_fold_cvtsi2ss(i32 %a0) { 591 ;CHECK-LABEL: stack_fold_cvtsi2ss 592 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 593 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 594 %2 = sitofp i32 %a0 to float 595 ret float %2 596} 597 598define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) { 599 ;CHECK-LABEL: stack_fold_cvtsi2ss_int 600 ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 601 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 602 %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0) 603 ret <4 x float> %2 604} 605declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 606 607define float @stack_fold_cvtsi642ss(i64 %a0) { 608 ;CHECK-LABEL: stack_fold_cvtsi642ss 609 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 610 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 611 %2 = sitofp i64 %a0 to float 612 ret float %2 613} 614 615define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { 616 ;CHECK-LABEL: stack_fold_cvtsi642ss_int 617 ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 618 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 619 %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0) 620 ret <4 x float> %2 621} 622declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 623 624; TODO stack_fold_cvtss2sd 625 626define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) { 627 ;CHECK-LABEL: stack_fold_cvtss2sd_int 628 ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 629 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 630 %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0) 631 ret <2 x double> %2 632} 633declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 634 635; TODO stack_fold_cvtss2si 636 637define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { 638 ;CHECK-LABEL: stack_fold_cvtss2si_int 639 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 640 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 641 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 642 ret i32 %2 643} 644declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 645 646; TODO stack_fold_cvtss2si64 647 648define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) { 649 ;CHECK-LABEL: stack_fold_cvtss2si64_int 650 ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 651 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 652 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) 653 ret i64 %2 654} 655declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 656 657define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) { 658 ;CHECK-LABEL: stack_fold_cvttpd2dq 659 ;CHECK: vcvttpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 660 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 661 %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 662 ret <4 x i32> %2 663} 664declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 665 666define <4 x i32> @stack_fold_cvttpd2dq_ymm(<4 x double> %a0) { 667 ;CHECK-LABEL: stack_fold_cvttpd2dq_ymm 668 ;CHECK: vcvttpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 669 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 670 %2 = fptosi <4 x double> %a0 to <4 x i32> 671 ret <4 x i32> %2 672} 673 674define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) { 675 ;CHECK-LABEL: stack_fold_cvttps2dq 676 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 677 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 678 %2 = fptosi <4 x float> %a0 to <4 x i32> 679 ret <4 x i32> %2 680} 681 682define <8 x i32> @stack_fold_cvttps2dq_ymm(<8 x float> %a0) { 683 ;CHECK-LABEL: stack_fold_cvttps2dq_ymm 684 ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 685 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 686 %2 = fptosi <8 x float> %a0 to <8 x i32> 687 ret <8 x i32> %2 688} 689 690define i32 @stack_fold_cvttsd2si(double %a0) { 691 ;CHECK-LABEL: stack_fold_cvttsd2si 692 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload 693 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 694 %2 = fptosi double %a0 to i32 695 ret i32 %2 696} 697 698define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) { 699 ;CHECK-LABEL: stack_fold_cvttsd2si_int 700 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 701 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 702 %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) 703 ret i32 %2 704} 705declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 706 707define i64 @stack_fold_cvttsd2si64(double %a0) { 708 ;CHECK-LABEL: stack_fold_cvttsd2si64 709 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 710 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 711 %2 = fptosi double %a0 to i64 712 ret i64 %2 713} 714 715define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) { 716 ;CHECK-LABEL: stack_fold_cvttsd2si64_int 717 ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 718 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 719 %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) 720 ret i64 %2 721} 722declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone 723 724define i32 @stack_fold_cvttss2si(float %a0) { 725 ;CHECK-LABEL: stack_fold_cvttss2si 726 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 727 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 728 %2 = fptosi float %a0 to i32 729 ret i32 %2 730} 731 732define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) { 733 ;CHECK-LABEL: stack_fold_cvttss2si_int 734 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload 735 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 736 %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 737 ret i32 %2 738} 739declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 740 741define i64 @stack_fold_cvttss2si64(float %a0) { 742 ;CHECK-LABEL: stack_fold_cvttss2si64 743 ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload 744 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 745 %2 = fptosi float %a0 to i64 746 ret i64 %2 747} 748 749define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) { 750 ;CHECK-LABEL: stack_fold_cvttss2si64_int 751 ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload 752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 753 %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) 754 ret i64 %2 755} 756declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone 757 758define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) { 759 ;CHECK-LABEL: stack_fold_divpd 760 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 761 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 762 %2 = fdiv <2 x double> %a0, %a1 763 ret <2 x double> %2 764} 765 766define <4 x double> @stack_fold_divpd_ymm(<4 x double> %a0, <4 x double> %a1) { 767 ;CHECK-LABEL: stack_fold_divpd_ymm 768 ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 769 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 770 %2 = fdiv <4 x double> %a0, %a1 771 ret <4 x double> %2 772} 773 774define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) { 775 ;CHECK-LABEL: stack_fold_divps 776 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 777 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 778 %2 = fdiv <4 x float> %a0, %a1 779 ret <4 x float> %2 780} 781 782define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) { 783 ;CHECK-LABEL: stack_fold_divps_ymm 784 ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 785 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 786 %2 = fdiv <8 x float> %a0, %a1 787 ret <8 x float> %2 788} 789 790define double @stack_fold_divsd(double %a0, double %a1) { 791 ;CHECK-LABEL: stack_fold_divsd 792 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 793 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 794 %2 = fdiv double %a0, %a1 795 ret double %2 796} 797 798define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) { 799 ;CHECK-LABEL: stack_fold_divsd_int 800 ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 801 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 802 %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) 803 ret <2 x double> %2 804} 805declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 806 807define float @stack_fold_divss(float %a0, float %a1) { 808 ;CHECK-LABEL: stack_fold_divss 809 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 810 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 811 %2 = fdiv float %a0, %a1 812 ret float %2 813} 814 815define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) { 816 ;CHECK-LABEL: stack_fold_divss_int 817 ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 818 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 819 %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) 820 ret <4 x float> %2 821} 822declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 823 824define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) { 825 ;CHECK-LABEL: stack_fold_dppd 826 ;CHECK: vdppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 827 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 828 %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 829 ret <2 x double> %2 830} 831declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 832 833define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) { 834 ;CHECK-LABEL: stack_fold_dpps 835 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 836 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 837 %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 838 ret <4 x float> %2 839} 840declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 841 842define <8 x float> @stack_fold_dpps_ymm(<8 x float> %a0, <8 x float> %a1) { 843 ;CHECK-LABEL: stack_fold_dpps_ymm 844 ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 845 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 846 %2 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) 847 ret <8 x float> %2 848} 849declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 850 851define <4 x float> @stack_fold_extractf128(<8 x float> %a0, <8 x float> %a1) { 852 ;CHECK-LABEL: stack_fold_extractf128 853 ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill 854 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 855 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 856 ret <4 x float> %1 857} 858 859define i32 @stack_fold_extractps(<4 x float> %a0) { 860 ;CHECK-LABEL: stack_fold_extractps 861 ;CHECK: vextractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 862 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 863 %1 = extractelement <4 x float> %a0, i32 1 864 %2 = bitcast float %1 to i32 865 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 866 ret i32 %2 867} 868 869define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) { 870 ;CHECK-LABEL: stack_fold_haddpd 871 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 872 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 873 %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 874 ret <2 x double> %2 875} 876declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 877 878define <4 x double> @stack_fold_haddpd_ymm(<4 x double> %a0, <4 x double> %a1) { 879 ;CHECK-LABEL: stack_fold_haddpd_ymm 880 ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 881 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 882 %2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) 883 ret <4 x double> %2 884} 885declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 886 887define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) { 888 ;CHECK-LABEL: stack_fold_haddps 889 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 890 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 891 %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 892 ret <4 x float> %2 893} 894declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 895 896define <8 x float> @stack_fold_haddps_ymm(<8 x float> %a0, <8 x float> %a1) { 897 ;CHECK-LABEL: stack_fold_haddps_ymm 898 ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 899 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 900 %2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) 901 ret <8 x float> %2 902} 903declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 904 905define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) { 906 ;CHECK-LABEL: stack_fold_hsubpd 907 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 908 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 909 %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 910 ret <2 x double> %2 911} 912declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 913 914define <4 x double> @stack_fold_hsubpd_ymm(<4 x double> %a0, <4 x double> %a1) { 915 ;CHECK-LABEL: stack_fold_hsubpd_ymm 916 ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 917 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 918 %2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) 919 ret <4 x double> %2 920} 921declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 922 923define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { 924 ;CHECK-LABEL: stack_fold_hsubps 925 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 926 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 927 %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 928 ret <4 x float> %2 929} 930declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 931 932define <8 x float> @stack_fold_hsubps_ymm(<8 x float> %a0, <8 x float> %a1) { 933 ;CHECK-LABEL: stack_fold_hsubps_ymm 934 ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 935 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 936 %2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) 937 ret <8 x float> %2 938} 939declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 940 941define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) { 942 ;CHECK-LABEL: stack_fold_insertf128 943 ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 944 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 945 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 946 ret <8 x float> %2 947} 948 949define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { 950 ;CHECK-LABEL: stack_fold_insertps 951 ;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 952 ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] 953 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 954 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 955 ret <4 x float> %2 956} 957declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 958 959define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { 960 ;CHECK-LABEL: stack_fold_maxpd 961 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 962 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 963 %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 964 ret <2 x double> %2 965} 966declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 967 968define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) { 969 ;CHECK-LABEL: stack_fold_maxpd_ymm 970 ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 971 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 972 %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) 973 ret <4 x double> %2 974} 975declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 976 977define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) { 978 ;CHECK-LABEL: stack_fold_maxps 979 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 980 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 981 %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 982 ret <4 x float> %2 983} 984declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 985 986define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) { 987 ;CHECK-LABEL: stack_fold_maxps_ymm 988 ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 989 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 990 %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) 991 ret <8 x float> %2 992} 993declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 994 995define double @stack_fold_maxsd(double %a0, double %a1) { 996 ;CHECK-LABEL: stack_fold_maxsd 997 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 998 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 999 %2 = fcmp ogt double %a0, %a1 1000 %3 = select i1 %2, double %a0, double %a1 1001 ret double %3 1002} 1003 1004define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) { 1005 ;CHECK-LABEL: stack_fold_maxsd_int 1006 ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1007 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1008 %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 1009 ret <2 x double> %2 1010} 1011declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 1012 1013define float @stack_fold_maxss(float %a0, float %a1) { 1014 ;CHECK-LABEL: stack_fold_maxss 1015 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1016 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1017 %2 = fcmp ogt float %a0, %a1 1018 %3 = select i1 %2, float %a0, float %a1 1019 ret float %3 1020} 1021 1022define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) { 1023 ;CHECK-LABEL: stack_fold_maxss_int 1024 ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1025 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1026 %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1027 ret <4 x float> %2 1028} 1029declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1030 1031define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) { 1032 ;CHECK-LABEL: stack_fold_minpd 1033 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1034 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1035 %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 1036 ret <2 x double> %2 1037} 1038declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 1039 1040define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1041 ;CHECK-LABEL: stack_fold_minpd_ymm 1042 ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1043 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1044 %2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) 1045 ret <4 x double> %2 1046} 1047declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 1048 1049define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) { 1050 ;CHECK-LABEL: stack_fold_minps 1051 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1052 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1053 %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1054 ret <4 x float> %2 1055} 1056declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1057 1058define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) { 1059 ;CHECK-LABEL: stack_fold_minps_ymm 1060 ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1061 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1062 %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) 1063 ret <8 x float> %2 1064} 1065declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 1066 1067define double @stack_fold_minsd(double %a0, double %a1) { 1068 ;CHECK-LABEL: stack_fold_minsd 1069 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1070 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1071 %2 = fcmp olt double %a0, %a1 1072 %3 = select i1 %2, double %a0, double %a1 1073 ret double %3 1074} 1075 1076define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) { 1077 ;CHECK-LABEL: stack_fold_minsd_int 1078 ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1079 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1080 %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 1081 ret <2 x double> %2 1082} 1083declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 1084 1085define float @stack_fold_minss(float %a0, float %a1) { 1086 ;CHECK-LABEL: stack_fold_minss 1087 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1088 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1089 %2 = fcmp olt float %a0, %a1 1090 %3 = select i1 %2, float %a0, float %a1 1091 ret float %3 1092} 1093 1094define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) { 1095 ;CHECK-LABEL: stack_fold_minss_int 1096 ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1097 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1098 %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1099 ret <4 x float> %2 1100} 1101declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1102 1103define <2 x double> @stack_fold_movddup(<2 x double> %a0) { 1104 ;CHECK-LABEL: stack_fold_movddup 1105 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1106 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1107 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0> 1108 ret <2 x double> %2 1109} 1110 1111define <4 x double> @stack_fold_movddup_ymm(<4 x double> %a0) { 1112 ;CHECK-LABEL: stack_fold_movddup_ymm 1113 ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1114 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1115 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1116 ret <4 x double> %2 1117} 1118 1119; TODO stack_fold_movhpd (load / store) 1120; TODO stack_fold_movhps (load / store) 1121 1122; TODO stack_fold_movlpd (load / store) 1123; TODO stack_fold_movlps (load / store) 1124 1125define <4 x float> @stack_fold_movshdup(<4 x float> %a0) { 1126 ;CHECK-LABEL: stack_fold_movshdup 1127 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1128 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1129 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1130 ret <4 x float> %2 1131} 1132 1133define <8 x float> @stack_fold_movshdup_ymm(<8 x float> %a0) { 1134 ;CHECK-LABEL: stack_fold_movshdup_ymm 1135 ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1137 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 1138 ret <8 x float> %2 1139} 1140 1141define <4 x float> @stack_fold_movsldup(<4 x float> %a0) { 1142 ;CHECK-LABEL: stack_fold_movsldup 1143 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1145 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1146 ret <4 x float> %2 1147} 1148 1149define <8 x float> @stack_fold_movsldup_ymm(<8 x float> %a0) { 1150 ;CHECK-LABEL: stack_fold_movsldup_ymm 1151 ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1152 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1153 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 1154 ret <8 x float> %2 1155} 1156 1157define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) { 1158 ;CHECK-LABEL: stack_fold_mulpd 1159 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1160 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1161 %2 = fmul <2 x double> %a0, %a1 1162 ret <2 x double> %2 1163} 1164 1165define <4 x double> @stack_fold_mulpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1166 ;CHECK-LABEL: stack_fold_mulpd_ymm 1167 ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1168 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1169 %2 = fmul <4 x double> %a0, %a1 1170 ret <4 x double> %2 1171} 1172 1173define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) { 1174 ;CHECK-LABEL: stack_fold_mulps 1175 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1176 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1177 %2 = fmul <4 x float> %a0, %a1 1178 ret <4 x float> %2 1179} 1180 1181define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) { 1182 ;CHECK-LABEL: stack_fold_mulps_ymm 1183 ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1184 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1185 %2 = fmul <8 x float> %a0, %a1 1186 ret <8 x float> %2 1187} 1188 1189define double @stack_fold_mulsd(double %a0, double %a1) { 1190 ;CHECK-LABEL: stack_fold_mulsd 1191 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1192 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1193 %2 = fmul double %a0, %a1 1194 ret double %2 1195} 1196 1197define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) { 1198 ;CHECK-LABEL: stack_fold_mulsd_int 1199 ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1200 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1201 %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) 1202 ret <2 x double> %2 1203} 1204declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 1205 1206define float @stack_fold_mulss(float %a0, float %a1) { 1207 ;CHECK-LABEL: stack_fold_mulss 1208 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1209 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1210 %2 = fmul float %a0, %a1 1211 ret float %2 1212} 1213 1214define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) { 1215 ;CHECK-LABEL: stack_fold_mulss_int 1216 ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1217 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1218 %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) 1219 ret <4 x float> %2 1220} 1221declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1222 1223define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) { 1224 ;CHECK-LABEL: stack_fold_orpd 1225 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1226 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1227 %2 = bitcast <2 x double> %a0 to <2 x i64> 1228 %3 = bitcast <2 x double> %a1 to <2 x i64> 1229 %4 = or <2 x i64> %2, %3 1230 %5 = bitcast <2 x i64> %4 to <2 x double> 1231 ; fadd forces execution domain 1232 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1233 ret <2 x double> %6 1234} 1235 1236define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1237 ;CHECK-LABEL: stack_fold_orpd_ymm 1238 ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1239 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1240 %2 = bitcast <4 x double> %a0 to <4 x i64> 1241 %3 = bitcast <4 x double> %a1 to <4 x i64> 1242 %4 = or <4 x i64> %2, %3 1243 %5 = bitcast <4 x i64> %4 to <4 x double> 1244 ; fadd forces execution domain 1245 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 1246 ret <4 x double> %6 1247} 1248 1249define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) { 1250 ;CHECK-LABEL: stack_fold_orps 1251 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1253 %2 = bitcast <4 x float> %a0 to <2 x i64> 1254 %3 = bitcast <4 x float> %a1 to <2 x i64> 1255 %4 = or <2 x i64> %2, %3 1256 %5 = bitcast <2 x i64> %4 to <4 x float> 1257 ; fadd forces execution domain 1258 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1259 ret <4 x float> %6 1260} 1261 1262define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) { 1263 ;CHECK-LABEL: stack_fold_orps_ymm 1264 ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1265 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1266 %2 = bitcast <8 x float> %a0 to <4 x i64> 1267 %3 = bitcast <8 x float> %a1 to <4 x i64> 1268 %4 = or <4 x i64> %2, %3 1269 %5 = bitcast <4 x i64> %4 to <8 x float> 1270 ; fadd forces execution domain 1271 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1272 ret <8 x float> %6 1273} 1274 1275define <8 x float> @stack_fold_perm2f128(<8 x float> %a0, <8 x float> %a1) { 1276 ;CHECK-LABEL: stack_fold_perm2f128 1277 ;CHECK: vperm2f128 $33, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1278 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1279 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1280 ret <8 x float> %2 1281} 1282 1283define <2 x double> @stack_fold_permilpd(<2 x double> %a0) { 1284 ;CHECK-LABEL: stack_fold_permilpd 1285 ;CHECK: vpermilpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1286 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1287 %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1288 ret <2 x double> %2 1289} 1290 1291define <4 x double> @stack_fold_permilpd_ymm(<4 x double> %a0) { 1292 ;CHECK-LABEL: stack_fold_permilpd_ymm 1293 ;CHECK: vpermilpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1294 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1295 %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 1296 ret <4 x double> %2 1297} 1298 1299define <2 x double> @stack_fold_permilpdvar(<2 x double> %a0, <2 x i64> %a1) { 1300 ;CHECK-LABEL: stack_fold_permilpdvar 1301 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1302 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1303 %2 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) 1304 ret <2 x double> %2 1305} 1306declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 1307 1308define <4 x double> @stack_fold_permilpdvar_ymm(<4 x double> %a0, <4 x i64> %a1) { 1309 ;CHECK-LABEL: stack_fold_permilpdvar_ymm 1310 ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1311 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1312 %2 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) 1313 ret <4 x double> %2 1314} 1315declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 1316 1317define <4 x float> @stack_fold_permilps(<4 x float> %a0) { 1318 ;CHECK-LABEL: stack_fold_permilps 1319 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1320 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1321 %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1322 ret <4 x float> %2 1323} 1324 1325define <8 x float> @stack_fold_permilps_ymm(<8 x float> %a0) { 1326 ;CHECK-LABEL: stack_fold_permilps_ymm 1327 ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1328 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1329 %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1330 ret <8 x float> %2 1331} 1332 1333define <4 x float> @stack_fold_permilpsvar(<4 x float> %a0, <4 x i32> %a1) { 1334 ;CHECK-LABEL: stack_fold_permilpsvar 1335 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1336 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1337 %2 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) 1338 ret <4 x float> %2 1339} 1340declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 1341 1342define <8 x float> @stack_fold_permilpsvar_ymm(<8 x float> %a0, <8 x i32> %a1) { 1343 ;CHECK-LABEL: stack_fold_permilpsvar_ymm 1344 ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1345 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1346 %2 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) 1347 ret <8 x float> %2 1348} 1349declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 1350 1351; TODO stack_fold_rcpps 1352 1353define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) { 1354 ;CHECK-LABEL: stack_fold_rcpps_int 1355 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1356 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1357 %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1358 ret <4 x float> %2 1359} 1360declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1361 1362; TODO stack_fold_rcpps_ymm 1363 1364define <8 x float> @stack_fold_rcpps_ymm_int(<8 x float> %a0) { 1365 ;CHECK-LABEL: stack_fold_rcpps_ymm_int 1366 ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1367 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1368 %2 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) 1369 ret <8 x float> %2 1370} 1371declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 1372 1373; TODO stack_fold_rcpss 1374 1375define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0) { 1376 ;CHECK-LABEL: stack_fold_rcpss_int 1377 ;CHECK: vrcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1378 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1379 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1380 ret <4 x float> %2 1381} 1382declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1383 1384define <2 x double> @stack_fold_roundpd(<2 x double> %a0) { 1385 ;CHECK-LABEL: stack_fold_roundpd 1386 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1387 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1388 %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) 1389 ret <2 x double> %2 1390} 1391declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1392 1393define <4 x double> @stack_fold_roundpd_ymm(<4 x double> %a0) { 1394 ;CHECK-LABEL: stack_fold_roundpd_ymm 1395 ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1396 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1397 %2 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) 1398 ret <4 x double> %2 1399} 1400declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 1401 1402define <4 x float> @stack_fold_roundps(<4 x float> %a0) { 1403 ;CHECK-LABEL: stack_fold_roundps 1404 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1405 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1406 %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) 1407 ret <4 x float> %2 1408} 1409declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1410 1411define <8 x float> @stack_fold_roundps_ymm(<8 x float> %a0) { 1412 ;CHECK-LABEL: stack_fold_roundps_ymm 1413 ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1414 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1415 %2 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) 1416 ret <8 x float> %2 1417} 1418declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 1419 1420define double @stack_fold_roundsd(double %a0) optsize { 1421 ;CHECK-LABEL: stack_fold_roundsd 1422 ;CHECK: vroundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1423 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1424 %2 = call double @llvm.floor.f64(double %a0) 1425 ret double %2 1426} 1427declare double @llvm.floor.f64(double) nounwind readnone 1428 1429; TODO stack_fold_roundsd_int 1430declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1431 1432define float @stack_fold_roundss(float %a0) optsize { 1433 ;CHECK-LABEL: stack_fold_roundss 1434 ;CHECK: vroundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1435 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1436 %2 = call float @llvm.floor.f32(float %a0) 1437 ret float %2 1438} 1439declare float @llvm.floor.f32(float) nounwind readnone 1440 1441; TODO stack_fold_roundss_int 1442declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1443 1444; TODO stack_fold_rsqrtps 1445 1446define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) { 1447 ;CHECK-LABEL: stack_fold_rsqrtps_int 1448 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1449 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1450 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1451 ret <4 x float> %2 1452} 1453declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1454 1455; TODO stack_fold_rsqrtps_ymm 1456 1457define <8 x float> @stack_fold_rsqrtps_ymm_int(<8 x float> %a0) { 1458 ;CHECK-LABEL: stack_fold_rsqrtps_ymm_int 1459 ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1460 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1461 %2 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) 1462 ret <8 x float> %2 1463} 1464declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 1465 1466; TODO stack_fold_rsqrtss 1467 1468define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0) { 1469 ;CHECK-LABEL: stack_fold_rsqrtss_int 1470 ;CHECK: vrsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1471 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1472 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1473 ret <4 x float> %2 1474} 1475declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1476 1477define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) { 1478 ;CHECK-LABEL: stack_fold_shufpd 1479 ;CHECK: vshufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1480 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1481 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 1482 ret <2 x double> %2 1483} 1484 1485define <4 x double> @stack_fold_shufpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1486 ;CHECK-LABEL: stack_fold_shufpd_ymm 1487 ;CHECK: vshufpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1488 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1489 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 1490 ret <4 x double> %2 1491} 1492 1493define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) { 1494 ;CHECK-LABEL: stack_fold_shufps 1495 ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1496 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1497 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7> 1498 ret <4 x float> %2 1499} 1500 1501define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) { 1502 ;CHECK-LABEL: stack_fold_shufps_ymm 1503 ;CHECK: vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1504 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1505 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 13, i32 14> 1506 ret <8 x float> %2 1507} 1508 1509define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) { 1510 ;CHECK-LABEL: stack_fold_sqrtpd 1511 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1512 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1513 %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 1514 ret <2 x double> %2 1515} 1516declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 1517 1518define <4 x double> @stack_fold_sqrtpd_ymm(<4 x double> %a0) { 1519 ;CHECK-LABEL: stack_fold_sqrtpd_ymm 1520 ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1521 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1522 %2 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) 1523 ret <4 x double> %2 1524} 1525declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 1526 1527define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) { 1528 ;CHECK-LABEL: stack_fold_sqrtps 1529 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1530 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1531 %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 1532 ret <4 x float> %2 1533} 1534declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1535 1536define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) { 1537 ;CHECK-LABEL: stack_fold_sqrtps_ymm 1538 ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1539 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1540 %2 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) 1541 ret <8 x float> %2 1542} 1543declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 1544 1545define double @stack_fold_sqrtsd(double %a0) { 1546 ;CHECK-LABEL: stack_fold_sqrtsd 1547 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1548 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1549 %2 = call double @llvm.sqrt.f64(double %a0) 1550 ret double %2 1551} 1552declare double @llvm.sqrt.f64(double) nounwind readnone 1553 1554define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0) { 1555 ;CHECK-LABEL: stack_fold_sqrtsd_int 1556 ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1557 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1558 %2 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) 1559 ret <2 x double> %2 1560} 1561declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 1562 1563define float @stack_fold_sqrtss(float %a0) { 1564 ;CHECK-LABEL: stack_fold_sqrtss 1565 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1566 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1567 %2 = call float @llvm.sqrt.f32(float %a0) 1568 ret float %2 1569} 1570declare float @llvm.sqrt.f32(float) nounwind readnone 1571 1572define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0) { 1573 ;CHECK-LABEL: stack_fold_sqrtss_int 1574 ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1575 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1576 %2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) 1577 ret <4 x float> %2 1578} 1579declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1580 1581define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { 1582 ;CHECK-LABEL: stack_fold_subpd 1583 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1585 %2 = fsub <2 x double> %a0, %a1 1586 ret <2 x double> %2 1587} 1588 1589define <4 x double> @stack_fold_subpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1590 ;CHECK-LABEL: stack_fold_subpd_ymm 1591 ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1592 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1593 %2 = fsub <4 x double> %a0, %a1 1594 ret <4 x double> %2 1595} 1596 1597define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) { 1598 ;CHECK-LABEL: stack_fold_subps 1599 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1600 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1601 %2 = fsub <4 x float> %a0, %a1 1602 ret <4 x float> %2 1603} 1604 1605define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) { 1606 ;CHECK-LABEL: stack_fold_subps_ymm 1607 ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1608 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1609 %2 = fsub <8 x float> %a0, %a1 1610 ret <8 x float> %2 1611} 1612 1613define double @stack_fold_subsd(double %a0, double %a1) { 1614 ;CHECK-LABEL: stack_fold_subsd 1615 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1616 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1617 %2 = fsub double %a0, %a1 1618 ret double %2 1619} 1620 1621define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) { 1622 ;CHECK-LABEL: stack_fold_subsd_int 1623 ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1624 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1625 %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) 1626 ret <2 x double> %2 1627} 1628declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1629 1630define float @stack_fold_subss(float %a0, float %a1) { 1631 ;CHECK-LABEL: stack_fold_subss 1632 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1633 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1634 %2 = fsub float %a0, %a1 1635 ret float %2 1636} 1637 1638define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) { 1639 ;CHECK-LABEL: stack_fold_subss_int 1640 ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1641 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1642 %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) 1643 ret <4 x float> %2 1644} 1645declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1646 1647define i32 @stack_fold_testpd(<2 x double> %a0, <2 x double> %a1) { 1648 ;CHECK-LABEL: stack_fold_testpd 1649 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1650 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1651 %2 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) 1652 ret i32 %2 1653} 1654declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 1655 1656define i32 @stack_fold_testpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1657 ;CHECK-LABEL: stack_fold_testpd_ymm 1658 ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1659 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1660 %2 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) 1661 ret i32 %2 1662} 1663declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 1664 1665define i32 @stack_fold_testps(<4 x float> %a0, <4 x float> %a1) { 1666 ;CHECK-LABEL: stack_fold_testps 1667 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1668 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1669 %2 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) 1670 ret i32 %2 1671} 1672declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 1673 1674define i32 @stack_fold_testps_ymm(<8 x float> %a0, <8 x float> %a1) { 1675 ;CHECK-LABEL: stack_fold_testps_ymm 1676 ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1677 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1678 %2 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) 1679 ret i32 %2 1680} 1681declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 1682 1683define i32 @stack_fold_ucomisd(double %a0, double %a1) { 1684 ;CHECK-LABEL: stack_fold_ucomisd 1685 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 1686 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1687 %2 = fcmp ueq double %a0, %a1 1688 %3 = select i1 %2, i32 1, i32 -1 1689 ret i32 %3 1690} 1691 1692define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) { 1693 ;CHECK-LABEL: stack_fold_ucomisd_int 1694 ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1695 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1696 %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 1697 ret i32 %2 1698} 1699declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1700 1701define i32 @stack_fold_ucomiss(float %a0, float %a1) { 1702 ;CHECK-LABEL: stack_fold_ucomiss 1703 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 1704 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1705 %2 = fcmp ueq float %a0, %a1 1706 %3 = select i1 %2, i32 1, i32 -1 1707 ret i32 %3 1708} 1709 1710define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) { 1711 ;CHECK-LABEL: stack_fold_ucomiss_int 1712 ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1713 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1714 %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 1715 ret i32 %2 1716} 1717declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1718 1719define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) { 1720 ;CHECK-LABEL: stack_fold_unpckhpd 1721 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1722 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1723 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 1724 ; fadd forces execution domain 1725 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1726 ret <2 x double> %3 1727} 1728 1729define <4 x double> @stack_fold_unpckhpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1730 ;CHECK-LABEL: stack_fold_unpckhpd_ymm 1731 ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1732 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1733 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 1734 ; fadd forces execution domain 1735 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1736 ret <4 x double> %3 1737} 1738 1739define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) { 1740 ;CHECK-LABEL: stack_fold_unpckhps 1741 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1742 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1743 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1744 ; fadd forces execution domain 1745 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1746 ret <4 x float> %3 1747} 1748 1749define <8 x float> @stack_fold_unpckhps_ymm(<8 x float> %a0, <8 x float> %a1) { 1750 ;CHECK-LABEL: stack_fold_unpckhps_ymm 1751 ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1753 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 1754 ; fadd forces execution domain 1755 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1756 ret <8 x float> %3 1757} 1758 1759define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) { 1760 ;CHECK-LABEL: stack_fold_unpcklpd 1761 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1762 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1763 %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 1764 ; fadd forces execution domain 1765 %3 = fadd <2 x double> %2, <double 0x0, double 0x0> 1766 ret <2 x double> %3 1767} 1768 1769define <4 x double> @stack_fold_unpcklpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1770 ;CHECK-LABEL: stack_fold_unpcklpd_ymm 1771 ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1772 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1773 %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1774 ; fadd forces execution domain 1775 %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0> 1776 ret <4 x double> %3 1777} 1778 1779define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) { 1780 ;CHECK-LABEL: stack_fold_unpcklps 1781 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1782 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1783 %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1784 ; fadd forces execution domain 1785 %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0> 1786 ret <4 x float> %3 1787} 1788 1789define <8 x float> @stack_fold_unpcklps_ymm(<8 x float> %a0, <8 x float> %a1) { 1790 ;CHECK-LABEL: stack_fold_unpcklps_ymm 1791 ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1792 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1793 %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 1794 ; fadd forces execution domain 1795 %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1796 ret <8 x float> %3 1797} 1798 1799define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) { 1800 ;CHECK-LABEL: stack_fold_xorpd 1801 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1802 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1803 %2 = bitcast <2 x double> %a0 to <2 x i64> 1804 %3 = bitcast <2 x double> %a1 to <2 x i64> 1805 %4 = xor <2 x i64> %2, %3 1806 %5 = bitcast <2 x i64> %4 to <2 x double> 1807 ; fadd forces execution domain 1808 %6 = fadd <2 x double> %5, <double 0x0, double 0x0> 1809 ret <2 x double> %6 1810} 1811 1812define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) { 1813 ;CHECK-LABEL: stack_fold_xorpd_ymm 1814 ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1815 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1816 %2 = bitcast <4 x double> %a0 to <4 x i64> 1817 %3 = bitcast <4 x double> %a1 to <4 x i64> 1818 %4 = xor <4 x i64> %2, %3 1819 %5 = bitcast <4 x i64> %4 to <4 x double> 1820 ; fadd forces execution domain 1821 %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0> 1822 ret <4 x double> %6 1823} 1824 1825define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) { 1826 ;CHECK-LABEL: stack_fold_xorps 1827 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1828 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1829 %2 = bitcast <4 x float> %a0 to <2 x i64> 1830 %3 = bitcast <4 x float> %a1 to <2 x i64> 1831 %4 = xor <2 x i64> %2, %3 1832 %5 = bitcast <2 x i64> %4 to <4 x float> 1833 ; fadd forces execution domain 1834 %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0> 1835 ret <4 x float> %6 1836} 1837 1838define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) { 1839 ;CHECK-LABEL: stack_fold_xorps_ymm 1840 ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 1841 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1842 %2 = bitcast <8 x float> %a0 to <4 x i64> 1843 %3 = bitcast <8 x float> %a1 to <4 x i64> 1844 %4 = xor <4 x i64> %2, %3 1845 %5 = bitcast <4 x i64> %4 to <8 x float> 1846 ; fadd forces execution domain 1847 %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0> 1848 ret <8 x float> %6 1849} 1850