1; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s 2; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s 3; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s 4; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s 5 6declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone 7 8define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 9; CHECK: phaddw 10entry: 11 %0 = bitcast <1 x i64> %b to <4 x i16> 12 %1 = bitcast <1 x i64> %a to <4 x i16> 13 %2 = bitcast <4 x i16> %1 to x86_mmx 14 %3 = bitcast <4 x i16> %0 to x86_mmx 15 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone 16 %5 = bitcast x86_mmx %4 to <4 x i16> 17 %6 = bitcast <4 x i16> %5 to <1 x i64> 18 %7 = extractelement <1 x i64> %6, i32 0 19 ret i64 %7 20} 21 22declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone 23 24define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 25; CHECK: pcmpgtd 26entry: 27 %0 = bitcast <1 x i64> %b to <2 x i32> 28 %1 = bitcast <1 x i64> %a to <2 x i32> 29 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 30 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 31 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 32 %3 = bitcast x86_mmx %2 to <2 x i32> 33 %4 = bitcast <2 x i32> %3 to <1 x i64> 34 %5 = extractelement <1 x i64> %4, i32 0 35 ret i64 %5 36} 37 38declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone 39 40define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 41; CHECK: pcmpgtw 42entry: 43 %0 = bitcast <1 x i64> %b to <4 x i16> 44 %1 = bitcast <1 x i64> %a to <4 x i16> 45 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 46 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 47 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 48 %3 = bitcast x86_mmx %2 to <4 x i16> 49 %4 = bitcast <4 x i16> %3 to <1 x i64> 50 %5 = extractelement <1 x i64> %4, i32 0 51 ret i64 %5 52} 53 54declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone 55 56define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 57; CHECK: pcmpgtb 58entry: 59 %0 = bitcast <1 x i64> %b to <8 x i8> 60 %1 = bitcast <1 x i64> %a to <8 x i8> 61 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 62 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 63 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 64 %3 = bitcast x86_mmx %2 to <8 x i8> 65 %4 = bitcast <8 x i8> %3 to <1 x i64> 66 %5 = extractelement <1 x i64> %4, i32 0 67 ret i64 %5 68} 69 70declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone 71 72define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 73; CHECK: pcmpeqd 74entry: 75 %0 = bitcast <1 x i64> %b to <2 x i32> 76 %1 = bitcast <1 x i64> %a to <2 x i32> 77 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 78 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 79 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 80 %3 = bitcast x86_mmx %2 to <2 x i32> 81 %4 = bitcast <2 x i32> %3 to <1 x i64> 82 %5 = extractelement <1 x i64> %4, i32 0 83 ret i64 %5 84} 85 86declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone 87 88define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 89; CHECK: pcmpeqw 90entry: 91 %0 = bitcast <1 x i64> %b to <4 x i16> 92 %1 = bitcast <1 x i64> %a to <4 x i16> 93 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 94 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 95 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 96 %3 = bitcast x86_mmx %2 to <4 x i16> 97 %4 = bitcast <4 x i16> %3 to <1 x i64> 98 %5 = extractelement <1 x i64> %4, i32 0 99 ret i64 %5 100} 101 102declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone 103 104define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 105; CHECK: pcmpeqb 106entry: 107 %0 = bitcast <1 x i64> %b to <8 x i8> 108 %1 = bitcast <1 x i64> %a to <8 x i8> 109 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 110 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 111 %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 112 %3 = bitcast x86_mmx %2 to <8 x i8> 113 %4 = bitcast <8 x i8> %3 to <1 x i64> 114 %5 = extractelement <1 x i64> %4, i32 0 115 ret i64 %5 116} 117 118declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone 119 120define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 121; CHECK: punpckldq 122entry: 123 %0 = bitcast <1 x i64> %b to <2 x i32> 124 %1 = bitcast <1 x i64> %a to <2 x i32> 125 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 126 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 127 %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 128 %3 = bitcast x86_mmx %2 to <2 x i32> 129 %4 = bitcast <2 x i32> %3 to <1 x i64> 130 %5 = extractelement <1 x i64> %4, i32 0 131 ret i64 %5 132} 133 134declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone 135 136define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 137; CHECK: punpcklwd 138entry: 139 %0 = bitcast <1 x i64> %b to <4 x i16> 140 %1 = bitcast <1 x i64> %a to <4 x i16> 141 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 142 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 143 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 144 %3 = bitcast x86_mmx %2 to <4 x i16> 145 %4 = bitcast <4 x i16> %3 to <1 x i64> 146 %5 = extractelement <1 x i64> %4, i32 0 147 ret i64 %5 148} 149 150declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone 151 152define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 153; CHECK: punpcklbw 154entry: 155 %0 = bitcast <1 x i64> %b to <8 x i8> 156 %1 = bitcast <1 x i64> %a to <8 x i8> 157 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 158 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 159 %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 160 %3 = bitcast x86_mmx %2 to <8 x i8> 161 %4 = bitcast <8 x i8> %3 to <1 x i64> 162 %5 = extractelement <1 x i64> %4, i32 0 163 ret i64 %5 164} 165 166declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone 167 168define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 169; CHECK: punpckhdq 170entry: 171 %0 = bitcast <1 x i64> %b to <2 x i32> 172 %1 = bitcast <1 x i64> %a to <2 x i32> 173 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 174 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 175 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 176 %3 = bitcast x86_mmx %2 to <2 x i32> 177 %4 = bitcast <2 x i32> %3 to <1 x i64> 178 %5 = extractelement <1 x i64> %4, i32 0 179 ret i64 %5 180} 181 182declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone 183 184define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 185; CHECK: punpckhwd 186entry: 187 %0 = bitcast <1 x i64> %b to <4 x i16> 188 %1 = bitcast <1 x i64> %a to <4 x i16> 189 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 190 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 191 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 192 %3 = bitcast x86_mmx %2 to <4 x i16> 193 %4 = bitcast <4 x i16> %3 to <1 x i64> 194 %5 = extractelement <1 x i64> %4, i32 0 195 ret i64 %5 196} 197 198declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone 199 200define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 201; CHECK: punpckhbw 202entry: 203 %0 = bitcast <1 x i64> %b to <8 x i8> 204 %1 = bitcast <1 x i64> %a to <8 x i8> 205 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 206 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 207 %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 208 %3 = bitcast x86_mmx %2 to <8 x i8> 209 %4 = bitcast <8 x i8> %3 to <1 x i64> 210 %5 = extractelement <1 x i64> %4, i32 0 211 ret i64 %5 212} 213 214declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone 215 216define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 217; CHECK: packuswb 218entry: 219 %0 = bitcast <1 x i64> %b to <4 x i16> 220 %1 = bitcast <1 x i64> %a to <4 x i16> 221 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 222 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 223 %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 224 %3 = bitcast x86_mmx %2 to <8 x i8> 225 %4 = bitcast <8 x i8> %3 to <1 x i64> 226 %5 = extractelement <1 x i64> %4, i32 0 227 ret i64 %5 228} 229 230declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone 231 232define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 233; CHECK: packssdw 234entry: 235 %0 = bitcast <1 x i64> %b to <2 x i32> 236 %1 = bitcast <1 x i64> %a to <2 x i32> 237 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 238 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 239 %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 240 %3 = bitcast x86_mmx %2 to <4 x i16> 241 %4 = bitcast <4 x i16> %3 to <1 x i64> 242 %5 = extractelement <1 x i64> %4, i32 0 243 ret i64 %5 244} 245 246declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone 247 248define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 249; CHECK: packsswb 250entry: 251 %0 = bitcast <1 x i64> %b to <4 x i16> 252 %1 = bitcast <1 x i64> %a to <4 x i16> 253 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 254 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 255 %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 256 %3 = bitcast x86_mmx %2 to <8 x i8> 257 %4 = bitcast <8 x i8> %3 to <1 x i64> 258 %5 = extractelement <1 x i64> %4, i32 0 259 ret i64 %5 260} 261 262declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone 263 264define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { 265; CHECK: psrad 266entry: 267 %0 = bitcast <1 x i64> %a to <2 x i32> 268 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 269 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind 270 %2 = bitcast x86_mmx %1 to <2 x i32> 271 %3 = bitcast <2 x i32> %2 to <1 x i64> 272 %4 = extractelement <1 x i64> %3, i32 0 273 ret i64 %4 274} 275 276declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone 277 278define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { 279; CHECK: psraw 280entry: 281 %0 = bitcast <1 x i64> %a to <4 x i16> 282 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 283 %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind 284 %2 = bitcast x86_mmx %1 to <4 x i16> 285 %3 = bitcast <4 x i16> %2 to <1 x i64> 286 %4 = extractelement <1 x i64> %3, i32 0 287 ret i64 %4 288} 289 290declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone 291 292define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { 293; CHECK: psrlq 294entry: 295 %0 = extractelement <1 x i64> %a, i32 0 296 %mmx_var.i = bitcast i64 %0 to x86_mmx 297 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind 298 %2 = bitcast x86_mmx %1 to i64 299 ret i64 %2 300} 301 302declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone 303 304define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { 305; CHECK: psrld 306entry: 307 %0 = bitcast <1 x i64> %a to <2 x i32> 308 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 309 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind 310 %2 = bitcast x86_mmx %1 to <2 x i32> 311 %3 = bitcast <2 x i32> %2 to <1 x i64> 312 %4 = extractelement <1 x i64> %3, i32 0 313 ret i64 %4 314} 315 316declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 317 318define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { 319; CHECK: psrlw 320entry: 321 %0 = bitcast <1 x i64> %a to <4 x i16> 322 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 323 %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind 324 %2 = bitcast x86_mmx %1 to <4 x i16> 325 %3 = bitcast <4 x i16> %2 to <1 x i64> 326 %4 = extractelement <1 x i64> %3, i32 0 327 ret i64 %4 328} 329 330declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 331 332define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { 333; CHECK: psllq 334entry: 335 %0 = extractelement <1 x i64> %a, i32 0 336 %mmx_var.i = bitcast i64 %0 to x86_mmx 337 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind 338 %2 = bitcast x86_mmx %1 to i64 339 ret i64 %2 340} 341 342declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone 343 344define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { 345; CHECK: pslld 346entry: 347 %0 = bitcast <1 x i64> %a to <2 x i32> 348 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 349 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind 350 %2 = bitcast x86_mmx %1 to <2 x i32> 351 %3 = bitcast <2 x i32> %2 to <1 x i64> 352 %4 = extractelement <1 x i64> %3, i32 0 353 ret i64 %4 354} 355 356declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone 357 358define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { 359; CHECK: psllw 360entry: 361 %0 = bitcast <1 x i64> %a to <4 x i16> 362 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 363 %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind 364 %2 = bitcast x86_mmx %1 to <4 x i16> 365 %3 = bitcast <4 x i16> %2 to <1 x i64> 366 %4 = extractelement <1 x i64> %3, i32 0 367 ret i64 %4 368} 369 370declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 371 372define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 373; CHECK: psrad 374entry: 375 %0 = bitcast <1 x i64> %a to <2 x i32> 376 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 377 %1 = extractelement <1 x i64> %b, i32 0 378 %mmx_var1.i = bitcast i64 %1 to x86_mmx 379 %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 380 %3 = bitcast x86_mmx %2 to <2 x i32> 381 %4 = bitcast <2 x i32> %3 to <1 x i64> 382 %5 = extractelement <1 x i64> %4, i32 0 383 ret i64 %5 384} 385 386declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone 387 388define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 389; CHECK: psraw 390entry: 391 %0 = bitcast <1 x i64> %a to <4 x i16> 392 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 393 %1 = extractelement <1 x i64> %b, i32 0 394 %mmx_var1.i = bitcast i64 %1 to x86_mmx 395 %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 396 %3 = bitcast x86_mmx %2 to <4 x i16> 397 %4 = bitcast <4 x i16> %3 to <1 x i64> 398 %5 = extractelement <1 x i64> %4, i32 0 399 ret i64 %5 400} 401 402declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone 403 404define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 405; CHECK: psrlq 406entry: 407 %0 = extractelement <1 x i64> %a, i32 0 408 %mmx_var.i = bitcast i64 %0 to x86_mmx 409 %1 = extractelement <1 x i64> %b, i32 0 410 %mmx_var1.i = bitcast i64 %1 to x86_mmx 411 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 412 %3 = bitcast x86_mmx %2 to i64 413 ret i64 %3 414} 415 416declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone 417 418define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 419; CHECK: psrld 420entry: 421 %0 = bitcast <1 x i64> %a to <2 x i32> 422 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 423 %1 = extractelement <1 x i64> %b, i32 0 424 %mmx_var1.i = bitcast i64 %1 to x86_mmx 425 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 426 %3 = bitcast x86_mmx %2 to <2 x i32> 427 %4 = bitcast <2 x i32> %3 to <1 x i64> 428 %5 = extractelement <1 x i64> %4, i32 0 429 ret i64 %5 430} 431 432declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone 433 434define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 435; CHECK: psrlw 436entry: 437 %0 = bitcast <1 x i64> %a to <4 x i16> 438 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 439 %1 = extractelement <1 x i64> %b, i32 0 440 %mmx_var1.i = bitcast i64 %1 to x86_mmx 441 %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 442 %3 = bitcast x86_mmx %2 to <4 x i16> 443 %4 = bitcast <4 x i16> %3 to <1 x i64> 444 %5 = extractelement <1 x i64> %4, i32 0 445 ret i64 %5 446} 447 448declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone 449 450define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 451; CHECK: psllq 452entry: 453 %0 = extractelement <1 x i64> %a, i32 0 454 %mmx_var.i = bitcast i64 %0 to x86_mmx 455 %1 = extractelement <1 x i64> %b, i32 0 456 %mmx_var1.i = bitcast i64 %1 to x86_mmx 457 %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 458 %3 = bitcast x86_mmx %2 to i64 459 ret i64 %3 460} 461 462declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone 463 464define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 465; CHECK: pslld 466entry: 467 %0 = bitcast <1 x i64> %a to <2 x i32> 468 %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx 469 %1 = extractelement <1 x i64> %b, i32 0 470 %mmx_var1.i = bitcast i64 %1 to x86_mmx 471 %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 472 %3 = bitcast x86_mmx %2 to <2 x i32> 473 %4 = bitcast <2 x i32> %3 to <1 x i64> 474 %5 = extractelement <1 x i64> %4, i32 0 475 ret i64 %5 476} 477 478declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone 479 480define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 481; CHECK: psllw 482entry: 483 %0 = bitcast <1 x i64> %a to <4 x i16> 484 %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx 485 %1 = extractelement <1 x i64> %b, i32 0 486 %mmx_var1.i = bitcast i64 %1 to x86_mmx 487 %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 488 %3 = bitcast x86_mmx %2 to <4 x i16> 489 %4 = bitcast <4 x i16> %3 to <1 x i64> 490 %5 = extractelement <1 x i64> %4, i32 0 491 ret i64 %5 492} 493 494declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone 495 496define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 497; CHECK: pxor 498entry: 499 %0 = bitcast <1 x i64> %b to <2 x i32> 500 %1 = bitcast <1 x i64> %a to <2 x i32> 501 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 502 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 503 %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 504 %3 = bitcast x86_mmx %2 to <2 x i32> 505 %4 = bitcast <2 x i32> %3 to <1 x i64> 506 %5 = extractelement <1 x i64> %4, i32 0 507 ret i64 %5 508} 509 510declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone 511 512define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 513; CHECK: por 514entry: 515 %0 = bitcast <1 x i64> %b to <2 x i32> 516 %1 = bitcast <1 x i64> %a to <2 x i32> 517 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 518 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 519 %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 520 %3 = bitcast x86_mmx %2 to <2 x i32> 521 %4 = bitcast <2 x i32> %3 to <1 x i64> 522 %5 = extractelement <1 x i64> %4, i32 0 523 ret i64 %5 524} 525 526declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone 527 528define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 529; CHECK: pandn 530entry: 531 %0 = bitcast <1 x i64> %b to <2 x i32> 532 %1 = bitcast <1 x i64> %a to <2 x i32> 533 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 534 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 535 %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 536 %3 = bitcast x86_mmx %2 to <2 x i32> 537 %4 = bitcast <2 x i32> %3 to <1 x i64> 538 %5 = extractelement <1 x i64> %4, i32 0 539 ret i64 %5 540} 541 542declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone 543 544define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 545; CHECK: pand 546entry: 547 %0 = bitcast <1 x i64> %b to <2 x i32> 548 %1 = bitcast <1 x i64> %a to <2 x i32> 549 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 550 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 551 %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 552 %3 = bitcast x86_mmx %2 to <2 x i32> 553 %4 = bitcast <2 x i32> %3 to <1 x i64> 554 %5 = extractelement <1 x i64> %4, i32 0 555 ret i64 %5 556} 557 558declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone 559 560define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 561; CHECK: pmullw 562entry: 563 %0 = bitcast <1 x i64> %b to <4 x i16> 564 %1 = bitcast <1 x i64> %a to <4 x i16> 565 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 566 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 567 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 568 %3 = bitcast x86_mmx %2 to <4 x i16> 569 %4 = bitcast <4 x i16> %3 to <1 x i64> 570 %5 = extractelement <1 x i64> %4, i32 0 571 ret i64 %5 572} 573 574define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 575; CHECK: pmullw 576entry: 577 %0 = bitcast <1 x i64> %b to <4 x i16> 578 %1 = bitcast <1 x i64> %a to <4 x i16> 579 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 580 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 581 %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 582 %3 = bitcast x86_mmx %2 to <4 x i16> 583 %4 = bitcast <4 x i16> %3 to <1 x i64> 584 %5 = extractelement <1 x i64> %4, i32 0 585 ret i64 %5 586} 587 588declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone 589 590define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 591; CHECK: pmulhw 592entry: 593 %0 = bitcast <1 x i64> %b to <4 x i16> 594 %1 = bitcast <1 x i64> %a to <4 x i16> 595 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 596 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 597 %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 598 %3 = bitcast x86_mmx %2 to <4 x i16> 599 %4 = bitcast <4 x i16> %3 to <1 x i64> 600 %5 = extractelement <1 x i64> %4, i32 0 601 ret i64 %5 602} 603 604declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone 605 606define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 607; CHECK: pmaddwd 608entry: 609 %0 = bitcast <1 x i64> %b to <4 x i16> 610 %1 = bitcast <1 x i64> %a to <4 x i16> 611 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 612 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 613 %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 614 %3 = bitcast x86_mmx %2 to <2 x i32> 615 %4 = bitcast <2 x i32> %3 to <1 x i64> 616 %5 = extractelement <1 x i64> %4, i32 0 617 ret i64 %5 618} 619 620declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone 621 622define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 623; CHECK: psubusw 624entry: 625 %0 = bitcast <1 x i64> %b to <4 x i16> 626 %1 = bitcast <1 x i64> %a to <4 x i16> 627 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 628 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 629 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 630 %3 = bitcast x86_mmx %2 to <4 x i16> 631 %4 = bitcast <4 x i16> %3 to <1 x i64> 632 %5 = extractelement <1 x i64> %4, i32 0 633 ret i64 %5 634} 635 636declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone 637 638define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 639; CHECK: psubusb 640entry: 641 %0 = bitcast <1 x i64> %b to <8 x i8> 642 %1 = bitcast <1 x i64> %a to <8 x i8> 643 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 644 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 645 %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 646 %3 = bitcast x86_mmx %2 to <8 x i8> 647 %4 = bitcast <8 x i8> %3 to <1 x i64> 648 %5 = extractelement <1 x i64> %4, i32 0 649 ret i64 %5 650} 651 652declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone 653 654define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 655; CHECK: psubsw 656entry: 657 %0 = bitcast <1 x i64> %b to <4 x i16> 658 %1 = bitcast <1 x i64> %a to <4 x i16> 659 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 660 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 661 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 662 %3 = bitcast x86_mmx %2 to <4 x i16> 663 %4 = bitcast <4 x i16> %3 to <1 x i64> 664 %5 = extractelement <1 x i64> %4, i32 0 665 ret i64 %5 666} 667 668declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone 669 670define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 671; CHECK: psubsb 672entry: 673 %0 = bitcast <1 x i64> %b to <8 x i8> 674 %1 = bitcast <1 x i64> %a to <8 x i8> 675 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 676 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 677 %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 678 %3 = bitcast x86_mmx %2 to <8 x i8> 679 %4 = bitcast <8 x i8> %3 to <1 x i64> 680 %5 = extractelement <1 x i64> %4, i32 0 681 ret i64 %5 682} 683 684define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 685; CHECK: psubq 686entry: 687 %0 = extractelement <1 x i64> %a, i32 0 688 %mmx_var = bitcast i64 %0 to x86_mmx 689 %1 = extractelement <1 x i64> %b, i32 0 690 %mmx_var1 = bitcast i64 %1 to x86_mmx 691 %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) 692 %3 = bitcast x86_mmx %2 to i64 693 ret i64 %3 694} 695 696declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone 697 698declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone 699 700define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 701; CHECK: psubd 702entry: 703 %0 = bitcast <1 x i64> %b to <2 x i32> 704 %1 = bitcast <1 x i64> %a to <2 x i32> 705 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 706 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 707 %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 708 %3 = bitcast x86_mmx %2 to <2 x i32> 709 %4 = bitcast <2 x i32> %3 to <1 x i64> 710 %5 = extractelement <1 x i64> %4, i32 0 711 ret i64 %5 712} 713 714declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone 715 716define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 717; CHECK: psubw 718entry: 719 %0 = bitcast <1 x i64> %b to <4 x i16> 720 %1 = bitcast <1 x i64> %a to <4 x i16> 721 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 722 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 723 %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 724 %3 = bitcast x86_mmx %2 to <4 x i16> 725 %4 = bitcast <4 x i16> %3 to <1 x i64> 726 %5 = extractelement <1 x i64> %4, i32 0 727 ret i64 %5 728} 729 730declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone 731 732define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 733; CHECK: psubb 734entry: 735 %0 = bitcast <1 x i64> %b to <8 x i8> 736 %1 = bitcast <1 x i64> %a to <8 x i8> 737 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 738 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 739 %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 740 %3 = bitcast x86_mmx %2 to <8 x i8> 741 %4 = bitcast <8 x i8> %3 to <1 x i64> 742 %5 = extractelement <1 x i64> %4, i32 0 743 ret i64 %5 744} 745 746declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone 747 748define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 749; CHECK: paddusw 750entry: 751 %0 = bitcast <1 x i64> %b to <4 x i16> 752 %1 = bitcast <1 x i64> %a to <4 x i16> 753 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 754 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 755 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 756 %3 = bitcast x86_mmx %2 to <4 x i16> 757 %4 = bitcast <4 x i16> %3 to <1 x i64> 758 %5 = extractelement <1 x i64> %4, i32 0 759 ret i64 %5 760} 761 762declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone 763 764define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 765; CHECK: paddusb 766entry: 767 %0 = bitcast <1 x i64> %b to <8 x i8> 768 %1 = bitcast <1 x i64> %a to <8 x i8> 769 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 770 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 771 %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 772 %3 = bitcast x86_mmx %2 to <8 x i8> 773 %4 = bitcast <8 x i8> %3 to <1 x i64> 774 %5 = extractelement <1 x i64> %4, i32 0 775 ret i64 %5 776} 777 778declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone 779 780define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 781; CHECK: paddsw 782entry: 783 %0 = bitcast <1 x i64> %b to <4 x i16> 784 %1 = bitcast <1 x i64> %a to <4 x i16> 785 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 786 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 787 %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 788 %3 = bitcast x86_mmx %2 to <4 x i16> 789 %4 = bitcast <4 x i16> %3 to <1 x i64> 790 %5 = extractelement <1 x i64> %4, i32 0 791 ret i64 %5 792} 793 794declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone 795 796define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 797; CHECK: paddsb 798entry: 799 %0 = bitcast <1 x i64> %b to <8 x i8> 800 %1 = bitcast <1 x i64> %a to <8 x i8> 801 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 802 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 803 %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 804 %3 = bitcast x86_mmx %2 to <8 x i8> 805 %4 = bitcast <8 x i8> %3 to <1 x i64> 806 %5 = extractelement <1 x i64> %4, i32 0 807 ret i64 %5 808} 809 810declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone 811 812define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 813; CHECK: paddq 814entry: 815 %0 = extractelement <1 x i64> %a, i32 0 816 %mmx_var = bitcast i64 %0 to x86_mmx 817 %1 = extractelement <1 x i64> %b, i32 0 818 %mmx_var1 = bitcast i64 %1 to x86_mmx 819 %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1) 820 %3 = bitcast x86_mmx %2 to i64 821 ret i64 %3 822} 823 824declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone 825 826define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 827; CHECK: paddd 828entry: 829 %0 = bitcast <1 x i64> %b to <2 x i32> 830 %1 = bitcast <1 x i64> %a to <2 x i32> 831 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 832 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 833 %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 834 %3 = bitcast x86_mmx %2 to <2 x i32> 835 %4 = bitcast <2 x i32> %3 to <1 x i64> 836 %5 = extractelement <1 x i64> %4, i32 0 837 ret i64 %5 838} 839 840declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone 841 842define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 843; CHECK: paddw 844entry: 845 %0 = bitcast <1 x i64> %b to <4 x i16> 846 %1 = bitcast <1 x i64> %a to <4 x i16> 847 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 848 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 849 %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 850 %3 = bitcast x86_mmx %2 to <4 x i16> 851 %4 = bitcast <4 x i16> %3 to <1 x i64> 852 %5 = extractelement <1 x i64> %4, i32 0 853 ret i64 %5 854} 855 856declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone 857 858define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 859; CHECK: paddb 860entry: 861 %0 = bitcast <1 x i64> %b to <8 x i8> 862 %1 = bitcast <1 x i64> %a to <8 x i8> 863 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 864 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 865 %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 866 %3 = bitcast x86_mmx %2 to <8 x i8> 867 %4 = bitcast <8 x i8> %3 to <1 x i64> 868 %5 = extractelement <1 x i64> %4, i32 0 869 ret i64 %5 870} 871 872declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone 873 874define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 875; CHECK: psadbw 876entry: 877 %0 = bitcast <1 x i64> %b to <8 x i8> 878 %1 = bitcast <1 x i64> %a to <8 x i8> 879 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 880 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 881 %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 882 %3 = bitcast x86_mmx %2 to i64 883 ret i64 %3 884} 885 886declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone 887 888define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 889; CHECK: pminsw 890entry: 891 %0 = bitcast <1 x i64> %b to <4 x i16> 892 %1 = bitcast <1 x i64> %a to <4 x i16> 893 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 894 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 895 %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 896 %3 = bitcast x86_mmx %2 to <4 x i16> 897 %4 = bitcast <4 x i16> %3 to <1 x i64> 898 %5 = extractelement <1 x i64> %4, i32 0 899 ret i64 %5 900} 901 902declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone 903 904define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 905; CHECK: pminub 906entry: 907 %0 = bitcast <1 x i64> %b to <8 x i8> 908 %1 = bitcast <1 x i64> %a to <8 x i8> 909 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 910 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 911 %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 912 %3 = bitcast x86_mmx %2 to <8 x i8> 913 %4 = bitcast <8 x i8> %3 to <1 x i64> 914 %5 = extractelement <1 x i64> %4, i32 0 915 ret i64 %5 916} 917 918declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone 919 920define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 921; CHECK: pmaxsw 922entry: 923 %0 = bitcast <1 x i64> %b to <4 x i16> 924 %1 = bitcast <1 x i64> %a to <4 x i16> 925 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 926 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 927 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 928 %3 = bitcast x86_mmx %2 to <4 x i16> 929 %4 = bitcast <4 x i16> %3 to <1 x i64> 930 %5 = extractelement <1 x i64> %4, i32 0 931 ret i64 %5 932} 933 934declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone 935 936define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 937; CHECK: pmaxub 938entry: 939 %0 = bitcast <1 x i64> %b to <8 x i8> 940 %1 = bitcast <1 x i64> %a to <8 x i8> 941 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 942 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 943 %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 944 %3 = bitcast x86_mmx %2 to <8 x i8> 945 %4 = bitcast <8 x i8> %3 to <1 x i64> 946 %5 = extractelement <1 x i64> %4, i32 0 947 ret i64 %5 948} 949 950declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone 951 952define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 953; CHECK: pavgw 954entry: 955 %0 = bitcast <1 x i64> %b to <4 x i16> 956 %1 = bitcast <1 x i64> %a to <4 x i16> 957 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 958 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 959 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 960 %3 = bitcast x86_mmx %2 to <4 x i16> 961 %4 = bitcast <4 x i16> %3 to <1 x i64> 962 %5 = extractelement <1 x i64> %4, i32 0 963 ret i64 %5 964} 965 966declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone 967 968define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 969; CHECK: pavgb 970entry: 971 %0 = bitcast <1 x i64> %b to <8 x i8> 972 %1 = bitcast <1 x i64> %a to <8 x i8> 973 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 974 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 975 %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 976 %3 = bitcast x86_mmx %2 to <8 x i8> 977 %4 = bitcast <8 x i8> %3 to <1 x i64> 978 %5 = extractelement <1 x i64> %4, i32 0 979 ret i64 %5 980} 981 982declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind 983 984define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp { 985; CHECK: movntq 986entry: 987 %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx* 988 %0 = extractelement <1 x i64> %a, i32 0 989 %mmx_var.i = bitcast i64 %0 to x86_mmx 990 tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind 991 ret void 992} 993 994declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone 995 996define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { 997; CHECK: pmovmskb 998entry: 999 %0 = bitcast <1 x i64> %a to <8 x i8> 1000 %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx 1001 %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind 1002 ret i32 %1 1003} 1004 1005declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind 1006 1007define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp { 1008; CHECK: maskmovq 1009entry: 1010 %0 = bitcast <1 x i64> %n to <8 x i8> 1011 %1 = bitcast <1 x i64> %d to <8 x i8> 1012 %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx 1013 %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx 1014 tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind 1015 ret void 1016} 1017 1018declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone 1019 1020define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1021; CHECK: pmulhuw 1022entry: 1023 %0 = bitcast <1 x i64> %b to <4 x i16> 1024 %1 = bitcast <1 x i64> %a to <4 x i16> 1025 %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx 1026 %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx 1027 %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1028 %3 = bitcast x86_mmx %2 to <4 x i16> 1029 %4 = bitcast <4 x i16> %3 to <1 x i64> 1030 %5 = extractelement <1 x i64> %4, i32 0 1031 ret i64 %5 1032} 1033 1034declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone 1035 1036define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { 1037; CHECK: pshufw 1038entry: 1039 %0 = bitcast <1 x i64> %a to <4 x i16> 1040 %1 = bitcast <4 x i16> %0 to x86_mmx 1041 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone 1042 %3 = bitcast x86_mmx %2 to <4 x i16> 1043 %4 = bitcast <4 x i16> %3 to <1 x i64> 1044 %5 = extractelement <1 x i64> %4, i32 0 1045 ret i64 %5 1046} 1047 1048define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { 1049; CHECK: test21_2 1050; CHECK: pshufw 1051; CHECK: movd 1052entry: 1053 %0 = bitcast <1 x i64> %a to <4 x i16> 1054 %1 = bitcast <4 x i16> %0 to x86_mmx 1055 %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone 1056 %3 = bitcast x86_mmx %2 to <4 x i16> 1057 %4 = bitcast <4 x i16> %3 to <2 x i32> 1058 %5 = extractelement <2 x i32> %4, i32 0 1059 ret i32 %5 1060} 1061 1062declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone 1063 1064define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1065; CHECK: pmuludq 1066entry: 1067 %0 = bitcast <1 x i64> %b to <2 x i32> 1068 %1 = bitcast <1 x i64> %a to <2 x i32> 1069 %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx 1070 %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx 1071 %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind 1072 %3 = bitcast x86_mmx %2 to i64 1073 ret i64 %3 1074} 1075 1076declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone 1077 1078define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { 1079; CHECK: cvtpi2pd 1080entry: 1081 %0 = bitcast <1 x i64> %a to <2 x i32> 1082 %1 = bitcast <2 x i32> %0 to x86_mmx 1083 %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone 1084 ret <2 x double> %2 1085} 1086 1087declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone 1088 1089define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { 1090; CHECK: cvttpd2pi 1091entry: 1092 %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone 1093 %1 = bitcast x86_mmx %0 to <2 x i32> 1094 %2 = bitcast <2 x i32> %1 to <1 x i64> 1095 %3 = extractelement <1 x i64> %2, i32 0 1096 ret i64 %3 1097} 1098 1099declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone 1100 1101define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { 1102; CHECK: cvtpd2pi 1103entry: 1104 %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone 1105 %1 = bitcast x86_mmx %0 to <2 x i32> 1106 %2 = bitcast <2 x i32> %1 to <1 x i64> 1107 %3 = extractelement <1 x i64> %2, i32 0 1108 ret i64 %3 1109} 1110 1111declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone 1112 1113define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1114; CHECK: palignr 1115entry: 1116 %0 = extractelement <1 x i64> %a, i32 0 1117 %mmx_var = bitcast i64 %0 to x86_mmx 1118 %1 = extractelement <1 x i64> %b, i32 0 1119 %mmx_var1 = bitcast i64 %1 to x86_mmx 1120 %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16) 1121 %3 = bitcast x86_mmx %2 to i64 1122 ret i64 %3 1123} 1124 1125declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone 1126 1127define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { 1128; CHECK: pabsd 1129entry: 1130 %0 = bitcast <1 x i64> %a to <2 x i32> 1131 %1 = bitcast <2 x i32> %0 to x86_mmx 1132 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone 1133 %3 = bitcast x86_mmx %2 to <2 x i32> 1134 %4 = bitcast <2 x i32> %3 to <1 x i64> 1135 %5 = extractelement <1 x i64> %4, i32 0 1136 ret i64 %5 1137} 1138 1139declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone 1140 1141define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { 1142; CHECK: pabsw 1143entry: 1144 %0 = bitcast <1 x i64> %a to <4 x i16> 1145 %1 = bitcast <4 x i16> %0 to x86_mmx 1146 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone 1147 %3 = bitcast x86_mmx %2 to <4 x i16> 1148 %4 = bitcast <4 x i16> %3 to <1 x i64> 1149 %5 = extractelement <1 x i64> %4, i32 0 1150 ret i64 %5 1151} 1152 1153declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone 1154 1155define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { 1156; CHECK: pabsb 1157entry: 1158 %0 = bitcast <1 x i64> %a to <8 x i8> 1159 %1 = bitcast <8 x i8> %0 to x86_mmx 1160 %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone 1161 %3 = bitcast x86_mmx %2 to <8 x i8> 1162 %4 = bitcast <8 x i8> %3 to <1 x i64> 1163 %5 = extractelement <1 x i64> %4, i32 0 1164 ret i64 %5 1165} 1166 1167declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone 1168 1169define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1170; CHECK: psignd 1171entry: 1172 %0 = bitcast <1 x i64> %b to <2 x i32> 1173 %1 = bitcast <1 x i64> %a to <2 x i32> 1174 %2 = bitcast <2 x i32> %1 to x86_mmx 1175 %3 = bitcast <2 x i32> %0 to x86_mmx 1176 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1177 %5 = bitcast x86_mmx %4 to <2 x i32> 1178 %6 = bitcast <2 x i32> %5 to <1 x i64> 1179 %7 = extractelement <1 x i64> %6, i32 0 1180 ret i64 %7 1181} 1182 1183declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone 1184 1185define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1186; CHECK: psignw 1187entry: 1188 %0 = bitcast <1 x i64> %b to <4 x i16> 1189 %1 = bitcast <1 x i64> %a to <4 x i16> 1190 %2 = bitcast <4 x i16> %1 to x86_mmx 1191 %3 = bitcast <4 x i16> %0 to x86_mmx 1192 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone 1193 %5 = bitcast x86_mmx %4 to <4 x i16> 1194 %6 = bitcast <4 x i16> %5 to <1 x i64> 1195 %7 = extractelement <1 x i64> %6, i32 0 1196 ret i64 %7 1197} 1198 1199declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone 1200 1201define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1202; CHECK: psignb 1203entry: 1204 %0 = bitcast <1 x i64> %b to <8 x i8> 1205 %1 = bitcast <1 x i64> %a to <8 x i8> 1206 %2 = bitcast <8 x i8> %1 to x86_mmx 1207 %3 = bitcast <8 x i8> %0 to x86_mmx 1208 %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone 1209 %5 = bitcast x86_mmx %4 to <8 x i8> 1210 %6 = bitcast <8 x i8> %5 to <1 x i64> 1211 %7 = extractelement <1 x i64> %6, i32 0 1212 ret i64 %7 1213} 1214 1215declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone 1216 1217define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1218; CHECK: pshufb 1219entry: 1220 %0 = bitcast <1 x i64> %b to <8 x i8> 1221 %1 = bitcast <1 x i64> %a to <8 x i8> 1222 %2 = bitcast <8 x i8> %1 to x86_mmx 1223 %3 = bitcast <8 x i8> %0 to x86_mmx 1224 %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone 1225 %5 = bitcast x86_mmx %4 to <8 x i8> 1226 %6 = bitcast <8 x i8> %5 to <1 x i64> 1227 %7 = extractelement <1 x i64> %6, i32 0 1228 ret i64 %7 1229} 1230 1231declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone 1232 1233define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1234; CHECK: pmulhrsw 1235entry: 1236 %0 = bitcast <1 x i64> %b to <4 x i16> 1237 %1 = bitcast <1 x i64> %a to <4 x i16> 1238 %2 = bitcast <4 x i16> %1 to x86_mmx 1239 %3 = bitcast <4 x i16> %0 to x86_mmx 1240 %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1241 %5 = bitcast x86_mmx %4 to <4 x i16> 1242 %6 = bitcast <4 x i16> %5 to <1 x i64> 1243 %7 = extractelement <1 x i64> %6, i32 0 1244 ret i64 %7 1245} 1246 1247declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone 1248 1249define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1250; CHECK: pmaddubsw 1251entry: 1252 %0 = bitcast <1 x i64> %b to <8 x i8> 1253 %1 = bitcast <1 x i64> %a to <8 x i8> 1254 %2 = bitcast <8 x i8> %1 to x86_mmx 1255 %3 = bitcast <8 x i8> %0 to x86_mmx 1256 %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1257 %5 = bitcast x86_mmx %4 to <8 x i8> 1258 %6 = bitcast <8 x i8> %5 to <1 x i64> 1259 %7 = extractelement <1 x i64> %6, i32 0 1260 ret i64 %7 1261} 1262 1263declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone 1264 1265define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1266; CHECK: phsubsw 1267entry: 1268 %0 = bitcast <1 x i64> %b to <4 x i16> 1269 %1 = bitcast <1 x i64> %a to <4 x i16> 1270 %2 = bitcast <4 x i16> %1 to x86_mmx 1271 %3 = bitcast <4 x i16> %0 to x86_mmx 1272 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1273 %5 = bitcast x86_mmx %4 to <4 x i16> 1274 %6 = bitcast <4 x i16> %5 to <1 x i64> 1275 %7 = extractelement <1 x i64> %6, i32 0 1276 ret i64 %7 1277} 1278 1279declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone 1280 1281define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1282; CHECK: phsubd 1283entry: 1284 %0 = bitcast <1 x i64> %b to <2 x i32> 1285 %1 = bitcast <1 x i64> %a to <2 x i32> 1286 %2 = bitcast <2 x i32> %1 to x86_mmx 1287 %3 = bitcast <2 x i32> %0 to x86_mmx 1288 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1289 %5 = bitcast x86_mmx %4 to <2 x i32> 1290 %6 = bitcast <2 x i32> %5 to <1 x i64> 1291 %7 = extractelement <1 x i64> %6, i32 0 1292 ret i64 %7 1293} 1294 1295declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone 1296 1297define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1298; CHECK: phsubw 1299entry: 1300 %0 = bitcast <1 x i64> %b to <4 x i16> 1301 %1 = bitcast <1 x i64> %a to <4 x i16> 1302 %2 = bitcast <4 x i16> %1 to x86_mmx 1303 %3 = bitcast <4 x i16> %0 to x86_mmx 1304 %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone 1305 %5 = bitcast x86_mmx %4 to <4 x i16> 1306 %6 = bitcast <4 x i16> %5 to <1 x i64> 1307 %7 = extractelement <1 x i64> %6, i32 0 1308 ret i64 %7 1309} 1310 1311declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone 1312 1313define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1314; CHECK: phaddsw 1315entry: 1316 %0 = bitcast <1 x i64> %b to <4 x i16> 1317 %1 = bitcast <1 x i64> %a to <4 x i16> 1318 %2 = bitcast <4 x i16> %1 to x86_mmx 1319 %3 = bitcast <4 x i16> %0 to x86_mmx 1320 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone 1321 %5 = bitcast x86_mmx %4 to <4 x i16> 1322 %6 = bitcast <4 x i16> %5 to <1 x i64> 1323 %7 = extractelement <1 x i64> %6, i32 0 1324 ret i64 %7 1325} 1326 1327declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone 1328 1329define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { 1330; CHECK: phaddd 1331entry: 1332 %0 = bitcast <1 x i64> %b to <2 x i32> 1333 %1 = bitcast <1 x i64> %a to <2 x i32> 1334 %2 = bitcast <2 x i32> %1 to x86_mmx 1335 %3 = bitcast <2 x i32> %0 to x86_mmx 1336 %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone 1337 %5 = bitcast x86_mmx %4 to <2 x i32> 1338 %6 = bitcast <2 x i32> %5 to <1 x i64> 1339 %7 = extractelement <1 x i64> %6, i32 0 1340 ret i64 %7 1341} 1342 1343define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { 1344; CHECK: cvtpi2ps 1345 %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) 1346 ret <4 x float> %c 1347} 1348 1349declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone 1350