1; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s 2 3define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 4 ; CHECK: addsd 5 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 6 ret <2 x double> %res 7} 8declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 9 10 11define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 12 ; CHECK: cmpordpd 13 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 14 ret <2 x double> %res 15} 16declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 17 18 19define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 20 ; CHECK: cmpordsd 21 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 22 ret <2 x double> %res 23} 24declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 25 26 27define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 28 ; CHECK: comisd 29 ; CHECK: sete 30 ; CHECK: movzbl 31 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 32 ret i32 %res 33} 34declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 35 36 37define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 38 ; CHECK: comisd 39 ; CHECK: setae 40 ; CHECK: movzbl 41 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 42 ret i32 %res 43} 44declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 45 46 47define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 48 ; CHECK: comisd 49 ; CHECK: seta 50 ; CHECK: movzbl 51 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 52 ret i32 %res 53} 54declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 55 56 57define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 58 ; CHECK: comisd 59 ; CHECK: setbe 60 ; CHECK: movzbl 61 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 62 ret i32 %res 63} 64declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 65 66 67define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 68 ; CHECK: comisd 69 ; CHECK: sbbl %eax, %eax 70 ; CHECK: andl $1, %eax 71 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 72 ret i32 %res 73} 74declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 75 76 77define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 78 ; CHECK: comisd 79 ; CHECK: setne 80 ; CHECK: movzbl 81 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 82 ret i32 %res 83} 84declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 85 86 87define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 88 ; CHECK: cvtdq2pd 89 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 90 ret <2 x double> %res 91} 92declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 93 94 95define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 96 ; CHECK: cvtdq2ps 97 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 98 ret <4 x float> %res 99} 100declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 101 102 103define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 104 ; CHECK: cvtpd2dq 105 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 106 ret <4 x i32> %res 107} 108declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 109 110 111define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 112 ; CHECK: cvtpd2ps 113 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 114 ret <4 x float> %res 115} 116declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 117 118 119define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 120 ; CHECK: cvtps2dq 121 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 122 ret <4 x i32> %res 123} 124declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 125 126 127define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 128 ; CHECK: cvtps2pd 129 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 130 ret <2 x double> %res 131} 132declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 133 134 135define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 136 ; CHECK: cvtsd2si 137 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 138 ret i32 %res 139} 140declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 141 142 143define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 144 ; CHECK: cvtsd2ss 145 ; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} 146 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 147 ret <4 x float> %res 148} 149declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 150 151 152define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 153 ; CHECK: movl 154 ; CHECK: cvtsi2sd 155 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 156 ret <2 x double> %res 157} 158declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 159 160 161define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 162 ; CHECK: cvtss2sd 163 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 164 ret <2 x double> %res 165} 166declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 167 168 169define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 170 ; CHECK: cvttpd2dq 171 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 172 ret <4 x i32> %res 173} 174declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 175 176 177define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 178 ; CHECK: cvttps2dq 179 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 180 ret <4 x i32> %res 181} 182declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 183 184 185define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 186 ; CHECK: cvttsd2si 187 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 188 ret i32 %res 189} 190declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 191 192 193define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 194 ; CHECK: divsd 195 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 196 ret <2 x double> %res 197} 198declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 199 200 201 202define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 203 ; CHECK: maxpd 204 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 205 ret <2 x double> %res 206} 207declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 208 209 210define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 211 ; CHECK: maxsd 212 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 213 ret <2 x double> %res 214} 215declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 216 217 218define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 219 ; CHECK: minpd 220 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 221 ret <2 x double> %res 222} 223declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 224 225 226define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 227 ; CHECK: minsd 228 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 229 ret <2 x double> %res 230} 231declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 232 233 234define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 235 ; CHECK: movmskpd 236 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 237 ret i32 %res 238} 239declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 240 241 242 243 244define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 245 ; CHECK: test_x86_sse2_mul_sd 246 ; CHECK: mulsd 247 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 248 ret <2 x double> %res 249} 250declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 251 252 253define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 254 ; CHECK: packssdw 255 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 256 ret <8 x i16> %res 257} 258declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 259 260 261define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 262 ; CHECK: packsswb 263 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 264 ret <16 x i8> %res 265} 266declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 267 268 269define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 270 ; CHECK: packuswb 271 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 272 ret <16 x i8> %res 273} 274declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 275 276 277define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 278 ; CHECK: paddsb 279 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 280 ret <16 x i8> %res 281} 282declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 283 284 285define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 286 ; CHECK: paddsw 287 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 288 ret <8 x i16> %res 289} 290declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 291 292 293define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 294 ; CHECK: paddusb 295 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 296 ret <16 x i8> %res 297} 298declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 299 300 301define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 302 ; CHECK: paddusw 303 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 304 ret <8 x i16> %res 305} 306declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 307 308 309define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 310 ; CHECK: pavgb 311 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 312 ret <16 x i8> %res 313} 314declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 315 316 317define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 318 ; CHECK: pavgw 319 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 320 ret <8 x i16> %res 321} 322declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 323 324 325define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 326 ; CHECK: pmaddwd 327 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 328 ret <4 x i32> %res 329} 330declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 331 332 333define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 334 ; CHECK: pmaxsw 335 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 336 ret <8 x i16> %res 337} 338declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 339 340 341define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 342 ; CHECK: pmaxub 343 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 344 ret <16 x i8> %res 345} 346declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 347 348 349define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 350 ; CHECK: pminsw 351 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 352 ret <8 x i16> %res 353} 354declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 355 356 357define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 358 ; CHECK: pminub 359 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 360 ret <16 x i8> %res 361} 362declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 363 364 365define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 366 ; CHECK: pmovmskb 367 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 368 ret i32 %res 369} 370declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 371 372 373define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 374 ; CHECK: pmulhw 375 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 376 ret <8 x i16> %res 377} 378declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 379 380 381define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 382 ; CHECK: pmulhuw 383 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 384 ret <8 x i16> %res 385} 386declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 387 388 389define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 390 ; CHECK: pmuludq 391 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 392 ret <2 x i64> %res 393} 394declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 395 396 397define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 398 ; CHECK: psadbw 399 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 400 ret <2 x i64> %res 401} 402declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 403 404 405define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 406 ; CHECK: pslld 407 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 408 ret <4 x i32> %res 409} 410declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 411 412 413define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 414 ; CHECK: pslldq 415 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 416 ret <2 x i64> %res 417} 418declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 419 420 421define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { 422 ; CHECK: pslldq 423 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 424 ret <2 x i64> %res 425} 426declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 427 428 429define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 430 ; CHECK: psllq 431 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 432 ret <2 x i64> %res 433} 434declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 435 436 437define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 438 ; CHECK: psllw 439 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 440 ret <8 x i16> %res 441} 442declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 443 444 445define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 446 ; CHECK: pslld 447 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 448 ret <4 x i32> %res 449} 450declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 451 452 453define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 454 ; CHECK: psllq 455 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 456 ret <2 x i64> %res 457} 458declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 459 460 461define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 462 ; CHECK: psllw 463 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 464 ret <8 x i16> %res 465} 466declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 467 468 469define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 470 ; CHECK: psrad 471 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 472 ret <4 x i32> %res 473} 474declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 475 476 477define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 478 ; CHECK: psraw 479 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 480 ret <8 x i16> %res 481} 482declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 483 484 485define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 486 ; CHECK: psrad 487 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 488 ret <4 x i32> %res 489} 490declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 491 492 493define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 494 ; CHECK: psraw 495 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 496 ret <8 x i16> %res 497} 498declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 499 500 501define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 502 ; CHECK: psrld 503 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 504 ret <4 x i32> %res 505} 506declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 507 508 509define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 510 ; CHECK: psrldq 511 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 512 ret <2 x i64> %res 513} 514declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 515 516 517define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { 518 ; CHECK: psrldq 519 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 520 ret <2 x i64> %res 521} 522declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 523 524 525define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 526 ; CHECK: psrlq 527 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 528 ret <2 x i64> %res 529} 530declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 531 532 533define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 534 ; CHECK: psrlw 535 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 536 ret <8 x i16> %res 537} 538declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 539 540 541define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 542 ; CHECK: psrld 543 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 544 ret <4 x i32> %res 545} 546declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 547 548 549define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 550 ; CHECK: psrlq 551 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 552 ret <2 x i64> %res 553} 554declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 555 556 557define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 558 ; CHECK: psrlw 559 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 560 ret <8 x i16> %res 561} 562declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 563 564 565define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 566 ; CHECK: psubsb 567 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 568 ret <16 x i8> %res 569} 570declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 571 572 573define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 574 ; CHECK: psubsw 575 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 576 ret <8 x i16> %res 577} 578declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 579 580 581define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 582 ; CHECK: psubusb 583 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 584 ret <16 x i8> %res 585} 586declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 587 588 589define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 590 ; CHECK: psubusw 591 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 592 ret <8 x i16> %res 593} 594declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 595 596 597define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 598 ; CHECK: sqrtpd 599 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 600 ret <2 x double> %res 601} 602declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 603 604 605define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 606 ; CHECK: sqrtsd 607 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 608 ret <2 x double> %res 609} 610declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 611 612 613define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 614 ; CHECK: test_x86_sse2_storel_dq 615 ; CHECK: movl 616 ; CHECK: movq 617 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 618 ret void 619} 620declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 621 622 623define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 624 ; CHECK: test_x86_sse2_storeu_dq 625 ; CHECK: movl 626 ; CHECK: movdqu 627 ; add operation forces the execution domain. 628 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 629 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 630 ret void 631} 632declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 633 634 635define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 636 ; CHECK: test_x86_sse2_storeu_pd 637 ; CHECK: movl 638 ; CHECK: movupd 639 ; fadd operation forces the execution domain. 640 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 641 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 642 ret void 643} 644declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 645 646 647define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 648 ; CHECK: test_x86_sse2_sub_sd 649 ; CHECK: subsd 650 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 651 ret <2 x double> %res 652} 653declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 654 655 656define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 657 ; CHECK: ucomisd 658 ; CHECK: sete 659 ; CHECK: movzbl 660 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 661 ret i32 %res 662} 663declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 664 665 666define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 667 ; CHECK: ucomisd 668 ; CHECK: setae 669 ; CHECK: movzbl 670 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 671 ret i32 %res 672} 673declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 674 675 676define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 677 ; CHECK: ucomisd 678 ; CHECK: seta 679 ; CHECK: movzbl 680 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 681 ret i32 %res 682} 683declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 684 685 686define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 687 ; CHECK: ucomisd 688 ; CHECK: setbe 689 ; CHECK: movzbl 690 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 691 ret i32 %res 692} 693declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 694 695 696define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 697 ; CHECK: ucomisd 698 ; CHECK: sbbl 699 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 700 ret i32 %res 701} 702declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 703 704 705define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 706 ; CHECK: ucomisd 707 ; CHECK: setne 708 ; CHECK: movzbl 709 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 710 ret i32 %res 711} 712declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 713 714define void @test_x86_sse2_pause() { 715 ; CHECK: pause 716 tail call void @llvm.x86.sse2.pause() 717 ret void 718} 719declare void @llvm.x86.sse2.pause() nounwind 720 721define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) { 722; CHECK-LABEL: test_x86_sse2_pshuf_d: 723; CHECK: pshufd $27 724entry: 725 %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone 726 ret <4 x i32> %res 727} 728declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone 729 730define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) { 731; CHECK-LABEL: test_x86_sse2_pshufl_w: 732; CHECK: pshuflw $27 733entry: 734 %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone 735 ret <8 x i16> %res 736} 737declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone 738 739define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) { 740; CHECK-LABEL: test_x86_sse2_pshufh_w: 741; CHECK: pshufhw $27 742entry: 743 %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone 744 ret <8 x i16> %res 745} 746declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone 747