1; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s 2 3define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 4 ; CHECK: vaesdec 5 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 6 ret <2 x i64> %res 7} 8declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 9 10 11define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 12 ; CHECK: vaesdeclast 13 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 14 ret <2 x i64> %res 15} 16declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 17 18 19define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 20 ; CHECK: vaesenc 21 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 22 ret <2 x i64> %res 23} 24declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 25 26 27define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 28 ; CHECK: vaesenclast 29 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 30 ret <2 x i64> %res 31} 32declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 33 34 35define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { 36 ; CHECK: vaesimc 37 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 38 ret <2 x i64> %res 39} 40declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 41 42 43define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { 44 ; CHECK: vaeskeygenassist 45 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 46 ret <2 x i64> %res 47} 48declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 49 50 51define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 52 ; CHECK: vaddsd 53 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 54 ret <2 x double> %res 55} 56declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 57 58 59define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 60 ; CHECK: vcmpordpd 61 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 62 ret <2 x double> %res 63} 64declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 65 66 67define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 68 ; CHECK: vcmpordsd 69 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 70 ret <2 x double> %res 71} 72declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 73 74 75define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 76 ; CHECK: vcomisd 77 ; CHECK: sete 78 ; CHECK: movzbl 79 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 80 ret i32 %res 81} 82declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 83 84 85define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 86 ; CHECK: vcomisd 87 ; CHECK: setae 88 ; CHECK: movzbl 89 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 90 ret i32 %res 91} 92declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 93 94 95define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 96 ; CHECK: vcomisd 97 ; CHECK: seta 98 ; CHECK: movzbl 99 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 100 ret i32 %res 101} 102declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 103 104 105define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 106 ; CHECK: vcomisd 107 ; CHECK: setbe 108 ; CHECK: movzbl 109 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 110 ret i32 %res 111} 112declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 113 114 115define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 116 ; CHECK: vcomisd 117 ; CHECK: sbbl %eax, %eax 118 ; CHECK: andl $1, %eax 119 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 120 ret i32 %res 121} 122declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 123 124 125define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 126 ; CHECK: vcomisd 127 ; CHECK: setne 128 ; CHECK: movzbl 129 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 130 ret i32 %res 131} 132declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 133 134 135define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 136 ; CHECK: vcvtdq2pd 137 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 138 ret <2 x double> %res 139} 140declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 141 142 143define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 144 ; CHECK: vcvtdq2ps 145 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 146 ret <4 x float> %res 147} 148declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 149 150 151define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 152 ; CHECK: vcvtpd2dq 153 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 154 ret <4 x i32> %res 155} 156declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 157 158 159define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 160 ; CHECK: vcvtpd2ps 161 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 162 ret <4 x float> %res 163} 164declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 165 166 167define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 168 ; CHECK: vcvtps2dq 169 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 170 ret <4 x i32> %res 171} 172declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 173 174 175define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 176 ; CHECK: vcvtps2pd 177 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 178 ret <2 x double> %res 179} 180declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 181 182 183define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 184 ; CHECK: vcvtsd2si 185 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 186 ret i32 %res 187} 188declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 189 190 191define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 192 ; CHECK: vcvtsd2ss 193 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 194 ret <4 x float> %res 195} 196declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 197 198 199define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 200 ; CHECK: movl 201 ; CHECK: vcvtsi2sd 202 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 203 ret <2 x double> %res 204} 205declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 206 207 208define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 209 ; CHECK: vcvtss2sd 210 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 211 ret <2 x double> %res 212} 213declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 214 215 216define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 217 ; CHECK: vcvttpd2dq 218 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 219 ret <4 x i32> %res 220} 221declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 222 223 224define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 225 ; CHECK: vcvttps2dq 226 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 227 ret <4 x i32> %res 228} 229declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 230 231 232define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 233 ; CHECK: vcvttsd2si 234 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 235 ret i32 %res 236} 237declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 238 239 240define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 241 ; CHECK: vdivsd 242 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 243 ret <2 x double> %res 244} 245declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 246 247 248define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) { 249 ; CHECK: movl 250 ; CHECK: vmovups 251 %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 252 ret <16 x i8> %res 253} 254declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly 255 256 257define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) { 258 ; CHECK: movl 259 ; CHECK: vmovups 260 %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1] 261 ret <2 x double> %res 262} 263declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly 264 265 266define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { 267 ; CHECK: pushl 268 ; CHECK: movl 269 ; CHECK: vmaskmovdqu 270 ; CHECK: popl 271 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) 272 ret void 273} 274declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind 275 276 277define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 278 ; CHECK: vmaxpd 279 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 280 ret <2 x double> %res 281} 282declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 283 284 285define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 286 ; CHECK: vmaxsd 287 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 288 ret <2 x double> %res 289} 290declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 291 292 293define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 294 ; CHECK: vminpd 295 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 296 ret <2 x double> %res 297} 298declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 299 300 301define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 302 ; CHECK: vminsd 303 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 304 ret <2 x double> %res 305} 306declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 307 308 309define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 310 ; CHECK: vmovmskpd 311 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 312 ret i32 %res 313} 314declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 315 316 317define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { 318 ; CHECK: movl 319 ; CHECK: vmovntdq 320 call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) 321 ret void 322} 323declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind 324 325 326define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { 327 ; CHECK: movl 328 ; CHECK: vmovntpd 329 call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) 330 ret void 331} 332declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind 333 334 335define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 336 ; CHECK: vmulsd 337 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 338 ret <2 x double> %res 339} 340declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 341 342 343define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 344 ; CHECK: vpackssdw 345 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 346 ret <8 x i16> %res 347} 348declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 349 350 351define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 352 ; CHECK: vpacksswb 353 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 354 ret <16 x i8> %res 355} 356declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 357 358 359define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 360 ; CHECK: vpackuswb 361 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 362 ret <16 x i8> %res 363} 364declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 365 366 367define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 368 ; CHECK: vpaddsb 369 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 370 ret <16 x i8> %res 371} 372declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 373 374 375define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 376 ; CHECK: vpaddsw 377 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 378 ret <8 x i16> %res 379} 380declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 381 382 383define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 384 ; CHECK: vpaddusb 385 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 386 ret <16 x i8> %res 387} 388declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 389 390 391define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 392 ; CHECK: vpaddusw 393 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 394 ret <8 x i16> %res 395} 396declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 397 398 399define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 400 ; CHECK: vpavgb 401 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 402 ret <16 x i8> %res 403} 404declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 405 406 407define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 408 ; CHECK: vpavgw 409 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 410 ret <8 x i16> %res 411} 412declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 413 414 415define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) { 416 ; CHECK: vpcmpeqb 417 %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 418 ret <16 x i8> %res 419} 420declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone 421 422 423define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) { 424 ; CHECK: vpcmpeqd 425 %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 426 ret <4 x i32> %res 427} 428declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone 429 430 431define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) { 432 ; CHECK: vpcmpeqw 433 %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 434 ret <8 x i16> %res 435} 436declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone 437 438 439define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) { 440 ; CHECK: vpcmpgtb 441 %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 442 ret <16 x i8> %res 443} 444declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone 445 446 447define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) { 448 ; CHECK: vpcmpgtd 449 %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 450 ret <4 x i32> %res 451} 452declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone 453 454 455define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) { 456 ; CHECK: vpcmpgtw 457 %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 458 ret <8 x i16> %res 459} 460declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone 461 462 463define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 464 ; CHECK: vpmaddwd 465 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 466 ret <4 x i32> %res 467} 468declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 469 470 471define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 472 ; CHECK: vpmaxsw 473 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 474 ret <8 x i16> %res 475} 476declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 477 478 479define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 480 ; CHECK: vpmaxub 481 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 482 ret <16 x i8> %res 483} 484declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 485 486 487define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 488 ; CHECK: vpminsw 489 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 490 ret <8 x i16> %res 491} 492declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 493 494 495define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 496 ; CHECK: vpminub 497 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 498 ret <16 x i8> %res 499} 500declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 501 502 503define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 504 ; CHECK: vpmovmskb 505 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 506 ret i32 %res 507} 508declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 509 510 511define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 512 ; CHECK: vpmulhw 513 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 514 ret <8 x i16> %res 515} 516declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 517 518 519define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 520 ; CHECK: vpmulhuw 521 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 522 ret <8 x i16> %res 523} 524declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 525 526 527define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 528 ; CHECK: vpmuludq 529 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 530 ret <2 x i64> %res 531} 532declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 533 534 535define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 536 ; CHECK: vpsadbw 537 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 538 ret <2 x i64> %res 539} 540declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 541 542 543define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 544 ; CHECK: vpslld 545 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 546 ret <4 x i32> %res 547} 548declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 549 550 551define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) { 552 ; CHECK: vpslldq 553 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 554 ret <2 x i64> %res 555} 556declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 557 558 559define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) { 560 ; CHECK: vpslldq 561 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 562 ret <2 x i64> %res 563} 564declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 565 566 567define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 568 ; CHECK: vpsllq 569 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 570 ret <2 x i64> %res 571} 572declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 573 574 575define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 576 ; CHECK: vpsllw 577 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 578 ret <8 x i16> %res 579} 580declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 581 582 583define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 584 ; CHECK: vpslld 585 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 586 ret <4 x i32> %res 587} 588declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 589 590 591define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 592 ; CHECK: vpsllq 593 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 594 ret <2 x i64> %res 595} 596declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 597 598 599define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 600 ; CHECK: vpsllw 601 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 602 ret <8 x i16> %res 603} 604declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 605 606 607define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 608 ; CHECK: vpsrad 609 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 610 ret <4 x i32> %res 611} 612declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 613 614 615define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 616 ; CHECK: vpsraw 617 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 618 ret <8 x i16> %res 619} 620declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 621 622 623define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 624 ; CHECK: vpsrad 625 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 626 ret <4 x i32> %res 627} 628declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 629 630 631define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 632 ; CHECK: vpsraw 633 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 634 ret <8 x i16> %res 635} 636declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 637 638 639define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 640 ; CHECK: vpsrld 641 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 642 ret <4 x i32> %res 643} 644declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 645 646 647define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) { 648 ; CHECK: vpsrldq 649 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 650 ret <2 x i64> %res 651} 652declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 653 654 655define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) { 656 ; CHECK: vpsrldq 657 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 658 ret <2 x i64> %res 659} 660declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 661 662 663define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 664 ; CHECK: vpsrlq 665 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 666 ret <2 x i64> %res 667} 668declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 669 670 671define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 672 ; CHECK: vpsrlw 673 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 674 ret <8 x i16> %res 675} 676declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 677 678 679define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 680 ; CHECK: vpsrld 681 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 682 ret <4 x i32> %res 683} 684declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 685 686 687define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 688 ; CHECK: vpsrlq 689 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 690 ret <2 x i64> %res 691} 692declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 693 694 695define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 696 ; CHECK: vpsrlw 697 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 698 ret <8 x i16> %res 699} 700declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 701 702 703define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 704 ; CHECK: vpsubsb 705 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 706 ret <16 x i8> %res 707} 708declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 709 710 711define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 712 ; CHECK: vpsubsw 713 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 714 ret <8 x i16> %res 715} 716declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 717 718 719define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 720 ; CHECK: vpsubusb 721 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 722 ret <16 x i8> %res 723} 724declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 725 726 727define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 728 ; CHECK: vpsubusw 729 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 730 ret <8 x i16> %res 731} 732declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 733 734 735define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 736 ; CHECK: vsqrtpd 737 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 738 ret <2 x double> %res 739} 740declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 741 742 743define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 744 ; CHECK: vsqrtsd 745 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 746 ret <2 x double> %res 747} 748declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 749 750 751define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 752 ; CHECK: movl 753 ; CHECK: vmovq 754 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 755 ret void 756} 757declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 758 759 760define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 761 ; CHECK: movl 762 ; CHECK: vmovdqu 763 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) 764 ret void 765} 766declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 767 768 769define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 770 ; CHECK: movl 771 ; CHECK: vmovupd 772 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) 773 ret void 774} 775declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 776 777 778define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 779 ; CHECK: vsubsd 780 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 781 ret <2 x double> %res 782} 783declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 784 785 786define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 787 ; CHECK: vucomisd 788 ; CHECK: sete 789 ; CHECK: movzbl 790 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 791 ret i32 %res 792} 793declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 794 795 796define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 797 ; CHECK: vucomisd 798 ; CHECK: setae 799 ; CHECK: movzbl 800 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 801 ret i32 %res 802} 803declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 804 805 806define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 807 ; CHECK: vucomisd 808 ; CHECK: seta 809 ; CHECK: movzbl 810 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 811 ret i32 %res 812} 813declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 814 815 816define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 817 ; CHECK: vucomisd 818 ; CHECK: setbe 819 ; CHECK: movzbl 820 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 821 ret i32 %res 822} 823declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 824 825 826define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 827 ; CHECK: vucomisd 828 ; CHECK: sbbl 829 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 830 ret i32 %res 831} 832declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 833 834 835define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 836 ; CHECK: vucomisd 837 ; CHECK: setne 838 ; CHECK: movzbl 839 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 840 ret i32 %res 841} 842declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 843 844 845define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 846 ; CHECK: vaddsubpd 847 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 848 ret <2 x double> %res 849} 850declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 851 852 853define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 854 ; CHECK: vaddsubps 855 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 856 ret <4 x float> %res 857} 858declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 859 860 861define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 862 ; CHECK: vhaddpd 863 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 864 ret <2 x double> %res 865} 866declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 867 868 869define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 870 ; CHECK: vhaddps 871 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 872 ret <4 x float> %res 873} 874declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 875 876 877define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 878 ; CHECK: vhsubpd 879 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 880 ret <2 x double> %res 881} 882declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 883 884 885define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 886 ; CHECK: vhsubps 887 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 888 ret <4 x float> %res 889} 890declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 891 892 893define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 894 ; CHECK: movl 895 ; CHECK: vlddqu 896 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 897 ret <16 x i8> %res 898} 899declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 900 901 902define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 903 ; CHECK: vblendpd 904 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 905 ret <2 x double> %res 906} 907declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone 908 909 910define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 911 ; CHECK: vblendps 912 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 913 ret <4 x float> %res 914} 915declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone 916 917 918define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 919 ; CHECK: vblendvpd 920 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 921 ret <2 x double> %res 922} 923declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 924 925 926define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 927 ; CHECK: vblendvps 928 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 929 ret <4 x float> %res 930} 931declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 932 933 934define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 935 ; CHECK: vdppd 936 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 937 ret <2 x double> %res 938} 939declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone 940 941 942define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 943 ; CHECK: vdpps 944 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 945 ret <4 x float> %res 946} 947declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone 948 949 950define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 951 ; CHECK: vinsertps 952 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 953 ret <4 x float> %res 954} 955declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 956 957 958define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) { 959 ; CHECK: movl 960 ; CHECK: vmovntdqa 961 %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1] 962 ret <2 x i64> %res 963} 964declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly 965 966 967define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 968 ; CHECK: vmpsadbw 969 %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1] 970 ret <16 x i8> %res 971} 972declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone 973 974 975define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 976 ; CHECK: vpackusdw 977 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 978 ret <8 x i16> %res 979} 980declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 981 982 983define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 984 ; CHECK: vpblendvb 985 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 986 ret <16 x i8> %res 987} 988declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 989 990 991define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 992 ; CHECK: vpblendw 993 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1] 994 ret <8 x i16> %res 995} 996declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone 997 998 999define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) { 1000 ; CHECK: vpcmpeqq 1001 %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1002 ret <2 x i64> %res 1003} 1004declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone 1005 1006 1007define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 1008 ; CHECK: vphminposuw 1009 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1010 ret <8 x i16> %res 1011} 1012declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 1013 1014 1015define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 1016 ; CHECK: vpmaxsb 1017 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1018 ret <16 x i8> %res 1019} 1020declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 1021 1022 1023define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 1024 ; CHECK: vpmaxsd 1025 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1026 ret <4 x i32> %res 1027} 1028declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 1029 1030 1031define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 1032 ; CHECK: vpmaxud 1033 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1034 ret <4 x i32> %res 1035} 1036declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 1037 1038 1039define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 1040 ; CHECK: vpmaxuw 1041 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1042 ret <8 x i16> %res 1043} 1044declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 1045 1046 1047define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 1048 ; CHECK: vpminsb 1049 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1050 ret <16 x i8> %res 1051} 1052declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 1053 1054 1055define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 1056 ; CHECK: vpminsd 1057 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1058 ret <4 x i32> %res 1059} 1060declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 1061 1062 1063define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 1064 ; CHECK: vpminud 1065 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1066 ret <4 x i32> %res 1067} 1068declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 1069 1070 1071define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 1072 ; CHECK: vpminuw 1073 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1074 ret <8 x i16> %res 1075} 1076declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 1077 1078 1079define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 1080 ; CHECK: vpmovsxbd 1081 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1082 ret <4 x i32> %res 1083} 1084declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 1085 1086 1087define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 1088 ; CHECK: vpmovsxbq 1089 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1090 ret <2 x i64> %res 1091} 1092declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 1093 1094 1095define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 1096 ; CHECK: vpmovsxbw 1097 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1098 ret <8 x i16> %res 1099} 1100declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 1101 1102 1103define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 1104 ; CHECK: vpmovsxdq 1105 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1106 ret <2 x i64> %res 1107} 1108declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 1109 1110 1111define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 1112 ; CHECK: vpmovsxwd 1113 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1114 ret <4 x i32> %res 1115} 1116declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 1117 1118 1119define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 1120 ; CHECK: vpmovsxwq 1121 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1122 ret <2 x i64> %res 1123} 1124declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 1125 1126 1127define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 1128 ; CHECK: vpmovzxbd 1129 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1130 ret <4 x i32> %res 1131} 1132declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 1133 1134 1135define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 1136 ; CHECK: vpmovzxbq 1137 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1138 ret <2 x i64> %res 1139} 1140declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 1141 1142 1143define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 1144 ; CHECK: vpmovzxbw 1145 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1146 ret <8 x i16> %res 1147} 1148declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 1149 1150 1151define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 1152 ; CHECK: vpmovzxdq 1153 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1154 ret <2 x i64> %res 1155} 1156declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 1157 1158 1159define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 1160 ; CHECK: vpmovzxwd 1161 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1162 ret <4 x i32> %res 1163} 1164declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 1165 1166 1167define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 1168 ; CHECK: vpmovzxwq 1169 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1170 ret <2 x i64> %res 1171} 1172declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 1173 1174 1175define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 1176 ; CHECK: vpmuldq 1177 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 1178 ret <2 x i64> %res 1179} 1180declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 1181 1182 1183define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) { 1184 ; CHECK: vptest 1185 ; CHECK: sbbl 1186 %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1187 ret i32 %res 1188} 1189declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone 1190 1191 1192define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) { 1193 ; CHECK: vptest 1194 ; CHECK: seta 1195 ; CHECK: movzbl 1196 %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1197 ret i32 %res 1198} 1199declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone 1200 1201 1202define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) { 1203 ; CHECK: vptest 1204 ; CHECK: sete 1205 ; CHECK: movzbl 1206 %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1207 ret i32 %res 1208} 1209declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone 1210 1211 1212define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 1213 ; CHECK: vroundpd 1214 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 1215 ret <2 x double> %res 1216} 1217declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1218 1219 1220define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 1221 ; CHECK: vroundps 1222 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1223 ret <4 x float> %res 1224} 1225declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1226 1227 1228define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 1229 ; CHECK: vroundsd 1230 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 1231 ret <2 x double> %res 1232} 1233declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1234 1235 1236define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 1237 ; CHECK: vroundss 1238 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 1239 ret <4 x float> %res 1240} 1241declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1242 1243 1244define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { 1245 ; CHECK: movl 1246 ; CHECK: movl 1247 ; CHECK: vpcmpestri 1248 ; CHECK: movl 1249 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1250 ret i32 %res 1251} 1252declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1253 1254 1255define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { 1256 ; CHECK: movl 1257 ; CHECK: movl 1258 ; CHECK: vpcmpestri 1259 ; CHECK: movl 1260 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1261 ret i32 %res 1262} 1263declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1264 1265 1266define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { 1267 ; CHECK: movl 1268 ; CHECK: movl 1269 ; CHECK: vpcmpestri 1270 ; CHECK: movl 1271 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1272 ret i32 %res 1273} 1274declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1275 1276 1277define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { 1278 ; CHECK: movl 1279 ; CHECK: movl 1280 ; CHECK: vpcmpestri 1281 ; CHECK: movl 1282 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1283 ret i32 %res 1284} 1285declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1286 1287 1288define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { 1289 ; CHECK: movl 1290 ; CHECK: movl 1291 ; CHECK: vpcmpestri 1292 ; CHECK: movl 1293 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1294 ret i32 %res 1295} 1296declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1297 1298 1299define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { 1300 ; CHECK: movl 1301 ; CHECK: movl 1302 ; CHECK: vpcmpestri 1303 ; CHECK: movl 1304 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1305 ret i32 %res 1306} 1307declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1308 1309 1310define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { 1311 ; CHECK: movl 1312 ; CHECK: movl 1313 ; CHECK: vpcmpestrm 1314 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1315 ret <16 x i8> %res 1316} 1317declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1318 1319 1320define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) { 1321 ; CHECK: vpcmpgtq 1322 %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1323 ret <2 x i64> %res 1324} 1325declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone 1326 1327 1328define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { 1329 ; CHECK: vpcmpistri 1330 ; CHECK: movl 1331 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1332 ret i32 %res 1333} 1334declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1335 1336 1337define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { 1338 ; CHECK: vpcmpistri 1339 ; CHECK: movl 1340 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1341 ret i32 %res 1342} 1343declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1344 1345 1346define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { 1347 ; CHECK: vpcmpistri 1348 ; CHECK: movl 1349 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1350 ret i32 %res 1351} 1352declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1353 1354 1355define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { 1356 ; CHECK: vpcmpistri 1357 ; CHECK: movl 1358 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1359 ret i32 %res 1360} 1361declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1362 1363 1364define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { 1365 ; CHECK: vpcmpistri 1366 ; CHECK: movl 1367 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1368 ret i32 %res 1369} 1370declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1371 1372 1373define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { 1374 ; CHECK: vpcmpistri 1375 ; CHECK: movl 1376 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1377 ret i32 %res 1378} 1379declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1380 1381 1382define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { 1383 ; CHECK: vpcmpistrm 1384 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] 1385 ret <16 x i8> %res 1386} 1387declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1388 1389 1390define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 1391 ; CHECK: vaddss 1392 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1393 ret <4 x float> %res 1394} 1395declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 1396 1397 1398define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 1399 ; CHECK: vcmpordps 1400 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1401 ret <4 x float> %res 1402} 1403declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 1404 1405 1406define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 1407 ; CHECK: vcmpordss 1408 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1409 ret <4 x float> %res 1410} 1411declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 1412 1413 1414define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 1415 ; CHECK: vcomiss 1416 ; CHECK: sete 1417 ; CHECK: movzbl 1418 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1419 ret i32 %res 1420} 1421declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 1422 1423 1424define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 1425 ; CHECK: vcomiss 1426 ; CHECK: setae 1427 ; CHECK: movzbl 1428 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1429 ret i32 %res 1430} 1431declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 1432 1433 1434define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 1435 ; CHECK: vcomiss 1436 ; CHECK: seta 1437 ; CHECK: movzbl 1438 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1439 ret i32 %res 1440} 1441declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 1442 1443 1444define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 1445 ; CHECK: vcomiss 1446 ; CHECK: setbe 1447 ; CHECK: movzbl 1448 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1449 ret i32 %res 1450} 1451declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 1452 1453 1454define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 1455 ; CHECK: vcomiss 1456 ; CHECK: sbb 1457 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1458 ret i32 %res 1459} 1460declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 1461 1462 1463define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 1464 ; CHECK: vcomiss 1465 ; CHECK: setne 1466 ; CHECK: movzbl 1467 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1468 ret i32 %res 1469} 1470declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 1471 1472 1473define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 1474 ; CHECK: movl 1475 ; CHECK: vcvtsi2ss 1476 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1477 ret <4 x float> %res 1478} 1479declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 1480 1481 1482define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 1483 ; CHECK: vcvtss2si 1484 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 1485 ret i32 %res 1486} 1487declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1488 1489 1490define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 1491 ; CHECK: vcvttss2si 1492 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 1493 ret i32 %res 1494} 1495declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1496 1497 1498define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 1499 ; CHECK: vdivss 1500 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1501 ret <4 x float> %res 1502} 1503declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1504 1505 1506define void @test_x86_sse_ldmxcsr(i8* %a0) { 1507 ; CHECK: movl 1508 ; CHECK: vldmxcsr 1509 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 1510 ret void 1511} 1512declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 1513 1514 1515define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) { 1516 ; CHECK: movl 1517 ; CHECK: vmovups 1518 %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1] 1519 ret <4 x float> %res 1520} 1521declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly 1522 1523 1524define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 1525 ; CHECK: vmaxps 1526 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1527 ret <4 x float> %res 1528} 1529declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1530 1531 1532define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 1533 ; CHECK: vmaxss 1534 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1535 ret <4 x float> %res 1536} 1537declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1538 1539 1540define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 1541 ; CHECK: vminps 1542 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1543 ret <4 x float> %res 1544} 1545declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1546 1547 1548define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 1549 ; CHECK: vminss 1550 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1551 ret <4 x float> %res 1552} 1553declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1554 1555 1556define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 1557 ; CHECK: vmovmskps 1558 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 1559 ret i32 %res 1560} 1561declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1562 1563 1564define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) { 1565 ; CHECK: movl 1566 ; CHECK: vmovntps 1567 call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1) 1568 ret void 1569} 1570declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind 1571 1572 1573define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 1574 ; CHECK: vmulss 1575 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1576 ret <4 x float> %res 1577} 1578declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1579 1580 1581define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 1582 ; CHECK: vrcpps 1583 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1584 ret <4 x float> %res 1585} 1586declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1587 1588 1589define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 1590 ; CHECK: vrcpss 1591 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1592 ret <4 x float> %res 1593} 1594declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1595 1596 1597define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 1598 ; CHECK: vrsqrtps 1599 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1600 ret <4 x float> %res 1601} 1602declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1603 1604 1605define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 1606 ; CHECK: vrsqrtss 1607 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1608 ret <4 x float> %res 1609} 1610declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1611 1612 1613define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 1614 ; CHECK: vsqrtps 1615 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1616 ret <4 x float> %res 1617} 1618declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1619 1620 1621define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 1622 ; CHECK: vsqrtss 1623 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1624 ret <4 x float> %res 1625} 1626declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1627 1628 1629define void @test_x86_sse_stmxcsr(i8* %a0) { 1630 ; CHECK: movl 1631 ; CHECK: vstmxcsr 1632 call void @llvm.x86.sse.stmxcsr(i8* %a0) 1633 ret void 1634} 1635declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 1636 1637 1638define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 1639 ; CHECK: movl 1640 ; CHECK: vmovups 1641 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 1642 ret void 1643} 1644declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 1645 1646 1647define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 1648 ; CHECK: vsubss 1649 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1650 ret <4 x float> %res 1651} 1652declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 1653 1654 1655define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 1656 ; CHECK: vucomiss 1657 ; CHECK: sete 1658 ; CHECK: movzbl 1659 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1660 ret i32 %res 1661} 1662declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 1663 1664 1665define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 1666 ; CHECK: vucomiss 1667 ; CHECK: setae 1668 ; CHECK: movzbl 1669 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1670 ret i32 %res 1671} 1672declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 1673 1674 1675define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 1676 ; CHECK: vucomiss 1677 ; CHECK: seta 1678 ; CHECK: movzbl 1679 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1680 ret i32 %res 1681} 1682declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 1683 1684 1685define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 1686 ; CHECK: vucomiss 1687 ; CHECK: setbe 1688 ; CHECK: movzbl 1689 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1690 ret i32 %res 1691} 1692declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 1693 1694 1695define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 1696 ; CHECK: vucomiss 1697 ; CHECK: sbbl 1698 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1699 ret i32 %res 1700} 1701declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 1702 1703 1704define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 1705 ; CHECK: vucomiss 1706 ; CHECK: setne 1707 ; CHECK: movzbl 1708 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1709 ret i32 %res 1710} 1711declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 1712 1713 1714define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { 1715 ; CHECK: vpabsb 1716 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 1717 ret <16 x i8> %res 1718} 1719declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 1720 1721 1722define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { 1723 ; CHECK: vpabsd 1724 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 1725 ret <4 x i32> %res 1726} 1727declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 1728 1729 1730define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { 1731 ; CHECK: vpabsw 1732 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1733 ret <8 x i16> %res 1734} 1735declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 1736 1737 1738define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1739 ; CHECK: vphaddd 1740 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1741 ret <4 x i32> %res 1742} 1743declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1744 1745 1746define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) { 1747 ; CHECK: vphaddsw 1748 %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1749 ret <4 x i32> %res 1750} 1751declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone 1752 1753 1754define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1755 ; CHECK: vphaddw 1756 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1757 ret <8 x i16> %res 1758} 1759declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1760 1761 1762define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1763 ; CHECK: vphsubd 1764 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1765 ret <4 x i32> %res 1766} 1767declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1768 1769 1770define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1771 ; CHECK: vphsubsw 1772 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1773 ret <8 x i16> %res 1774} 1775declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1776 1777 1778define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1779 ; CHECK: vphsubw 1780 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1781 ret <8 x i16> %res 1782} 1783declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1784 1785 1786define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1787 ; CHECK: vpmaddubsw 1788 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1789 ret <8 x i16> %res 1790} 1791declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1792 1793 1794define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 1795 ; CHECK: vpmulhrsw 1796 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1797 ret <8 x i16> %res 1798} 1799declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1800 1801 1802define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { 1803 ; CHECK: vpshufb 1804 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1805 ret <16 x i8> %res 1806} 1807declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1808 1809 1810define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { 1811 ; CHECK: vpsignb 1812 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1813 ret <16 x i8> %res 1814} 1815declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1816 1817 1818define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { 1819 ; CHECK: vpsignd 1820 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1821 ret <4 x i32> %res 1822} 1823declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1824 1825 1826define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { 1827 ; CHECK: vpsignw 1828 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1829 ret <8 x i16> %res 1830} 1831declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1832 1833 1834define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 1835 ; CHECK: vaddsubpd 1836 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1837 ret <4 x double> %res 1838} 1839declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1840 1841 1842define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 1843 ; CHECK: vaddsubps 1844 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1845 ret <8 x float> %res 1846} 1847declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 1848 1849 1850define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) { 1851 ; CHECK: vblendpd 1852 %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1] 1853 ret <4 x double> %res 1854} 1855declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone 1856 1857 1858define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) { 1859 ; CHECK: vblendps 1860 %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 1861 ret <8 x float> %res 1862} 1863declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 1864 1865 1866define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 1867 ; CHECK: vblendvpd 1868 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 1869 ret <4 x double> %res 1870} 1871declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 1872 1873 1874define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 1875 ; CHECK: vblendvps 1876 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 1877 ret <8 x float> %res 1878} 1879declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 1880 1881 1882define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 1883 ; CHECK: vcmpordpd 1884 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 1885 ret <4 x double> %res 1886} 1887declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 1888 1889 1890define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 1891 ; CHECK: vcmpordps 1892 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 1893 ret <8 x float> %res 1894} 1895declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 1896 1897 1898define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 1899 ; CHECK: vcvtpd2psy 1900 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 1901 ret <4 x float> %res 1902} 1903declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 1904 1905 1906define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 1907 ; CHECK: vcvtpd2dqy 1908 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 1909 ret <4 x i32> %res 1910} 1911declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 1912 1913 1914define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 1915 ; CHECK: vcvtps2pd 1916 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 1917 ret <4 x double> %res 1918} 1919declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 1920 1921 1922define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 1923 ; CHECK: vcvtps2dq 1924 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 1925 ret <8 x i32> %res 1926} 1927declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 1928 1929 1930define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 1931 ; CHECK: vcvtdq2pd 1932 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 1933 ret <4 x double> %res 1934} 1935declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 1936 1937 1938define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 1939 ; CHECK: vcvtdq2ps 1940 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 1941 ret <8 x float> %res 1942} 1943declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 1944 1945 1946define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 1947 ; CHECK: vcvttpd2dqy 1948 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 1949 ret <4 x i32> %res 1950} 1951declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 1952 1953 1954define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 1955 ; CHECK: vcvttps2dq 1956 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 1957 ret <8 x i32> %res 1958} 1959declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 1960 1961 1962define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 1963 ; CHECK: vdpps 1964 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1] 1965 ret <8 x float> %res 1966} 1967declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone 1968 1969 1970define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 1971 ; CHECK: vhaddpd 1972 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1973 ret <4 x double> %res 1974} 1975declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 1976 1977 1978define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 1979 ; CHECK: vhaddps 1980 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1981 ret <8 x float> %res 1982} 1983declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 1984 1985 1986define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 1987 ; CHECK: vhsubpd 1988 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 1989 ret <4 x double> %res 1990} 1991declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1992 1993 1994define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 1995 ; CHECK: vhsubps 1996 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 1997 ret <8 x float> %res 1998} 1999declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2000 2001 2002define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 2003 ; CHECK: vlddqu 2004 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 2005 ret <32 x i8> %res 2006} 2007declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 2008 2009 2010define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) { 2011 ; CHECK: vmovdqu 2012 %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 2013 ret <32 x i8> %res 2014} 2015declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly 2016 2017 2018define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) { 2019 ; CHECK: vmovupd 2020 %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2021 ret <4 x double> %res 2022} 2023declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly 2024 2025 2026define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) { 2027 ; CHECK: vmovups 2028 %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2029 ret <8 x float> %res 2030} 2031declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly 2032 2033 2034define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) { 2035 ; CHECK: vmaskmovpd 2036 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 2037 ret <2 x double> %res 2038} 2039declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly 2040 2041 2042define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) { 2043 ; CHECK: vmaskmovpd 2044 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2045 ret <4 x double> %res 2046} 2047declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly 2048 2049 2050define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) { 2051 ; CHECK: vmaskmovps 2052 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 2053 ret <4 x float> %res 2054} 2055declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly 2056 2057 2058define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) { 2059 ; CHECK: vmaskmovps 2060 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2061 ret <8 x float> %res 2062} 2063declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly 2064 2065 2066define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) { 2067 ; CHECK: vmaskmovpd 2068 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) 2069 ret void 2070} 2071declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind 2072 2073 2074define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) { 2075 ; CHECK: vmaskmovpd 2076 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2) 2077 ret void 2078} 2079declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind 2080 2081 2082define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) { 2083 ; CHECK: vmaskmovps 2084 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) 2085 ret void 2086} 2087declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind 2088 2089 2090define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) { 2091 ; CHECK: vmaskmovps 2092 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2) 2093 ret void 2094} 2095declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind 2096 2097 2098define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 2099 ; CHECK: vmaxpd 2100 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2101 ret <4 x double> %res 2102} 2103declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2104 2105 2106define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 2107 ; CHECK: vmaxps 2108 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2109 ret <8 x float> %res 2110} 2111declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2112 2113 2114define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 2115 ; CHECK: vminpd 2116 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2117 ret <4 x double> %res 2118} 2119declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2120 2121 2122define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 2123 ; CHECK: vminps 2124 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2125 ret <8 x float> %res 2126} 2127declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2128 2129 2130define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 2131 ; CHECK: vmovmskpd 2132 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 2133 ret i32 %res 2134} 2135declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2136 2137 2138define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 2139 ; CHECK: vmovmskps 2140 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 2141 ret i32 %res 2142} 2143declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2144 2145 2146define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) { 2147 ; CHECK: vmovntdq 2148 call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1) 2149 ret void 2150} 2151declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 2152 2153 2154define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) { 2155 ; CHECK: vmovntpd 2156 call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1) 2157 ret void 2158} 2159declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 2160 2161 2162define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) { 2163 ; CHECK: vmovntps 2164 call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1) 2165 ret void 2166} 2167declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 2168 2169 2170define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 2171 ; CHECK: vptest 2172 ; CHECK: sbbl 2173 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2174 ret i32 %res 2175} 2176declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 2177 2178 2179define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 2180 ; CHECK: vptest 2181 ; CHECK: seta 2182 ; CHECK: movzbl 2183 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2184 ret i32 %res 2185} 2186declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 2187 2188 2189define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 2190 ; CHECK: vptest 2191 ; CHECK: sete 2192 ; CHECK: movzbl 2193 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2194 ret i32 %res 2195} 2196declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 2197 2198 2199define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 2200 ; CHECK: vrcpps 2201 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2202 ret <8 x float> %res 2203} 2204declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 2205 2206 2207define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 2208 ; CHECK: vroundpd 2209 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 2210 ret <4 x double> %res 2211} 2212declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 2213 2214 2215define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 2216 ; CHECK: vroundps 2217 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 2218 ret <8 x float> %res 2219} 2220declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 2221 2222 2223define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 2224 ; CHECK: vrsqrtps 2225 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2226 ret <8 x float> %res 2227} 2228declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 2229 2230 2231define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 2232 ; CHECK: vsqrtpd 2233 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 2234 ret <4 x double> %res 2235} 2236declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 2237 2238 2239define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 2240 ; CHECK: vsqrtps 2241 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2242 ret <8 x float> %res 2243} 2244declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 2245 2246 2247define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 2248 ; CHECK: vmovdqu 2249 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1) 2250 ret void 2251} 2252declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 2253 2254 2255define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 2256 ; CHECK: vmovupd 2257 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1) 2258 ret void 2259} 2260declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 2261 2262 2263define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 2264 ; CHECK: vmovups 2265 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 2266 ret void 2267} 2268declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 2269 2270 2271define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) { 2272 ; CHECK: vbroadcastsd 2273 %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2274 ret <4 x double> %res 2275} 2276declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly 2277 2278 2279define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 2280 ; CHECK: vbroadcastf128 2281 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2282 ret <4 x double> %res 2283} 2284declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 2285 2286 2287define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 2288 ; CHECK: vbroadcastf128 2289 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2290 ret <8 x float> %res 2291} 2292declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 2293 2294 2295define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) { 2296 ; CHECK: vbroadcastss 2297 %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1] 2298 ret <4 x float> %res 2299} 2300declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly 2301 2302 2303define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) { 2304 ; CHECK: vbroadcastss 2305 %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1] 2306 ret <8 x float> %res 2307} 2308declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly 2309 2310 2311define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) { 2312 ; CHECK: vextractf128 2313 %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] 2314 ret <2 x double> %res 2315} 2316declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone 2317 2318 2319define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) { 2320 ; CHECK: vextractf128 2321 %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2322 ret <4 x float> %res 2323} 2324declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone 2325 2326 2327define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) { 2328 ; CHECK: vextractf128 2329 %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1] 2330 ret <4 x i32> %res 2331} 2332declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone 2333 2334 2335define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) { 2336 ; CHECK: vinsertf128 2337 %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2338 ret <4 x double> %res 2339} 2340declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone 2341 2342 2343define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) { 2344 ; CHECK: vinsertf128 2345 %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2346 ret <8 x float> %res 2347} 2348declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone 2349 2350 2351define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) { 2352 ; CHECK: vinsertf128 2353 %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2354 ret <8 x i32> %res 2355} 2356declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone 2357 2358 2359define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 2360 ; CHECK: vperm2f128 2361 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2362 ret <4 x double> %res 2363} 2364declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2365 2366 2367define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 2368 ; CHECK: vperm2f128 2369 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2370 ret <8 x float> %res 2371} 2372declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2373 2374 2375define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 2376 ; CHECK: vperm2f128 2377 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2378 ret <8 x i32> %res 2379} 2380declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 2381 2382 2383define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 2384 ; CHECK: vpermilpd 2385 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1] 2386 ret <2 x double> %res 2387} 2388declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 2389 2390 2391define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 2392 ; CHECK: vpermilpd 2393 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 2394 ret <4 x double> %res 2395} 2396declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 2397 2398 2399define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 2400 ; CHECK: vpermilps 2401 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2402 ret <4 x float> %res 2403} 2404declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 2405 2406 2407define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 2408 ; CHECK: vpermilps 2409 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 2410 ret <8 x float> %res 2411} 2412declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 2413 2414 2415define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 2416 ; CHECK: vpermilpd 2417 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 2418 ret <2 x double> %res 2419} 2420declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 2421 2422 2423define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 2424 ; CHECK: vpermilpd 2425 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 2426 ret <4 x double> %res 2427} 2428declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 2429 2430 2431define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 2432 ; CHECK: vpermilps 2433 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 2434 ret <4 x float> %res 2435} 2436declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 2437 2438 2439define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 2440 ; CHECK: vpermilps 2441 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 2442 ret <8 x float> %res 2443} 2444declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 2445 2446 2447define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 2448 ; CHECK: vtestpd 2449 ; CHECK: sbbl 2450 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2451 ret i32 %res 2452} 2453declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 2454 2455 2456define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 2457 ; CHECK: vtestpd 2458 ; CHECK: sbbl 2459 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2460 ret i32 %res 2461} 2462declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 2463 2464 2465define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 2466 ; CHECK: vtestps 2467 ; CHECK: sbbl 2468 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2469 ret i32 %res 2470} 2471declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 2472 2473 2474define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 2475 ; CHECK: vtestps 2476 ; CHECK: sbbl 2477 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2478 ret i32 %res 2479} 2480declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 2481 2482 2483define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 2484 ; CHECK: vtestpd 2485 ; CHECK: seta 2486 ; CHECK: movzbl 2487 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2488 ret i32 %res 2489} 2490declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 2491 2492 2493define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 2494 ; CHECK: vtestpd 2495 ; CHECK: seta 2496 ; CHECK: movzbl 2497 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2498 ret i32 %res 2499} 2500declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 2501 2502 2503define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 2504 ; CHECK: vtestps 2505 ; CHECK: seta 2506 ; CHECK: movzbl 2507 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2508 ret i32 %res 2509} 2510declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 2511 2512 2513define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 2514 ; CHECK: vtestps 2515 ; CHECK: seta 2516 ; CHECK: movzbl 2517 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2518 ret i32 %res 2519} 2520declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 2521 2522 2523define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 2524 ; CHECK: vtestpd 2525 ; CHECK: sete 2526 ; CHECK: movzbl 2527 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 2528 ret i32 %res 2529} 2530declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 2531 2532 2533define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 2534 ; CHECK: vtestpd 2535 ; CHECK: sete 2536 ; CHECK: movzbl 2537 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 2538 ret i32 %res 2539} 2540declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 2541 2542 2543define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 2544 ; CHECK: vtestps 2545 ; CHECK: sete 2546 ; CHECK: movzbl 2547 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2548 ret i32 %res 2549} 2550declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 2551 2552 2553define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 2554 ; CHECK: vtestps 2555 ; CHECK: sete 2556 ; CHECK: movzbl 2557 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 2558 ret i32 %res 2559} 2560declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 2561 2562 2563define void @test_x86_avx_vzeroall() { 2564 ; CHECK: vzeroall 2565 call void @llvm.x86.avx.vzeroall() 2566 ret void 2567} 2568declare void @llvm.x86.avx.vzeroall() nounwind 2569 2570 2571define void @test_x86_avx_vzeroupper() { 2572 ; CHECK: vzeroupper 2573 call void @llvm.x86.avx.vzeroupper() 2574 ret void 2575} 2576declare void @llvm.x86.avx.vzeroupper() nounwind 2577 2578 2579