1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s 3 4define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 5; CHECK-LABEL: test_x86_aesni_aesdec: 6; CHECK: ## BB#0: 7; CHECK-NEXT: vaesdec %xmm1, %xmm0, %xmm0 8; CHECK-NEXT: retl 9 %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 10 ret <2 x i64> %res 11} 12declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 13 14 15define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 16; CHECK-LABEL: test_x86_aesni_aesdeclast: 17; CHECK: ## BB#0: 18; CHECK-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 19; CHECK-NEXT: retl 20 %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 21 ret <2 x i64> %res 22} 23declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 24 25 26define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 27; CHECK-LABEL: test_x86_aesni_aesenc: 28; CHECK: ## BB#0: 29; CHECK-NEXT: vaesenc %xmm1, %xmm0, %xmm0 30; CHECK-NEXT: retl 31 %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 32 ret <2 x i64> %res 33} 34declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 35 36 37define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 38; CHECK-LABEL: test_x86_aesni_aesenclast: 39; CHECK: ## BB#0: 40; CHECK-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 41; CHECK-NEXT: retl 42 %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 43 ret <2 x i64> %res 44} 45declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 46 47 48define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) { 49; CHECK-LABEL: test_x86_aesni_aesimc: 50; CHECK: ## BB#0: 51; CHECK-NEXT: vaesimc %xmm0, %xmm0 52; CHECK-NEXT: retl 53 %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 54 ret <2 x i64> %res 55} 56declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 57 58 59define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) { 60; CHECK-LABEL: test_x86_aesni_aeskeygenassist: 61; CHECK: ## BB#0: 62; CHECK-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 63; CHECK-NEXT: retl 64 %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1] 65 ret <2 x i64> %res 66} 67declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 68 69 70define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 71; CHECK-LABEL: test_x86_sse2_add_sd: 72; CHECK: ## BB#0: 73; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 74; CHECK-NEXT: retl 75 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 76 ret <2 x double> %res 77} 78declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 79 80 81define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 82; CHECK-LABEL: test_x86_sse2_cmp_pd: 83; CHECK: ## BB#0: 84; CHECK-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 85; CHECK-NEXT: retl 86 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 87 ret <2 x double> %res 88} 89declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 90 91 92define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 93; CHECK-LABEL: test_x86_sse2_cmp_sd: 94; CHECK: ## BB#0: 95; CHECK-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 96; CHECK-NEXT: retl 97 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 98 ret <2 x double> %res 99} 100declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 101 102 103define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 104; CHECK-LABEL: test_x86_sse2_comieq_sd: 105; CHECK: ## BB#0: 106; CHECK-NEXT: vcomisd %xmm1, %xmm0 107; CHECK-NEXT: sete %al 108; CHECK-NEXT: movzbl %al, %eax 109; CHECK-NEXT: retl 110 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 111 ret i32 %res 112} 113declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 114 115 116define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 117; CHECK-LABEL: test_x86_sse2_comige_sd: 118; CHECK: ## BB#0: 119; CHECK-NEXT: vcomisd %xmm1, %xmm0 120; CHECK-NEXT: setae %al 121; CHECK-NEXT: movzbl %al, %eax 122; CHECK-NEXT: retl 123 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 124 ret i32 %res 125} 126declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 127 128 129define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 130; CHECK-LABEL: test_x86_sse2_comigt_sd: 131; CHECK: ## BB#0: 132; CHECK-NEXT: vcomisd %xmm1, %xmm0 133; CHECK-NEXT: seta %al 134; CHECK-NEXT: movzbl %al, %eax 135; CHECK-NEXT: retl 136 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 137 ret i32 %res 138} 139declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 140 141 142define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 143; CHECK-LABEL: test_x86_sse2_comile_sd: 144; CHECK: ## BB#0: 145; CHECK-NEXT: vcomisd %xmm1, %xmm0 146; CHECK-NEXT: setbe %al 147; CHECK-NEXT: movzbl %al, %eax 148; CHECK-NEXT: retl 149 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 150 ret i32 %res 151} 152declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 153 154 155define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 156; CHECK-LABEL: test_x86_sse2_comilt_sd: 157; CHECK: ## BB#0: 158; CHECK-NEXT: vcomisd %xmm1, %xmm0 159; CHECK-NEXT: sbbl %eax, %eax 160; CHECK-NEXT: andl $1, %eax 161; CHECK-NEXT: retl 162 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 163 ret i32 %res 164} 165declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 166 167 168define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 169; CHECK-LABEL: test_x86_sse2_comineq_sd: 170; CHECK: ## BB#0: 171; CHECK-NEXT: vcomisd %xmm1, %xmm0 172; CHECK-NEXT: setne %al 173; CHECK-NEXT: movzbl %al, %eax 174; CHECK-NEXT: retl 175 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 176 ret i32 %res 177} 178declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 179 180 181define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { 182; CHECK-LABEL: test_x86_sse2_cvtdq2pd: 183; CHECK: ## BB#0: 184; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 185; CHECK-NEXT: retl 186 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] 187 ret <2 x double> %res 188} 189declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone 190 191 192define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 193; CHECK-LABEL: test_x86_sse2_cvtdq2ps: 194; CHECK: ## BB#0: 195; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 196; CHECK-NEXT: retl 197 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 198 ret <4 x float> %res 199} 200declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 201 202 203define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 204; CHECK-LABEL: test_x86_sse2_cvtpd2dq: 205; CHECK: ## BB#0: 206; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 207; CHECK-NEXT: retl 208 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 209 ret <4 x i32> %res 210} 211declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 212 213 214define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 215; CHECK-LABEL: test_x86_sse2_cvtpd2ps: 216; CHECK: ## BB#0: 217; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 218; CHECK-NEXT: retl 219 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 220 ret <4 x float> %res 221} 222declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 223 224 225define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 226; CHECK-LABEL: test_x86_sse2_cvtps2dq: 227; CHECK: ## BB#0: 228; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 229; CHECK-NEXT: retl 230 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 231 ret <4 x i32> %res 232} 233declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 234 235 236define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { 237; CHECK-LABEL: test_x86_sse2_cvtps2pd: 238; CHECK: ## BB#0: 239; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 240; CHECK-NEXT: retl 241 %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] 242 ret <2 x double> %res 243} 244declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone 245 246 247define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 248; CHECK-LABEL: test_x86_sse2_cvtsd2si: 249; CHECK: ## BB#0: 250; CHECK-NEXT: vcvtsd2si %xmm0, %eax 251; CHECK-NEXT: retl 252 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 253 ret i32 %res 254} 255declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 256 257 258define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 259; CHECK-LABEL: test_x86_sse2_cvtsd2ss: 260; CHECK: ## BB#0: 261; CHECK-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 262; CHECK-NEXT: retl 263 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 264 ret <4 x float> %res 265} 266declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 267 268 269define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { 270; CHECK-LABEL: test_x86_sse2_cvtsi2sd: 271; CHECK: ## BB#0: 272; CHECK-NEXT: movl $7, %eax 273; CHECK-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 274; CHECK-NEXT: retl 275 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 276 ret <2 x double> %res 277} 278declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 279 280 281define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 282; CHECK-LABEL: test_x86_sse2_cvtss2sd: 283; CHECK: ## BB#0: 284; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 285; CHECK-NEXT: retl 286 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 287 ret <2 x double> %res 288} 289declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 290 291 292define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 293; CHECK-LABEL: test_x86_sse2_cvttpd2dq: 294; CHECK: ## BB#0: 295; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 296; CHECK-NEXT: retl 297 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 298 ret <4 x i32> %res 299} 300declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 301 302 303define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { 304; CHECK-LABEL: test_x86_sse2_cvttps2dq: 305; CHECK: ## BB#0: 306; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 307; CHECK-NEXT: retl 308 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 309 ret <4 x i32> %res 310} 311declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 312 313 314define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 315; CHECK-LABEL: test_x86_sse2_cvttsd2si: 316; CHECK: ## BB#0: 317; CHECK-NEXT: vcvttsd2si %xmm0, %eax 318; CHECK-NEXT: retl 319 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 320 ret i32 %res 321} 322declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 323 324 325define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 326; CHECK-LABEL: test_x86_sse2_div_sd: 327; CHECK: ## BB#0: 328; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 329; CHECK-NEXT: retl 330 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 331 ret <2 x double> %res 332} 333declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 334 335 336 337define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 338; CHECK-LABEL: test_x86_sse2_max_pd: 339; CHECK: ## BB#0: 340; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 341; CHECK-NEXT: retl 342 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 343 ret <2 x double> %res 344} 345declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 346 347 348define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 349; CHECK-LABEL: test_x86_sse2_max_sd: 350; CHECK: ## BB#0: 351; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 352; CHECK-NEXT: retl 353 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 354 ret <2 x double> %res 355} 356declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 357 358 359define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 360; CHECK-LABEL: test_x86_sse2_min_pd: 361; CHECK: ## BB#0: 362; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0 363; CHECK-NEXT: retl 364 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 365 ret <2 x double> %res 366} 367declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 368 369 370define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 371; CHECK-LABEL: test_x86_sse2_min_sd: 372; CHECK: ## BB#0: 373; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0 374; CHECK-NEXT: retl 375 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 376 ret <2 x double> %res 377} 378declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 379 380 381define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 382; CHECK-LABEL: test_x86_sse2_movmsk_pd: 383; CHECK: ## BB#0: 384; CHECK-NEXT: vmovmskpd %xmm0, %eax 385; CHECK-NEXT: retl 386 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 387 ret i32 %res 388} 389declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 390 391 392 393 394define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 395; CHECK-LABEL: test_x86_sse2_mul_sd: 396; CHECK: ## BB#0: 397; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 398; CHECK-NEXT: retl 399 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 400 ret <2 x double> %res 401} 402declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 403 404 405define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 406; CHECK-LABEL: test_x86_sse2_packssdw_128: 407; CHECK: ## BB#0: 408; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 409; CHECK-NEXT: retl 410 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 411 ret <8 x i16> %res 412} 413declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 414 415 416define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 417; CHECK-LABEL: test_x86_sse2_packsswb_128: 418; CHECK: ## BB#0: 419; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 420; CHECK-NEXT: retl 421 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 422 ret <16 x i8> %res 423} 424declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 425 426 427define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 428; CHECK-LABEL: test_x86_sse2_packuswb_128: 429; CHECK: ## BB#0: 430; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 431; CHECK-NEXT: retl 432 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 433 ret <16 x i8> %res 434} 435declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 436 437 438define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 439; CHECK-LABEL: test_x86_sse2_padds_b: 440; CHECK: ## BB#0: 441; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 442; CHECK-NEXT: retl 443 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 444 ret <16 x i8> %res 445} 446declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 447 448 449define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 450; CHECK-LABEL: test_x86_sse2_padds_w: 451; CHECK: ## BB#0: 452; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 453; CHECK-NEXT: retl 454 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 455 ret <8 x i16> %res 456} 457declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 458 459 460define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 461; CHECK-LABEL: test_x86_sse2_paddus_b: 462; CHECK: ## BB#0: 463; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 464; CHECK-NEXT: retl 465 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 466 ret <16 x i8> %res 467} 468declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 469 470 471define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 472; CHECK-LABEL: test_x86_sse2_paddus_w: 473; CHECK: ## BB#0: 474; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 475; CHECK-NEXT: retl 476 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 477 ret <8 x i16> %res 478} 479declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 480 481 482define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 483; CHECK-LABEL: test_x86_sse2_pavg_b: 484; CHECK: ## BB#0: 485; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 486; CHECK-NEXT: retl 487 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 488 ret <16 x i8> %res 489} 490declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 491 492 493define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 494; CHECK-LABEL: test_x86_sse2_pavg_w: 495; CHECK: ## BB#0: 496; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 497; CHECK-NEXT: retl 498 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 499 ret <8 x i16> %res 500} 501declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 502 503 504define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 505; CHECK-LABEL: test_x86_sse2_pmadd_wd: 506; CHECK: ## BB#0: 507; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 508; CHECK-NEXT: retl 509 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 510 ret <4 x i32> %res 511} 512declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 513 514 515define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 516; CHECK-LABEL: test_x86_sse2_pmaxs_w: 517; CHECK: ## BB#0: 518; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 519; CHECK-NEXT: retl 520 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 521 ret <8 x i16> %res 522} 523declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 524 525 526define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 527; CHECK-LABEL: test_x86_sse2_pmaxu_b: 528; CHECK: ## BB#0: 529; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 530; CHECK-NEXT: retl 531 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 532 ret <16 x i8> %res 533} 534declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 535 536 537define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 538; CHECK-LABEL: test_x86_sse2_pmins_w: 539; CHECK: ## BB#0: 540; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0 541; CHECK-NEXT: retl 542 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 543 ret <8 x i16> %res 544} 545declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 546 547 548define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 549; CHECK-LABEL: test_x86_sse2_pminu_b: 550; CHECK: ## BB#0: 551; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0 552; CHECK-NEXT: retl 553 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 554 ret <16 x i8> %res 555} 556declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 557 558 559define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 560; CHECK-LABEL: test_x86_sse2_pmovmskb_128: 561; CHECK: ## BB#0: 562; CHECK-NEXT: vpmovmskb %xmm0, %eax 563; CHECK-NEXT: retl 564 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 565 ret i32 %res 566} 567declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 568 569 570define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 571; CHECK-LABEL: test_x86_sse2_pmulh_w: 572; CHECK: ## BB#0: 573; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 574; CHECK-NEXT: retl 575 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 576 ret <8 x i16> %res 577} 578declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 579 580 581define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 582; CHECK-LABEL: test_x86_sse2_pmulhu_w: 583; CHECK: ## BB#0: 584; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 585; CHECK-NEXT: retl 586 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 587 ret <8 x i16> %res 588} 589declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 590 591 592define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 593; CHECK-LABEL: test_x86_sse2_pmulu_dq: 594; CHECK: ## BB#0: 595; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 596; CHECK-NEXT: retl 597 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 598 ret <2 x i64> %res 599} 600declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 601 602 603define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 604; CHECK-LABEL: test_x86_sse2_psad_bw: 605; CHECK: ## BB#0: 606; CHECK-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 607; CHECK-NEXT: retl 608 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 609 ret <2 x i64> %res 610} 611declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 612 613 614define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 615; CHECK-LABEL: test_x86_sse2_psll_d: 616; CHECK: ## BB#0: 617; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0 618; CHECK-NEXT: retl 619 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 620 ret <4 x i32> %res 621} 622declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 623 624 625define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 626; CHECK-LABEL: test_x86_sse2_psll_q: 627; CHECK: ## BB#0: 628; CHECK-NEXT: vpsllq %xmm1, %xmm0, %xmm0 629; CHECK-NEXT: retl 630 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 631 ret <2 x i64> %res 632} 633declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 634 635 636define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 637; CHECK-LABEL: test_x86_sse2_psll_w: 638; CHECK: ## BB#0: 639; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 640; CHECK-NEXT: retl 641 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 642 ret <8 x i16> %res 643} 644declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 645 646 647define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 648; CHECK-LABEL: test_x86_sse2_pslli_d: 649; CHECK: ## BB#0: 650; CHECK-NEXT: vpslld $7, %xmm0, %xmm0 651; CHECK-NEXT: retl 652 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 653 ret <4 x i32> %res 654} 655declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 656 657 658define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 659; CHECK-LABEL: test_x86_sse2_pslli_q: 660; CHECK: ## BB#0: 661; CHECK-NEXT: vpsllq $7, %xmm0, %xmm0 662; CHECK-NEXT: retl 663 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 664 ret <2 x i64> %res 665} 666declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 667 668 669define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 670; CHECK-LABEL: test_x86_sse2_pslli_w: 671; CHECK: ## BB#0: 672; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 673; CHECK-NEXT: retl 674 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 675 ret <8 x i16> %res 676} 677declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 678 679 680define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 681; CHECK-LABEL: test_x86_sse2_psra_d: 682; CHECK: ## BB#0: 683; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0 684; CHECK-NEXT: retl 685 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 686 ret <4 x i32> %res 687} 688declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 689 690 691define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 692; CHECK-LABEL: test_x86_sse2_psra_w: 693; CHECK: ## BB#0: 694; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 695; CHECK-NEXT: retl 696 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 697 ret <8 x i16> %res 698} 699declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 700 701 702define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 703; CHECK-LABEL: test_x86_sse2_psrai_d: 704; CHECK: ## BB#0: 705; CHECK-NEXT: vpsrad $7, %xmm0, %xmm0 706; CHECK-NEXT: retl 707 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 708 ret <4 x i32> %res 709} 710declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 711 712 713define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 714; CHECK-LABEL: test_x86_sse2_psrai_w: 715; CHECK: ## BB#0: 716; CHECK-NEXT: vpsraw $7, %xmm0, %xmm0 717; CHECK-NEXT: retl 718 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 719 ret <8 x i16> %res 720} 721declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 722 723 724define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 725; CHECK-LABEL: test_x86_sse2_psrl_d: 726; CHECK: ## BB#0: 727; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 728; CHECK-NEXT: retl 729 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 730 ret <4 x i32> %res 731} 732declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 733 734 735define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 736; CHECK-LABEL: test_x86_sse2_psrl_q: 737; CHECK: ## BB#0: 738; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 739; CHECK-NEXT: retl 740 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 741 ret <2 x i64> %res 742} 743declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 744 745 746define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 747; CHECK-LABEL: test_x86_sse2_psrl_w: 748; CHECK: ## BB#0: 749; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 750; CHECK-NEXT: retl 751 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 752 ret <8 x i16> %res 753} 754declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 755 756 757define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 758; CHECK-LABEL: test_x86_sse2_psrli_d: 759; CHECK: ## BB#0: 760; CHECK-NEXT: vpsrld $7, %xmm0, %xmm0 761; CHECK-NEXT: retl 762 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 763 ret <4 x i32> %res 764} 765declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 766 767 768define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 769; CHECK-LABEL: test_x86_sse2_psrli_q: 770; CHECK: ## BB#0: 771; CHECK-NEXT: vpsrlq $7, %xmm0, %xmm0 772; CHECK-NEXT: retl 773 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 774 ret <2 x i64> %res 775} 776declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 777 778 779define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 780; CHECK-LABEL: test_x86_sse2_psrli_w: 781; CHECK: ## BB#0: 782; CHECK-NEXT: vpsrlw $7, %xmm0, %xmm0 783; CHECK-NEXT: retl 784 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 785 ret <8 x i16> %res 786} 787declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 788 789 790define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 791; CHECK-LABEL: test_x86_sse2_psubs_b: 792; CHECK: ## BB#0: 793; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 794; CHECK-NEXT: retl 795 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 796 ret <16 x i8> %res 797} 798declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 799 800 801define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 802; CHECK-LABEL: test_x86_sse2_psubs_w: 803; CHECK: ## BB#0: 804; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 805; CHECK-NEXT: retl 806 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 807 ret <8 x i16> %res 808} 809declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 810 811 812define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 813; CHECK-LABEL: test_x86_sse2_psubus_b: 814; CHECK: ## BB#0: 815; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 816; CHECK-NEXT: retl 817 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 818 ret <16 x i8> %res 819} 820declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 821 822 823define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 824; CHECK-LABEL: test_x86_sse2_psubus_w: 825; CHECK: ## BB#0: 826; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 827; CHECK-NEXT: retl 828 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 829 ret <8 x i16> %res 830} 831declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 832 833 834define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 835; CHECK-LABEL: test_x86_sse2_sqrt_pd: 836; CHECK: ## BB#0: 837; CHECK-NEXT: vsqrtpd %xmm0, %xmm0 838; CHECK-NEXT: retl 839 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 840 ret <2 x double> %res 841} 842declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 843 844 845define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 846; CHECK-LABEL: test_x86_sse2_sqrt_sd: 847; CHECK: ## BB#0: 848; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 849; CHECK-NEXT: retl 850 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 851 ret <2 x double> %res 852} 853declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 854 855 856define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { 857; CHECK-LABEL: test_x86_sse2_storel_dq: 858; CHECK: ## BB#0: 859; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 860; CHECK-NEXT: vmovlps %xmm0, (%eax) 861; CHECK-NEXT: retl 862 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) 863 ret void 864} 865declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 866 867 868define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { 869 ; add operation forces the execution domain. 870; CHECK-LABEL: test_x86_sse2_storeu_dq: 871; CHECK: ## BB#0: 872; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 873; CHECK-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0 874; CHECK-NEXT: vmovdqu %xmm0, (%eax) 875; CHECK-NEXT: retl 876 %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 877 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) 878 ret void 879} 880declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind 881 882 883define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { 884 ; fadd operation forces the execution domain. 885; CHECK-LABEL: test_x86_sse2_storeu_pd: 886; CHECK: ## BB#0: 887; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 888; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 889; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] 890; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 891; CHECK-NEXT: vmovupd %xmm0, (%eax) 892; CHECK-NEXT: retl 893 %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000> 894 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) 895 ret void 896} 897declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind 898 899 900define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 901; CHECK-LABEL: test_x86_sse2_sub_sd: 902; CHECK: ## BB#0: 903; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 904; CHECK-NEXT: retl 905 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 906 ret <2 x double> %res 907} 908declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 909 910 911define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 912; CHECK-LABEL: test_x86_sse2_ucomieq_sd: 913; CHECK: ## BB#0: 914; CHECK-NEXT: vucomisd %xmm1, %xmm0 915; CHECK-NEXT: sete %al 916; CHECK-NEXT: movzbl %al, %eax 917; CHECK-NEXT: retl 918 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 919 ret i32 %res 920} 921declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 922 923 924define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 925; CHECK-LABEL: test_x86_sse2_ucomige_sd: 926; CHECK: ## BB#0: 927; CHECK-NEXT: vucomisd %xmm1, %xmm0 928; CHECK-NEXT: setae %al 929; CHECK-NEXT: movzbl %al, %eax 930; CHECK-NEXT: retl 931 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 932 ret i32 %res 933} 934declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 935 936 937define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 938; CHECK-LABEL: test_x86_sse2_ucomigt_sd: 939; CHECK: ## BB#0: 940; CHECK-NEXT: vucomisd %xmm1, %xmm0 941; CHECK-NEXT: seta %al 942; CHECK-NEXT: movzbl %al, %eax 943; CHECK-NEXT: retl 944 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 945 ret i32 %res 946} 947declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 948 949 950define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 951; CHECK-LABEL: test_x86_sse2_ucomile_sd: 952; CHECK: ## BB#0: 953; CHECK-NEXT: vucomisd %xmm1, %xmm0 954; CHECK-NEXT: setbe %al 955; CHECK-NEXT: movzbl %al, %eax 956; CHECK-NEXT: retl 957 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 958 ret i32 %res 959} 960declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 961 962 963define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 964; CHECK-LABEL: test_x86_sse2_ucomilt_sd: 965; CHECK: ## BB#0: 966; CHECK-NEXT: vucomisd %xmm1, %xmm0 967; CHECK-NEXT: sbbl %eax, %eax 968; CHECK-NEXT: andl $1, %eax 969; CHECK-NEXT: retl 970 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 971 ret i32 %res 972} 973declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 974 975 976define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 977; CHECK-LABEL: test_x86_sse2_ucomineq_sd: 978; CHECK: ## BB#0: 979; CHECK-NEXT: vucomisd %xmm1, %xmm0 980; CHECK-NEXT: setne %al 981; CHECK-NEXT: movzbl %al, %eax 982; CHECK-NEXT: retl 983 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 984 ret i32 %res 985} 986declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 987 988 989define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { 990; CHECK-LABEL: test_x86_sse3_addsub_pd: 991; CHECK: ## BB#0: 992; CHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 993; CHECK-NEXT: retl 994 %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 995 ret <2 x double> %res 996} 997declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 998 999 1000define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { 1001; CHECK-LABEL: test_x86_sse3_addsub_ps: 1002; CHECK: ## BB#0: 1003; CHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 1004; CHECK-NEXT: retl 1005 %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1006 ret <4 x float> %res 1007} 1008declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 1009 1010 1011define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { 1012; CHECK-LABEL: test_x86_sse3_hadd_pd: 1013; CHECK: ## BB#0: 1014; CHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 1015; CHECK-NEXT: retl 1016 %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1017 ret <2 x double> %res 1018} 1019declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 1020 1021 1022define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { 1023; CHECK-LABEL: test_x86_sse3_hadd_ps: 1024; CHECK: ## BB#0: 1025; CHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0 1026; CHECK-NEXT: retl 1027 %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1028 ret <4 x float> %res 1029} 1030declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 1031 1032 1033define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { 1034; CHECK-LABEL: test_x86_sse3_hsub_pd: 1035; CHECK: ## BB#0: 1036; CHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 1037; CHECK-NEXT: retl 1038 %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1039 ret <2 x double> %res 1040} 1041declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 1042 1043 1044define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { 1045; CHECK-LABEL: test_x86_sse3_hsub_ps: 1046; CHECK: ## BB#0: 1047; CHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0 1048; CHECK-NEXT: retl 1049 %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1050 ret <4 x float> %res 1051} 1052declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 1053 1054 1055define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { 1056; CHECK-LABEL: test_x86_sse3_ldu_dq: 1057; CHECK: ## BB#0: 1058; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1059; CHECK-NEXT: vlddqu (%eax), %xmm0 1060; CHECK-NEXT: retl 1061 %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] 1062 ret <16 x i8> %res 1063} 1064declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 1065 1066 1067define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 1068; CHECK-LABEL: test_x86_sse41_blendvpd: 1069; CHECK: ## BB#0: 1070; CHECK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1071; CHECK-NEXT: retl 1072 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 1073 ret <2 x double> %res 1074} 1075declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 1076 1077 1078define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 1079; CHECK-LABEL: test_x86_sse41_blendvps: 1080; CHECK: ## BB#0: 1081; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1082; CHECK-NEXT: retl 1083 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 1084 ret <4 x float> %res 1085} 1086declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 1087 1088 1089define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 1090; CHECK-LABEL: test_x86_sse41_dppd: 1091; CHECK: ## BB#0: 1092; CHECK-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 1093; CHECK-NEXT: retl 1094 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 1095 ret <2 x double> %res 1096} 1097declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 1098 1099 1100define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 1101; CHECK-LABEL: test_x86_sse41_dpps: 1102; CHECK: ## BB#0: 1103; CHECK-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 1104; CHECK-NEXT: retl 1105 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1106 ret <4 x float> %res 1107} 1108declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 1109 1110 1111define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 1112; CHECK-LABEL: test_x86_sse41_insertps: 1113; CHECK: ## BB#0: 1114; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3] 1115; CHECK-NEXT: retl 1116 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1117 ret <4 x float> %res 1118} 1119declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 1120 1121 1122 1123define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 1124; CHECK-LABEL: test_x86_sse41_mpsadbw: 1125; CHECK: ## BB#0: 1126; CHECK-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 1127; CHECK-NEXT: retl 1128 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 1129 ret <8 x i16> %res 1130} 1131declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 1132 1133 1134define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 1135; CHECK-LABEL: test_x86_sse41_packusdw: 1136; CHECK: ## BB#0: 1137; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 1138; CHECK-NEXT: retl 1139 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 1140 ret <8 x i16> %res 1141} 1142declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 1143 1144 1145define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 1146; CHECK-LABEL: test_x86_sse41_pblendvb: 1147; CHECK: ## BB#0: 1148; CHECK-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 1149; CHECK-NEXT: retl 1150 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 1151 ret <16 x i8> %res 1152} 1153declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1154 1155 1156define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) { 1157; CHECK-LABEL: test_x86_sse41_phminposuw: 1158; CHECK: ## BB#0: 1159; CHECK-NEXT: vphminposuw %xmm0, %xmm0 1160; CHECK-NEXT: retl 1161 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 1162 ret <8 x i16> %res 1163} 1164declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 1165 1166 1167define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 1168; CHECK-LABEL: test_x86_sse41_pmaxsb: 1169; CHECK: ## BB#0: 1170; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1171; CHECK-NEXT: retl 1172 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1173 ret <16 x i8> %res 1174} 1175declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 1176 1177 1178define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 1179; CHECK-LABEL: test_x86_sse41_pmaxsd: 1180; CHECK: ## BB#0: 1181; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 1182; CHECK-NEXT: retl 1183 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1184 ret <4 x i32> %res 1185} 1186declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 1187 1188 1189define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 1190; CHECK-LABEL: test_x86_sse41_pmaxud: 1191; CHECK: ## BB#0: 1192; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1193; CHECK-NEXT: retl 1194 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1195 ret <4 x i32> %res 1196} 1197declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 1198 1199 1200define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 1201; CHECK-LABEL: test_x86_sse41_pmaxuw: 1202; CHECK: ## BB#0: 1203; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1204; CHECK-NEXT: retl 1205 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1206 ret <8 x i16> %res 1207} 1208declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 1209 1210 1211define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 1212; CHECK-LABEL: test_x86_sse41_pminsb: 1213; CHECK: ## BB#0: 1214; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1215; CHECK-NEXT: retl 1216 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1217 ret <16 x i8> %res 1218} 1219declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 1220 1221 1222define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 1223; CHECK-LABEL: test_x86_sse41_pminsd: 1224; CHECK: ## BB#0: 1225; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1226; CHECK-NEXT: retl 1227 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1228 ret <4 x i32> %res 1229} 1230declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 1231 1232 1233define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) { 1234; CHECK-LABEL: test_x86_sse41_pminud: 1235; CHECK: ## BB#0: 1236; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0 1237; CHECK-NEXT: retl 1238 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1239 ret <4 x i32> %res 1240} 1241declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 1242 1243 1244define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 1245; CHECK-LABEL: test_x86_sse41_pminuw: 1246; CHECK: ## BB#0: 1247; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1248; CHECK-NEXT: retl 1249 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1250 ret <8 x i16> %res 1251} 1252declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 1253 1254 1255define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 1256; CHECK-LABEL: test_x86_sse41_pmovzxbd: 1257; CHECK: ## BB#0: 1258; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1259; CHECK-NEXT: retl 1260 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 1261 ret <4 x i32> %res 1262} 1263declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 1264 1265 1266define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 1267; CHECK-LABEL: test_x86_sse41_pmovzxbq: 1268; CHECK: ## BB#0: 1269; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1270; CHECK-NEXT: retl 1271 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 1272 ret <2 x i64> %res 1273} 1274declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 1275 1276 1277define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 1278; CHECK-LABEL: test_x86_sse41_pmovzxbw: 1279; CHECK: ## BB#0: 1280; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1281; CHECK-NEXT: retl 1282 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 1283 ret <8 x i16> %res 1284} 1285declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 1286 1287 1288define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 1289; CHECK-LABEL: test_x86_sse41_pmovzxdq: 1290; CHECK: ## BB#0: 1291; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1292; CHECK-NEXT: retl 1293 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 1294 ret <2 x i64> %res 1295} 1296declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 1297 1298 1299define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 1300; CHECK-LABEL: test_x86_sse41_pmovzxwd: 1301; CHECK: ## BB#0: 1302; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1303; CHECK-NEXT: retl 1304 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 1305 ret <4 x i32> %res 1306} 1307declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 1308 1309 1310define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 1311; CHECK-LABEL: test_x86_sse41_pmovzxwq: 1312; CHECK: ## BB#0: 1313; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1314; CHECK-NEXT: retl 1315 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 1316 ret <2 x i64> %res 1317} 1318declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 1319 1320 1321define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 1322; CHECK-LABEL: test_x86_sse41_pmuldq: 1323; CHECK: ## BB#0: 1324; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 1325; CHECK-NEXT: retl 1326 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 1327 ret <2 x i64> %res 1328} 1329declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 1330 1331 1332define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) { 1333; CHECK-LABEL: test_x86_sse41_ptestc: 1334; CHECK: ## BB#0: 1335; CHECK-NEXT: vptest %xmm1, %xmm0 1336; CHECK-NEXT: sbbl %eax, %eax 1337; CHECK-NEXT: andl $1, %eax 1338; CHECK-NEXT: retl 1339 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1340 ret i32 %res 1341} 1342declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 1343 1344 1345define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { 1346; CHECK-LABEL: test_x86_sse41_ptestnzc: 1347; CHECK: ## BB#0: 1348; CHECK-NEXT: vptest %xmm1, %xmm0 1349; CHECK-NEXT: seta %al 1350; CHECK-NEXT: movzbl %al, %eax 1351; CHECK-NEXT: retl 1352 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1353 ret i32 %res 1354} 1355declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 1356 1357 1358define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { 1359; CHECK-LABEL: test_x86_sse41_ptestz: 1360; CHECK: ## BB#0: 1361; CHECK-NEXT: vptest %xmm1, %xmm0 1362; CHECK-NEXT: sete %al 1363; CHECK-NEXT: movzbl %al, %eax 1364; CHECK-NEXT: retl 1365 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 1366 ret i32 %res 1367} 1368declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 1369 1370 1371define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) { 1372; CHECK-LABEL: test_x86_sse41_round_pd: 1373; CHECK: ## BB#0: 1374; CHECK-NEXT: vroundpd $7, %xmm0, %xmm0 1375; CHECK-NEXT: retl 1376 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 1377 ret <2 x double> %res 1378} 1379declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 1380 1381 1382define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) { 1383; CHECK-LABEL: test_x86_sse41_round_ps: 1384; CHECK: ## BB#0: 1385; CHECK-NEXT: vroundps $7, %xmm0, %xmm0 1386; CHECK-NEXT: retl 1387 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1388 ret <4 x float> %res 1389} 1390declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 1391 1392 1393define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) { 1394; CHECK-LABEL: test_x86_sse41_round_sd: 1395; CHECK: ## BB#0: 1396; CHECK-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 1397; CHECK-NEXT: retl 1398 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 1399 ret <2 x double> %res 1400} 1401declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 1402 1403 1404define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) { 1405; CHECK-LABEL: test_x86_sse41_round_ss: 1406; CHECK: ## BB#0: 1407; CHECK-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 1408; CHECK-NEXT: retl 1409 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 1410 ret <4 x float> %res 1411} 1412declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 1413 1414 1415define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) { 1416; CHECK-LABEL: test_x86_sse42_pcmpestri128: 1417; CHECK: ## BB#0: 1418; CHECK-NEXT: movl $7, %eax 1419; CHECK-NEXT: movl $7, %edx 1420; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1421; CHECK-NEXT: movl %ecx, %eax 1422; CHECK-NEXT: retl 1423 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1424 ret i32 %res 1425} 1426declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1427 1428 1429define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { 1430; CHECK-LABEL: test_x86_sse42_pcmpestri128_load: 1431; CHECK: ## BB#0: 1432; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1433; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1434; CHECK-NEXT: vmovdqa (%eax), %xmm0 1435; CHECK-NEXT: movl $7, %eax 1436; CHECK-NEXT: movl $7, %edx 1437; CHECK-NEXT: vpcmpestri $7, (%ecx), %xmm0 1438; CHECK-NEXT: movl %ecx, %eax 1439; CHECK-NEXT: retl 1440 %1 = load <16 x i8>, <16 x i8>* %a0 1441 %2 = load <16 x i8>, <16 x i8>* %a2 1442 %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1] 1443 ret i32 %res 1444} 1445 1446 1447define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { 1448; CHECK-LABEL: test_x86_sse42_pcmpestria128: 1449; CHECK: ## BB#0: 1450; CHECK-NEXT: movl $7, %eax 1451; CHECK-NEXT: movl $7, %edx 1452; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1453; CHECK-NEXT: seta %al 1454; CHECK-NEXT: movzbl %al, %eax 1455; CHECK-NEXT: retl 1456 %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1457 ret i32 %res 1458} 1459declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1460 1461 1462define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { 1463; CHECK-LABEL: test_x86_sse42_pcmpestric128: 1464; CHECK: ## BB#0: 1465; CHECK-NEXT: movl $7, %eax 1466; CHECK-NEXT: movl $7, %edx 1467; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1468; CHECK-NEXT: sbbl %eax, %eax 1469; CHECK-NEXT: andl $1, %eax 1470; CHECK-NEXT: retl 1471 %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1472 ret i32 %res 1473} 1474declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1475 1476 1477define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { 1478; CHECK-LABEL: test_x86_sse42_pcmpestrio128: 1479; CHECK: ## BB#0: 1480; CHECK-NEXT: movl $7, %eax 1481; CHECK-NEXT: movl $7, %edx 1482; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1483; CHECK-NEXT: seto %al 1484; CHECK-NEXT: movzbl %al, %eax 1485; CHECK-NEXT: retl 1486 %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1487 ret i32 %res 1488} 1489declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1490 1491 1492define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { 1493; CHECK-LABEL: test_x86_sse42_pcmpestris128: 1494; CHECK: ## BB#0: 1495; CHECK-NEXT: movl $7, %eax 1496; CHECK-NEXT: movl $7, %edx 1497; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1498; CHECK-NEXT: sets %al 1499; CHECK-NEXT: movzbl %al, %eax 1500; CHECK-NEXT: retl 1501 %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1502 ret i32 %res 1503} 1504declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1505 1506 1507define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { 1508; CHECK-LABEL: test_x86_sse42_pcmpestriz128: 1509; CHECK: ## BB#0: 1510; CHECK-NEXT: movl $7, %eax 1511; CHECK-NEXT: movl $7, %edx 1512; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 1513; CHECK-NEXT: sete %al 1514; CHECK-NEXT: movzbl %al, %eax 1515; CHECK-NEXT: retl 1516 %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1] 1517 ret i32 %res 1518} 1519declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1520 1521 1522define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) { 1523; CHECK-LABEL: test_x86_sse42_pcmpestrm128: 1524; CHECK: ## BB#0: 1525; CHECK-NEXT: movl $7, %eax 1526; CHECK-NEXT: movl $7, %edx 1527; CHECK-NEXT: vpcmpestrm $7, %xmm1, %xmm0 1528; CHECK-NEXT: retl 1529 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1530 ret <16 x i8> %res 1531} 1532declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 1533 1534 1535define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) { 1536; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load: 1537; CHECK: ## BB#0: 1538; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1539; CHECK-NEXT: movl $7, %eax 1540; CHECK-NEXT: movl $7, %edx 1541; CHECK-NEXT: vpcmpestrm $7, (%ecx), %xmm0 1542; CHECK-NEXT: retl 1543 %1 = load <16 x i8>, <16 x i8>* %a2 1544 %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1] 1545 ret <16 x i8> %res 1546} 1547 1548 1549define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) { 1550; CHECK-LABEL: test_x86_sse42_pcmpistri128: 1551; CHECK: ## BB#0: 1552; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1553; CHECK-NEXT: movl %ecx, %eax 1554; CHECK-NEXT: retl 1555 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1556 ret i32 %res 1557} 1558declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1559 1560 1561define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { 1562; CHECK-LABEL: test_x86_sse42_pcmpistri128_load: 1563; CHECK: ## BB#0: 1564; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1565; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1566; CHECK-NEXT: vmovdqa (%ecx), %xmm0 1567; CHECK-NEXT: vpcmpistri $7, (%eax), %xmm0 1568; CHECK-NEXT: movl %ecx, %eax 1569; CHECK-NEXT: retl 1570 %1 = load <16 x i8>, <16 x i8>* %a0 1571 %2 = load <16 x i8>, <16 x i8>* %a1 1572 %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1] 1573 ret i32 %res 1574} 1575 1576 1577define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { 1578; CHECK-LABEL: test_x86_sse42_pcmpistria128: 1579; CHECK: ## BB#0: 1580; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1581; CHECK-NEXT: seta %al 1582; CHECK-NEXT: movzbl %al, %eax 1583; CHECK-NEXT: retl 1584 %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1585 ret i32 %res 1586} 1587declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1588 1589 1590define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) { 1591; CHECK-LABEL: test_x86_sse42_pcmpistric128: 1592; CHECK: ## BB#0: 1593; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1594; CHECK-NEXT: sbbl %eax, %eax 1595; CHECK-NEXT: andl $1, %eax 1596; CHECK-NEXT: retl 1597 %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1598 ret i32 %res 1599} 1600declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1601 1602 1603define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { 1604; CHECK-LABEL: test_x86_sse42_pcmpistrio128: 1605; CHECK: ## BB#0: 1606; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1607; CHECK-NEXT: seto %al 1608; CHECK-NEXT: movzbl %al, %eax 1609; CHECK-NEXT: retl 1610 %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1611 ret i32 %res 1612} 1613declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1614 1615 1616define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { 1617; CHECK-LABEL: test_x86_sse42_pcmpistris128: 1618; CHECK: ## BB#0: 1619; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1620; CHECK-NEXT: sets %al 1621; CHECK-NEXT: movzbl %al, %eax 1622; CHECK-NEXT: retl 1623 %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1624 ret i32 %res 1625} 1626declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1627 1628 1629define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { 1630; CHECK-LABEL: test_x86_sse42_pcmpistriz128: 1631; CHECK: ## BB#0: 1632; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 1633; CHECK-NEXT: sete %al 1634; CHECK-NEXT: movzbl %al, %eax 1635; CHECK-NEXT: retl 1636 %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1] 1637 ret i32 %res 1638} 1639declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1640 1641 1642define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) { 1643; CHECK-LABEL: test_x86_sse42_pcmpistrm128: 1644; CHECK: ## BB#0: 1645; CHECK-NEXT: vpcmpistrm $7, %xmm1, %xmm0 1646; CHECK-NEXT: retl 1647 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1] 1648 ret <16 x i8> %res 1649} 1650declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 1651 1652 1653define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) { 1654; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load: 1655; CHECK: ## BB#0: 1656; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1657; CHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0 1658; CHECK-NEXT: retl 1659 %1 = load <16 x i8>, <16 x i8>* %a1 1660 %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1] 1661 ret <16 x i8> %res 1662} 1663 1664 1665define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { 1666; CHECK-LABEL: test_x86_sse_add_ss: 1667; CHECK: ## BB#0: 1668; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 1669; CHECK-NEXT: retl 1670 %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1671 ret <4 x float> %res 1672} 1673declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone 1674 1675 1676define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { 1677; CHECK-LABEL: test_x86_sse_cmp_ps: 1678; CHECK: ## BB#0: 1679; CHECK-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 1680; CHECK-NEXT: retl 1681 %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1682 ret <4 x float> %res 1683} 1684declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone 1685 1686 1687define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { 1688; CHECK-LABEL: test_x86_sse_cmp_ss: 1689; CHECK: ## BB#0: 1690; CHECK-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 1691; CHECK-NEXT: retl 1692 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] 1693 ret <4 x float> %res 1694} 1695declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 1696 1697 1698define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { 1699; CHECK-LABEL: test_x86_sse_comieq_ss: 1700; CHECK: ## BB#0: 1701; CHECK-NEXT: vcomiss %xmm1, %xmm0 1702; CHECK-NEXT: sete %al 1703; CHECK-NEXT: movzbl %al, %eax 1704; CHECK-NEXT: retl 1705 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1706 ret i32 %res 1707} 1708declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 1709 1710 1711define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { 1712; CHECK-LABEL: test_x86_sse_comige_ss: 1713; CHECK: ## BB#0: 1714; CHECK-NEXT: vcomiss %xmm1, %xmm0 1715; CHECK-NEXT: setae %al 1716; CHECK-NEXT: movzbl %al, %eax 1717; CHECK-NEXT: retl 1718 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1719 ret i32 %res 1720} 1721declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 1722 1723 1724define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { 1725; CHECK-LABEL: test_x86_sse_comigt_ss: 1726; CHECK: ## BB#0: 1727; CHECK-NEXT: vcomiss %xmm1, %xmm0 1728; CHECK-NEXT: seta %al 1729; CHECK-NEXT: movzbl %al, %eax 1730; CHECK-NEXT: retl 1731 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1732 ret i32 %res 1733} 1734declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 1735 1736 1737define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { 1738; CHECK-LABEL: test_x86_sse_comile_ss: 1739; CHECK: ## BB#0: 1740; CHECK-NEXT: vcomiss %xmm1, %xmm0 1741; CHECK-NEXT: setbe %al 1742; CHECK-NEXT: movzbl %al, %eax 1743; CHECK-NEXT: retl 1744 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1745 ret i32 %res 1746} 1747declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 1748 1749 1750define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { 1751; CHECK-LABEL: test_x86_sse_comilt_ss: 1752; CHECK: ## BB#0: 1753; CHECK-NEXT: vcomiss %xmm1, %xmm0 1754; CHECK-NEXT: sbbl %eax, %eax 1755; CHECK-NEXT: andl $1, %eax 1756; CHECK-NEXT: retl 1757 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1758 ret i32 %res 1759} 1760declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 1761 1762 1763define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { 1764; CHECK-LABEL: test_x86_sse_comineq_ss: 1765; CHECK: ## BB#0: 1766; CHECK-NEXT: vcomiss %xmm1, %xmm0 1767; CHECK-NEXT: setne %al 1768; CHECK-NEXT: movzbl %al, %eax 1769; CHECK-NEXT: retl 1770 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 1771 ret i32 %res 1772} 1773declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 1774 1775 1776define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { 1777; CHECK-LABEL: test_x86_sse_cvtsi2ss: 1778; CHECK: ## BB#0: 1779; CHECK-NEXT: movl $7, %eax 1780; CHECK-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 1781; CHECK-NEXT: retl 1782 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 1783 ret <4 x float> %res 1784} 1785declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 1786 1787 1788define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { 1789; CHECK-LABEL: test_x86_sse_cvtss2si: 1790; CHECK: ## BB#0: 1791; CHECK-NEXT: vcvtss2si %xmm0, %eax 1792; CHECK-NEXT: retl 1793 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1] 1794 ret i32 %res 1795} 1796declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1797 1798 1799define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { 1800; CHECK-LABEL: test_x86_sse_cvttss2si: 1801; CHECK: ## BB#0: 1802; CHECK-NEXT: vcvttss2si %xmm0, %eax 1803; CHECK-NEXT: retl 1804 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1] 1805 ret i32 %res 1806} 1807declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 1808 1809 1810define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { 1811; CHECK-LABEL: test_x86_sse_div_ss: 1812; CHECK: ## BB#0: 1813; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 1814; CHECK-NEXT: retl 1815 %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1816 ret <4 x float> %res 1817} 1818declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone 1819 1820 1821define void @test_x86_sse_ldmxcsr(i8* %a0) { 1822; CHECK-LABEL: test_x86_sse_ldmxcsr: 1823; CHECK: ## BB#0: 1824; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1825; CHECK-NEXT: vldmxcsr (%eax) 1826; CHECK-NEXT: retl 1827 call void @llvm.x86.sse.ldmxcsr(i8* %a0) 1828 ret void 1829} 1830declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind 1831 1832 1833 1834define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { 1835; CHECK-LABEL: test_x86_sse_max_ps: 1836; CHECK: ## BB#0: 1837; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 1838; CHECK-NEXT: retl 1839 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1840 ret <4 x float> %res 1841} 1842declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1843 1844 1845define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { 1846; CHECK-LABEL: test_x86_sse_max_ss: 1847; CHECK: ## BB#0: 1848; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 1849; CHECK-NEXT: retl 1850 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1851 ret <4 x float> %res 1852} 1853declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1854 1855 1856define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { 1857; CHECK-LABEL: test_x86_sse_min_ps: 1858; CHECK: ## BB#0: 1859; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 1860; CHECK-NEXT: retl 1861 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1862 ret <4 x float> %res 1863} 1864declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1865 1866 1867define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { 1868; CHECK-LABEL: test_x86_sse_min_ss: 1869; CHECK: ## BB#0: 1870; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0 1871; CHECK-NEXT: retl 1872 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1873 ret <4 x float> %res 1874} 1875declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1876 1877 1878define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { 1879; CHECK-LABEL: test_x86_sse_movmsk_ps: 1880; CHECK: ## BB#0: 1881; CHECK-NEXT: vmovmskps %xmm0, %eax 1882; CHECK-NEXT: retl 1883 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1] 1884 ret i32 %res 1885} 1886declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1887 1888 1889 1890define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { 1891; CHECK-LABEL: test_x86_sse_mul_ss: 1892; CHECK: ## BB#0: 1893; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 1894; CHECK-NEXT: retl 1895 %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1896 ret <4 x float> %res 1897} 1898declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone 1899 1900 1901define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { 1902; CHECK-LABEL: test_x86_sse_rcp_ps: 1903; CHECK: ## BB#0: 1904; CHECK-NEXT: vrcpps %xmm0, %xmm0 1905; CHECK-NEXT: retl 1906 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1907 ret <4 x float> %res 1908} 1909declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1910 1911 1912define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { 1913; CHECK-LABEL: test_x86_sse_rcp_ss: 1914; CHECK: ## BB#0: 1915; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0 1916; CHECK-NEXT: retl 1917 %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1918 ret <4 x float> %res 1919} 1920declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1921 1922 1923define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { 1924; CHECK-LABEL: test_x86_sse_rsqrt_ps: 1925; CHECK: ## BB#0: 1926; CHECK-NEXT: vrsqrtps %xmm0, %xmm0 1927; CHECK-NEXT: retl 1928 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1929 ret <4 x float> %res 1930} 1931declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1932 1933 1934define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { 1935; CHECK-LABEL: test_x86_sse_rsqrt_ss: 1936; CHECK: ## BB#0: 1937; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 1938; CHECK-NEXT: retl 1939 %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1940 ret <4 x float> %res 1941} 1942declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1943 1944 1945define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { 1946; CHECK-LABEL: test_x86_sse_sqrt_ps: 1947; CHECK: ## BB#0: 1948; CHECK-NEXT: vsqrtps %xmm0, %xmm0 1949; CHECK-NEXT: retl 1950 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1951 ret <4 x float> %res 1952} 1953declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1954 1955 1956define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { 1957; CHECK-LABEL: test_x86_sse_sqrt_ss: 1958; CHECK: ## BB#0: 1959; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 1960; CHECK-NEXT: retl 1961 %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] 1962 ret <4 x float> %res 1963} 1964declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1965 1966 1967define void @test_x86_sse_stmxcsr(i8* %a0) { 1968; CHECK-LABEL: test_x86_sse_stmxcsr: 1969; CHECK: ## BB#0: 1970; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1971; CHECK-NEXT: vstmxcsr (%eax) 1972; CHECK-NEXT: retl 1973 call void @llvm.x86.sse.stmxcsr(i8* %a0) 1974 ret void 1975} 1976declare void @llvm.x86.sse.stmxcsr(i8*) nounwind 1977 1978 1979define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { 1980; CHECK-LABEL: test_x86_sse_storeu_ps: 1981; CHECK: ## BB#0: 1982; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1983; CHECK-NEXT: vmovups %xmm0, (%eax) 1984; CHECK-NEXT: retl 1985 call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) 1986 ret void 1987} 1988declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind 1989 1990 1991define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { 1992; CHECK-LABEL: test_x86_sse_sub_ss: 1993; CHECK: ## BB#0: 1994; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 1995; CHECK-NEXT: retl 1996 %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] 1997 ret <4 x float> %res 1998} 1999declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone 2000 2001 2002define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { 2003; CHECK-LABEL: test_x86_sse_ucomieq_ss: 2004; CHECK: ## BB#0: 2005; CHECK-NEXT: vucomiss %xmm1, %xmm0 2006; CHECK-NEXT: sete %al 2007; CHECK-NEXT: movzbl %al, %eax 2008; CHECK-NEXT: retl 2009 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2010 ret i32 %res 2011} 2012declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 2013 2014 2015define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { 2016; CHECK-LABEL: test_x86_sse_ucomige_ss: 2017; CHECK: ## BB#0: 2018; CHECK-NEXT: vucomiss %xmm1, %xmm0 2019; CHECK-NEXT: setae %al 2020; CHECK-NEXT: movzbl %al, %eax 2021; CHECK-NEXT: retl 2022 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2023 ret i32 %res 2024} 2025declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 2026 2027 2028define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { 2029; CHECK-LABEL: test_x86_sse_ucomigt_ss: 2030; CHECK: ## BB#0: 2031; CHECK-NEXT: vucomiss %xmm1, %xmm0 2032; CHECK-NEXT: seta %al 2033; CHECK-NEXT: movzbl %al, %eax 2034; CHECK-NEXT: retl 2035 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2036 ret i32 %res 2037} 2038declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 2039 2040 2041define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { 2042; CHECK-LABEL: test_x86_sse_ucomile_ss: 2043; CHECK: ## BB#0: 2044; CHECK-NEXT: vucomiss %xmm1, %xmm0 2045; CHECK-NEXT: setbe %al 2046; CHECK-NEXT: movzbl %al, %eax 2047; CHECK-NEXT: retl 2048 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2049 ret i32 %res 2050} 2051declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 2052 2053 2054define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { 2055; CHECK-LABEL: test_x86_sse_ucomilt_ss: 2056; CHECK: ## BB#0: 2057; CHECK-NEXT: vucomiss %xmm1, %xmm0 2058; CHECK-NEXT: sbbl %eax, %eax 2059; CHECK-NEXT: andl $1, %eax 2060; CHECK-NEXT: retl 2061 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2062 ret i32 %res 2063} 2064declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 2065 2066 2067define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { 2068; CHECK-LABEL: test_x86_sse_ucomineq_ss: 2069; CHECK: ## BB#0: 2070; CHECK-NEXT: vucomiss %xmm1, %xmm0 2071; CHECK-NEXT: setne %al 2072; CHECK-NEXT: movzbl %al, %eax 2073; CHECK-NEXT: retl 2074 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 2075 ret i32 %res 2076} 2077declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 2078 2079 2080define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { 2081; CHECK-LABEL: test_x86_ssse3_pabs_b_128: 2082; CHECK: ## BB#0: 2083; CHECK-NEXT: vpabsb %xmm0, %xmm0 2084; CHECK-NEXT: retl 2085 %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 2086 ret <16 x i8> %res 2087} 2088declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 2089 2090 2091define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { 2092; CHECK-LABEL: test_x86_ssse3_pabs_d_128: 2093; CHECK: ## BB#0: 2094; CHECK-NEXT: vpabsd %xmm0, %xmm0 2095; CHECK-NEXT: retl 2096 %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 2097 ret <4 x i32> %res 2098} 2099declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 2100 2101 2102define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { 2103; CHECK-LABEL: test_x86_ssse3_pabs_w_128: 2104; CHECK: ## BB#0: 2105; CHECK-NEXT: vpabsw %xmm0, %xmm0 2106; CHECK-NEXT: retl 2107 %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 2108 ret <8 x i16> %res 2109} 2110declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 2111 2112 2113define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2114; CHECK-LABEL: test_x86_ssse3_phadd_d_128: 2115; CHECK: ## BB#0: 2116; CHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0 2117; CHECK-NEXT: retl 2118 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2119 ret <4 x i32> %res 2120} 2121declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2122 2123 2124define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2125; CHECK-LABEL: test_x86_ssse3_phadd_sw_128: 2126; CHECK: ## BB#0: 2127; CHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 2128; CHECK-NEXT: retl 2129 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2130 ret <8 x i16> %res 2131} 2132declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2133 2134 2135define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2136; CHECK-LABEL: test_x86_ssse3_phadd_w_128: 2137; CHECK: ## BB#0: 2138; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0 2139; CHECK-NEXT: retl 2140 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2141 ret <8 x i16> %res 2142} 2143declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2144 2145 2146define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2147; CHECK-LABEL: test_x86_ssse3_phsub_d_128: 2148; CHECK: ## BB#0: 2149; CHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0 2150; CHECK-NEXT: retl 2151 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2152 ret <4 x i32> %res 2153} 2154declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2155 2156 2157define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2158; CHECK-LABEL: test_x86_ssse3_phsub_sw_128: 2159; CHECK: ## BB#0: 2160; CHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 2161; CHECK-NEXT: retl 2162 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2163 ret <8 x i16> %res 2164} 2165declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2166 2167 2168define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2169; CHECK-LABEL: test_x86_ssse3_phsub_w_128: 2170; CHECK: ## BB#0: 2171; CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0 2172; CHECK-NEXT: retl 2173 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2174 ret <8 x i16> %res 2175} 2176declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2177 2178 2179define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) { 2180; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128: 2181; CHECK: ## BB#0: 2182; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 2183; CHECK-NEXT: retl 2184 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] 2185 ret <8 x i16> %res 2186} 2187declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 2188 2189 2190define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { 2191; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128: 2192; CHECK: ## BB#0: 2193; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 2194; CHECK-NEXT: retl 2195 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2196 ret <8 x i16> %res 2197} 2198declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 2199 2200 2201define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { 2202; CHECK-LABEL: test_x86_ssse3_pshuf_b_128: 2203; CHECK: ## BB#0: 2204; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 2205; CHECK-NEXT: retl 2206 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 2207 ret <16 x i8> %res 2208} 2209declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 2210 2211 2212define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { 2213; CHECK-LABEL: test_x86_ssse3_psign_b_128: 2214; CHECK: ## BB#0: 2215; CHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0 2216; CHECK-NEXT: retl 2217 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 2218 ret <16 x i8> %res 2219} 2220declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 2221 2222 2223define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { 2224; CHECK-LABEL: test_x86_ssse3_psign_d_128: 2225; CHECK: ## BB#0: 2226; CHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0 2227; CHECK-NEXT: retl 2228 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 2229 ret <4 x i32> %res 2230} 2231declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 2232 2233 2234define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { 2235; CHECK-LABEL: test_x86_ssse3_psign_w_128: 2236; CHECK: ## BB#0: 2237; CHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0 2238; CHECK-NEXT: retl 2239 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 2240 ret <8 x i16> %res 2241} 2242declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 2243 2244 2245define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 2246; CHECK-LABEL: test_x86_avx_addsub_pd_256: 2247; CHECK: ## BB#0: 2248; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 2249; CHECK-NEXT: retl 2250 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2251 ret <4 x double> %res 2252} 2253declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 2254 2255 2256define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 2257; CHECK-LABEL: test_x86_avx_addsub_ps_256: 2258; CHECK: ## BB#0: 2259; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 2260; CHECK-NEXT: retl 2261 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2262 ret <8 x float> %res 2263} 2264declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2265 2266 2267define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 2268; CHECK-LABEL: test_x86_avx_blendv_pd_256: 2269; CHECK: ## BB#0: 2270; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 2271; CHECK-NEXT: retl 2272 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] 2273 ret <4 x double> %res 2274} 2275declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 2276 2277 2278define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 2279; CHECK-LABEL: test_x86_avx_blendv_ps_256: 2280; CHECK: ## BB#0: 2281; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 2282; CHECK-NEXT: retl 2283 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] 2284 ret <8 x float> %res 2285} 2286declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 2287 2288 2289define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) { 2290; CHECK-LABEL: test_x86_avx_cmp_pd_256: 2291; CHECK: ## BB#0: 2292; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0 2293; CHECK-NEXT: retl 2294 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2295 ret <4 x double> %res 2296} 2297declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2298 2299 2300define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { 2301; CHECK-LABEL: test_x86_avx_cmp_ps_256: 2302; CHECK: ## BB#0: 2303; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0 2304; CHECK-NEXT: retl 2305 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2306 ret <8 x float> %res 2307} 2308 2309define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) { 2310; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op: 2311; CHECK: ## BB#0: 2312; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 2313; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1 2314; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1 2315; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1 2316; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1 2317; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1 2318; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1 2319; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1 2320; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1 2321; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1 2322; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1 2323; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1 2324; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1 2325; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1 2326; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1 2327; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1 2328; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1 2329; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1 2330; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1 2331; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1 2332; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1 2333; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1 2334; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1 2335; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1 2336; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1 2337; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1 2338; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1 2339; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1 2340; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1 2341; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1 2342; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1 2343; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0 2344; CHECK-NEXT: retl 2345 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] 2346 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] 2347 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] 2348 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] 2349 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] 2350 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] 2351 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] 2352 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] 2353 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] 2354 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] 2355 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] 2356 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] 2357 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] 2358 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] 2359 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] 2360 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] 2361 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] 2362 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] 2363 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] 2364 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] 2365 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] 2366 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] 2367 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] 2368 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] 2369 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] 2370 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] 2371 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] 2372 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] 2373 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] 2374 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] 2375 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] 2376 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] 2377 ret <8 x float> %res 2378} 2379declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2380 2381 2382define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { 2383; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256: 2384; CHECK: ## BB#0: 2385; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0 2386; CHECK-NEXT: vzeroupper 2387; CHECK-NEXT: retl 2388 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] 2389 ret <4 x float> %res 2390} 2391declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone 2392 2393 2394define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { 2395; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256: 2396; CHECK: ## BB#0: 2397; CHECK-NEXT: vcvtpd2dqy %ymm0, %xmm0 2398; CHECK-NEXT: vzeroupper 2399; CHECK-NEXT: retl 2400 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 2401 ret <4 x i32> %res 2402} 2403declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 2404 2405 2406define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) { 2407; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256: 2408; CHECK: ## BB#0: 2409; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 2410; CHECK-NEXT: retl 2411 %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1] 2412 ret <4 x double> %res 2413} 2414declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone 2415 2416 2417define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { 2418; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256: 2419; CHECK: ## BB#0: 2420; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 2421; CHECK-NEXT: retl 2422 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 2423 ret <8 x i32> %res 2424} 2425declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 2426 2427 2428define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) { 2429; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256: 2430; CHECK: ## BB#0: 2431; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 2432; CHECK-NEXT: retl 2433 %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1] 2434 ret <4 x double> %res 2435} 2436declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone 2437 2438 2439define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) { 2440; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256: 2441; CHECK: ## BB#0: 2442; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 2443; CHECK-NEXT: retl 2444 %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1] 2445 ret <8 x float> %res 2446} 2447declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone 2448 2449 2450define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) { 2451; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256: 2452; CHECK: ## BB#0: 2453; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0 2454; CHECK-NEXT: vzeroupper 2455; CHECK-NEXT: retl 2456 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] 2457 ret <4 x i32> %res 2458} 2459declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone 2460 2461 2462define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { 2463; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256: 2464; CHECK: ## BB#0: 2465; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 2466; CHECK-NEXT: retl 2467 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] 2468 ret <8 x i32> %res 2469} 2470declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone 2471 2472 2473define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) { 2474; CHECK-LABEL: test_x86_avx_dp_ps_256: 2475; CHECK: ## BB#0: 2476; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 2477; CHECK-NEXT: retl 2478 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2479 ret <8 x float> %res 2480} 2481declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2482 2483 2484define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) { 2485; CHECK-LABEL: test_x86_avx_hadd_pd_256: 2486; CHECK: ## BB#0: 2487; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 2488; CHECK-NEXT: retl 2489 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2490 ret <4 x double> %res 2491} 2492declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 2493 2494 2495define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) { 2496; CHECK-LABEL: test_x86_avx_hadd_ps_256: 2497; CHECK: ## BB#0: 2498; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0 2499; CHECK-NEXT: retl 2500 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2501 ret <8 x float> %res 2502} 2503declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 2504 2505 2506define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) { 2507; CHECK-LABEL: test_x86_avx_hsub_pd_256: 2508; CHECK: ## BB#0: 2509; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 2510; CHECK-NEXT: retl 2511 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2512 ret <4 x double> %res 2513} 2514declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 2515 2516 2517define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) { 2518; CHECK-LABEL: test_x86_avx_hsub_ps_256: 2519; CHECK: ## BB#0: 2520; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0 2521; CHECK-NEXT: retl 2522 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2523 ret <8 x float> %res 2524} 2525declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 2526 2527 2528define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) { 2529; CHECK-LABEL: test_x86_avx_ldu_dq_256: 2530; CHECK: ## BB#0: 2531; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2532; CHECK-NEXT: vlddqu (%eax), %ymm0 2533; CHECK-NEXT: retl 2534 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1] 2535 ret <32 x i8> %res 2536} 2537declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 2538 2539 2540define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) { 2541; CHECK-LABEL: test_x86_avx_maskload_pd: 2542; CHECK: ## BB#0: 2543; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2544; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0 2545; CHECK-NEXT: retl 2546 %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1] 2547 ret <2 x double> %res 2548} 2549declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly 2550 2551 2552define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) { 2553; CHECK-LABEL: test_x86_avx_maskload_pd_256: 2554; CHECK: ## BB#0: 2555; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2556; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0 2557; CHECK-NEXT: retl 2558 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1] 2559 ret <4 x double> %res 2560} 2561declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly 2562 2563 2564define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) { 2565; CHECK-LABEL: test_x86_avx_maskload_ps: 2566; CHECK: ## BB#0: 2567; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2568; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0 2569; CHECK-NEXT: retl 2570 %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1] 2571 ret <4 x float> %res 2572} 2573declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly 2574 2575 2576define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) { 2577; CHECK-LABEL: test_x86_avx_maskload_ps_256: 2578; CHECK: ## BB#0: 2579; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2580; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0 2581; CHECK-NEXT: retl 2582 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1] 2583 ret <8 x float> %res 2584} 2585declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly 2586 2587 2588define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) { 2589; CHECK-LABEL: test_x86_avx_maskstore_pd: 2590; CHECK: ## BB#0: 2591; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2592; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax) 2593; CHECK-NEXT: retl 2594 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) 2595 ret void 2596} 2597declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind 2598 2599 2600define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) { 2601; CHECK-LABEL: test_x86_avx_maskstore_pd_256: 2602; CHECK: ## BB#0: 2603; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2604; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax) 2605; CHECK-NEXT: vzeroupper 2606; CHECK-NEXT: retl 2607 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) 2608 ret void 2609} 2610declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind 2611 2612 2613define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) { 2614; CHECK-LABEL: test_x86_avx_maskstore_ps: 2615; CHECK: ## BB#0: 2616; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2617; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax) 2618; CHECK-NEXT: retl 2619 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) 2620 ret void 2621} 2622declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind 2623 2624 2625define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) { 2626; CHECK-LABEL: test_x86_avx_maskstore_ps_256: 2627; CHECK: ## BB#0: 2628; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2629; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax) 2630; CHECK-NEXT: vzeroupper 2631; CHECK-NEXT: retl 2632 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) 2633 ret void 2634} 2635declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind 2636 2637 2638define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) { 2639; CHECK-LABEL: test_x86_avx_max_pd_256: 2640; CHECK: ## BB#0: 2641; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 2642; CHECK-NEXT: retl 2643 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2644 ret <4 x double> %res 2645} 2646declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2647 2648 2649define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) { 2650; CHECK-LABEL: test_x86_avx_max_ps_256: 2651; CHECK: ## BB#0: 2652; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 2653; CHECK-NEXT: retl 2654 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2655 ret <8 x float> %res 2656} 2657declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2658 2659 2660define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) { 2661; CHECK-LABEL: test_x86_avx_min_pd_256: 2662; CHECK: ## BB#0: 2663; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0 2664; CHECK-NEXT: retl 2665 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] 2666 ret <4 x double> %res 2667} 2668declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2669 2670 2671define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) { 2672; CHECK-LABEL: test_x86_avx_min_ps_256: 2673; CHECK: ## BB#0: 2674; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 2675; CHECK-NEXT: retl 2676 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 2677 ret <8 x float> %res 2678} 2679declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2680 2681 2682define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) { 2683; CHECK-LABEL: test_x86_avx_movmsk_pd_256: 2684; CHECK: ## BB#0: 2685; CHECK-NEXT: vmovmskpd %ymm0, %eax 2686; CHECK-NEXT: vzeroupper 2687; CHECK-NEXT: retl 2688 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1] 2689 ret i32 %res 2690} 2691declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2692 2693 2694define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) { 2695; CHECK-LABEL: test_x86_avx_movmsk_ps_256: 2696; CHECK: ## BB#0: 2697; CHECK-NEXT: vmovmskps %ymm0, %eax 2698; CHECK-NEXT: vzeroupper 2699; CHECK-NEXT: retl 2700 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1] 2701 ret i32 %res 2702} 2703declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2704 2705 2706 2707 2708 2709 2710 2711define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) { 2712; CHECK-LABEL: test_x86_avx_ptestc_256: 2713; CHECK: ## BB#0: 2714; CHECK-NEXT: vptest %ymm1, %ymm0 2715; CHECK-NEXT: sbbl %eax, %eax 2716; CHECK-NEXT: andl $1, %eax 2717; CHECK-NEXT: vzeroupper 2718; CHECK-NEXT: retl 2719 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2720 ret i32 %res 2721} 2722declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone 2723 2724 2725define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { 2726; CHECK-LABEL: test_x86_avx_ptestnzc_256: 2727; CHECK: ## BB#0: 2728; CHECK-NEXT: vptest %ymm1, %ymm0 2729; CHECK-NEXT: seta %al 2730; CHECK-NEXT: movzbl %al, %eax 2731; CHECK-NEXT: vzeroupper 2732; CHECK-NEXT: retl 2733 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2734 ret i32 %res 2735} 2736declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone 2737 2738 2739define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { 2740; CHECK-LABEL: test_x86_avx_ptestz_256: 2741; CHECK: ## BB#0: 2742; CHECK-NEXT: vptest %ymm1, %ymm0 2743; CHECK-NEXT: sete %al 2744; CHECK-NEXT: movzbl %al, %eax 2745; CHECK-NEXT: vzeroupper 2746; CHECK-NEXT: retl 2747 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1] 2748 ret i32 %res 2749} 2750declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone 2751 2752 2753define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) { 2754; CHECK-LABEL: test_x86_avx_rcp_ps_256: 2755; CHECK: ## BB#0: 2756; CHECK-NEXT: vrcpps %ymm0, %ymm0 2757; CHECK-NEXT: retl 2758 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2759 ret <8 x float> %res 2760} 2761declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 2762 2763 2764define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) { 2765; CHECK-LABEL: test_x86_avx_round_pd_256: 2766; CHECK: ## BB#0: 2767; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0 2768; CHECK-NEXT: retl 2769 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] 2770 ret <4 x double> %res 2771} 2772declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 2773 2774 2775define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) { 2776; CHECK-LABEL: test_x86_avx_round_ps_256: 2777; CHECK: ## BB#0: 2778; CHECK-NEXT: vroundps $7, %ymm0, %ymm0 2779; CHECK-NEXT: retl 2780 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] 2781 ret <8 x float> %res 2782} 2783declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 2784 2785 2786define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) { 2787; CHECK-LABEL: test_x86_avx_rsqrt_ps_256: 2788; CHECK: ## BB#0: 2789; CHECK-NEXT: vrsqrtps %ymm0, %ymm0 2790; CHECK-NEXT: retl 2791 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2792 ret <8 x float> %res 2793} 2794declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 2795 2796 2797define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) { 2798; CHECK-LABEL: test_x86_avx_sqrt_pd_256: 2799; CHECK: ## BB#0: 2800; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 2801; CHECK-NEXT: retl 2802 %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1] 2803 ret <4 x double> %res 2804} 2805declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 2806 2807 2808define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) { 2809; CHECK-LABEL: test_x86_avx_sqrt_ps_256: 2810; CHECK: ## BB#0: 2811; CHECK-NEXT: vsqrtps %ymm0, %ymm0 2812; CHECK-NEXT: retl 2813 %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] 2814 ret <8 x float> %res 2815} 2816declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 2817 2818 2819define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 2820 ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions 2821 ; add operation forces the execution domain. 2822; CHECK-LABEL: test_x86_avx_storeu_dq_256: 2823; CHECK: ## BB#0: 2824; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2825; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 2826; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 2827; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1 2828; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 2829; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2830; CHECK-NEXT: vmovups %ymm0, (%eax) 2831; CHECK-NEXT: vzeroupper 2832; CHECK-NEXT: retl 2833 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2834 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 2835 ret void 2836} 2837declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 2838 2839 2840define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) { 2841 ; add operation forces the execution domain. 2842; CHECK-LABEL: test_x86_avx_storeu_pd_256: 2843; CHECK: ## BB#0: 2844; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2845; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 2846; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 2847; CHECK-NEXT: vmovupd %ymm0, (%eax) 2848; CHECK-NEXT: vzeroupper 2849; CHECK-NEXT: retl 2850 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 2851 call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2) 2852 ret void 2853} 2854declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind 2855 2856 2857define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) { 2858; CHECK-LABEL: test_x86_avx_storeu_ps_256: 2859; CHECK: ## BB#0: 2860; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2861; CHECK-NEXT: vmovups %ymm0, (%eax) 2862; CHECK-NEXT: vzeroupper 2863; CHECK-NEXT: retl 2864 call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1) 2865 ret void 2866} 2867declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind 2868 2869 2870define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) { 2871; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256: 2872; CHECK: ## BB#0: 2873; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2874; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0 2875; CHECK-NEXT: retl 2876 %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1] 2877 ret <4 x double> %res 2878} 2879declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly 2880 2881 2882define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) { 2883; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256: 2884; CHECK: ## BB#0: 2885; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 2886; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0 2887; CHECK-NEXT: retl 2888 %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1] 2889 ret <8 x float> %res 2890} 2891declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly 2892 2893 2894define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) { 2895; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256: 2896; CHECK: ## BB#0: 2897; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2898; CHECK-NEXT: retl 2899 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] 2900 ret <4 x double> %res 2901} 2902declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2903 2904 2905define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) { 2906; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256: 2907; CHECK: ## BB#0: 2908; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2909; CHECK-NEXT: retl 2910 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] 2911 ret <8 x float> %res 2912} 2913declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2914 2915 2916define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) { 2917; CHECK-LABEL: test_x86_avx_vperm2f128_si_256: 2918; CHECK: ## BB#0: 2919; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] 2920; CHECK-NEXT: retl 2921 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 2922 ret <8 x i32> %res 2923} 2924declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 2925 2926 2927define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) { 2928; CHECK-LABEL: test_x86_avx_vpermil_pd: 2929; CHECK: ## BB#0: 2930; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2931; CHECK-NEXT: retl 2932 %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1] 2933 ret <2 x double> %res 2934} 2935declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone 2936 2937 2938define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) { 2939; CHECK-LABEL: test_x86_avx_vpermil_pd_256: 2940; CHECK: ## BB#0: 2941; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 2942; CHECK-NEXT: retl 2943 %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1] 2944 ret <4 x double> %res 2945} 2946declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone 2947 2948 2949define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) { 2950; CHECK-LABEL: test_x86_avx_vpermil_ps: 2951; CHECK: ## BB#0: 2952; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0] 2953; CHECK-NEXT: retl 2954 %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1] 2955 ret <4 x float> %res 2956} 2957declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone 2958 2959 2960define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) { 2961; CHECK-LABEL: test_x86_avx_vpermil_ps_256: 2962; CHECK: ## BB#0: 2963; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4] 2964; CHECK-NEXT: retl 2965 %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1] 2966 ret <8 x float> %res 2967} 2968declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone 2969 2970 2971define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) { 2972; CHECK-LABEL: test_x86_avx_vpermilvar_pd: 2973; CHECK: ## BB#0: 2974; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 2975; CHECK-NEXT: retl 2976 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] 2977 ret <2 x double> %res 2978} 2979declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 2980 2981 2982define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) { 2983; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256: 2984; CHECK: ## BB#0: 2985; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 2986; CHECK-NEXT: retl 2987 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] 2988 ret <4 x double> %res 2989} 2990declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 2991 2992 2993define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) { 2994; CHECK-LABEL: test_x86_avx_vpermilvar_ps: 2995; CHECK: ## BB#0: 2996; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 2997; CHECK-NEXT: retl 2998 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] 2999 ret <4 x float> %res 3000} 3001define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) { 3002; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load: 3003; CHECK: ## BB#0: 3004; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3005; CHECK-NEXT: vpermilps (%eax), %xmm0, %xmm0 3006; CHECK-NEXT: retl 3007 %a2 = load <4 x i32>, <4 x i32>* %a1 3008 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] 3009 ret <4 x float> %res 3010} 3011declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 3012 3013 3014define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) { 3015; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256: 3016; CHECK: ## BB#0: 3017; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 3018; CHECK-NEXT: retl 3019 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 3020 ret <8 x float> %res 3021} 3022declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 3023 3024 3025define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) { 3026; CHECK-LABEL: test_x86_avx_vtestc_pd: 3027; CHECK: ## BB#0: 3028; CHECK-NEXT: vtestpd %xmm1, %xmm0 3029; CHECK-NEXT: sbbl %eax, %eax 3030; CHECK-NEXT: andl $1, %eax 3031; CHECK-NEXT: retl 3032 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3033 ret i32 %res 3034} 3035declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 3036 3037 3038define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) { 3039; CHECK-LABEL: test_x86_avx_vtestc_pd_256: 3040; CHECK: ## BB#0: 3041; CHECK-NEXT: vtestpd %ymm1, %ymm0 3042; CHECK-NEXT: sbbl %eax, %eax 3043; CHECK-NEXT: andl $1, %eax 3044; CHECK-NEXT: vzeroupper 3045; CHECK-NEXT: retl 3046 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3047 ret i32 %res 3048} 3049declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3050 3051 3052define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) { 3053; CHECK-LABEL: test_x86_avx_vtestc_ps: 3054; CHECK: ## BB#0: 3055; CHECK-NEXT: vtestps %xmm1, %xmm0 3056; CHECK-NEXT: sbbl %eax, %eax 3057; CHECK-NEXT: andl $1, %eax 3058; CHECK-NEXT: retl 3059 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3060 ret i32 %res 3061} 3062declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 3063 3064 3065define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) { 3066; CHECK-LABEL: test_x86_avx_vtestc_ps_256: 3067; CHECK: ## BB#0: 3068; CHECK-NEXT: vtestps %ymm1, %ymm0 3069; CHECK-NEXT: sbbl %eax, %eax 3070; CHECK-NEXT: andl $1, %eax 3071; CHECK-NEXT: vzeroupper 3072; CHECK-NEXT: retl 3073 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3074 ret i32 %res 3075} 3076declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3077 3078 3079define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { 3080; CHECK-LABEL: test_x86_avx_vtestnzc_pd: 3081; CHECK: ## BB#0: 3082; CHECK-NEXT: vtestpd %xmm1, %xmm0 3083; CHECK-NEXT: seta %al 3084; CHECK-NEXT: movzbl %al, %eax 3085; CHECK-NEXT: retl 3086 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3087 ret i32 %res 3088} 3089declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone 3090 3091 3092define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { 3093; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256: 3094; CHECK: ## BB#0: 3095; CHECK-NEXT: vtestpd %ymm1, %ymm0 3096; CHECK-NEXT: seta %al 3097; CHECK-NEXT: movzbl %al, %eax 3098; CHECK-NEXT: vzeroupper 3099; CHECK-NEXT: retl 3100 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3101 ret i32 %res 3102} 3103declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone 3104 3105 3106define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { 3107; CHECK-LABEL: test_x86_avx_vtestnzc_ps: 3108; CHECK: ## BB#0: 3109; CHECK-NEXT: vtestps %xmm1, %xmm0 3110; CHECK-NEXT: seta %al 3111; CHECK-NEXT: movzbl %al, %eax 3112; CHECK-NEXT: retl 3113 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3114 ret i32 %res 3115} 3116declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone 3117 3118 3119define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { 3120; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256: 3121; CHECK: ## BB#0: 3122; CHECK-NEXT: vtestps %ymm1, %ymm0 3123; CHECK-NEXT: seta %al 3124; CHECK-NEXT: movzbl %al, %eax 3125; CHECK-NEXT: vzeroupper 3126; CHECK-NEXT: retl 3127 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3128 ret i32 %res 3129} 3130declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone 3131 3132 3133define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { 3134; CHECK-LABEL: test_x86_avx_vtestz_pd: 3135; CHECK: ## BB#0: 3136; CHECK-NEXT: vtestpd %xmm1, %xmm0 3137; CHECK-NEXT: sete %al 3138; CHECK-NEXT: movzbl %al, %eax 3139; CHECK-NEXT: retl 3140 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 3141 ret i32 %res 3142} 3143declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone 3144 3145 3146define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { 3147; CHECK-LABEL: test_x86_avx_vtestz_pd_256: 3148; CHECK: ## BB#0: 3149; CHECK-NEXT: vtestpd %ymm1, %ymm0 3150; CHECK-NEXT: sete %al 3151; CHECK-NEXT: movzbl %al, %eax 3152; CHECK-NEXT: vzeroupper 3153; CHECK-NEXT: retl 3154 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1] 3155 ret i32 %res 3156} 3157declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone 3158 3159 3160define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { 3161; CHECK-LABEL: test_x86_avx_vtestz_ps: 3162; CHECK: ## BB#0: 3163; CHECK-NEXT: vtestps %xmm1, %xmm0 3164; CHECK-NEXT: sete %al 3165; CHECK-NEXT: movzbl %al, %eax 3166; CHECK-NEXT: retl 3167 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1] 3168 ret i32 %res 3169} 3170declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone 3171 3172 3173define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { 3174; CHECK-LABEL: test_x86_avx_vtestz_ps_256: 3175; CHECK: ## BB#0: 3176; CHECK-NEXT: vtestps %ymm1, %ymm0 3177; CHECK-NEXT: sete %al 3178; CHECK-NEXT: movzbl %al, %eax 3179; CHECK-NEXT: vzeroupper 3180; CHECK-NEXT: retl 3181 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1] 3182 ret i32 %res 3183} 3184declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone 3185 3186 3187define void @test_x86_avx_vzeroall() { 3188; CHECK-LABEL: test_x86_avx_vzeroall: 3189; CHECK: ## BB#0: 3190; CHECK-NEXT: vzeroall 3191; CHECK-NEXT: vzeroupper 3192; CHECK-NEXT: retl 3193 call void @llvm.x86.avx.vzeroall() 3194 ret void 3195} 3196declare void @llvm.x86.avx.vzeroall() nounwind 3197 3198 3199define void @test_x86_avx_vzeroupper() { 3200; CHECK-LABEL: test_x86_avx_vzeroupper: 3201; CHECK: ## BB#0: 3202; CHECK-NEXT: vzeroupper 3203; CHECK-NEXT: vzeroupper 3204; CHECK-NEXT: retl 3205 call void @llvm.x86.avx.vzeroupper() 3206 ret void 3207} 3208declare void @llvm.x86.avx.vzeroupper() nounwind 3209 3210; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work 3211 3212define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { 3213; CHECK-LABEL: monitor: 3214; CHECK: ## BB#0: 3215; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 3216; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 3217; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3218; CHECK-NEXT: leal (%eax), %eax 3219; CHECK-NEXT: monitor 3220; CHECK-NEXT: retl 3221 tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) 3222 ret void 3223} 3224declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind 3225 3226define void @mwait(i32 %E, i32 %H) nounwind { 3227; CHECK-LABEL: mwait: 3228; CHECK: ## BB#0: 3229; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 3230; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3231; CHECK-NEXT: mwait 3232; CHECK-NEXT: retl 3233 tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) 3234 ret void 3235} 3236declare void @llvm.x86.sse3.mwait(i32, i32) nounwind 3237 3238define void @sfence() nounwind { 3239; CHECK-LABEL: sfence: 3240; CHECK: ## BB#0: 3241; CHECK-NEXT: sfence 3242; CHECK-NEXT: retl 3243 tail call void @llvm.x86.sse.sfence() 3244 ret void 3245} 3246declare void @llvm.x86.sse.sfence() nounwind 3247 3248define void @lfence() nounwind { 3249; CHECK-LABEL: lfence: 3250; CHECK: ## BB#0: 3251; CHECK-NEXT: lfence 3252; CHECK-NEXT: retl 3253 tail call void @llvm.x86.sse2.lfence() 3254 ret void 3255} 3256declare void @llvm.x86.sse2.lfence() nounwind 3257 3258define void @mfence() nounwind { 3259; CHECK-LABEL: mfence: 3260; CHECK: ## BB#0: 3261; CHECK-NEXT: mfence 3262; CHECK-NEXT: retl 3263 tail call void @llvm.x86.sse2.mfence() 3264 ret void 3265} 3266declare void @llvm.x86.sse2.mfence() nounwind 3267 3268define void @clflush(i8* %p) nounwind { 3269; CHECK-LABEL: clflush: 3270; CHECK: ## BB#0: 3271; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3272; CHECK-NEXT: clflush (%eax) 3273; CHECK-NEXT: retl 3274 tail call void @llvm.x86.sse2.clflush(i8* %p) 3275 ret void 3276} 3277declare void @llvm.x86.sse2.clflush(i8*) nounwind 3278 3279define i32 @crc32_32_8(i32 %a, i8 %b) nounwind { 3280; CHECK-LABEL: crc32_32_8: 3281; CHECK: ## BB#0: 3282; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3283; CHECK-NEXT: crc32b {{[0-9]+}}(%esp), %eax 3284; CHECK-NEXT: retl 3285 %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b) 3286 ret i32 %tmp 3287} 3288declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind 3289 3290define i32 @crc32_32_16(i32 %a, i16 %b) nounwind { 3291; CHECK-LABEL: crc32_32_16: 3292; CHECK: ## BB#0: 3293; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3294; CHECK-NEXT: crc32w {{[0-9]+}}(%esp), %eax 3295; CHECK-NEXT: retl 3296 %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b) 3297 ret i32 %tmp 3298} 3299declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind 3300 3301define i32 @crc32_32_32(i32 %a, i32 %b) nounwind { 3302; CHECK-LABEL: crc32_32_32: 3303; CHECK: ## BB#0: 3304; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3305; CHECK-NEXT: crc32l {{[0-9]+}}(%esp), %eax 3306; CHECK-NEXT: retl 3307 %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b) 3308 ret i32 %tmp 3309} 3310declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind 3311 3312define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { 3313; CHECK-LABEL: movnt_dq: 3314; CHECK: ## BB#0: 3315; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3316; CHECK-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0 3317; CHECK-NEXT: vmovntdq %ymm0, (%eax) 3318; CHECK-NEXT: vzeroupper 3319; CHECK-NEXT: retl 3320 %a2 = add <2 x i64> %a1, <i64 1, i64 1> 3321 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 3322 tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind 3323 ret void 3324} 3325declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind 3326 3327define void @movnt_ps(i8* %p, <8 x float> %a) nounwind { 3328; CHECK-LABEL: movnt_ps: 3329; CHECK: ## BB#0: 3330; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3331; CHECK-NEXT: vmovntps %ymm0, (%eax) 3332; CHECK-NEXT: vzeroupper 3333; CHECK-NEXT: retl 3334 tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind 3335 ret void 3336} 3337declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind 3338 3339define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind { 3340 ; add operation forces the execution domain. 3341; CHECK-LABEL: movnt_pd: 3342; CHECK: ## BB#0: 3343; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 3344; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1 3345; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 3346; CHECK-NEXT: vmovntpd %ymm0, (%eax) 3347; CHECK-NEXT: vzeroupper 3348; CHECK-NEXT: retl 3349 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0> 3350 tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind 3351 ret void 3352} 3353declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind 3354 3355 3356; Check for pclmulqdq 3357define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { 3358; CHECK-LABEL: test_x86_pclmulqdq: 3359; CHECK: ## BB#0: 3360; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 3361; CHECK-NEXT: retl 3362 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] 3363 ret <2 x i64> %res 3364} 3365declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 3366