1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL 4 5define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { 6; SSE-LABEL: test_x86_sse2_add_sd: 7; SSE: ## BB#0: 8; SSE-NEXT: addsd %xmm1, %xmm0 9; SSE-NEXT: retl 10; 11; KNL-LABEL: test_x86_sse2_add_sd: 12; KNL: ## BB#0: 13; KNL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 14; KNL-NEXT: retl 15 %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 16 ret <2 x double> %res 17} 18declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 19 20 21define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { 22; SSE-LABEL: test_x86_sse2_cmp_pd: 23; SSE: ## BB#0: 24; SSE-NEXT: cmpordpd %xmm1, %xmm0 25; SSE-NEXT: retl 26; 27; KNL-LABEL: test_x86_sse2_cmp_pd: 28; KNL: ## BB#0: 29; KNL-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 30; KNL-NEXT: retl 31 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 32 ret <2 x double> %res 33} 34declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 35 36 37define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { 38; SSE-LABEL: test_x86_sse2_cmp_sd: 39; SSE: ## BB#0: 40; SSE-NEXT: cmpordsd %xmm1, %xmm0 41; SSE-NEXT: retl 42; 43; KNL-LABEL: test_x86_sse2_cmp_sd: 44; KNL: ## BB#0: 45; KNL-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 46; KNL-NEXT: retl 47 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 48 ret <2 x double> %res 49} 50declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 51 52 53define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { 54; SSE-LABEL: test_x86_sse2_comieq_sd: 55; SSE: ## BB#0: 56; SSE-NEXT: comisd %xmm1, %xmm0 57; SSE-NEXT: setnp %al 58; SSE-NEXT: sete %cl 59; SSE-NEXT: andb %al, %cl 60; SSE-NEXT: movzbl %cl, %eax 61; SSE-NEXT: retl 62; 63; KNL-LABEL: test_x86_sse2_comieq_sd: 64; KNL: ## BB#0: 65; KNL-NEXT: vcomisd %xmm1, %xmm0 66; KNL-NEXT: setnp %al 67; KNL-NEXT: sete %cl 68; KNL-NEXT: andb %al, %cl 69; KNL-NEXT: movzbl %cl, %eax 70; KNL-NEXT: retl 71 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 72 ret i32 %res 73} 74declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 75 76 77define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { 78; SSE-LABEL: test_x86_sse2_comige_sd: 79; SSE: ## BB#0: 80; SSE-NEXT: xorl %eax, %eax 81; SSE-NEXT: comisd %xmm1, %xmm0 82; SSE-NEXT: setae %al 83; SSE-NEXT: retl 84; 85; KNL-LABEL: test_x86_sse2_comige_sd: 86; KNL: ## BB#0: 87; KNL-NEXT: xorl %eax, %eax 88; KNL-NEXT: vcomisd %xmm1, %xmm0 89; KNL-NEXT: setae %al 90; KNL-NEXT: retl 91 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 92 ret i32 %res 93} 94declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 95 96 97define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { 98; SSE-LABEL: test_x86_sse2_comigt_sd: 99; SSE: ## BB#0: 100; SSE-NEXT: xorl %eax, %eax 101; SSE-NEXT: comisd %xmm1, %xmm0 102; SSE-NEXT: seta %al 103; SSE-NEXT: retl 104; 105; KNL-LABEL: test_x86_sse2_comigt_sd: 106; KNL: ## BB#0: 107; KNL-NEXT: xorl %eax, %eax 108; KNL-NEXT: vcomisd %xmm1, %xmm0 109; KNL-NEXT: seta %al 110; KNL-NEXT: retl 111 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 112 ret i32 %res 113} 114declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 115 116 117define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { 118; SSE-LABEL: test_x86_sse2_comile_sd: 119; SSE: ## BB#0: 120; SSE-NEXT: xorl %eax, %eax 121; SSE-NEXT: comisd %xmm0, %xmm1 122; SSE-NEXT: setae %al 123; SSE-NEXT: retl 124; 125; KNL-LABEL: test_x86_sse2_comile_sd: 126; KNL: ## BB#0: 127; KNL-NEXT: xorl %eax, %eax 128; KNL-NEXT: vcomisd %xmm0, %xmm1 129; KNL-NEXT: setae %al 130; KNL-NEXT: retl 131 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 132 ret i32 %res 133} 134declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 135 136 137define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { 138; SSE-LABEL: test_x86_sse2_comilt_sd: 139; SSE: ## BB#0: 140; SSE-NEXT: xorl %eax, %eax 141; SSE-NEXT: comisd %xmm0, %xmm1 142; SSE-NEXT: seta %al 143; SSE-NEXT: retl 144; 145; KNL-LABEL: test_x86_sse2_comilt_sd: 146; KNL: ## BB#0: 147; KNL-NEXT: xorl %eax, %eax 148; KNL-NEXT: vcomisd %xmm0, %xmm1 149; KNL-NEXT: seta %al 150; KNL-NEXT: retl 151 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 152 ret i32 %res 153} 154declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 155 156 157define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { 158; SSE-LABEL: test_x86_sse2_comineq_sd: 159; SSE: ## BB#0: 160; SSE-NEXT: comisd %xmm1, %xmm0 161; SSE-NEXT: setp %al 162; SSE-NEXT: setne %cl 163; SSE-NEXT: orb %al, %cl 164; SSE-NEXT: movzbl %cl, %eax 165; SSE-NEXT: retl 166; 167; KNL-LABEL: test_x86_sse2_comineq_sd: 168; KNL: ## BB#0: 169; KNL-NEXT: vcomisd %xmm1, %xmm0 170; KNL-NEXT: setp %al 171; KNL-NEXT: setne %cl 172; KNL-NEXT: orb %al, %cl 173; KNL-NEXT: movzbl %cl, %eax 174; KNL-NEXT: retl 175 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 176 ret i32 %res 177} 178declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 179 180 181define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { 182; SSE-LABEL: test_x86_sse2_cvtdq2ps: 183; SSE: ## BB#0: 184; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 185; SSE-NEXT: retl 186; 187; KNL-LABEL: test_x86_sse2_cvtdq2ps: 188; KNL: ## BB#0: 189; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0 190; KNL-NEXT: retl 191 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] 192 ret <4 x float> %res 193} 194declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 195 196 197define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { 198; SSE-LABEL: test_x86_sse2_cvtpd2dq: 199; SSE: ## BB#0: 200; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 201; SSE-NEXT: retl 202; 203; KNL-LABEL: test_x86_sse2_cvtpd2dq: 204; KNL: ## BB#0: 205; KNL-NEXT: vcvtpd2dq %xmm0, %xmm0 206; KNL-NEXT: retl 207 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 208 ret <4 x i32> %res 209} 210declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 211 212 213define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { 214; SSE-LABEL: test_x86_sse2_cvtpd2ps: 215; SSE: ## BB#0: 216; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 217; SSE-NEXT: retl 218; 219; KNL-LABEL: test_x86_sse2_cvtpd2ps: 220; KNL: ## BB#0: 221; KNL-NEXT: vcvtpd2ps %xmm0, %xmm0 222; KNL-NEXT: retl 223 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 224 ret <4 x float> %res 225} 226declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 227 228 229define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { 230; SSE-LABEL: test_x86_sse2_cvtps2dq: 231; SSE: ## BB#0: 232; SSE-NEXT: cvtps2dq %xmm0, %xmm0 233; SSE-NEXT: retl 234; 235; KNL-LABEL: test_x86_sse2_cvtps2dq: 236; KNL: ## BB#0: 237; KNL-NEXT: vcvtps2dq %xmm0, %xmm0 238; KNL-NEXT: retl 239 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 240 ret <4 x i32> %res 241} 242declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 243 244 245define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { 246; SSE-LABEL: test_x86_sse2_cvtsd2si: 247; SSE: ## BB#0: 248; SSE-NEXT: cvtsd2si %xmm0, %eax 249; SSE-NEXT: retl 250; 251; KNL-LABEL: test_x86_sse2_cvtsd2si: 252; KNL: ## BB#0: 253; KNL-NEXT: vcvtsd2si %xmm0, %eax 254; KNL-NEXT: retl 255 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 256 ret i32 %res 257} 258declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 259 260 261define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { 262; SSE-LABEL: test_x86_sse2_cvtsd2ss: 263; SSE: ## BB#0: 264; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 265; SSE-NEXT: retl 266; 267; KNL-LABEL: test_x86_sse2_cvtsd2ss: 268; KNL: ## BB#0: 269; KNL-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 270; KNL-NEXT: retl 271 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 272 ret <4 x float> %res 273} 274declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 275 276 277define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) { 278; SSE-LABEL: test_x86_sse2_cvtsi2sd: 279; SSE: ## BB#0: 280; SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 281; SSE-NEXT: retl 282; 283; KNL-LABEL: test_x86_sse2_cvtsi2sd: 284; KNL: ## BB#0: 285; KNL-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 286; KNL-NEXT: retl 287 %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1] 288 ret <2 x double> %res 289} 290declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone 291 292 293define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { 294; SSE-LABEL: test_x86_sse2_cvtss2sd: 295; SSE: ## BB#0: 296; SSE-NEXT: cvtss2sd %xmm1, %xmm0 297; SSE-NEXT: retl 298; 299; KNL-LABEL: test_x86_sse2_cvtss2sd: 300; KNL: ## BB#0: 301; KNL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 302; KNL-NEXT: retl 303 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] 304 ret <2 x double> %res 305} 306declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone 307 308 309define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { 310; SSE-LABEL: test_x86_sse2_cvttpd2dq: 311; SSE: ## BB#0: 312; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 313; SSE-NEXT: retl 314; 315; KNL-LABEL: test_x86_sse2_cvttpd2dq: 316; KNL: ## BB#0: 317; KNL-NEXT: vcvttpd2dq %xmm0, %xmm0 318; KNL-NEXT: retl 319 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 320 ret <4 x i32> %res 321} 322declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 323 324 325define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { 326; SSE-LABEL: test_x86_sse2_cvttsd2si: 327; SSE: ## BB#0: 328; SSE-NEXT: cvttsd2si %xmm0, %eax 329; SSE-NEXT: retl 330; 331; KNL-LABEL: test_x86_sse2_cvttsd2si: 332; KNL: ## BB#0: 333; KNL-NEXT: vcvttsd2si %xmm0, %eax 334; KNL-NEXT: retl 335 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 336 ret i32 %res 337} 338declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 339 340 341define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { 342; SSE-LABEL: test_x86_sse2_div_sd: 343; SSE: ## BB#0: 344; SSE-NEXT: divsd %xmm1, %xmm0 345; SSE-NEXT: retl 346; 347; KNL-LABEL: test_x86_sse2_div_sd: 348; KNL: ## BB#0: 349; KNL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 350; KNL-NEXT: retl 351 %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 352 ret <2 x double> %res 353} 354declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone 355 356 357 358define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { 359; SSE-LABEL: test_x86_sse2_max_pd: 360; SSE: ## BB#0: 361; SSE-NEXT: maxpd %xmm1, %xmm0 362; SSE-NEXT: retl 363; 364; KNL-LABEL: test_x86_sse2_max_pd: 365; KNL: ## BB#0: 366; KNL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 367; KNL-NEXT: retl 368 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 369 ret <2 x double> %res 370} 371declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 372 373 374define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { 375; SSE-LABEL: test_x86_sse2_max_sd: 376; SSE: ## BB#0: 377; SSE-NEXT: maxsd %xmm1, %xmm0 378; SSE-NEXT: retl 379; 380; KNL-LABEL: test_x86_sse2_max_sd: 381; KNL: ## BB#0: 382; KNL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 383; KNL-NEXT: retl 384 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 385 ret <2 x double> %res 386} 387declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 388 389 390define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { 391; SSE-LABEL: test_x86_sse2_min_pd: 392; SSE: ## BB#0: 393; SSE-NEXT: minpd %xmm1, %xmm0 394; SSE-NEXT: retl 395; 396; KNL-LABEL: test_x86_sse2_min_pd: 397; KNL: ## BB#0: 398; KNL-NEXT: vminpd %xmm1, %xmm0, %xmm0 399; KNL-NEXT: retl 400 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 401 ret <2 x double> %res 402} 403declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 404 405 406define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { 407; SSE-LABEL: test_x86_sse2_min_sd: 408; SSE: ## BB#0: 409; SSE-NEXT: minsd %xmm1, %xmm0 410; SSE-NEXT: retl 411; 412; KNL-LABEL: test_x86_sse2_min_sd: 413; KNL: ## BB#0: 414; KNL-NEXT: vminsd %xmm1, %xmm0, %xmm0 415; KNL-NEXT: retl 416 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 417 ret <2 x double> %res 418} 419declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 420 421 422define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { 423; SSE-LABEL: test_x86_sse2_movmsk_pd: 424; SSE: ## BB#0: 425; SSE-NEXT: movmskpd %xmm0, %eax 426; SSE-NEXT: retl 427; 428; KNL-LABEL: test_x86_sse2_movmsk_pd: 429; KNL: ## BB#0: 430; KNL-NEXT: vmovmskpd %xmm0, %eax 431; KNL-NEXT: retl 432 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 433 ret i32 %res 434} 435declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 436 437 438 439 440define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { 441; SSE-LABEL: test_x86_sse2_mul_sd: 442; SSE: ## BB#0: 443; SSE-NEXT: mulsd %xmm1, %xmm0 444; SSE-NEXT: retl 445; 446; KNL-LABEL: test_x86_sse2_mul_sd: 447; KNL: ## BB#0: 448; KNL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 449; KNL-NEXT: retl 450 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 451 ret <2 x double> %res 452} 453declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone 454 455 456define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { 457; SSE-LABEL: test_x86_sse2_packssdw_128: 458; SSE: ## BB#0: 459; SSE-NEXT: packssdw %xmm1, %xmm0 460; SSE-NEXT: retl 461; 462; KNL-LABEL: test_x86_sse2_packssdw_128: 463; KNL: ## BB#0: 464; KNL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 465; KNL-NEXT: retl 466 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 467 ret <8 x i16> %res 468} 469declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 470 471 472define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { 473; SSE-LABEL: test_x86_sse2_packsswb_128: 474; SSE: ## BB#0: 475; SSE-NEXT: packsswb %xmm1, %xmm0 476; SSE-NEXT: retl 477; 478; KNL-LABEL: test_x86_sse2_packsswb_128: 479; KNL: ## BB#0: 480; KNL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 481; KNL-NEXT: retl 482 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 483 ret <16 x i8> %res 484} 485declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 486 487 488define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { 489; SSE-LABEL: test_x86_sse2_packuswb_128: 490; SSE: ## BB#0: 491; SSE-NEXT: packuswb %xmm1, %xmm0 492; SSE-NEXT: retl 493; 494; KNL-LABEL: test_x86_sse2_packuswb_128: 495; KNL: ## BB#0: 496; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 497; KNL-NEXT: retl 498 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 499 ret <16 x i8> %res 500} 501declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 502 503 504define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { 505; SSE-LABEL: test_x86_sse2_padds_b: 506; SSE: ## BB#0: 507; SSE-NEXT: paddsb %xmm1, %xmm0 508; SSE-NEXT: retl 509; 510; KNL-LABEL: test_x86_sse2_padds_b: 511; KNL: ## BB#0: 512; KNL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 513; KNL-NEXT: retl 514 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 515 ret <16 x i8> %res 516} 517declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 518 519 520define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { 521; SSE-LABEL: test_x86_sse2_padds_w: 522; SSE: ## BB#0: 523; SSE-NEXT: paddsw %xmm1, %xmm0 524; SSE-NEXT: retl 525; 526; KNL-LABEL: test_x86_sse2_padds_w: 527; KNL: ## BB#0: 528; KNL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 529; KNL-NEXT: retl 530 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 531 ret <8 x i16> %res 532} 533declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 534 535 536define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { 537; SSE-LABEL: test_x86_sse2_paddus_b: 538; SSE: ## BB#0: 539; SSE-NEXT: paddusb %xmm1, %xmm0 540; SSE-NEXT: retl 541; 542; KNL-LABEL: test_x86_sse2_paddus_b: 543; KNL: ## BB#0: 544; KNL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 545; KNL-NEXT: retl 546 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 547 ret <16 x i8> %res 548} 549declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 550 551 552define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { 553; SSE-LABEL: test_x86_sse2_paddus_w: 554; SSE: ## BB#0: 555; SSE-NEXT: paddusw %xmm1, %xmm0 556; SSE-NEXT: retl 557; 558; KNL-LABEL: test_x86_sse2_paddus_w: 559; KNL: ## BB#0: 560; KNL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 561; KNL-NEXT: retl 562 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 563 ret <8 x i16> %res 564} 565declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 566 567 568define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { 569; SSE-LABEL: test_x86_sse2_pavg_b: 570; SSE: ## BB#0: 571; SSE-NEXT: pavgb %xmm1, %xmm0 572; SSE-NEXT: retl 573; 574; KNL-LABEL: test_x86_sse2_pavg_b: 575; KNL: ## BB#0: 576; KNL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 577; KNL-NEXT: retl 578 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 579 ret <16 x i8> %res 580} 581declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 582 583 584define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { 585; SSE-LABEL: test_x86_sse2_pavg_w: 586; SSE: ## BB#0: 587; SSE-NEXT: pavgw %xmm1, %xmm0 588; SSE-NEXT: retl 589; 590; KNL-LABEL: test_x86_sse2_pavg_w: 591; KNL: ## BB#0: 592; KNL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 593; KNL-NEXT: retl 594 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 595 ret <8 x i16> %res 596} 597declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 598 599 600define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { 601; SSE-LABEL: test_x86_sse2_pmadd_wd: 602; SSE: ## BB#0: 603; SSE-NEXT: pmaddwd %xmm1, %xmm0 604; SSE-NEXT: retl 605; 606; KNL-LABEL: test_x86_sse2_pmadd_wd: 607; KNL: ## BB#0: 608; KNL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 609; KNL-NEXT: retl 610 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 611 ret <4 x i32> %res 612} 613declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 614 615 616define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { 617; SSE-LABEL: test_x86_sse2_pmaxs_w: 618; SSE: ## BB#0: 619; SSE-NEXT: pmaxsw %xmm1, %xmm0 620; SSE-NEXT: retl 621; 622; KNL-LABEL: test_x86_sse2_pmaxs_w: 623; KNL: ## BB#0: 624; KNL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 625; KNL-NEXT: retl 626 %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 627 ret <8 x i16> %res 628} 629declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 630 631 632define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { 633; SSE-LABEL: test_x86_sse2_pmaxu_b: 634; SSE: ## BB#0: 635; SSE-NEXT: pmaxub %xmm1, %xmm0 636; SSE-NEXT: retl 637; 638; KNL-LABEL: test_x86_sse2_pmaxu_b: 639; KNL: ## BB#0: 640; KNL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 641; KNL-NEXT: retl 642 %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 643 ret <16 x i8> %res 644} 645declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 646 647 648define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { 649; SSE-LABEL: test_x86_sse2_pmins_w: 650; SSE: ## BB#0: 651; SSE-NEXT: pminsw %xmm1, %xmm0 652; SSE-NEXT: retl 653; 654; KNL-LABEL: test_x86_sse2_pmins_w: 655; KNL: ## BB#0: 656; KNL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 657; KNL-NEXT: retl 658 %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 659 ret <8 x i16> %res 660} 661declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 662 663 664define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { 665; SSE-LABEL: test_x86_sse2_pminu_b: 666; SSE: ## BB#0: 667; SSE-NEXT: pminub %xmm1, %xmm0 668; SSE-NEXT: retl 669; 670; KNL-LABEL: test_x86_sse2_pminu_b: 671; KNL: ## BB#0: 672; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm0 673; KNL-NEXT: retl 674 %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 675 ret <16 x i8> %res 676} 677declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 678 679 680define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { 681; SSE-LABEL: test_x86_sse2_pmovmskb_128: 682; SSE: ## BB#0: 683; SSE-NEXT: pmovmskb %xmm0, %eax 684; SSE-NEXT: retl 685; 686; KNL-LABEL: test_x86_sse2_pmovmskb_128: 687; KNL: ## BB#0: 688; KNL-NEXT: vpmovmskb %xmm0, %eax 689; KNL-NEXT: retl 690 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 691 ret i32 %res 692} 693declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 694 695 696define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { 697; SSE-LABEL: test_x86_sse2_pmulh_w: 698; SSE: ## BB#0: 699; SSE-NEXT: pmulhw %xmm1, %xmm0 700; SSE-NEXT: retl 701; 702; KNL-LABEL: test_x86_sse2_pmulh_w: 703; KNL: ## BB#0: 704; KNL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 705; KNL-NEXT: retl 706 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 707 ret <8 x i16> %res 708} 709declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 710 711 712define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { 713; SSE-LABEL: test_x86_sse2_pmulhu_w: 714; SSE: ## BB#0: 715; SSE-NEXT: pmulhuw %xmm1, %xmm0 716; SSE-NEXT: retl 717; 718; KNL-LABEL: test_x86_sse2_pmulhu_w: 719; KNL: ## BB#0: 720; KNL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 721; KNL-NEXT: retl 722 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 723 ret <8 x i16> %res 724} 725declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 726 727 728define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { 729; SSE-LABEL: test_x86_sse2_pmulu_dq: 730; SSE: ## BB#0: 731; SSE-NEXT: pmuludq %xmm1, %xmm0 732; SSE-NEXT: retl 733; 734; KNL-LABEL: test_x86_sse2_pmulu_dq: 735; KNL: ## BB#0: 736; KNL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 737; KNL-NEXT: retl 738 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 739 ret <2 x i64> %res 740} 741declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 742 743 744define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { 745; SSE-LABEL: test_x86_sse2_psad_bw: 746; SSE: ## BB#0: 747; SSE-NEXT: psadbw %xmm1, %xmm0 748; SSE-NEXT: retl 749; 750; KNL-LABEL: test_x86_sse2_psad_bw: 751; KNL: ## BB#0: 752; KNL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 753; KNL-NEXT: retl 754 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 755 ret <2 x i64> %res 756} 757declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 758 759 760define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { 761; SSE-LABEL: test_x86_sse2_psll_d: 762; SSE: ## BB#0: 763; SSE-NEXT: pslld %xmm1, %xmm0 764; SSE-NEXT: retl 765; 766; KNL-LABEL: test_x86_sse2_psll_d: 767; KNL: ## BB#0: 768; KNL-NEXT: vpslld %xmm1, %xmm0, %xmm0 769; KNL-NEXT: retl 770 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 771 ret <4 x i32> %res 772} 773declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 774 775 776define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { 777; SSE-LABEL: test_x86_sse2_psll_q: 778; SSE: ## BB#0: 779; SSE-NEXT: psllq %xmm1, %xmm0 780; SSE-NEXT: retl 781; 782; KNL-LABEL: test_x86_sse2_psll_q: 783; KNL: ## BB#0: 784; KNL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 785; KNL-NEXT: retl 786 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 787 ret <2 x i64> %res 788} 789declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 790 791 792define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { 793; SSE-LABEL: test_x86_sse2_psll_w: 794; SSE: ## BB#0: 795; SSE-NEXT: psllw %xmm1, %xmm0 796; SSE-NEXT: retl 797; 798; KNL-LABEL: test_x86_sse2_psll_w: 799; KNL: ## BB#0: 800; KNL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 801; KNL-NEXT: retl 802 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 803 ret <8 x i16> %res 804} 805declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 806 807 808define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { 809; SSE-LABEL: test_x86_sse2_pslli_d: 810; SSE: ## BB#0: 811; SSE-NEXT: pslld $7, %xmm0 812; SSE-NEXT: retl 813; 814; KNL-LABEL: test_x86_sse2_pslli_d: 815; KNL: ## BB#0: 816; KNL-NEXT: vpslld $7, %xmm0, %xmm0 817; KNL-NEXT: retl 818 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 819 ret <4 x i32> %res 820} 821declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 822 823 824define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { 825; SSE-LABEL: test_x86_sse2_pslli_q: 826; SSE: ## BB#0: 827; SSE-NEXT: psllq $7, %xmm0 828; SSE-NEXT: retl 829; 830; KNL-LABEL: test_x86_sse2_pslli_q: 831; KNL: ## BB#0: 832; KNL-NEXT: vpsllq $7, %xmm0, %xmm0 833; KNL-NEXT: retl 834 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 835 ret <2 x i64> %res 836} 837declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 838 839 840define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { 841; SSE-LABEL: test_x86_sse2_pslli_w: 842; SSE: ## BB#0: 843; SSE-NEXT: psllw $7, %xmm0 844; SSE-NEXT: retl 845; 846; KNL-LABEL: test_x86_sse2_pslli_w: 847; KNL: ## BB#0: 848; KNL-NEXT: vpsllw $7, %xmm0, %xmm0 849; KNL-NEXT: retl 850 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 851 ret <8 x i16> %res 852} 853declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 854 855 856define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { 857; SSE-LABEL: test_x86_sse2_psra_d: 858; SSE: ## BB#0: 859; SSE-NEXT: psrad %xmm1, %xmm0 860; SSE-NEXT: retl 861; 862; KNL-LABEL: test_x86_sse2_psra_d: 863; KNL: ## BB#0: 864; KNL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 865; KNL-NEXT: retl 866 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 867 ret <4 x i32> %res 868} 869declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 870 871 872define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { 873; SSE-LABEL: test_x86_sse2_psra_w: 874; SSE: ## BB#0: 875; SSE-NEXT: psraw %xmm1, %xmm0 876; SSE-NEXT: retl 877; 878; KNL-LABEL: test_x86_sse2_psra_w: 879; KNL: ## BB#0: 880; KNL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 881; KNL-NEXT: retl 882 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 883 ret <8 x i16> %res 884} 885declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 886 887 888define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { 889; SSE-LABEL: test_x86_sse2_psrai_d: 890; SSE: ## BB#0: 891; SSE-NEXT: psrad $7, %xmm0 892; SSE-NEXT: retl 893; 894; KNL-LABEL: test_x86_sse2_psrai_d: 895; KNL: ## BB#0: 896; KNL-NEXT: vpsrad $7, %xmm0, %xmm0 897; KNL-NEXT: retl 898 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 899 ret <4 x i32> %res 900} 901declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 902 903 904define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { 905; SSE-LABEL: test_x86_sse2_psrai_w: 906; SSE: ## BB#0: 907; SSE-NEXT: psraw $7, %xmm0 908; SSE-NEXT: retl 909; 910; KNL-LABEL: test_x86_sse2_psrai_w: 911; KNL: ## BB#0: 912; KNL-NEXT: vpsraw $7, %xmm0, %xmm0 913; KNL-NEXT: retl 914 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 915 ret <8 x i16> %res 916} 917declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 918 919 920define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { 921; SSE-LABEL: test_x86_sse2_psrl_d: 922; SSE: ## BB#0: 923; SSE-NEXT: psrld %xmm1, %xmm0 924; SSE-NEXT: retl 925; 926; KNL-LABEL: test_x86_sse2_psrl_d: 927; KNL: ## BB#0: 928; KNL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 929; KNL-NEXT: retl 930 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 931 ret <4 x i32> %res 932} 933declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 934 935 936define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { 937; SSE-LABEL: test_x86_sse2_psrl_q: 938; SSE: ## BB#0: 939; SSE-NEXT: psrlq %xmm1, %xmm0 940; SSE-NEXT: retl 941; 942; KNL-LABEL: test_x86_sse2_psrl_q: 943; KNL: ## BB#0: 944; KNL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 945; KNL-NEXT: retl 946 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 947 ret <2 x i64> %res 948} 949declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 950 951 952define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { 953; SSE-LABEL: test_x86_sse2_psrl_w: 954; SSE: ## BB#0: 955; SSE-NEXT: psrlw %xmm1, %xmm0 956; SSE-NEXT: retl 957; 958; KNL-LABEL: test_x86_sse2_psrl_w: 959; KNL: ## BB#0: 960; KNL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 961; KNL-NEXT: retl 962 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 963 ret <8 x i16> %res 964} 965declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 966 967 968define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { 969; SSE-LABEL: test_x86_sse2_psrli_d: 970; SSE: ## BB#0: 971; SSE-NEXT: psrld $7, %xmm0 972; SSE-NEXT: retl 973; 974; KNL-LABEL: test_x86_sse2_psrli_d: 975; KNL: ## BB#0: 976; KNL-NEXT: vpsrld $7, %xmm0, %xmm0 977; KNL-NEXT: retl 978 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 979 ret <4 x i32> %res 980} 981declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 982 983 984define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { 985; SSE-LABEL: test_x86_sse2_psrli_q: 986; SSE: ## BB#0: 987; SSE-NEXT: psrlq $7, %xmm0 988; SSE-NEXT: retl 989; 990; KNL-LABEL: test_x86_sse2_psrli_q: 991; KNL: ## BB#0: 992; KNL-NEXT: vpsrlq $7, %xmm0, %xmm0 993; KNL-NEXT: retl 994 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 995 ret <2 x i64> %res 996} 997declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 998 999 1000define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { 1001; SSE-LABEL: test_x86_sse2_psrli_w: 1002; SSE: ## BB#0: 1003; SSE-NEXT: psrlw $7, %xmm0 1004; SSE-NEXT: retl 1005; 1006; KNL-LABEL: test_x86_sse2_psrli_w: 1007; KNL: ## BB#0: 1008; KNL-NEXT: vpsrlw $7, %xmm0, %xmm0 1009; KNL-NEXT: retl 1010 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 1011 ret <8 x i16> %res 1012} 1013declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 1014 1015 1016define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { 1017; SSE-LABEL: test_x86_sse2_psubs_b: 1018; SSE: ## BB#0: 1019; SSE-NEXT: psubsb %xmm1, %xmm0 1020; SSE-NEXT: retl 1021; 1022; KNL-LABEL: test_x86_sse2_psubs_b: 1023; KNL: ## BB#0: 1024; KNL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 1025; KNL-NEXT: retl 1026 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1027 ret <16 x i8> %res 1028} 1029declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 1030 1031 1032define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { 1033; SSE-LABEL: test_x86_sse2_psubs_w: 1034; SSE: ## BB#0: 1035; SSE-NEXT: psubsw %xmm1, %xmm0 1036; SSE-NEXT: retl 1037; 1038; KNL-LABEL: test_x86_sse2_psubs_w: 1039; KNL: ## BB#0: 1040; KNL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 1041; KNL-NEXT: retl 1042 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1043 ret <8 x i16> %res 1044} 1045declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 1046 1047 1048define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { 1049; SSE-LABEL: test_x86_sse2_psubus_b: 1050; SSE: ## BB#0: 1051; SSE-NEXT: psubusb %xmm1, %xmm0 1052; SSE-NEXT: retl 1053; 1054; KNL-LABEL: test_x86_sse2_psubus_b: 1055; KNL: ## BB#0: 1056; KNL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 1057; KNL-NEXT: retl 1058 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 1059 ret <16 x i8> %res 1060} 1061declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 1062 1063 1064define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { 1065; SSE-LABEL: test_x86_sse2_psubus_w: 1066; SSE: ## BB#0: 1067; SSE-NEXT: psubusw %xmm1, %xmm0 1068; SSE-NEXT: retl 1069; 1070; KNL-LABEL: test_x86_sse2_psubus_w: 1071; KNL: ## BB#0: 1072; KNL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 1073; KNL-NEXT: retl 1074 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1075 ret <8 x i16> %res 1076} 1077declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 1078 1079 1080define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { 1081; SSE-LABEL: test_x86_sse2_sqrt_pd: 1082; SSE: ## BB#0: 1083; SSE-NEXT: sqrtpd %xmm0, %xmm0 1084; SSE-NEXT: retl 1085; 1086; KNL-LABEL: test_x86_sse2_sqrt_pd: 1087; KNL: ## BB#0: 1088; KNL-NEXT: vsqrtpd %xmm0, %xmm0 1089; KNL-NEXT: retl 1090 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 1091 ret <2 x double> %res 1092} 1093declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 1094 1095 1096define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { 1097; SSE-LABEL: test_x86_sse2_sqrt_sd: 1098; SSE: ## BB#0: 1099; SSE-NEXT: sqrtsd %xmm0, %xmm0 1100; SSE-NEXT: retl 1101; 1102; KNL-LABEL: test_x86_sse2_sqrt_sd: 1103; KNL: ## BB#0: 1104; KNL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 1105; KNL-NEXT: retl 1106 %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] 1107 ret <2 x double> %res 1108} 1109declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 1110 1111 1112define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { 1113; SSE-LABEL: test_x86_sse2_sub_sd: 1114; SSE: ## BB#0: 1115; SSE-NEXT: subsd %xmm1, %xmm0 1116; SSE-NEXT: retl 1117; 1118; KNL-LABEL: test_x86_sse2_sub_sd: 1119; KNL: ## BB#0: 1120; KNL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 1121; KNL-NEXT: retl 1122 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 1123 ret <2 x double> %res 1124} 1125declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone 1126 1127 1128define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { 1129; SSE-LABEL: test_x86_sse2_ucomieq_sd: 1130; SSE: ## BB#0: 1131; SSE-NEXT: ucomisd %xmm1, %xmm0 1132; SSE-NEXT: setnp %al 1133; SSE-NEXT: sete %cl 1134; SSE-NEXT: andb %al, %cl 1135; SSE-NEXT: movzbl %cl, %eax 1136; SSE-NEXT: retl 1137; 1138; KNL-LABEL: test_x86_sse2_ucomieq_sd: 1139; KNL: ## BB#0: 1140; KNL-NEXT: vucomisd %xmm1, %xmm0 1141; KNL-NEXT: setnp %al 1142; KNL-NEXT: sete %cl 1143; KNL-NEXT: andb %al, %cl 1144; KNL-NEXT: movzbl %cl, %eax 1145; KNL-NEXT: retl 1146 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1147 ret i32 %res 1148} 1149declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1150 1151 1152define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { 1153; SSE-LABEL: test_x86_sse2_ucomige_sd: 1154; SSE: ## BB#0: 1155; SSE-NEXT: xorl %eax, %eax 1156; SSE-NEXT: ucomisd %xmm1, %xmm0 1157; SSE-NEXT: setae %al 1158; SSE-NEXT: retl 1159; 1160; KNL-LABEL: test_x86_sse2_ucomige_sd: 1161; KNL: ## BB#0: 1162; KNL-NEXT: xorl %eax, %eax 1163; KNL-NEXT: vucomisd %xmm1, %xmm0 1164; KNL-NEXT: setae %al 1165; KNL-NEXT: retl 1166 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1167 ret i32 %res 1168} 1169declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 1170 1171 1172define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { 1173; SSE-LABEL: test_x86_sse2_ucomigt_sd: 1174; SSE: ## BB#0: 1175; SSE-NEXT: xorl %eax, %eax 1176; SSE-NEXT: ucomisd %xmm1, %xmm0 1177; SSE-NEXT: seta %al 1178; SSE-NEXT: retl 1179; 1180; KNL-LABEL: test_x86_sse2_ucomigt_sd: 1181; KNL: ## BB#0: 1182; KNL-NEXT: xorl %eax, %eax 1183; KNL-NEXT: vucomisd %xmm1, %xmm0 1184; KNL-NEXT: seta %al 1185; KNL-NEXT: retl 1186 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1187 ret i32 %res 1188} 1189declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 1190 1191 1192define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { 1193; SSE-LABEL: test_x86_sse2_ucomile_sd: 1194; SSE: ## BB#0: 1195; SSE-NEXT: xorl %eax, %eax 1196; SSE-NEXT: ucomisd %xmm0, %xmm1 1197; SSE-NEXT: setae %al 1198; SSE-NEXT: retl 1199; 1200; KNL-LABEL: test_x86_sse2_ucomile_sd: 1201; KNL: ## BB#0: 1202; KNL-NEXT: xorl %eax, %eax 1203; KNL-NEXT: vucomisd %xmm0, %xmm1 1204; KNL-NEXT: setae %al 1205; KNL-NEXT: retl 1206 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1207 ret i32 %res 1208} 1209declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 1210 1211 1212define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { 1213; SSE-LABEL: test_x86_sse2_ucomilt_sd: 1214; SSE: ## BB#0: 1215; SSE-NEXT: xorl %eax, %eax 1216; SSE-NEXT: ucomisd %xmm0, %xmm1 1217; SSE-NEXT: seta %al 1218; SSE-NEXT: retl 1219; 1220; KNL-LABEL: test_x86_sse2_ucomilt_sd: 1221; KNL: ## BB#0: 1222; KNL-NEXT: xorl %eax, %eax 1223; KNL-NEXT: vucomisd %xmm0, %xmm1 1224; KNL-NEXT: seta %al 1225; KNL-NEXT: retl 1226 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1227 ret i32 %res 1228} 1229declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 1230 1231 1232define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { 1233; SSE-LABEL: test_x86_sse2_ucomineq_sd: 1234; SSE: ## BB#0: 1235; SSE-NEXT: ucomisd %xmm1, %xmm0 1236; SSE-NEXT: setp %al 1237; SSE-NEXT: setne %cl 1238; SSE-NEXT: orb %al, %cl 1239; SSE-NEXT: movzbl %cl, %eax 1240; SSE-NEXT: retl 1241; 1242; KNL-LABEL: test_x86_sse2_ucomineq_sd: 1243; KNL: ## BB#0: 1244; KNL-NEXT: vucomisd %xmm1, %xmm0 1245; KNL-NEXT: setp %al 1246; KNL-NEXT: setne %cl 1247; KNL-NEXT: orb %al, %cl 1248; KNL-NEXT: movzbl %cl, %eax 1249; KNL-NEXT: retl 1250 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1251 ret i32 %res 1252} 1253declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 1254 1255define void @test_x86_sse2_pause() { 1256; SSE-LABEL: test_x86_sse2_pause: 1257; SSE: ## BB#0: 1258; SSE-NEXT: pause 1259; SSE-NEXT: retl 1260; 1261; KNL-LABEL: test_x86_sse2_pause: 1262; KNL: ## BB#0: 1263; KNL-NEXT: pause 1264; KNL-NEXT: retl 1265 tail call void @llvm.x86.sse2.pause() 1266 ret void 1267} 1268declare void @llvm.x86.sse2.pause() nounwind 1269