1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c 6 7define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 8; X32-LABEL: test_mm_add_epi8: 9; X32: # BB#0: 10; X32-NEXT: paddb %xmm1, %xmm0 11; X32-NEXT: retl 12; 13; X64-LABEL: test_mm_add_epi8: 14; X64: # BB#0: 15; X64-NEXT: paddb %xmm1, %xmm0 16; X64-NEXT: retq 17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 18 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 19 %res = add <16 x i8> %arg0, %arg1 20 %bc = bitcast <16 x i8> %res to <2 x i64> 21 ret <2 x i64> %bc 22} 23 24define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 25; X32-LABEL: test_mm_add_epi16: 26; X32: # BB#0: 27; X32-NEXT: paddw %xmm1, %xmm0 28; X32-NEXT: retl 29; 30; X64-LABEL: test_mm_add_epi16: 31; X64: # BB#0: 32; X64-NEXT: paddw %xmm1, %xmm0 33; X64-NEXT: retq 34 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 35 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 36 %res = add <8 x i16> %arg0, %arg1 37 %bc = bitcast <8 x i16> %res to <2 x i64> 38 ret <2 x i64> %bc 39} 40 41define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 42; X32-LABEL: test_mm_add_epi32: 43; X32: # BB#0: 44; X32-NEXT: paddd %xmm1, %xmm0 45; X32-NEXT: retl 46; 47; X64-LABEL: test_mm_add_epi32: 48; X64: # BB#0: 49; X64-NEXT: paddd %xmm1, %xmm0 50; X64-NEXT: retq 51 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 52 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 53 %res = add <4 x i32> %arg0, %arg1 54 %bc = bitcast <4 x i32> %res to <2 x i64> 55 ret <2 x i64> %bc 56} 57 58define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 59; X32-LABEL: test_mm_add_epi64: 60; X32: # BB#0: 61; X32-NEXT: paddq %xmm1, %xmm0 62; X32-NEXT: retl 63; 64; X64-LABEL: test_mm_add_epi64: 65; X64: # BB#0: 66; X64-NEXT: paddq %xmm1, %xmm0 67; X64-NEXT: retq 68 %res = add <2 x i64> %a0, %a1 69 ret <2 x i64> %res 70} 71 72define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 73; X32-LABEL: test_mm_add_pd: 74; X32: # BB#0: 75; X32-NEXT: addpd %xmm1, %xmm0 76; X32-NEXT: retl 77; 78; X64-LABEL: test_mm_add_pd: 79; X64: # BB#0: 80; X64-NEXT: addpd %xmm1, %xmm0 81; X64-NEXT: retq 82 %res = fadd <2 x double> %a0, %a1 83 ret <2 x double> %res 84} 85 86define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 87; X32-LABEL: test_mm_add_sd: 88; X32: # BB#0: 89; X32-NEXT: addsd %xmm1, %xmm0 90; X32-NEXT: retl 91; 92; X64-LABEL: test_mm_add_sd: 93; X64: # BB#0: 94; X64-NEXT: addsd %xmm1, %xmm0 95; X64-NEXT: retq 96 %ext0 = extractelement <2 x double> %a0, i32 0 97 %ext1 = extractelement <2 x double> %a1, i32 0 98 %fadd = fadd double %ext0, %ext1 99 %res = insertelement <2 x double> %a0, double %fadd, i32 0 100 ret <2 x double> %res 101} 102 103define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 104; X32-LABEL: test_mm_adds_epi8: 105; X32: # BB#0: 106; X32-NEXT: paddsb %xmm1, %xmm0 107; X32-NEXT: retl 108; 109; X64-LABEL: test_mm_adds_epi8: 110; X64: # BB#0: 111; X64-NEXT: paddsb %xmm1, %xmm0 112; X64-NEXT: retq 113 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 114 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 115 %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1) 116 %bc = bitcast <16 x i8> %res to <2 x i64> 117 ret <2 x i64> %bc 118} 119declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 120 121define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 122; X32-LABEL: test_mm_adds_epi16: 123; X32: # BB#0: 124; X32-NEXT: paddsw %xmm1, %xmm0 125; X32-NEXT: retl 126; 127; X64-LABEL: test_mm_adds_epi16: 128; X64: # BB#0: 129; X64-NEXT: paddsw %xmm1, %xmm0 130; X64-NEXT: retq 131 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 132 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 133 %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1) 134 %bc = bitcast <8 x i16> %res to <2 x i64> 135 ret <2 x i64> %bc 136} 137declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 138 139define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 140; X32-LABEL: test_mm_adds_epu8: 141; X32: # BB#0: 142; X32-NEXT: paddusb %xmm1, %xmm0 143; X32-NEXT: retl 144; 145; X64-LABEL: test_mm_adds_epu8: 146; X64: # BB#0: 147; X64-NEXT: paddusb %xmm1, %xmm0 148; X64-NEXT: retq 149 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 150 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 151 %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1) 152 %bc = bitcast <16 x i8> %res to <2 x i64> 153 ret <2 x i64> %bc 154} 155declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 156 157define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 158; X32-LABEL: test_mm_adds_epu16: 159; X32: # BB#0: 160; X32-NEXT: paddusw %xmm1, %xmm0 161; X32-NEXT: retl 162; 163; X64-LABEL: test_mm_adds_epu16: 164; X64: # BB#0: 165; X64-NEXT: paddusw %xmm1, %xmm0 166; X64-NEXT: retq 167 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 168 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 169 %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1) 170 %bc = bitcast <8 x i16> %res to <2 x i64> 171 ret <2 x i64> %bc 172} 173declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 174 175define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 176; X32-LABEL: test_mm_and_pd: 177; X32: # BB#0: 178; X32-NEXT: andps %xmm1, %xmm0 179; X32-NEXT: retl 180; 181; X64-LABEL: test_mm_and_pd: 182; X64: # BB#0: 183; X64-NEXT: andps %xmm1, %xmm0 184; X64-NEXT: retq 185 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 186 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 187 %res = and <4 x i32> %arg0, %arg1 188 %bc = bitcast <4 x i32> %res to <2 x double> 189 ret <2 x double> %bc 190} 191 192define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 193; X32-LABEL: test_mm_and_si128: 194; X32: # BB#0: 195; X32-NEXT: andps %xmm1, %xmm0 196; X32-NEXT: retl 197; 198; X64-LABEL: test_mm_and_si128: 199; X64: # BB#0: 200; X64-NEXT: andps %xmm1, %xmm0 201; X64-NEXT: retq 202 %res = and <2 x i64> %a0, %a1 203 ret <2 x i64> %res 204} 205 206define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 207; X32-LABEL: test_mm_andnot_pd: 208; X32: # BB#0: 209; X32-NEXT: andnps %xmm1, %xmm0 210; X32-NEXT: retl 211; 212; X64-LABEL: test_mm_andnot_pd: 213; X64: # BB#0: 214; X64-NEXT: andnps %xmm1, %xmm0 215; X64-NEXT: retq 216 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 217 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 218 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 219 %res = and <4 x i32> %not, %arg1 220 %bc = bitcast <4 x i32> %res to <2 x double> 221 ret <2 x double> %bc 222} 223 224define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 225; X32-LABEL: test_mm_andnot_si128: 226; X32: # BB#0: 227; X32-NEXT: pcmpeqd %xmm2, %xmm2 228; X32-NEXT: pxor %xmm2, %xmm0 229; X32-NEXT: pand %xmm1, %xmm0 230; X32-NEXT: retl 231; 232; X64-LABEL: test_mm_andnot_si128: 233; X64: # BB#0: 234; X64-NEXT: pcmpeqd %xmm2, %xmm2 235; X64-NEXT: pxor %xmm2, %xmm0 236; X64-NEXT: pand %xmm1, %xmm0 237; X64-NEXT: retq 238 %not = xor <2 x i64> %a0, <i64 -1, i64 -1> 239 %res = and <2 x i64> %not, %a1 240 ret <2 x i64> %res 241} 242 243define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 244; X32-LABEL: test_mm_avg_epu8: 245; X32: # BB#0: 246; X32-NEXT: pavgb %xmm1, %xmm0 247; X32-NEXT: retl 248; 249; X64-LABEL: test_mm_avg_epu8: 250; X64: # BB#0: 251; X64-NEXT: pavgb %xmm1, %xmm0 252; X64-NEXT: retq 253 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 254 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 255 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) 256 %bc = bitcast <16 x i8> %res to <2 x i64> 257 ret <2 x i64> %bc 258} 259declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwind readnone 260 261define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 262; X32-LABEL: test_mm_avg_epu16: 263; X32: # BB#0: 264; X32-NEXT: pavgw %xmm1, %xmm0 265; X32-NEXT: retl 266; 267; X64-LABEL: test_mm_avg_epu16: 268; X64: # BB#0: 269; X64-NEXT: pavgw %xmm1, %xmm0 270; X64-NEXT: retq 271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 272 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 273 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %arg0, <8 x i16> %arg1) 274 %bc = bitcast <8 x i16> %res to <2 x i64> 275 ret <2 x i64> %bc 276} 277declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 278 279define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind { 280; X32-LABEL: test_mm_bslli_si128: 281; X32: # BB#0: 282; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 283; X32-NEXT: retl 284; 285; X64-LABEL: test_mm_bslli_si128: 286; X64: # BB#0: 287; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 288; X64-NEXT: retq 289 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 290 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 291 %bc = bitcast <16 x i8> %res to <2 x i64> 292 ret <2 x i64> %bc 293} 294 295define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind { 296; X32-LABEL: test_mm_bsrli_si128: 297; X32: # BB#0: 298; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 299; X32-NEXT: retl 300; 301; X64-LABEL: test_mm_bsrli_si128: 302; X64: # BB#0: 303; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 304; X64-NEXT: retq 305 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 306 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 307 %bc = bitcast <16 x i8> %res to <2 x i64> 308 ret <2 x i64> %bc 309} 310 311define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind { 312; X32-LABEL: test_mm_castpd_ps: 313; X32: # BB#0: 314; X32-NEXT: retl 315; 316; X64-LABEL: test_mm_castpd_ps: 317; X64: # BB#0: 318; X64-NEXT: retq 319 %res = bitcast <2 x double> %a0 to <4 x float> 320 ret <4 x float> %res 321} 322 323define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind { 324; X32-LABEL: test_mm_castpd_si128: 325; X32: # BB#0: 326; X32-NEXT: retl 327; 328; X64-LABEL: test_mm_castpd_si128: 329; X64: # BB#0: 330; X64-NEXT: retq 331 %res = bitcast <2 x double> %a0 to <2 x i64> 332 ret <2 x i64> %res 333} 334 335define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind { 336; X32-LABEL: test_mm_castps_pd: 337; X32: # BB#0: 338; X32-NEXT: retl 339; 340; X64-LABEL: test_mm_castps_pd: 341; X64: # BB#0: 342; X64-NEXT: retq 343 %res = bitcast <4 x float> %a0 to <2 x double> 344 ret <2 x double> %res 345} 346 347define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind { 348; X32-LABEL: test_mm_castps_si128: 349; X32: # BB#0: 350; X32-NEXT: retl 351; 352; X64-LABEL: test_mm_castps_si128: 353; X64: # BB#0: 354; X64-NEXT: retq 355 %res = bitcast <4 x float> %a0 to <2 x i64> 356 ret <2 x i64> %res 357} 358 359define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind { 360; X32-LABEL: test_mm_castsi128_pd: 361; X32: # BB#0: 362; X32-NEXT: retl 363; 364; X64-LABEL: test_mm_castsi128_pd: 365; X64: # BB#0: 366; X64-NEXT: retq 367 %res = bitcast <2 x i64> %a0 to <2 x double> 368 ret <2 x double> %res 369} 370 371define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind { 372; X32-LABEL: test_mm_castsi128_ps: 373; X32: # BB#0: 374; X32-NEXT: retl 375; 376; X64-LABEL: test_mm_castsi128_ps: 377; X64: # BB#0: 378; X64-NEXT: retq 379 %res = bitcast <2 x i64> %a0 to <4 x float> 380 ret <4 x float> %res 381} 382 383define void @test_mm_clflush(i8* %a0) nounwind { 384; X32-LABEL: test_mm_clflush: 385; X32: # BB#0: 386; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 387; X32-NEXT: clflush (%eax) 388; X32-NEXT: retl 389; 390; X64-LABEL: test_mm_clflush: 391; X64: # BB#0: 392; X64-NEXT: clflush (%rdi) 393; X64-NEXT: retq 394 call void @llvm.x86.sse2.clflush(i8* %a0) 395 ret void 396} 397declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone 398 399define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 400; X32-LABEL: test_mm_cmpeq_epi8: 401; X32: # BB#0: 402; X32-NEXT: pcmpeqb %xmm1, %xmm0 403; X32-NEXT: retl 404; 405; X64-LABEL: test_mm_cmpeq_epi8: 406; X64: # BB#0: 407; X64-NEXT: pcmpeqb %xmm1, %xmm0 408; X64-NEXT: retq 409 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 410 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 411 %cmp = icmp eq <16 x i8> %arg0, %arg1 412 %res = sext <16 x i1> %cmp to <16 x i8> 413 %bc = bitcast <16 x i8> %res to <2 x i64> 414 ret <2 x i64> %bc 415} 416 417define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 418; X32-LABEL: test_mm_cmpeq_epi16: 419; X32: # BB#0: 420; X32-NEXT: pcmpeqw %xmm1, %xmm0 421; X32-NEXT: retl 422; 423; X64-LABEL: test_mm_cmpeq_epi16: 424; X64: # BB#0: 425; X64-NEXT: pcmpeqw %xmm1, %xmm0 426; X64-NEXT: retq 427 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 428 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 429 %cmp = icmp eq <8 x i16> %arg0, %arg1 430 %res = sext <8 x i1> %cmp to <8 x i16> 431 %bc = bitcast <8 x i16> %res to <2 x i64> 432 ret <2 x i64> %bc 433} 434 435define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 436; X32-LABEL: test_mm_cmpeq_epi32: 437; X32: # BB#0: 438; X32-NEXT: pcmpeqd %xmm1, %xmm0 439; X32-NEXT: retl 440; 441; X64-LABEL: test_mm_cmpeq_epi32: 442; X64: # BB#0: 443; X64-NEXT: pcmpeqd %xmm1, %xmm0 444; X64-NEXT: retq 445 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 446 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 447 %cmp = icmp eq <4 x i32> %arg0, %arg1 448 %res = sext <4 x i1> %cmp to <4 x i32> 449 %bc = bitcast <4 x i32> %res to <2 x i64> 450 ret <2 x i64> %bc 451} 452 453define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 454; X32-LABEL: test_mm_cmpeq_pd: 455; X32: # BB#0: 456; X32-NEXT: cmpeqpd %xmm1, %xmm0 457; X32-NEXT: retl 458; 459; X64-LABEL: test_mm_cmpeq_pd: 460; X64: # BB#0: 461; X64-NEXT: cmpeqpd %xmm1, %xmm0 462; X64-NEXT: retq 463 %fcmp = fcmp oeq <2 x double> %a0, %a1 464 %sext = sext <2 x i1> %fcmp to <2 x i64> 465 %res = bitcast <2 x i64> %sext to <2 x double> 466 ret <2 x double> %res 467} 468 469define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 470; X32-LABEL: test_mm_cmpeq_sd: 471; X32: # BB#0: 472; X32-NEXT: cmpeqsd %xmm1, %xmm0 473; X32-NEXT: retl 474; 475; X64-LABEL: test_mm_cmpeq_sd: 476; X64: # BB#0: 477; X64-NEXT: cmpeqsd %xmm1, %xmm0 478; X64-NEXT: retq 479 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) 480 ret <2 x double> %res 481} 482declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 483 484define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 485; X32-LABEL: test_mm_cmpge_pd: 486; X32: # BB#0: 487; X32-NEXT: cmplepd %xmm0, %xmm1 488; X32-NEXT: movapd %xmm1, %xmm0 489; X32-NEXT: retl 490; 491; X64-LABEL: test_mm_cmpge_pd: 492; X64: # BB#0: 493; X64-NEXT: cmplepd %xmm0, %xmm1 494; X64-NEXT: movapd %xmm1, %xmm0 495; X64-NEXT: retq 496 %fcmp = fcmp ole <2 x double> %a1, %a0 497 %sext = sext <2 x i1> %fcmp to <2 x i64> 498 %res = bitcast <2 x i64> %sext to <2 x double> 499 ret <2 x double> %res 500} 501 502define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 503; X32-LABEL: test_mm_cmpge_sd: 504; X32: # BB#0: 505; X32-NEXT: cmplesd %xmm0, %xmm1 506; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 507; X32-NEXT: retl 508; 509; X64-LABEL: test_mm_cmpge_sd: 510; X64: # BB#0: 511; X64-NEXT: cmplesd %xmm0, %xmm1 512; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 513; X64-NEXT: retq 514 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2) 515 %ext0 = extractelement <2 x double> %cmp, i32 0 516 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 517 %ext1 = extractelement <2 x double> %a0, i32 1 518 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 519 ret <2 x double> %ins1 520} 521 522define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 523; X32-LABEL: test_mm_cmpgt_epi8: 524; X32: # BB#0: 525; X32-NEXT: pcmpgtb %xmm1, %xmm0 526; X32-NEXT: retl 527; 528; X64-LABEL: test_mm_cmpgt_epi8: 529; X64: # BB#0: 530; X64-NEXT: pcmpgtb %xmm1, %xmm0 531; X64-NEXT: retq 532 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 533 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 534 %cmp = icmp sgt <16 x i8> %arg0, %arg1 535 %res = sext <16 x i1> %cmp to <16 x i8> 536 %bc = bitcast <16 x i8> %res to <2 x i64> 537 ret <2 x i64> %bc 538} 539 540define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 541; X32-LABEL: test_mm_cmpgt_epi16: 542; X32: # BB#0: 543; X32-NEXT: pcmpgtw %xmm1, %xmm0 544; X32-NEXT: retl 545; 546; X64-LABEL: test_mm_cmpgt_epi16: 547; X64: # BB#0: 548; X64-NEXT: pcmpgtw %xmm1, %xmm0 549; X64-NEXT: retq 550 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 551 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 552 %cmp = icmp sgt <8 x i16> %arg0, %arg1 553 %res = sext <8 x i1> %cmp to <8 x i16> 554 %bc = bitcast <8 x i16> %res to <2 x i64> 555 ret <2 x i64> %bc 556} 557 558define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 559; X32-LABEL: test_mm_cmpgt_epi32: 560; X32: # BB#0: 561; X32-NEXT: pcmpgtd %xmm1, %xmm0 562; X32-NEXT: retl 563; 564; X64-LABEL: test_mm_cmpgt_epi32: 565; X64: # BB#0: 566; X64-NEXT: pcmpgtd %xmm1, %xmm0 567; X64-NEXT: retq 568 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 569 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 570 %cmp = icmp sgt <4 x i32> %arg0, %arg1 571 %res = sext <4 x i1> %cmp to <4 x i32> 572 %bc = bitcast <4 x i32> %res to <2 x i64> 573 ret <2 x i64> %bc 574} 575 576define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 577; X32-LABEL: test_mm_cmpgt_pd: 578; X32: # BB#0: 579; X32-NEXT: cmpltpd %xmm0, %xmm1 580; X32-NEXT: movapd %xmm1, %xmm0 581; X32-NEXT: retl 582; 583; X64-LABEL: test_mm_cmpgt_pd: 584; X64: # BB#0: 585; X64-NEXT: cmpltpd %xmm0, %xmm1 586; X64-NEXT: movapd %xmm1, %xmm0 587; X64-NEXT: retq 588 %fcmp = fcmp olt <2 x double> %a1, %a0 589 %sext = sext <2 x i1> %fcmp to <2 x i64> 590 %res = bitcast <2 x i64> %sext to <2 x double> 591 ret <2 x double> %res 592} 593 594define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 595; X32-LABEL: test_mm_cmpgt_sd: 596; X32: # BB#0: 597; X32-NEXT: cmpltsd %xmm0, %xmm1 598; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 599; X32-NEXT: retl 600; 601; X64-LABEL: test_mm_cmpgt_sd: 602; X64: # BB#0: 603; X64-NEXT: cmpltsd %xmm0, %xmm1 604; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 605; X64-NEXT: retq 606 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1) 607 %ext0 = extractelement <2 x double> %cmp, i32 0 608 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 609 %ext1 = extractelement <2 x double> %a0, i32 1 610 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 611 ret <2 x double> %ins1 612} 613 614define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 615; X32-LABEL: test_mm_cmple_pd: 616; X32: # BB#0: 617; X32-NEXT: cmplepd %xmm1, %xmm0 618; X32-NEXT: retl 619; 620; X64-LABEL: test_mm_cmple_pd: 621; X64: # BB#0: 622; X64-NEXT: cmplepd %xmm1, %xmm0 623; X64-NEXT: retq 624 %fcmp = fcmp ole <2 x double> %a0, %a1 625 %sext = sext <2 x i1> %fcmp to <2 x i64> 626 %res = bitcast <2 x i64> %sext to <2 x double> 627 ret <2 x double> %res 628} 629 630define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 631; X32-LABEL: test_mm_cmple_sd: 632; X32: # BB#0: 633; X32-NEXT: cmplesd %xmm1, %xmm0 634; X32-NEXT: retl 635; 636; X64-LABEL: test_mm_cmple_sd: 637; X64: # BB#0: 638; X64-NEXT: cmplesd %xmm1, %xmm0 639; X64-NEXT: retq 640 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2) 641 ret <2 x double> %res 642} 643 644define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 645; X32-LABEL: test_mm_cmplt_epi8: 646; X32: # BB#0: 647; X32-NEXT: pcmpgtb %xmm0, %xmm1 648; X32-NEXT: movdqa %xmm1, %xmm0 649; X32-NEXT: retl 650; 651; X64-LABEL: test_mm_cmplt_epi8: 652; X64: # BB#0: 653; X64-NEXT: pcmpgtb %xmm0, %xmm1 654; X64-NEXT: movdqa %xmm1, %xmm0 655; X64-NEXT: retq 656 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 657 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 658 %cmp = icmp sgt <16 x i8> %arg1, %arg0 659 %res = sext <16 x i1> %cmp to <16 x i8> 660 %bc = bitcast <16 x i8> %res to <2 x i64> 661 ret <2 x i64> %bc 662} 663 664define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 665; X32-LABEL: test_mm_cmplt_epi16: 666; X32: # BB#0: 667; X32-NEXT: pcmpgtw %xmm0, %xmm1 668; X32-NEXT: movdqa %xmm1, %xmm0 669; X32-NEXT: retl 670; 671; X64-LABEL: test_mm_cmplt_epi16: 672; X64: # BB#0: 673; X64-NEXT: pcmpgtw %xmm0, %xmm1 674; X64-NEXT: movdqa %xmm1, %xmm0 675; X64-NEXT: retq 676 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 677 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 678 %cmp = icmp sgt <8 x i16> %arg1, %arg0 679 %res = sext <8 x i1> %cmp to <8 x i16> 680 %bc = bitcast <8 x i16> %res to <2 x i64> 681 ret <2 x i64> %bc 682} 683 684define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 685; X32-LABEL: test_mm_cmplt_epi32: 686; X32: # BB#0: 687; X32-NEXT: pcmpgtd %xmm0, %xmm1 688; X32-NEXT: movdqa %xmm1, %xmm0 689; X32-NEXT: retl 690; 691; X64-LABEL: test_mm_cmplt_epi32: 692; X64: # BB#0: 693; X64-NEXT: pcmpgtd %xmm0, %xmm1 694; X64-NEXT: movdqa %xmm1, %xmm0 695; X64-NEXT: retq 696 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 697 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 698 %cmp = icmp sgt <4 x i32> %arg1, %arg0 699 %res = sext <4 x i1> %cmp to <4 x i32> 700 %bc = bitcast <4 x i32> %res to <2 x i64> 701 ret <2 x i64> %bc 702} 703 704define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 705; X32-LABEL: test_mm_cmplt_pd: 706; X32: # BB#0: 707; X32-NEXT: cmpltpd %xmm1, %xmm0 708; X32-NEXT: retl 709; 710; X64-LABEL: test_mm_cmplt_pd: 711; X64: # BB#0: 712; X64-NEXT: cmpltpd %xmm1, %xmm0 713; X64-NEXT: retq 714 %fcmp = fcmp olt <2 x double> %a0, %a1 715 %sext = sext <2 x i1> %fcmp to <2 x i64> 716 %res = bitcast <2 x i64> %sext to <2 x double> 717 ret <2 x double> %res 718} 719 720define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 721; X32-LABEL: test_mm_cmplt_sd: 722; X32: # BB#0: 723; X32-NEXT: cmpltsd %xmm1, %xmm0 724; X32-NEXT: retl 725; 726; X64-LABEL: test_mm_cmplt_sd: 727; X64: # BB#0: 728; X64-NEXT: cmpltsd %xmm1, %xmm0 729; X64-NEXT: retq 730 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1) 731 ret <2 x double> %res 732} 733 734define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 735; X32-LABEL: test_mm_cmpneq_pd: 736; X32: # BB#0: 737; X32-NEXT: cmpneqpd %xmm1, %xmm0 738; X32-NEXT: retl 739; 740; X64-LABEL: test_mm_cmpneq_pd: 741; X64: # BB#0: 742; X64-NEXT: cmpneqpd %xmm1, %xmm0 743; X64-NEXT: retq 744 %fcmp = fcmp une <2 x double> %a0, %a1 745 %sext = sext <2 x i1> %fcmp to <2 x i64> 746 %res = bitcast <2 x i64> %sext to <2 x double> 747 ret <2 x double> %res 748} 749 750define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 751; X32-LABEL: test_mm_cmpneq_sd: 752; X32: # BB#0: 753; X32-NEXT: cmpneqsd %xmm1, %xmm0 754; X32-NEXT: retl 755; 756; X64-LABEL: test_mm_cmpneq_sd: 757; X64: # BB#0: 758; X64-NEXT: cmpneqsd %xmm1, %xmm0 759; X64-NEXT: retq 760 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4) 761 ret <2 x double> %res 762} 763 764define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 765; X32-LABEL: test_mm_cmpnge_pd: 766; X32: # BB#0: 767; X32-NEXT: cmpnlepd %xmm0, %xmm1 768; X32-NEXT: movapd %xmm1, %xmm0 769; X32-NEXT: retl 770; 771; X64-LABEL: test_mm_cmpnge_pd: 772; X64: # BB#0: 773; X64-NEXT: cmpnlepd %xmm0, %xmm1 774; X64-NEXT: movapd %xmm1, %xmm0 775; X64-NEXT: retq 776 %fcmp = fcmp ugt <2 x double> %a1, %a0 777 %sext = sext <2 x i1> %fcmp to <2 x i64> 778 %res = bitcast <2 x i64> %sext to <2 x double> 779 ret <2 x double> %res 780} 781 782define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 783; X32-LABEL: test_mm_cmpnge_sd: 784; X32: # BB#0: 785; X32-NEXT: cmpnlesd %xmm0, %xmm1 786; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 787; X32-NEXT: retl 788; 789; X64-LABEL: test_mm_cmpnge_sd: 790; X64: # BB#0: 791; X64-NEXT: cmpnlesd %xmm0, %xmm1 792; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 793; X64-NEXT: retq 794 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6) 795 %ext0 = extractelement <2 x double> %cmp, i32 0 796 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 797 %ext1 = extractelement <2 x double> %a0, i32 1 798 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 799 ret <2 x double> %ins1 800} 801 802define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 803; X32-LABEL: test_mm_cmpngt_pd: 804; X32: # BB#0: 805; X32-NEXT: cmpnltpd %xmm0, %xmm1 806; X32-NEXT: movapd %xmm1, %xmm0 807; X32-NEXT: retl 808; 809; X64-LABEL: test_mm_cmpngt_pd: 810; X64: # BB#0: 811; X64-NEXT: cmpnltpd %xmm0, %xmm1 812; X64-NEXT: movapd %xmm1, %xmm0 813; X64-NEXT: retq 814 %fcmp = fcmp uge <2 x double> %a1, %a0 815 %sext = sext <2 x i1> %fcmp to <2 x i64> 816 %res = bitcast <2 x i64> %sext to <2 x double> 817 ret <2 x double> %res 818} 819 820define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 821; X32-LABEL: test_mm_cmpngt_sd: 822; X32: # BB#0: 823; X32-NEXT: cmpnltsd %xmm0, %xmm1 824; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 825; X32-NEXT: retl 826; 827; X64-LABEL: test_mm_cmpngt_sd: 828; X64: # BB#0: 829; X64-NEXT: cmpnltsd %xmm0, %xmm1 830; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 831; X64-NEXT: retq 832 %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5) 833 %ext0 = extractelement <2 x double> %cmp, i32 0 834 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 835 %ext1 = extractelement <2 x double> %a0, i32 1 836 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 837 ret <2 x double> %ins1 838} 839 840define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 841; X32-LABEL: test_mm_cmpnle_pd: 842; X32: # BB#0: 843; X32-NEXT: cmpnlepd %xmm1, %xmm0 844; X32-NEXT: retl 845; 846; X64-LABEL: test_mm_cmpnle_pd: 847; X64: # BB#0: 848; X64-NEXT: cmpnlepd %xmm1, %xmm0 849; X64-NEXT: retq 850 %fcmp = fcmp ugt <2 x double> %a0, %a1 851 %sext = sext <2 x i1> %fcmp to <2 x i64> 852 %res = bitcast <2 x i64> %sext to <2 x double> 853 ret <2 x double> %res 854} 855 856define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 857; X32-LABEL: test_mm_cmpnle_sd: 858; X32: # BB#0: 859; X32-NEXT: cmpnlesd %xmm1, %xmm0 860; X32-NEXT: retl 861; 862; X64-LABEL: test_mm_cmpnle_sd: 863; X64: # BB#0: 864; X64-NEXT: cmpnlesd %xmm1, %xmm0 865; X64-NEXT: retq 866 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6) 867 ret <2 x double> %res 868} 869 870define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 871; X32-LABEL: test_mm_cmpnlt_pd: 872; X32: # BB#0: 873; X32-NEXT: cmpnltpd %xmm1, %xmm0 874; X32-NEXT: retl 875; 876; X64-LABEL: test_mm_cmpnlt_pd: 877; X64: # BB#0: 878; X64-NEXT: cmpnltpd %xmm1, %xmm0 879; X64-NEXT: retq 880 %fcmp = fcmp uge <2 x double> %a0, %a1 881 %sext = sext <2 x i1> %fcmp to <2 x i64> 882 %res = bitcast <2 x i64> %sext to <2 x double> 883 ret <2 x double> %res 884} 885 886define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 887; X32-LABEL: test_mm_cmpnlt_sd: 888; X32: # BB#0: 889; X32-NEXT: cmpnltsd %xmm1, %xmm0 890; X32-NEXT: retl 891; 892; X64-LABEL: test_mm_cmpnlt_sd: 893; X64: # BB#0: 894; X64-NEXT: cmpnltsd %xmm1, %xmm0 895; X64-NEXT: retq 896 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5) 897 ret <2 x double> %res 898} 899 900define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 901; X32-LABEL: test_mm_cmpord_pd: 902; X32: # BB#0: 903; X32-NEXT: cmpordpd %xmm1, %xmm0 904; X32-NEXT: retl 905; 906; X64-LABEL: test_mm_cmpord_pd: 907; X64: # BB#0: 908; X64-NEXT: cmpordpd %xmm1, %xmm0 909; X64-NEXT: retq 910 %fcmp = fcmp ord <2 x double> %a0, %a1 911 %sext = sext <2 x i1> %fcmp to <2 x i64> 912 %res = bitcast <2 x i64> %sext to <2 x double> 913 ret <2 x double> %res 914} 915 916define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 917; X32-LABEL: test_mm_cmpord_sd: 918; X32: # BB#0: 919; X32-NEXT: cmpordsd %xmm1, %xmm0 920; X32-NEXT: retl 921; 922; X64-LABEL: test_mm_cmpord_sd: 923; X64: # BB#0: 924; X64-NEXT: cmpordsd %xmm1, %xmm0 925; X64-NEXT: retq 926 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) 927 ret <2 x double> %res 928} 929 930define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 931; X32-LABEL: test_mm_cmpunord_pd: 932; X32: # BB#0: 933; X32-NEXT: cmpunordpd %xmm1, %xmm0 934; X32-NEXT: retl 935; 936; X64-LABEL: test_mm_cmpunord_pd: 937; X64: # BB#0: 938; X64-NEXT: cmpunordpd %xmm1, %xmm0 939; X64-NEXT: retq 940 %fcmp = fcmp uno <2 x double> %a0, %a1 941 %sext = sext <2 x i1> %fcmp to <2 x i64> 942 %res = bitcast <2 x i64> %sext to <2 x double> 943 ret <2 x double> %res 944} 945 946define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 947; X32-LABEL: test_mm_cmpunord_sd: 948; X32: # BB#0: 949; X32-NEXT: cmpunordsd %xmm1, %xmm0 950; X32-NEXT: retl 951; 952; X64-LABEL: test_mm_cmpunord_sd: 953; X64: # BB#0: 954; X64-NEXT: cmpunordsd %xmm1, %xmm0 955; X64-NEXT: retq 956 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3) 957 ret <2 x double> %res 958} 959 960define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 961; X32-LABEL: test_mm_comieq_sd: 962; X32: # BB#0: 963; X32-NEXT: comisd %xmm1, %xmm0 964; X32-NEXT: setnp %al 965; X32-NEXT: sete %cl 966; X32-NEXT: andb %al, %cl 967; X32-NEXT: movzbl %cl, %eax 968; X32-NEXT: retl 969; 970; X64-LABEL: test_mm_comieq_sd: 971; X64: # BB#0: 972; X64-NEXT: comisd %xmm1, %xmm0 973; X64-NEXT: setnp %al 974; X64-NEXT: sete %cl 975; X64-NEXT: andb %al, %cl 976; X64-NEXT: movzbl %cl, %eax 977; X64-NEXT: retq 978 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 979 ret i32 %res 980} 981declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 982 983define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 984; X32-LABEL: test_mm_comige_sd: 985; X32: # BB#0: 986; X32-NEXT: xorl %eax, %eax 987; X32-NEXT: comisd %xmm1, %xmm0 988; X32-NEXT: setae %al 989; X32-NEXT: retl 990; 991; X64-LABEL: test_mm_comige_sd: 992; X64: # BB#0: 993; X64-NEXT: xorl %eax, %eax 994; X64-NEXT: comisd %xmm1, %xmm0 995; X64-NEXT: setae %al 996; X64-NEXT: retq 997 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) 998 ret i32 %res 999} 1000declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 1001 1002define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1003; X32-LABEL: test_mm_comigt_sd: 1004; X32: # BB#0: 1005; X32-NEXT: xorl %eax, %eax 1006; X32-NEXT: comisd %xmm1, %xmm0 1007; X32-NEXT: seta %al 1008; X32-NEXT: retl 1009; 1010; X64-LABEL: test_mm_comigt_sd: 1011; X64: # BB#0: 1012; X64-NEXT: xorl %eax, %eax 1013; X64-NEXT: comisd %xmm1, %xmm0 1014; X64-NEXT: seta %al 1015; X64-NEXT: retq 1016 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) 1017 ret i32 %res 1018} 1019declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 1020 1021define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1022; X32-LABEL: test_mm_comile_sd: 1023; X32: # BB#0: 1024; X32-NEXT: xorl %eax, %eax 1025; X32-NEXT: comisd %xmm0, %xmm1 1026; X32-NEXT: setae %al 1027; X32-NEXT: retl 1028; 1029; X64-LABEL: test_mm_comile_sd: 1030; X64: # BB#0: 1031; X64-NEXT: xorl %eax, %eax 1032; X64-NEXT: comisd %xmm0, %xmm1 1033; X64-NEXT: setae %al 1034; X64-NEXT: retq 1035 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) 1036 ret i32 %res 1037} 1038declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 1039 1040define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1041; X32-LABEL: test_mm_comilt_sd: 1042; X32: # BB#0: 1043; X32-NEXT: xorl %eax, %eax 1044; X32-NEXT: comisd %xmm0, %xmm1 1045; X32-NEXT: seta %al 1046; X32-NEXT: retl 1047; 1048; X64-LABEL: test_mm_comilt_sd: 1049; X64: # BB#0: 1050; X64-NEXT: xorl %eax, %eax 1051; X64-NEXT: comisd %xmm0, %xmm1 1052; X64-NEXT: seta %al 1053; X64-NEXT: retq 1054 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) 1055 ret i32 %res 1056} 1057declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 1058 1059define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1060; X32-LABEL: test_mm_comineq_sd: 1061; X32: # BB#0: 1062; X32-NEXT: comisd %xmm1, %xmm0 1063; X32-NEXT: setp %al 1064; X32-NEXT: setne %cl 1065; X32-NEXT: orb %al, %cl 1066; X32-NEXT: movzbl %cl, %eax 1067; X32-NEXT: retl 1068; 1069; X64-LABEL: test_mm_comineq_sd: 1070; X64: # BB#0: 1071; X64-NEXT: comisd %xmm1, %xmm0 1072; X64-NEXT: setp %al 1073; X64-NEXT: setne %cl 1074; X64-NEXT: orb %al, %cl 1075; X64-NEXT: movzbl %cl, %eax 1076; X64-NEXT: retq 1077 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) 1078 ret i32 %res 1079} 1080declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 1081 1082define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind { 1083; X32-LABEL: test_mm_cvtepi32_pd: 1084; X32: # BB#0: 1085; X32-NEXT: cvtdq2pd %xmm0, %xmm0 1086; X32-NEXT: retl 1087; 1088; X64-LABEL: test_mm_cvtepi32_pd: 1089; X64: # BB#0: 1090; X64-NEXT: cvtdq2pd %xmm0, %xmm0 1091; X64-NEXT: retq 1092 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1093 %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1> 1094 %res = sitofp <2 x i32> %ext to <2 x double> 1095 ret <2 x double> %res 1096} 1097 1098define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind { 1099; X32-LABEL: test_mm_cvtepi32_ps: 1100; X32: # BB#0: 1101; X32-NEXT: cvtdq2ps %xmm0, %xmm0 1102; X32-NEXT: retl 1103; 1104; X64-LABEL: test_mm_cvtepi32_ps: 1105; X64: # BB#0: 1106; X64-NEXT: cvtdq2ps %xmm0, %xmm0 1107; X64-NEXT: retq 1108 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1109 %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %arg0) 1110 ret <4 x float> %res 1111} 1112declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone 1113 1114define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind { 1115; X32-LABEL: test_mm_cvtpd_epi32: 1116; X32: # BB#0: 1117; X32-NEXT: cvtpd2dq %xmm0, %xmm0 1118; X32-NEXT: retl 1119; 1120; X64-LABEL: test_mm_cvtpd_epi32: 1121; X64: # BB#0: 1122; X64-NEXT: cvtpd2dq %xmm0, %xmm0 1123; X64-NEXT: retq 1124 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 1125 %bc = bitcast <4 x i32> %res to <2 x i64> 1126 ret <2 x i64> %bc 1127} 1128declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 1129 1130define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind { 1131; X32-LABEL: test_mm_cvtpd_ps: 1132; X32: # BB#0: 1133; X32-NEXT: cvtpd2ps %xmm0, %xmm0 1134; X32-NEXT: retl 1135; 1136; X64-LABEL: test_mm_cvtpd_ps: 1137; X64: # BB#0: 1138; X64-NEXT: cvtpd2ps %xmm0, %xmm0 1139; X64-NEXT: retq 1140 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) 1141 ret <4 x float> %res 1142} 1143declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 1144 1145define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind { 1146; X32-LABEL: test_mm_cvtps_epi32: 1147; X32: # BB#0: 1148; X32-NEXT: cvtps2dq %xmm0, %xmm0 1149; X32-NEXT: retl 1150; 1151; X64-LABEL: test_mm_cvtps_epi32: 1152; X64: # BB#0: 1153; X64-NEXT: cvtps2dq %xmm0, %xmm0 1154; X64-NEXT: retq 1155 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 1156 %bc = bitcast <4 x i32> %res to <2 x i64> 1157 ret <2 x i64> %bc 1158} 1159declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 1160 1161define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind { 1162; X32-LABEL: test_mm_cvtps_pd: 1163; X32: # BB#0: 1164; X32-NEXT: cvtps2pd %xmm0, %xmm0 1165; X32-NEXT: retl 1166; 1167; X64-LABEL: test_mm_cvtps_pd: 1168; X64: # BB#0: 1169; X64-NEXT: cvtps2pd %xmm0, %xmm0 1170; X64-NEXT: retq 1171 %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> 1172 %res = fpext <2 x float> %ext to <2 x double> 1173 ret <2 x double> %res 1174} 1175 1176define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind { 1177; X32-LABEL: test_mm_cvtsd_f64: 1178; X32: # BB#0: 1179; X32-NEXT: pushl %ebp 1180; X32-NEXT: movl %esp, %ebp 1181; X32-NEXT: andl $-8, %esp 1182; X32-NEXT: subl $8, %esp 1183; X32-NEXT: movlps %xmm0, (%esp) 1184; X32-NEXT: fldl (%esp) 1185; X32-NEXT: movl %ebp, %esp 1186; X32-NEXT: popl %ebp 1187; X32-NEXT: retl 1188; 1189; X64-LABEL: test_mm_cvtsd_f64: 1190; X64: # BB#0: 1191; X64-NEXT: retq 1192 %res = extractelement <2 x double> %a0, i32 0 1193 ret double %res 1194} 1195 1196define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind { 1197; X32-LABEL: test_mm_cvtsd_si32: 1198; X32: # BB#0: 1199; X32-NEXT: cvtsd2si %xmm0, %eax 1200; X32-NEXT: retl 1201; 1202; X64-LABEL: test_mm_cvtsd_si32: 1203; X64: # BB#0: 1204; X64-NEXT: cvtsd2si %xmm0, %eax 1205; X64-NEXT: retq 1206 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) 1207 ret i32 %res 1208} 1209declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 1210 1211define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { 1212; X32-LABEL: test_mm_cvtsi128_si32: 1213; X32: # BB#0: 1214; X32-NEXT: movd %xmm0, %eax 1215; X32-NEXT: retl 1216; 1217; X64-LABEL: test_mm_cvtsi128_si32: 1218; X64: # BB#0: 1219; X64-NEXT: movd %xmm0, %eax 1220; X64-NEXT: retq 1221 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1222 %res = extractelement <4 x i32> %arg0, i32 0 1223 ret i32 %res 1224} 1225 1226define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind { 1227; X32-LABEL: test_mm_cvtsi32_sd: 1228; X32: # BB#0: 1229; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1230; X32-NEXT: cvtsi2sdl %eax, %xmm1 1231; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1232; X32-NEXT: retl 1233; 1234; X64-LABEL: test_mm_cvtsi32_sd: 1235; X64: # BB#0: 1236; X64-NEXT: cvtsi2sdl %edi, %xmm1 1237; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1238; X64-NEXT: retq 1239 %cvt = sitofp i32 %a1 to double 1240 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1241 ret <2 x double> %res 1242} 1243 1244define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind { 1245; X32-LABEL: test_mm_cvtsi32_si128: 1246; X32: # BB#0: 1247; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1248; X32-NEXT: retl 1249; 1250; X64-LABEL: test_mm_cvtsi32_si128: 1251; X64: # BB#0: 1252; X64-NEXT: movd %edi, %xmm0 1253; X64-NEXT: retq 1254 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 1255 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 1256 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2 1257 %res3 = insertelement <4 x i32> %res2, i32 0, i32 3 1258 %res = bitcast <4 x i32> %res3 to <2 x i64> 1259 ret <2 x i64> %res 1260} 1261 1262define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind { 1263; X32-LABEL: test_mm_cvtss_sd: 1264; X32: # BB#0: 1265; X32-NEXT: cvtss2sd %xmm1, %xmm1 1266; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1267; X32-NEXT: retl 1268; 1269; X64-LABEL: test_mm_cvtss_sd: 1270; X64: # BB#0: 1271; X64-NEXT: cvtss2sd %xmm1, %xmm1 1272; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1273; X64-NEXT: retq 1274 %ext = extractelement <4 x float> %a1, i32 0 1275 %cvt = fpext float %ext to double 1276 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1277 ret <2 x double> %res 1278} 1279 1280define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind { 1281; X32-LABEL: test_mm_cvttpd_epi32: 1282; X32: # BB#0: 1283; X32-NEXT: cvttpd2dq %xmm0, %xmm0 1284; X32-NEXT: retl 1285; 1286; X64-LABEL: test_mm_cvttpd_epi32: 1287; X64: # BB#0: 1288; X64-NEXT: cvttpd2dq %xmm0, %xmm0 1289; X64-NEXT: retq 1290 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 1291 %bc = bitcast <4 x i32> %res to <2 x i64> 1292 ret <2 x i64> %bc 1293} 1294declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 1295 1296define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind { 1297; X32-LABEL: test_mm_cvttps_epi32: 1298; X32: # BB#0: 1299; X32-NEXT: cvttps2dq %xmm0, %xmm0 1300; X32-NEXT: retl 1301; 1302; X64-LABEL: test_mm_cvttps_epi32: 1303; X64: # BB#0: 1304; X64-NEXT: cvttps2dq %xmm0, %xmm0 1305; X64-NEXT: retq 1306 %res = fptosi <4 x float> %a0 to <4 x i32> 1307 %bc = bitcast <4 x i32> %res to <2 x i64> 1308 ret <2 x i64> %bc 1309} 1310 1311define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind { 1312; X32-LABEL: test_mm_cvttsd_si32: 1313; X32: # BB#0: 1314; X32-NEXT: cvttsd2si %xmm0, %eax 1315; X32-NEXT: retl 1316; 1317; X64-LABEL: test_mm_cvttsd_si32: 1318; X64: # BB#0: 1319; X64-NEXT: cvttsd2si %xmm0, %eax 1320; X64-NEXT: retq 1321 %ext = extractelement <2 x double> %a0, i32 0 1322 %res = fptosi double %ext to i32 1323 ret i32 %res 1324} 1325 1326define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1327; X32-LABEL: test_mm_div_pd: 1328; X32: # BB#0: 1329; X32-NEXT: divpd %xmm1, %xmm0 1330; X32-NEXT: retl 1331; 1332; X64-LABEL: test_mm_div_pd: 1333; X64: # BB#0: 1334; X64-NEXT: divpd %xmm1, %xmm0 1335; X64-NEXT: retq 1336 %res = fdiv <2 x double> %a0, %a1 1337 ret <2 x double> %res 1338} 1339 1340define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1341; X32-LABEL: test_mm_div_sd: 1342; X32: # BB#0: 1343; X32-NEXT: divsd %xmm1, %xmm0 1344; X32-NEXT: retl 1345; 1346; X64-LABEL: test_mm_div_sd: 1347; X64: # BB#0: 1348; X64-NEXT: divsd %xmm1, %xmm0 1349; X64-NEXT: retq 1350 %ext0 = extractelement <2 x double> %a0, i32 0 1351 %ext1 = extractelement <2 x double> %a1, i32 0 1352 %fdiv = fdiv double %ext0, %ext1 1353 %res = insertelement <2 x double> %a0, double %fdiv, i32 0 1354 ret <2 x double> %res 1355} 1356 1357define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind { 1358; X32-LABEL: test_mm_extract_epi16: 1359; X32: # BB#0: 1360; X32-NEXT: pextrw $1, %xmm0, %eax 1361; X32-NEXT: movzwl %ax, %eax 1362; X32-NEXT: retl 1363; 1364; X64-LABEL: test_mm_extract_epi16: 1365; X64: # BB#0: 1366; X64-NEXT: pextrw $1, %xmm0, %eax 1367; X64-NEXT: movzwl %ax, %eax 1368; X64-NEXT: retq 1369 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1370 %ext = extractelement <8 x i16> %arg0, i32 1 1371 %res = zext i16 %ext to i32 1372 ret i32 %res 1373} 1374 1375define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind { 1376; X32-LABEL: test_mm_insert_epi16: 1377; X32: # BB#0: 1378; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 1379; X32-NEXT: pinsrw $1, %eax, %xmm0 1380; X32-NEXT: retl 1381; 1382; X64-LABEL: test_mm_insert_epi16: 1383; X64: # BB#0: 1384; X64-NEXT: pinsrw $1, %edi, %xmm0 1385; X64-NEXT: retq 1386 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1387 %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1 1388 %bc = bitcast <8 x i16> %res to <2 x i64> 1389 ret <2 x i64> %bc 1390} 1391 1392define void @test_mm_lfence() nounwind { 1393; X32-LABEL: test_mm_lfence: 1394; X32: # BB#0: 1395; X32-NEXT: lfence 1396; X32-NEXT: retl 1397; 1398; X64-LABEL: test_mm_lfence: 1399; X64: # BB#0: 1400; X64-NEXT: lfence 1401; X64-NEXT: retq 1402 call void @llvm.x86.sse2.lfence() 1403 ret void 1404} 1405declare void @llvm.x86.sse2.lfence() nounwind readnone 1406 1407define <2 x double> @test_mm_load_pd(double* %a0) nounwind { 1408; X32-LABEL: test_mm_load_pd: 1409; X32: # BB#0: 1410; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1411; X32-NEXT: movaps (%eax), %xmm0 1412; X32-NEXT: retl 1413; 1414; X64-LABEL: test_mm_load_pd: 1415; X64: # BB#0: 1416; X64-NEXT: movaps (%rdi), %xmm0 1417; X64-NEXT: retq 1418 %arg0 = bitcast double* %a0 to <2 x double>* 1419 %res = load <2 x double>, <2 x double>* %arg0, align 16 1420 ret <2 x double> %res 1421} 1422 1423define <2 x double> @test_mm_load_sd(double* %a0) nounwind { 1424; X32-LABEL: test_mm_load_sd: 1425; X32: # BB#0: 1426; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1427; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1428; X32-NEXT: retl 1429; 1430; X64-LABEL: test_mm_load_sd: 1431; X64: # BB#0: 1432; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1433; X64-NEXT: retq 1434 %ld = load double, double* %a0, align 1 1435 %res0 = insertelement <2 x double> undef, double %ld, i32 0 1436 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 1437 ret <2 x double> %res1 1438} 1439 1440define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind { 1441; X32-LABEL: test_mm_load_si128: 1442; X32: # BB#0: 1443; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1444; X32-NEXT: movaps (%eax), %xmm0 1445; X32-NEXT: retl 1446; 1447; X64-LABEL: test_mm_load_si128: 1448; X64: # BB#0: 1449; X64-NEXT: movaps (%rdi), %xmm0 1450; X64-NEXT: retq 1451 %res = load <2 x i64>, <2 x i64>* %a0, align 16 1452 ret <2 x i64> %res 1453} 1454 1455define <2 x double> @test_mm_load1_pd(double* %a0) nounwind { 1456; X32-LABEL: test_mm_load1_pd: 1457; X32: # BB#0: 1458; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1459; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1460; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1461; X32-NEXT: retl 1462; 1463; X64-LABEL: test_mm_load1_pd: 1464; X64: # BB#0: 1465; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1466; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 1467; X64-NEXT: retq 1468 %ld = load double, double* %a0, align 8 1469 %res0 = insertelement <2 x double> undef, double %ld, i32 0 1470 %res1 = insertelement <2 x double> %res0, double %ld, i32 1 1471 ret <2 x double> %res1 1472} 1473 1474define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind { 1475; X32-LABEL: test_mm_loadh_pd: 1476; X32: # BB#0: 1477; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1478; X32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1479; X32-NEXT: retl 1480; 1481; X64-LABEL: test_mm_loadh_pd: 1482; X64: # BB#0: 1483; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 1484; X64-NEXT: retq 1485 %ld = load double, double* %a1, align 8 1486 %res = insertelement <2 x double> %a0, double %ld, i32 1 1487 ret <2 x double> %res 1488} 1489 1490define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind { 1491; X32-LABEL: test_mm_loadl_epi64: 1492; X32: # BB#0: 1493; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1494; X32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1495; X32-NEXT: retl 1496; 1497; X64-LABEL: test_mm_loadl_epi64: 1498; X64: # BB#0: 1499; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1500; X64-NEXT: retq 1501 %bc = bitcast <2 x i64>* %a1 to i64* 1502 %ld = load i64, i64* %bc, align 1 1503 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0 1504 %res1 = insertelement <2 x i64> %res0, i64 0, i32 1 1505 ret <2 x i64> %res1 1506} 1507 1508define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind { 1509; X32-LABEL: test_mm_loadl_pd: 1510; X32: # BB#0: 1511; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1512; X32-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1513; X32-NEXT: retl 1514; 1515; X64-LABEL: test_mm_loadl_pd: 1516; X64: # BB#0: 1517; X64-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] 1518; X64-NEXT: retq 1519 %ld = load double, double* %a1, align 8 1520 %res = insertelement <2 x double> %a0, double %ld, i32 0 1521 ret <2 x double> %res 1522} 1523 1524define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { 1525; X32-LABEL: test_mm_loadr_pd: 1526; X32: # BB#0: 1527; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1528; X32-NEXT: movapd (%eax), %xmm0 1529; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 1530; X32-NEXT: retl 1531; 1532; X64-LABEL: test_mm_loadr_pd: 1533; X64: # BB#0: 1534; X64-NEXT: movapd (%rdi), %xmm0 1535; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 1536; X64-NEXT: retq 1537 %arg0 = bitcast double* %a0 to <2 x double>* 1538 %ld = load <2 x double>, <2 x double>* %arg0, align 16 1539 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0> 1540 ret <2 x double> %res 1541} 1542 1543define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind { 1544; X32-LABEL: test_mm_loadu_pd: 1545; X32: # BB#0: 1546; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1547; X32-NEXT: movups (%eax), %xmm0 1548; X32-NEXT: retl 1549; 1550; X64-LABEL: test_mm_loadu_pd: 1551; X64: # BB#0: 1552; X64-NEXT: movups (%rdi), %xmm0 1553; X64-NEXT: retq 1554 %arg0 = bitcast double* %a0 to <2 x double>* 1555 %res = load <2 x double>, <2 x double>* %arg0, align 1 1556 ret <2 x double> %res 1557} 1558 1559define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind { 1560; X32-LABEL: test_mm_loadu_si128: 1561; X32: # BB#0: 1562; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1563; X32-NEXT: movups (%eax), %xmm0 1564; X32-NEXT: retl 1565; 1566; X64-LABEL: test_mm_loadu_si128: 1567; X64: # BB#0: 1568; X64-NEXT: movups (%rdi), %xmm0 1569; X64-NEXT: retq 1570 %res = load <2 x i64>, <2 x i64>* %a0, align 1 1571 ret <2 x i64> %res 1572} 1573 1574define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 1575; X32-LABEL: test_mm_madd_epi16: 1576; X32: # BB#0: 1577; X32-NEXT: pmaddwd %xmm1, %xmm0 1578; X32-NEXT: retl 1579; 1580; X64-LABEL: test_mm_madd_epi16: 1581; X64: # BB#0: 1582; X64-NEXT: pmaddwd %xmm1, %xmm0 1583; X64-NEXT: retq 1584 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1585 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 1586 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1) 1587 %bc = bitcast <4 x i32> %res to <2 x i64> 1588 ret <2 x i64> %bc 1589} 1590declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 1591 1592define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind { 1593; X32-LABEL: test_mm_maskmoveu_si128: 1594; X32: # BB#0: 1595; X32-NEXT: pushl %edi 1596; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 1597; X32-NEXT: maskmovdqu %xmm1, %xmm0 1598; X32-NEXT: popl %edi 1599; X32-NEXT: retl 1600; 1601; X64-LABEL: test_mm_maskmoveu_si128: 1602; X64: # BB#0: 1603; X64-NEXT: maskmovdqu %xmm1, %xmm0 1604; X64-NEXT: retq 1605 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 1606 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 1607 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2) 1608 ret void 1609} 1610declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind 1611 1612define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 1613; X32-LABEL: test_mm_max_epi16: 1614; X32: # BB#0: 1615; X32-NEXT: pmaxsw %xmm1, %xmm0 1616; X32-NEXT: retl 1617; 1618; X64-LABEL: test_mm_max_epi16: 1619; X64: # BB#0: 1620; X64-NEXT: pmaxsw %xmm1, %xmm0 1621; X64-NEXT: retq 1622 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1623 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 1624 %cmp = icmp sgt <8 x i16> %arg0, %arg1 1625 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 1626 %bc = bitcast <8 x i16> %sel to <2 x i64> 1627 ret <2 x i64> %bc 1628} 1629 1630define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 1631; X32-LABEL: test_mm_max_epu8: 1632; X32: # BB#0: 1633; X32-NEXT: pmaxub %xmm1, %xmm0 1634; X32-NEXT: retl 1635; 1636; X64-LABEL: test_mm_max_epu8: 1637; X64: # BB#0: 1638; X64-NEXT: pmaxub %xmm1, %xmm0 1639; X64-NEXT: retq 1640 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 1641 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 1642 %cmp = icmp ugt <16 x i8> %arg0, %arg1 1643 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 1644 %bc = bitcast <16 x i8> %sel to <2 x i64> 1645 ret <2 x i64> %bc 1646} 1647 1648define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1649; X32-LABEL: test_mm_max_pd: 1650; X32: # BB#0: 1651; X32-NEXT: maxpd %xmm1, %xmm0 1652; X32-NEXT: retl 1653; 1654; X64-LABEL: test_mm_max_pd: 1655; X64: # BB#0: 1656; X64-NEXT: maxpd %xmm1, %xmm0 1657; X64-NEXT: retq 1658 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 1659 ret <2 x double> %res 1660} 1661declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 1662 1663define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1664; X32-LABEL: test_mm_max_sd: 1665; X32: # BB#0: 1666; X32-NEXT: maxsd %xmm1, %xmm0 1667; X32-NEXT: retl 1668; 1669; X64-LABEL: test_mm_max_sd: 1670; X64: # BB#0: 1671; X64-NEXT: maxsd %xmm1, %xmm0 1672; X64-NEXT: retq 1673 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 1674 ret <2 x double> %res 1675} 1676declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 1677 1678define void @test_mm_mfence() nounwind { 1679; X32-LABEL: test_mm_mfence: 1680; X32: # BB#0: 1681; X32-NEXT: mfence 1682; X32-NEXT: retl 1683; 1684; X64-LABEL: test_mm_mfence: 1685; X64: # BB#0: 1686; X64-NEXT: mfence 1687; X64-NEXT: retq 1688 call void @llvm.x86.sse2.mfence() 1689 ret void 1690} 1691declare void @llvm.x86.sse2.mfence() nounwind readnone 1692 1693define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 1694; X32-LABEL: test_mm_min_epi16: 1695; X32: # BB#0: 1696; X32-NEXT: pminsw %xmm1, %xmm0 1697; X32-NEXT: retl 1698; 1699; X64-LABEL: test_mm_min_epi16: 1700; X64: # BB#0: 1701; X64-NEXT: pminsw %xmm1, %xmm0 1702; X64-NEXT: retq 1703 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1704 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 1705 %cmp = icmp slt <8 x i16> %arg0, %arg1 1706 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 1707 %bc = bitcast <8 x i16> %sel to <2 x i64> 1708 ret <2 x i64> %bc 1709} 1710 1711define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 1712; X32-LABEL: test_mm_min_epu8: 1713; X32: # BB#0: 1714; X32-NEXT: pminub %xmm1, %xmm0 1715; X32-NEXT: retl 1716; 1717; X64-LABEL: test_mm_min_epu8: 1718; X64: # BB#0: 1719; X64-NEXT: pminub %xmm1, %xmm0 1720; X64-NEXT: retq 1721 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 1722 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 1723 %cmp = icmp ult <16 x i8> %arg0, %arg1 1724 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 1725 %bc = bitcast <16 x i8> %sel to <2 x i64> 1726 ret <2 x i64> %bc 1727} 1728 1729define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1730; X32-LABEL: test_mm_min_pd: 1731; X32: # BB#0: 1732; X32-NEXT: minpd %xmm1, %xmm0 1733; X32-NEXT: retl 1734; 1735; X64-LABEL: test_mm_min_pd: 1736; X64: # BB#0: 1737; X64-NEXT: minpd %xmm1, %xmm0 1738; X64-NEXT: retq 1739 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 1740 ret <2 x double> %res 1741} 1742declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 1743 1744define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1745; X32-LABEL: test_mm_min_sd: 1746; X32: # BB#0: 1747; X32-NEXT: minsd %xmm1, %xmm0 1748; X32-NEXT: retl 1749; 1750; X64-LABEL: test_mm_min_sd: 1751; X64: # BB#0: 1752; X64-NEXT: minsd %xmm1, %xmm0 1753; X64-NEXT: retq 1754 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 1755 ret <2 x double> %res 1756} 1757declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 1758 1759define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind { 1760; X32-LABEL: test_mm_move_epi64: 1761; X32: # BB#0: 1762; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1763; X32-NEXT: retl 1764; 1765; X64-LABEL: test_mm_move_epi64: 1766; X64: # BB#0: 1767; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1768; X64-NEXT: retq 1769 %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> 1770 ret <2 x i64> %res 1771} 1772 1773define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1774; X32-LABEL: test_mm_move_sd: 1775; X32: # BB#0: 1776; X32-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1777; X32-NEXT: retl 1778; 1779; X64-LABEL: test_mm_move_sd: 1780; X64: # BB#0: 1781; X64-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 1782; X64-NEXT: retq 1783 %ext0 = extractelement <2 x double> %a1, i32 0 1784 %res0 = insertelement <2 x double> undef, double %ext0, i32 0 1785 %ext1 = extractelement <2 x double> %a0, i32 1 1786 %res1 = insertelement <2 x double> %res0, double %ext1, i32 1 1787 ret <2 x double> %res1 1788} 1789 1790define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind { 1791; X32-LABEL: test_mm_movemask_epi8: 1792; X32: # BB#0: 1793; X32-NEXT: pmovmskb %xmm0, %eax 1794; X32-NEXT: retl 1795; 1796; X64-LABEL: test_mm_movemask_epi8: 1797; X64: # BB#0: 1798; X64-NEXT: pmovmskb %xmm0, %eax 1799; X64-NEXT: retq 1800 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 1801 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0) 1802 ret i32 %res 1803} 1804declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 1805 1806define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind { 1807; X32-LABEL: test_mm_movemask_pd: 1808; X32: # BB#0: 1809; X32-NEXT: movmskpd %xmm0, %eax 1810; X32-NEXT: retl 1811; 1812; X64-LABEL: test_mm_movemask_pd: 1813; X64: # BB#0: 1814; X64-NEXT: movmskpd %xmm0, %eax 1815; X64-NEXT: retq 1816 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 1817 ret i32 %res 1818} 1819declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 1820 1821define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) { 1822; X32-LABEL: test_mm_mul_epu32: 1823; X32: # BB#0: 1824; X32-NEXT: pmuludq %xmm1, %xmm0 1825; X32-NEXT: retl 1826; 1827; X64-LABEL: test_mm_mul_epu32: 1828; X64: # BB#0: 1829; X64-NEXT: pmuludq %xmm1, %xmm0 1830; X64-NEXT: retq 1831 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1832 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 1833 %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %arg0, <4 x i32> %arg1) 1834 ret <2 x i64> %res 1835} 1836declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 1837 1838define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1839; X32-LABEL: test_mm_mul_pd: 1840; X32: # BB#0: 1841; X32-NEXT: mulpd %xmm1, %xmm0 1842; X32-NEXT: retl 1843; 1844; X64-LABEL: test_mm_mul_pd: 1845; X64: # BB#0: 1846; X64-NEXT: mulpd %xmm1, %xmm0 1847; X64-NEXT: retq 1848 %res = fmul <2 x double> %a0, %a1 1849 ret <2 x double> %res 1850} 1851 1852define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 1853; X32-LABEL: test_mm_mul_sd: 1854; X32: # BB#0: 1855; X32-NEXT: mulsd %xmm1, %xmm0 1856; X32-NEXT: retl 1857; 1858; X64-LABEL: test_mm_mul_sd: 1859; X64: # BB#0: 1860; X64-NEXT: mulsd %xmm1, %xmm0 1861; X64-NEXT: retq 1862 %ext0 = extractelement <2 x double> %a0, i32 0 1863 %ext1 = extractelement <2 x double> %a1, i32 0 1864 %fmul = fmul double %ext0, %ext1 1865 %res = insertelement <2 x double> %a0, double %fmul, i32 0 1866 ret <2 x double> %res 1867} 1868 1869define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { 1870; X32-LABEL: test_mm_mulhi_epi16: 1871; X32: # BB#0: 1872; X32-NEXT: pmulhw %xmm1, %xmm0 1873; X32-NEXT: retl 1874; 1875; X64-LABEL: test_mm_mulhi_epi16: 1876; X64: # BB#0: 1877; X64-NEXT: pmulhw %xmm1, %xmm0 1878; X64-NEXT: retq 1879 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1880 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 1881 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1) 1882 %bc = bitcast <8 x i16> %res to <2 x i64> 1883 ret <2 x i64> %bc 1884} 1885declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 1886 1887define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) { 1888; X32-LABEL: test_mm_mulhi_epu16: 1889; X32: # BB#0: 1890; X32-NEXT: pmulhuw %xmm1, %xmm0 1891; X32-NEXT: retl 1892; 1893; X64-LABEL: test_mm_mulhi_epu16: 1894; X64: # BB#0: 1895; X64-NEXT: pmulhuw %xmm1, %xmm0 1896; X64-NEXT: retq 1897 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1898 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 1899 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1) 1900 %bc = bitcast <8 x i16> %res to <2 x i64> 1901 ret <2 x i64> %bc 1902} 1903declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 1904 1905define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) { 1906; X32-LABEL: test_mm_mullo_epi16: 1907; X32: # BB#0: 1908; X32-NEXT: pmullw %xmm1, %xmm0 1909; X32-NEXT: retl 1910; 1911; X64-LABEL: test_mm_mullo_epi16: 1912; X64: # BB#0: 1913; X64-NEXT: pmullw %xmm1, %xmm0 1914; X64-NEXT: retq 1915 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1916 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 1917 %res = mul <8 x i16> %arg0, %arg1 1918 %bc = bitcast <8 x i16> %res to <2 x i64> 1919 ret <2 x i64> %bc 1920} 1921 1922define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 1923; X32-LABEL: test_mm_or_pd: 1924; X32: # BB#0: 1925; X32-NEXT: orps %xmm1, %xmm0 1926; X32-NEXT: retl 1927; 1928; X64-LABEL: test_mm_or_pd: 1929; X64: # BB#0: 1930; X64-NEXT: orps %xmm1, %xmm0 1931; X64-NEXT: retq 1932 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 1933 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 1934 %res = or <4 x i32> %arg0, %arg1 1935 %bc = bitcast <4 x i32> %res to <2 x double> 1936 ret <2 x double> %bc 1937} 1938 1939define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 1940; X32-LABEL: test_mm_or_si128: 1941; X32: # BB#0: 1942; X32-NEXT: orps %xmm1, %xmm0 1943; X32-NEXT: retl 1944; 1945; X64-LABEL: test_mm_or_si128: 1946; X64: # BB#0: 1947; X64-NEXT: orps %xmm1, %xmm0 1948; X64-NEXT: retq 1949 %res = or <2 x i64> %a0, %a1 1950 ret <2 x i64> %res 1951} 1952 1953define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) { 1954; X32-LABEL: test_mm_packs_epi16: 1955; X32: # BB#0: 1956; X32-NEXT: packsswb %xmm1, %xmm0 1957; X32-NEXT: retl 1958; 1959; X64-LABEL: test_mm_packs_epi16: 1960; X64: # BB#0: 1961; X64-NEXT: packsswb %xmm1, %xmm0 1962; X64-NEXT: retq 1963 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 1964 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 1965 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1) 1966 %bc = bitcast <16 x i8> %res to <2 x i64> 1967 ret <2 x i64> %bc 1968} 1969declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 1970 1971define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) { 1972; X32-LABEL: test_mm_packs_epi32: 1973; X32: # BB#0: 1974; X32-NEXT: packssdw %xmm1, %xmm0 1975; X32-NEXT: retl 1976; 1977; X64-LABEL: test_mm_packs_epi32: 1978; X64: # BB#0: 1979; X64-NEXT: packssdw %xmm1, %xmm0 1980; X64-NEXT: retq 1981 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 1982 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 1983 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1) 1984 %bc = bitcast <8 x i16> %res to <2 x i64> 1985 ret <2 x i64> %bc 1986} 1987declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 1988 1989define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) { 1990; X32-LABEL: test_mm_packus_epi16: 1991; X32: # BB#0: 1992; X32-NEXT: packuswb %xmm1, %xmm0 1993; X32-NEXT: retl 1994; 1995; X64-LABEL: test_mm_packus_epi16: 1996; X64: # BB#0: 1997; X64-NEXT: packuswb %xmm1, %xmm0 1998; X64-NEXT: retq 1999 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2000 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2001 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1) 2002 %bc = bitcast <16 x i8> %res to <2 x i64> 2003 ret <2 x i64> %bc 2004} 2005declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 2006 2007define void @test_mm_pause() nounwind { 2008; X32-LABEL: test_mm_pause: 2009; X32: # BB#0: 2010; X32-NEXT: pause 2011; X32-NEXT: retl 2012; 2013; X64-LABEL: test_mm_pause: 2014; X64: # BB#0: 2015; X64-NEXT: pause 2016; X64-NEXT: retq 2017 call void @llvm.x86.sse2.pause() 2018 ret void 2019} 2020declare void @llvm.x86.sse2.pause() nounwind readnone 2021 2022define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 2023; X32-LABEL: test_mm_sad_epu8: 2024; X32: # BB#0: 2025; X32-NEXT: psadbw %xmm1, %xmm0 2026; X32-NEXT: retl 2027; 2028; X64-LABEL: test_mm_sad_epu8: 2029; X64: # BB#0: 2030; X64-NEXT: psadbw %xmm1, %xmm0 2031; X64-NEXT: retq 2032 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2033 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 2034 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1) 2035 ret <2 x i64> %res 2036} 2037declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 2038 2039define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { 2040; X32-LABEL: test_mm_set_epi8: 2041; X32: # BB#0: 2042; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2043; X32-NEXT: movd %eax, %xmm0 2044; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2045; X32-NEXT: movd %eax, %xmm1 2046; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2047; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2048; X32-NEXT: movd %eax, %xmm0 2049; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2050; X32-NEXT: movd %eax, %xmm2 2051; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2052; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2053; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2054; X32-NEXT: movd %eax, %xmm0 2055; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2056; X32-NEXT: movd %eax, %xmm3 2057; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2058; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2059; X32-NEXT: movd %eax, %xmm0 2060; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2061; X32-NEXT: movd %eax, %xmm1 2062; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2063; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2064; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2065; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2066; X32-NEXT: movd %eax, %xmm0 2067; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2068; X32-NEXT: movd %eax, %xmm2 2069; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2070; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2071; X32-NEXT: movd %eax, %xmm0 2072; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2073; X32-NEXT: movd %eax, %xmm3 2074; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2075; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2076; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2077; X32-NEXT: movd %eax, %xmm0 2078; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2079; X32-NEXT: movd %eax, %xmm2 2080; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2081; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2082; X32-NEXT: movd %eax, %xmm4 2083; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2084; X32-NEXT: movd %eax, %xmm0 2085; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 2086; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2087; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2088; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2089; X32-NEXT: retl 2090; 2091; X64-LABEL: test_mm_set_epi8: 2092; X64: # BB#0: 2093; X64-NEXT: movzbl %dil, %eax 2094; X64-NEXT: movd %eax, %xmm0 2095; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2096; X64-NEXT: movd %eax, %xmm1 2097; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2098; X64-NEXT: movzbl %r8b, %eax 2099; X64-NEXT: movd %eax, %xmm0 2100; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2101; X64-NEXT: movd %eax, %xmm2 2102; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2103; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2104; X64-NEXT: movzbl %dl, %eax 2105; X64-NEXT: movd %eax, %xmm0 2106; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2107; X64-NEXT: movd %eax, %xmm3 2108; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2109; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2110; X64-NEXT: movd %eax, %xmm0 2111; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2112; X64-NEXT: movd %eax, %xmm1 2113; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2114; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2115; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2116; X64-NEXT: movzbl %sil, %eax 2117; X64-NEXT: movd %eax, %xmm0 2118; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2119; X64-NEXT: movd %eax, %xmm2 2120; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2121; X64-NEXT: movzbl %r9b, %eax 2122; X64-NEXT: movd %eax, %xmm0 2123; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2124; X64-NEXT: movd %eax, %xmm3 2125; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2126; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2127; X64-NEXT: movzbl %cl, %eax 2128; X64-NEXT: movd %eax, %xmm0 2129; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2130; X64-NEXT: movd %eax, %xmm2 2131; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2132; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2133; X64-NEXT: movd %eax, %xmm4 2134; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2135; X64-NEXT: movd %eax, %xmm0 2136; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 2137; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2138; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2139; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2140; X64-NEXT: retq 2141 %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 2142 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 2143 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2 2144 %res3 = insertelement <16 x i8> %res2, i8 %a12, i32 3 2145 %res4 = insertelement <16 x i8> %res3, i8 %a11, i32 4 2146 %res5 = insertelement <16 x i8> %res4, i8 %a10, i32 5 2147 %res6 = insertelement <16 x i8> %res5, i8 %a9 , i32 6 2148 %res7 = insertelement <16 x i8> %res6, i8 %a8 , i32 7 2149 %res8 = insertelement <16 x i8> %res7, i8 %a7 , i32 8 2150 %res9 = insertelement <16 x i8> %res8, i8 %a6 , i32 9 2151 %res10 = insertelement <16 x i8> %res9, i8 %a5 , i32 10 2152 %res11 = insertelement <16 x i8> %res10, i8 %a4 , i32 11 2153 %res12 = insertelement <16 x i8> %res11, i8 %a3 , i32 12 2154 %res13 = insertelement <16 x i8> %res12, i8 %a2 , i32 13 2155 %res14 = insertelement <16 x i8> %res13, i8 %a1 , i32 14 2156 %res15 = insertelement <16 x i8> %res14, i8 %a0 , i32 15 2157 %res = bitcast <16 x i8> %res15 to <2 x i64> 2158 ret <2 x i64> %res 2159} 2160 2161define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { 2162; X32-LABEL: test_mm_set_epi16: 2163; X32: # BB#0: 2164; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2165; X32-NEXT: movd %eax, %xmm1 2166; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2167; X32-NEXT: movd %eax, %xmm2 2168; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2169; X32-NEXT: movd %eax, %xmm3 2170; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2171; X32-NEXT: movd %eax, %xmm4 2172; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2173; X32-NEXT: movd %eax, %xmm5 2174; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2175; X32-NEXT: movd %eax, %xmm6 2176; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2177; X32-NEXT: movd %eax, %xmm7 2178; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2179; X32-NEXT: movd %eax, %xmm0 2180; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2181; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 2182; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2183; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 2184; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 2185; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3] 2186; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 2187; X32-NEXT: retl 2188; 2189; X64-LABEL: test_mm_set_epi16: 2190; X64: # BB#0: 2191; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w 2192; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2193; X64-NEXT: movd %edi, %xmm0 2194; X64-NEXT: movd %r8d, %xmm1 2195; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2196; X64-NEXT: movd %edx, %xmm0 2197; X64-NEXT: movd %eax, %xmm2 2198; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2199; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2200; X64-NEXT: movd %esi, %xmm0 2201; X64-NEXT: movd %r9d, %xmm1 2202; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2203; X64-NEXT: movd %ecx, %xmm3 2204; X64-NEXT: movd %r10d, %xmm0 2205; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 2206; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2207; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2208; X64-NEXT: retq 2209 %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 2210 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 2211 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2 2212 %res3 = insertelement <8 x i16> %res2, i16 %a4, i32 3 2213 %res4 = insertelement <8 x i16> %res3, i16 %a3, i32 4 2214 %res5 = insertelement <8 x i16> %res4, i16 %a2, i32 5 2215 %res6 = insertelement <8 x i16> %res5, i16 %a1, i32 6 2216 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 2217 %res = bitcast <8 x i16> %res7 to <2 x i64> 2218 ret <2 x i64> %res 2219} 2220 2221define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { 2222; X32-LABEL: test_mm_set_epi32: 2223; X32: # BB#0: 2224; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2225; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2226; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2227; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2228; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2229; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2230; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2231; X32-NEXT: retl 2232; 2233; X64-LABEL: test_mm_set_epi32: 2234; X64: # BB#0: 2235; X64-NEXT: movd %edi, %xmm0 2236; X64-NEXT: movd %edx, %xmm1 2237; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2238; X64-NEXT: movd %esi, %xmm2 2239; X64-NEXT: movd %ecx, %xmm0 2240; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2241; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2242; X64-NEXT: retq 2243 %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 2244 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 2245 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2 2246 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 2247 %res = bitcast <4 x i32> %res3 to <2 x i64> 2248 ret <2 x i64> %res 2249} 2250 2251; TODO test_mm_set_epi64 2252 2253define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { 2254; X32-LABEL: test_mm_set_epi64x: 2255; X32: # BB#0: 2256; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2257; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2258; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2259; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2260; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2261; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2262; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2263; X32-NEXT: retl 2264; 2265; X64-LABEL: test_mm_set_epi64x: 2266; X64: # BB#0: 2267; X64-NEXT: movd %rdi, %xmm1 2268; X64-NEXT: movd %rsi, %xmm0 2269; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2270; X64-NEXT: retq 2271 %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0 2272 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 2273 ret <2 x i64> %res1 2274} 2275 2276define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { 2277; X32-LABEL: test_mm_set_pd: 2278; X32: # BB#0: 2279; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2280; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2281; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2282; X32-NEXT: retl 2283; 2284; X64-LABEL: test_mm_set_pd: 2285; X64: # BB#0: 2286; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 2287; X64-NEXT: movapd %xmm1, %xmm0 2288; X64-NEXT: retq 2289 %res0 = insertelement <2 x double> undef, double %a1, i32 0 2290 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 2291 ret <2 x double> %res1 2292} 2293 2294define <2 x double> @test_mm_set_sd(double %a0) nounwind { 2295; X32-LABEL: test_mm_set_sd: 2296; X32: # BB#0: 2297; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2298; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2299; X32-NEXT: retl 2300; 2301; X64-LABEL: test_mm_set_sd: 2302; X64: # BB#0: 2303; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2304; X64-NEXT: retq 2305 %res0 = insertelement <2 x double> undef, double %a0, i32 0 2306 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 2307 ret <2 x double> %res1 2308} 2309 2310define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind { 2311; X32-LABEL: test_mm_set1_epi8: 2312; X32: # BB#0: 2313; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2314; X32-NEXT: movd %eax, %xmm0 2315; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2316; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2317; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2318; X32-NEXT: retl 2319; 2320; X64-LABEL: test_mm_set1_epi8: 2321; X64: # BB#0: 2322; X64-NEXT: movzbl %dil, %eax 2323; X64-NEXT: movd %eax, %xmm0 2324; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2325; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2326; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2327; X64-NEXT: retq 2328 %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0 2329 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1 2330 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2 2331 %res3 = insertelement <16 x i8> %res2, i8 %a0, i32 3 2332 %res4 = insertelement <16 x i8> %res3, i8 %a0, i32 4 2333 %res5 = insertelement <16 x i8> %res4, i8 %a0, i32 5 2334 %res6 = insertelement <16 x i8> %res5, i8 %a0, i32 6 2335 %res7 = insertelement <16 x i8> %res6, i8 %a0, i32 7 2336 %res8 = insertelement <16 x i8> %res7, i8 %a0, i32 8 2337 %res9 = insertelement <16 x i8> %res8, i8 %a0, i32 9 2338 %res10 = insertelement <16 x i8> %res9, i8 %a0, i32 10 2339 %res11 = insertelement <16 x i8> %res10, i8 %a0, i32 11 2340 %res12 = insertelement <16 x i8> %res11, i8 %a0, i32 12 2341 %res13 = insertelement <16 x i8> %res12, i8 %a0, i32 13 2342 %res14 = insertelement <16 x i8> %res13, i8 %a0, i32 14 2343 %res15 = insertelement <16 x i8> %res14, i8 %a0, i32 15 2344 %res = bitcast <16 x i8> %res15 to <2 x i64> 2345 ret <2 x i64> %res 2346} 2347 2348define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind { 2349; X32-LABEL: test_mm_set1_epi16: 2350; X32: # BB#0: 2351; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2352; X32-NEXT: movd %eax, %xmm0 2353; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2354; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2355; X32-NEXT: retl 2356; 2357; X64-LABEL: test_mm_set1_epi16: 2358; X64: # BB#0: 2359; X64-NEXT: movd %edi, %xmm0 2360; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2361; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2362; X64-NEXT: retq 2363 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 2364 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1 2365 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2 2366 %res3 = insertelement <8 x i16> %res2, i16 %a0, i32 3 2367 %res4 = insertelement <8 x i16> %res3, i16 %a0, i32 4 2368 %res5 = insertelement <8 x i16> %res4, i16 %a0, i32 5 2369 %res6 = insertelement <8 x i16> %res5, i16 %a0, i32 6 2370 %res7 = insertelement <8 x i16> %res6, i16 %a0, i32 7 2371 %res = bitcast <8 x i16> %res7 to <2 x i64> 2372 ret <2 x i64> %res 2373} 2374 2375define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind { 2376; X32-LABEL: test_mm_set1_epi32: 2377; X32: # BB#0: 2378; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2379; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2380; X32-NEXT: retl 2381; 2382; X64-LABEL: test_mm_set1_epi32: 2383; X64: # BB#0: 2384; X64-NEXT: movd %edi, %xmm0 2385; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2386; X64-NEXT: retq 2387 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 2388 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1 2389 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2 2390 %res3 = insertelement <4 x i32> %res2, i32 %a0, i32 3 2391 %res = bitcast <4 x i32> %res3 to <2 x i64> 2392 ret <2 x i64> %res 2393} 2394 2395; TODO test_mm_set1_epi64 2396 2397define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind { 2398; X32-LABEL: test_mm_set1_epi64x: 2399; X32: # BB#0: 2400; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2401; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2402; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2403; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 2404; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2405; X32-NEXT: retl 2406; 2407; X64-LABEL: test_mm_set1_epi64x: 2408; X64: # BB#0: 2409; X64-NEXT: movd %rdi, %xmm0 2410; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 2411; X64-NEXT: retq 2412 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 2413 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 2414 ret <2 x i64> %res1 2415} 2416 2417define <2 x double> @test_mm_set1_pd(double %a0) nounwind { 2418; X32-LABEL: test_mm_set1_pd: 2419; X32: # BB#0: 2420; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2421; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2422; X32-NEXT: retl 2423; 2424; X64-LABEL: test_mm_set1_pd: 2425; X64: # BB#0: 2426; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 2427; X64-NEXT: retq 2428 %res0 = insertelement <2 x double> undef, double %a0, i32 0 2429 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 2430 ret <2 x double> %res1 2431} 2432 2433define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { 2434; X32-LABEL: test_mm_setr_epi8: 2435; X32: # BB#0: 2436; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2437; X32-NEXT: movd %eax, %xmm0 2438; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2439; X32-NEXT: movd %eax, %xmm1 2440; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2441; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2442; X32-NEXT: movd %eax, %xmm0 2443; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2444; X32-NEXT: movd %eax, %xmm2 2445; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2446; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2447; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2448; X32-NEXT: movd %eax, %xmm0 2449; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2450; X32-NEXT: movd %eax, %xmm3 2451; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2452; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2453; X32-NEXT: movd %eax, %xmm0 2454; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2455; X32-NEXT: movd %eax, %xmm1 2456; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2457; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2458; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2459; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2460; X32-NEXT: movd %eax, %xmm0 2461; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2462; X32-NEXT: movd %eax, %xmm2 2463; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2464; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2465; X32-NEXT: movd %eax, %xmm0 2466; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2467; X32-NEXT: movd %eax, %xmm3 2468; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2469; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2470; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2471; X32-NEXT: movd %eax, %xmm0 2472; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2473; X32-NEXT: movd %eax, %xmm2 2474; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2475; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2476; X32-NEXT: movd %eax, %xmm4 2477; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2478; X32-NEXT: movd %eax, %xmm0 2479; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 2480; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2481; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2482; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2483; X32-NEXT: retl 2484; 2485; X64-LABEL: test_mm_setr_epi8: 2486; X64: # BB#0: 2487; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2488; X64-NEXT: movd %eax, %xmm0 2489; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2490; X64-NEXT: movd %eax, %xmm1 2491; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2492; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2493; X64-NEXT: movd %eax, %xmm0 2494; X64-NEXT: movzbl %cl, %eax 2495; X64-NEXT: movd %eax, %xmm2 2496; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2497; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2498; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2499; X64-NEXT: movd %eax, %xmm0 2500; X64-NEXT: movzbl %r9b, %eax 2501; X64-NEXT: movd %eax, %xmm3 2502; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2503; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2504; X64-NEXT: movd %eax, %xmm0 2505; X64-NEXT: movzbl %sil, %eax 2506; X64-NEXT: movd %eax, %xmm1 2507; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2508; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2509; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2510; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2511; X64-NEXT: movd %eax, %xmm0 2512; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2513; X64-NEXT: movd %eax, %xmm2 2514; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2515; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2516; X64-NEXT: movd %eax, %xmm0 2517; X64-NEXT: movzbl %dl, %eax 2518; X64-NEXT: movd %eax, %xmm3 2519; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2520; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2521; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2522; X64-NEXT: movd %eax, %xmm0 2523; X64-NEXT: movzbl %r8b, %eax 2524; X64-NEXT: movd %eax, %xmm2 2525; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2526; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2527; X64-NEXT: movd %eax, %xmm4 2528; X64-NEXT: movzbl %dil, %eax 2529; X64-NEXT: movd %eax, %xmm0 2530; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 2531; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2532; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2533; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2534; X64-NEXT: retq 2535 %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 2536 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 2537 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2 2538 %res3 = insertelement <16 x i8> %res2, i8 %a3 , i32 3 2539 %res4 = insertelement <16 x i8> %res3, i8 %a4 , i32 4 2540 %res5 = insertelement <16 x i8> %res4, i8 %a5 , i32 5 2541 %res6 = insertelement <16 x i8> %res5, i8 %a6 , i32 6 2542 %res7 = insertelement <16 x i8> %res6, i8 %a7 , i32 7 2543 %res8 = insertelement <16 x i8> %res7, i8 %a8 , i32 8 2544 %res9 = insertelement <16 x i8> %res8, i8 %a9 , i32 9 2545 %res10 = insertelement <16 x i8> %res9, i8 %a10, i32 10 2546 %res11 = insertelement <16 x i8> %res10, i8 %a11, i32 11 2547 %res12 = insertelement <16 x i8> %res11, i8 %a12, i32 12 2548 %res13 = insertelement <16 x i8> %res12, i8 %a13, i32 13 2549 %res14 = insertelement <16 x i8> %res13, i8 %a14, i32 14 2550 %res15 = insertelement <16 x i8> %res14, i8 %a15, i32 15 2551 %res = bitcast <16 x i8> %res15 to <2 x i64> 2552 ret <2 x i64> %res 2553} 2554 2555define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { 2556; X32-LABEL: test_mm_setr_epi16: 2557; X32: # BB#0: 2558; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2559; X32-NEXT: movd %eax, %xmm1 2560; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2561; X32-NEXT: movd %eax, %xmm2 2562; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2563; X32-NEXT: movd %eax, %xmm3 2564; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2565; X32-NEXT: movd %eax, %xmm4 2566; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2567; X32-NEXT: movd %eax, %xmm5 2568; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2569; X32-NEXT: movd %eax, %xmm6 2570; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2571; X32-NEXT: movd %eax, %xmm7 2572; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 2573; X32-NEXT: movd %eax, %xmm0 2574; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2575; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 2576; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] 2577; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] 2578; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 2579; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3] 2580; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 2581; X32-NEXT: retl 2582; 2583; X64-LABEL: test_mm_setr_epi16: 2584; X64: # BB#0: 2585; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax 2586; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w 2587; X64-NEXT: movd %eax, %xmm0 2588; X64-NEXT: movd %ecx, %xmm1 2589; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2590; X64-NEXT: movd %r9d, %xmm0 2591; X64-NEXT: movd %esi, %xmm2 2592; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2593; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2594; X64-NEXT: movd %r10d, %xmm0 2595; X64-NEXT: movd %edx, %xmm1 2596; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2597; X64-NEXT: movd %r8d, %xmm3 2598; X64-NEXT: movd %edi, %xmm0 2599; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 2600; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2601; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 2602; X64-NEXT: retq 2603 %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 2604 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 2605 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2 2606 %res3 = insertelement <8 x i16> %res2, i16 %a3, i32 3 2607 %res4 = insertelement <8 x i16> %res3, i16 %a4, i32 4 2608 %res5 = insertelement <8 x i16> %res4, i16 %a5, i32 5 2609 %res6 = insertelement <8 x i16> %res5, i16 %a6, i32 6 2610 %res7 = insertelement <8 x i16> %res6, i16 %a7, i32 7 2611 %res = bitcast <8 x i16> %res7 to <2 x i64> 2612 ret <2 x i64> %res 2613} 2614 2615define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { 2616; X32-LABEL: test_mm_setr_epi32: 2617; X32: # BB#0: 2618; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2619; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2620; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2621; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2622; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2623; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2624; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2625; X32-NEXT: retl 2626; 2627; X64-LABEL: test_mm_setr_epi32: 2628; X64: # BB#0: 2629; X64-NEXT: movd %ecx, %xmm0 2630; X64-NEXT: movd %esi, %xmm1 2631; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2632; X64-NEXT: movd %edx, %xmm2 2633; X64-NEXT: movd %edi, %xmm0 2634; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2635; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2636; X64-NEXT: retq 2637 %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 2638 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 2639 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2 2640 %res3 = insertelement <4 x i32> %res2, i32 %a3, i32 3 2641 %res = bitcast <4 x i32> %res3 to <2 x i64> 2642 ret <2 x i64> %res 2643} 2644 2645; TODO test_mm_setr_epi64 2646 2647define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { 2648; X32-LABEL: test_mm_setr_epi64x: 2649; X32: # BB#0: 2650; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2651; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2652; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2653; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2654; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2655; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2656; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2657; X32-NEXT: retl 2658; 2659; X64-LABEL: test_mm_setr_epi64x: 2660; X64: # BB#0: 2661; X64-NEXT: movd %rsi, %xmm1 2662; X64-NEXT: movd %rdi, %xmm0 2663; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2664; X64-NEXT: retq 2665 %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 2666 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1 2667 ret <2 x i64> %res1 2668} 2669 2670define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind { 2671; X32-LABEL: test_mm_setr_pd: 2672; X32: # BB#0: 2673; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2674; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2675; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2676; X32-NEXT: retl 2677; 2678; X64-LABEL: test_mm_setr_pd: 2679; X64: # BB#0: 2680; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2681; X64-NEXT: retq 2682 %res0 = insertelement <2 x double> undef, double %a0, i32 0 2683 %res1 = insertelement <2 x double> %res0, double %a1, i32 1 2684 ret <2 x double> %res1 2685} 2686 2687define <2 x double> @test_mm_setzero_pd() { 2688; X32-LABEL: test_mm_setzero_pd: 2689; X32: # BB#0: 2690; X32-NEXT: xorps %xmm0, %xmm0 2691; X32-NEXT: retl 2692; 2693; X64-LABEL: test_mm_setzero_pd: 2694; X64: # BB#0: 2695; X64-NEXT: xorps %xmm0, %xmm0 2696; X64-NEXT: retq 2697 ret <2 x double> zeroinitializer 2698} 2699 2700define <2 x i64> @test_mm_setzero_si128() { 2701; X32-LABEL: test_mm_setzero_si128: 2702; X32: # BB#0: 2703; X32-NEXT: xorps %xmm0, %xmm0 2704; X32-NEXT: retl 2705; 2706; X64-LABEL: test_mm_setzero_si128: 2707; X64: # BB#0: 2708; X64-NEXT: xorps %xmm0, %xmm0 2709; X64-NEXT: retq 2710 ret <2 x i64> zeroinitializer 2711} 2712 2713define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) { 2714; X32-LABEL: test_mm_shuffle_epi32: 2715; X32: # BB#0: 2716; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2717; X32-NEXT: retl 2718; 2719; X64-LABEL: test_mm_shuffle_epi32: 2720; X64: # BB#0: 2721; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2722; X64-NEXT: retq 2723 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2724 %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer 2725 %bc = bitcast <4 x i32> %res to <2 x i64> 2726 ret <2 x i64> %bc 2727} 2728 2729define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { 2730; X32-LABEL: test_mm_shuffle_pd: 2731; X32: # BB#0: 2732; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 2733; X32-NEXT: retl 2734; 2735; X64-LABEL: test_mm_shuffle_pd: 2736; X64: # BB#0: 2737; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] 2738; X64-NEXT: retq 2739 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 2740 ret <2 x double> %res 2741} 2742 2743define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) { 2744; X32-LABEL: test_mm_shufflehi_epi16: 2745; X32: # BB#0: 2746; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 2747; X32-NEXT: retl 2748; 2749; X64-LABEL: test_mm_shufflehi_epi16: 2750; X64: # BB#0: 2751; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 2752; X64-NEXT: retq 2753 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2754 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 2755 %bc = bitcast <8 x i16> %res to <2 x i64> 2756 ret <2 x i64> %bc 2757} 2758 2759define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) { 2760; X32-LABEL: test_mm_shufflelo_epi16: 2761; X32: # BB#0: 2762; X32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2763; X32-NEXT: retl 2764; 2765; X64-LABEL: test_mm_shufflelo_epi16: 2766; X64: # BB#0: 2767; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2768; X64-NEXT: retq 2769 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2770 %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> 2771 %bc = bitcast <8 x i16> %res to <2 x i64> 2772 ret <2 x i64> %bc 2773} 2774 2775define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2776; X32-LABEL: test_mm_sll_epi16: 2777; X32: # BB#0: 2778; X32-NEXT: psllw %xmm1, %xmm0 2779; X32-NEXT: retl 2780; 2781; X64-LABEL: test_mm_sll_epi16: 2782; X64: # BB#0: 2783; X64-NEXT: psllw %xmm1, %xmm0 2784; X64-NEXT: retq 2785 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2786 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2787 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1) 2788 %bc = bitcast <8 x i16> %res to <2 x i64> 2789 ret <2 x i64> %bc 2790} 2791declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 2792 2793define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) { 2794; X32-LABEL: test_mm_sll_epi32: 2795; X32: # BB#0: 2796; X32-NEXT: pslld %xmm1, %xmm0 2797; X32-NEXT: retl 2798; 2799; X64-LABEL: test_mm_sll_epi32: 2800; X64: # BB#0: 2801; X64-NEXT: pslld %xmm1, %xmm0 2802; X64-NEXT: retq 2803 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2804 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2805 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1) 2806 %bc = bitcast <4 x i32> %res to <2 x i64> 2807 ret <2 x i64> %bc 2808} 2809declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 2810 2811define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) { 2812; X32-LABEL: test_mm_sll_epi64: 2813; X32: # BB#0: 2814; X32-NEXT: psllq %xmm1, %xmm0 2815; X32-NEXT: retl 2816; 2817; X64-LABEL: test_mm_sll_epi64: 2818; X64: # BB#0: 2819; X64-NEXT: psllq %xmm1, %xmm0 2820; X64-NEXT: retq 2821 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) 2822 ret <2 x i64> %res 2823} 2824declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 2825 2826define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) { 2827; X32-LABEL: test_mm_slli_epi16: 2828; X32: # BB#0: 2829; X32-NEXT: psllw $1, %xmm0 2830; X32-NEXT: retl 2831; 2832; X64-LABEL: test_mm_slli_epi16: 2833; X64: # BB#0: 2834; X64-NEXT: psllw $1, %xmm0 2835; X64-NEXT: retq 2836 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2837 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1) 2838 %bc = bitcast <8 x i16> %res to <2 x i64> 2839 ret <2 x i64> %bc 2840} 2841declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 2842 2843define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) { 2844; X32-LABEL: test_mm_slli_epi32: 2845; X32: # BB#0: 2846; X32-NEXT: pslld $1, %xmm0 2847; X32-NEXT: retl 2848; 2849; X64-LABEL: test_mm_slli_epi32: 2850; X64: # BB#0: 2851; X64-NEXT: pslld $1, %xmm0 2852; X64-NEXT: retq 2853 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2854 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1) 2855 %bc = bitcast <4 x i32> %res to <2 x i64> 2856 ret <2 x i64> %bc 2857} 2858declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 2859 2860define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) { 2861; X32-LABEL: test_mm_slli_epi64: 2862; X32: # BB#0: 2863; X32-NEXT: psllq $1, %xmm0 2864; X32-NEXT: retl 2865; 2866; X64-LABEL: test_mm_slli_epi64: 2867; X64: # BB#0: 2868; X64-NEXT: psllq $1, %xmm0 2869; X64-NEXT: retq 2870 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1) 2871 ret <2 x i64> %res 2872} 2873declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 2874 2875define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind { 2876; X32-LABEL: test_mm_slli_si128: 2877; X32: # BB#0: 2878; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 2879; X32-NEXT: retl 2880; 2881; X64-LABEL: test_mm_slli_si128: 2882; X64: # BB#0: 2883; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] 2884; X64-NEXT: retq 2885 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 2886 %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> 2887 %bc = bitcast <16 x i8> %res to <2 x i64> 2888 ret <2 x i64> %bc 2889} 2890 2891define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind { 2892; X32-LABEL: test_mm_sqrt_pd: 2893; X32: # BB#0: 2894; X32-NEXT: sqrtpd %xmm0, %xmm0 2895; X32-NEXT: retl 2896; 2897; X64-LABEL: test_mm_sqrt_pd: 2898; X64: # BB#0: 2899; X64-NEXT: sqrtpd %xmm0, %xmm0 2900; X64-NEXT: retq 2901 %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 2902 ret <2 x double> %res 2903} 2904declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 2905 2906define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 2907; X32-LABEL: test_mm_sqrt_sd: 2908; X32: # BB#0: 2909; X32-NEXT: sqrtsd %xmm0, %xmm1 2910; X32-NEXT: movaps %xmm1, %xmm0 2911; X32-NEXT: retl 2912; 2913; X64-LABEL: test_mm_sqrt_sd: 2914; X64: # BB#0: 2915; X64-NEXT: sqrtsd %xmm0, %xmm1 2916; X64-NEXT: movaps %xmm1, %xmm0 2917; X64-NEXT: retq 2918 %call = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) 2919 %ext0 = extractelement <2 x double> %call, i32 0 2920 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 2921 %ext1 = extractelement <2 x double> %a1, i32 1 2922 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1 2923 ret <2 x double> %ins1 2924} 2925declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 2926 2927define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2928; X32-LABEL: test_mm_sra_epi16: 2929; X32: # BB#0: 2930; X32-NEXT: psraw %xmm1, %xmm0 2931; X32-NEXT: retl 2932; 2933; X64-LABEL: test_mm_sra_epi16: 2934; X64: # BB#0: 2935; X64-NEXT: psraw %xmm1, %xmm0 2936; X64-NEXT: retq 2937 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2938 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 2939 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1) 2940 %bc = bitcast <8 x i16> %res to <2 x i64> 2941 ret <2 x i64> %bc 2942} 2943declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 2944 2945define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) { 2946; X32-LABEL: test_mm_sra_epi32: 2947; X32: # BB#0: 2948; X32-NEXT: psrad %xmm1, %xmm0 2949; X32-NEXT: retl 2950; 2951; X64-LABEL: test_mm_sra_epi32: 2952; X64: # BB#0: 2953; X64-NEXT: psrad %xmm1, %xmm0 2954; X64-NEXT: retq 2955 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2956 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 2957 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1) 2958 %bc = bitcast <4 x i32> %res to <2 x i64> 2959 ret <2 x i64> %bc 2960} 2961declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 2962 2963define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) { 2964; X32-LABEL: test_mm_srai_epi16: 2965; X32: # BB#0: 2966; X32-NEXT: psraw $1, %xmm0 2967; X32-NEXT: retl 2968; 2969; X64-LABEL: test_mm_srai_epi16: 2970; X64: # BB#0: 2971; X64-NEXT: psraw $1, %xmm0 2972; X64-NEXT: retq 2973 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 2974 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1) 2975 %bc = bitcast <8 x i16> %res to <2 x i64> 2976 ret <2 x i64> %bc 2977} 2978declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 2979 2980define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) { 2981; X32-LABEL: test_mm_srai_epi32: 2982; X32: # BB#0: 2983; X32-NEXT: psrad $1, %xmm0 2984; X32-NEXT: retl 2985; 2986; X64-LABEL: test_mm_srai_epi32: 2987; X64: # BB#0: 2988; X64-NEXT: psrad $1, %xmm0 2989; X64-NEXT: retq 2990 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 2991 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1) 2992 %bc = bitcast <4 x i32> %res to <2 x i64> 2993 ret <2 x i64> %bc 2994} 2995declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 2996 2997define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) { 2998; X32-LABEL: test_mm_srl_epi16: 2999; X32: # BB#0: 3000; X32-NEXT: psrlw %xmm1, %xmm0 3001; X32-NEXT: retl 3002; 3003; X64-LABEL: test_mm_srl_epi16: 3004; X64: # BB#0: 3005; X64-NEXT: psrlw %xmm1, %xmm0 3006; X64-NEXT: retq 3007 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 3008 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 3009 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1) 3010 %bc = bitcast <8 x i16> %res to <2 x i64> 3011 ret <2 x i64> %bc 3012} 3013declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 3014 3015define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) { 3016; X32-LABEL: test_mm_srl_epi32: 3017; X32: # BB#0: 3018; X32-NEXT: psrld %xmm1, %xmm0 3019; X32-NEXT: retl 3020; 3021; X64-LABEL: test_mm_srl_epi32: 3022; X64: # BB#0: 3023; X64-NEXT: psrld %xmm1, %xmm0 3024; X64-NEXT: retq 3025 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 3026 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 3027 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1) 3028 %bc = bitcast <4 x i32> %res to <2 x i64> 3029 ret <2 x i64> %bc 3030} 3031declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 3032 3033define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) { 3034; X32-LABEL: test_mm_srl_epi64: 3035; X32: # BB#0: 3036; X32-NEXT: psrlq %xmm1, %xmm0 3037; X32-NEXT: retl 3038; 3039; X64-LABEL: test_mm_srl_epi64: 3040; X64: # BB#0: 3041; X64-NEXT: psrlq %xmm1, %xmm0 3042; X64-NEXT: retq 3043 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) 3044 ret <2 x i64> %res 3045} 3046declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 3047 3048define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) { 3049; X32-LABEL: test_mm_srli_epi16: 3050; X32: # BB#0: 3051; X32-NEXT: psrlw $1, %xmm0 3052; X32-NEXT: retl 3053; 3054; X64-LABEL: test_mm_srli_epi16: 3055; X64: # BB#0: 3056; X64-NEXT: psrlw $1, %xmm0 3057; X64-NEXT: retq 3058 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 3059 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1) 3060 %bc = bitcast <8 x i16> %res to <2 x i64> 3061 ret <2 x i64> %bc 3062} 3063declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 3064 3065define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) { 3066; X32-LABEL: test_mm_srli_epi32: 3067; X32: # BB#0: 3068; X32-NEXT: psrld $1, %xmm0 3069; X32-NEXT: retl 3070; 3071; X64-LABEL: test_mm_srli_epi32: 3072; X64: # BB#0: 3073; X64-NEXT: psrld $1, %xmm0 3074; X64-NEXT: retq 3075 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 3076 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1) 3077 %bc = bitcast <4 x i32> %res to <2 x i64> 3078 ret <2 x i64> %bc 3079} 3080declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 3081 3082define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) { 3083; X32-LABEL: test_mm_srli_epi64: 3084; X32: # BB#0: 3085; X32-NEXT: psrlq $1, %xmm0 3086; X32-NEXT: retl 3087; 3088; X64-LABEL: test_mm_srli_epi64: 3089; X64: # BB#0: 3090; X64-NEXT: psrlq $1, %xmm0 3091; X64-NEXT: retq 3092 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1) 3093 ret <2 x i64> %res 3094} 3095declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 3096 3097define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind { 3098; X32-LABEL: test_mm_srli_si128: 3099; X32: # BB#0: 3100; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 3101; X32-NEXT: retl 3102; 3103; X64-LABEL: test_mm_srli_si128: 3104; X64: # BB#0: 3105; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero 3106; X64-NEXT: retq 3107 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 3108 %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> 3109 %bc = bitcast <16 x i8> %res to <2 x i64> 3110 ret <2 x i64> %bc 3111} 3112 3113define void @test_mm_store_pd(double *%a0, <2 x double> %a1) { 3114; X32-LABEL: test_mm_store_pd: 3115; X32: # BB#0: 3116; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3117; X32-NEXT: movaps %xmm0, (%eax) 3118; X32-NEXT: retl 3119; 3120; X64-LABEL: test_mm_store_pd: 3121; X64: # BB#0: 3122; X64-NEXT: movaps %xmm0, (%rdi) 3123; X64-NEXT: retq 3124 %arg0 = bitcast double* %a0 to <2 x double>* 3125 store <2 x double> %a1, <2 x double>* %arg0, align 16 3126 ret void 3127} 3128 3129define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) { 3130; X32-LABEL: test_mm_store_pd1: 3131; X32: # BB#0: 3132; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3133; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 3134; X32-NEXT: movaps %xmm0, (%eax) 3135; X32-NEXT: retl 3136; 3137; X64-LABEL: test_mm_store_pd1: 3138; X64: # BB#0: 3139; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 3140; X64-NEXT: movaps %xmm0, (%rdi) 3141; X64-NEXT: retq 3142 %arg0 = bitcast double * %a0 to <2 x double>* 3143 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 3144 store <2 x double> %shuf, <2 x double>* %arg0, align 16 3145 ret void 3146} 3147 3148define void @test_mm_store_sd(double *%a0, <2 x double> %a1) { 3149; X32-LABEL: test_mm_store_sd: 3150; X32: # BB#0: 3151; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3152; X32-NEXT: movsd %xmm0, (%eax) 3153; X32-NEXT: retl 3154; 3155; X64-LABEL: test_mm_store_sd: 3156; X64: # BB#0: 3157; X64-NEXT: movsd %xmm0, (%rdi) 3158; X64-NEXT: retq 3159 %ext = extractelement <2 x double> %a1, i32 0 3160 store double %ext, double* %a0, align 1 3161 ret void 3162} 3163 3164define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) { 3165; X32-LABEL: test_mm_store_si128: 3166; X32: # BB#0: 3167; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3168; X32-NEXT: movaps %xmm0, (%eax) 3169; X32-NEXT: retl 3170; 3171; X64-LABEL: test_mm_store_si128: 3172; X64: # BB#0: 3173; X64-NEXT: movaps %xmm0, (%rdi) 3174; X64-NEXT: retq 3175 store <2 x i64> %a1, <2 x i64>* %a0, align 16 3176 ret void 3177} 3178 3179define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) { 3180; X32-LABEL: test_mm_store1_pd: 3181; X32: # BB#0: 3182; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3183; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 3184; X32-NEXT: movaps %xmm0, (%eax) 3185; X32-NEXT: retl 3186; 3187; X64-LABEL: test_mm_store1_pd: 3188; X64: # BB#0: 3189; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 3190; X64-NEXT: movaps %xmm0, (%rdi) 3191; X64-NEXT: retq 3192 %arg0 = bitcast double * %a0 to <2 x double>* 3193 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer 3194 store <2 x double> %shuf, <2 x double>* %arg0, align 16 3195 ret void 3196} 3197 3198define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) { 3199; X32-LABEL: test_mm_storeh_sd: 3200; X32: # BB#0: 3201; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3202; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 3203; X32-NEXT: movsd %xmm0, (%eax) 3204; X32-NEXT: retl 3205; 3206; X64-LABEL: test_mm_storeh_sd: 3207; X64: # BB#0: 3208; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 3209; X64-NEXT: movsd %xmm0, (%rdi) 3210; X64-NEXT: retq 3211 %ext = extractelement <2 x double> %a1, i32 1 3212 store double %ext, double* %a0, align 8 3213 ret void 3214} 3215 3216define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) { 3217; X32-LABEL: test_mm_storel_epi64: 3218; X32: # BB#0: 3219; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3220; X32-NEXT: movlps %xmm0, (%eax) 3221; X32-NEXT: retl 3222; 3223; X64-LABEL: test_mm_storel_epi64: 3224; X64: # BB#0: 3225; X64-NEXT: movd %xmm0, %rax 3226; X64-NEXT: movq %rax, (%rdi) 3227; X64-NEXT: retq 3228 %ext = extractelement <2 x i64> %a1, i32 0 3229 %bc = bitcast <2 x i64> *%a0 to i64* 3230 store i64 %ext, i64* %bc, align 8 3231 ret void 3232} 3233 3234define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) { 3235; X32-LABEL: test_mm_storel_sd: 3236; X32: # BB#0: 3237; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3238; X32-NEXT: movsd %xmm0, (%eax) 3239; X32-NEXT: retl 3240; 3241; X64-LABEL: test_mm_storel_sd: 3242; X64: # BB#0: 3243; X64-NEXT: movsd %xmm0, (%rdi) 3244; X64-NEXT: retq 3245 %ext = extractelement <2 x double> %a1, i32 0 3246 store double %ext, double* %a0, align 8 3247 ret void 3248} 3249 3250define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { 3251; X32-LABEL: test_mm_storer_pd: 3252; X32: # BB#0: 3253; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3254; X32-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 3255; X32-NEXT: movapd %xmm0, (%eax) 3256; X32-NEXT: retl 3257; 3258; X64-LABEL: test_mm_storer_pd: 3259; X64: # BB#0: 3260; X64-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 3261; X64-NEXT: movapd %xmm0, (%rdi) 3262; X64-NEXT: retq 3263 %arg0 = bitcast double* %a0 to <2 x double>* 3264 %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0> 3265 store <2 x double> %shuf, <2 x double>* %arg0, align 16 3266 ret void 3267} 3268 3269define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) { 3270; X32-LABEL: test_mm_storeu_pd: 3271; X32: # BB#0: 3272; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3273; X32-NEXT: movups %xmm0, (%eax) 3274; X32-NEXT: retl 3275; 3276; X64-LABEL: test_mm_storeu_pd: 3277; X64: # BB#0: 3278; X64-NEXT: movups %xmm0, (%rdi) 3279; X64-NEXT: retq 3280 %arg0 = bitcast double* %a0 to <2 x double>* 3281 store <2 x double> %a1, <2 x double>* %arg0, align 1 3282 ret void 3283} 3284 3285define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) { 3286; X32-LABEL: test_mm_storeu_si128: 3287; X32: # BB#0: 3288; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3289; X32-NEXT: movups %xmm0, (%eax) 3290; X32-NEXT: retl 3291; 3292; X64-LABEL: test_mm_storeu_si128: 3293; X64: # BB#0: 3294; X64-NEXT: movups %xmm0, (%rdi) 3295; X64-NEXT: retq 3296 store <2 x i64> %a1, <2 x i64>* %a0, align 1 3297 ret void 3298} 3299 3300define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) { 3301; X32-LABEL: test_mm_stream_pd: 3302; X32: # BB#0: 3303; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3304; X32-NEXT: movntps %xmm0, (%eax) 3305; X32-NEXT: retl 3306; 3307; X64-LABEL: test_mm_stream_pd: 3308; X64: # BB#0: 3309; X64-NEXT: movntps %xmm0, (%rdi) 3310; X64-NEXT: retq 3311 %arg0 = bitcast double* %a0 to <2 x double>* 3312 store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0 3313 ret void 3314} 3315 3316define void @test_mm_stream_si32(i32 *%a0, i32 %a1) { 3317; X32-LABEL: test_mm_stream_si32: 3318; X32: # BB#0: 3319; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3320; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 3321; X32-NEXT: movntil %eax, (%ecx) 3322; X32-NEXT: retl 3323; 3324; X64-LABEL: test_mm_stream_si32: 3325; X64: # BB#0: 3326; X64-NEXT: movntil %esi, (%rdi) 3327; X64-NEXT: retq 3328 store i32 %a1, i32* %a0, align 1, !nontemporal !0 3329 ret void 3330} 3331 3332define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) { 3333; X32-LABEL: test_mm_stream_si128: 3334; X32: # BB#0: 3335; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 3336; X32-NEXT: movntps %xmm0, (%eax) 3337; X32-NEXT: retl 3338; 3339; X64-LABEL: test_mm_stream_si128: 3340; X64: # BB#0: 3341; X64-NEXT: movntps %xmm0, (%rdi) 3342; X64-NEXT: retq 3343 store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0 3344 ret void 3345} 3346 3347define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3348; X32-LABEL: test_mm_sub_epi8: 3349; X32: # BB#0: 3350; X32-NEXT: psubb %xmm1, %xmm0 3351; X32-NEXT: retl 3352; 3353; X64-LABEL: test_mm_sub_epi8: 3354; X64: # BB#0: 3355; X64-NEXT: psubb %xmm1, %xmm0 3356; X64-NEXT: retq 3357 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 3358 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 3359 %res = sub <16 x i8> %arg0, %arg1 3360 %bc = bitcast <16 x i8> %res to <2 x i64> 3361 ret <2 x i64> %bc 3362} 3363 3364define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3365; X32-LABEL: test_mm_sub_epi16: 3366; X32: # BB#0: 3367; X32-NEXT: psubw %xmm1, %xmm0 3368; X32-NEXT: retl 3369; 3370; X64-LABEL: test_mm_sub_epi16: 3371; X64: # BB#0: 3372; X64-NEXT: psubw %xmm1, %xmm0 3373; X64-NEXT: retq 3374 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 3375 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 3376 %res = sub <8 x i16> %arg0, %arg1 3377 %bc = bitcast <8 x i16> %res to <2 x i64> 3378 ret <2 x i64> %bc 3379} 3380 3381define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3382; X32-LABEL: test_mm_sub_epi32: 3383; X32: # BB#0: 3384; X32-NEXT: psubd %xmm1, %xmm0 3385; X32-NEXT: retl 3386; 3387; X64-LABEL: test_mm_sub_epi32: 3388; X64: # BB#0: 3389; X64-NEXT: psubd %xmm1, %xmm0 3390; X64-NEXT: retq 3391 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 3392 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 3393 %res = sub <4 x i32> %arg0, %arg1 3394 %bc = bitcast <4 x i32> %res to <2 x i64> 3395 ret <2 x i64> %bc 3396} 3397 3398define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3399; X32-LABEL: test_mm_sub_epi64: 3400; X32: # BB#0: 3401; X32-NEXT: psubq %xmm1, %xmm0 3402; X32-NEXT: retl 3403; 3404; X64-LABEL: test_mm_sub_epi64: 3405; X64: # BB#0: 3406; X64-NEXT: psubq %xmm1, %xmm0 3407; X64-NEXT: retq 3408 %res = sub <2 x i64> %a0, %a1 3409 ret <2 x i64> %res 3410} 3411 3412define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 3413; X32-LABEL: test_mm_sub_pd: 3414; X32: # BB#0: 3415; X32-NEXT: subpd %xmm1, %xmm0 3416; X32-NEXT: retl 3417; 3418; X64-LABEL: test_mm_sub_pd: 3419; X64: # BB#0: 3420; X64-NEXT: subpd %xmm1, %xmm0 3421; X64-NEXT: retq 3422 %res = fsub <2 x double> %a0, %a1 3423 ret <2 x double> %res 3424} 3425 3426define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 3427; X32-LABEL: test_mm_sub_sd: 3428; X32: # BB#0: 3429; X32-NEXT: subsd %xmm1, %xmm0 3430; X32-NEXT: retl 3431; 3432; X64-LABEL: test_mm_sub_sd: 3433; X64: # BB#0: 3434; X64-NEXT: subsd %xmm1, %xmm0 3435; X64-NEXT: retq 3436 %ext0 = extractelement <2 x double> %a0, i32 0 3437 %ext1 = extractelement <2 x double> %a1, i32 0 3438 %fsub = fsub double %ext0, %ext1 3439 %res = insertelement <2 x double> %a0, double %fsub, i32 0 3440 ret <2 x double> %res 3441} 3442 3443define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3444; X32-LABEL: test_mm_subs_epi8: 3445; X32: # BB#0: 3446; X32-NEXT: psubsb %xmm1, %xmm0 3447; X32-NEXT: retl 3448; 3449; X64-LABEL: test_mm_subs_epi8: 3450; X64: # BB#0: 3451; X64-NEXT: psubsb %xmm1, %xmm0 3452; X64-NEXT: retq 3453 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 3454 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 3455 %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1) 3456 %bc = bitcast <16 x i8> %res to <2 x i64> 3457 ret <2 x i64> %bc 3458} 3459declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 3460 3461define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3462; X32-LABEL: test_mm_subs_epi16: 3463; X32: # BB#0: 3464; X32-NEXT: psubsw %xmm1, %xmm0 3465; X32-NEXT: retl 3466; 3467; X64-LABEL: test_mm_subs_epi16: 3468; X64: # BB#0: 3469; X64-NEXT: psubsw %xmm1, %xmm0 3470; X64-NEXT: retq 3471 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 3472 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 3473 %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1) 3474 %bc = bitcast <8 x i16> %res to <2 x i64> 3475 ret <2 x i64> %bc 3476} 3477declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 3478 3479define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3480; X32-LABEL: test_mm_subs_epu8: 3481; X32: # BB#0: 3482; X32-NEXT: psubusb %xmm1, %xmm0 3483; X32-NEXT: retl 3484; 3485; X64-LABEL: test_mm_subs_epu8: 3486; X64: # BB#0: 3487; X64-NEXT: psubusb %xmm1, %xmm0 3488; X64-NEXT: retq 3489 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 3490 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 3491 %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1) 3492 %bc = bitcast <16 x i8> %res to <2 x i64> 3493 ret <2 x i64> %bc 3494} 3495declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 3496 3497define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3498; X32-LABEL: test_mm_subs_epu16: 3499; X32: # BB#0: 3500; X32-NEXT: psubusw %xmm1, %xmm0 3501; X32-NEXT: retl 3502; 3503; X64-LABEL: test_mm_subs_epu16: 3504; X64: # BB#0: 3505; X64-NEXT: psubusw %xmm1, %xmm0 3506; X64-NEXT: retq 3507 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 3508 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 3509 %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1) 3510 %bc = bitcast <8 x i16> %res to <2 x i64> 3511 ret <2 x i64> %bc 3512} 3513declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 3514 3515define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 3516; X32-LABEL: test_mm_ucomieq_sd: 3517; X32: # BB#0: 3518; X32-NEXT: ucomisd %xmm1, %xmm0 3519; X32-NEXT: setnp %al 3520; X32-NEXT: sete %cl 3521; X32-NEXT: andb %al, %cl 3522; X32-NEXT: movzbl %cl, %eax 3523; X32-NEXT: retl 3524; 3525; X64-LABEL: test_mm_ucomieq_sd: 3526; X64: # BB#0: 3527; X64-NEXT: ucomisd %xmm1, %xmm0 3528; X64-NEXT: setnp %al 3529; X64-NEXT: sete %cl 3530; X64-NEXT: andb %al, %cl 3531; X64-NEXT: movzbl %cl, %eax 3532; X64-NEXT: retq 3533 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 3534 ret i32 %res 3535} 3536declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 3537 3538define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 3539; X32-LABEL: test_mm_ucomige_sd: 3540; X32: # BB#0: 3541; X32-NEXT: xorl %eax, %eax 3542; X32-NEXT: ucomisd %xmm1, %xmm0 3543; X32-NEXT: setae %al 3544; X32-NEXT: retl 3545; 3546; X64-LABEL: test_mm_ucomige_sd: 3547; X64: # BB#0: 3548; X64-NEXT: xorl %eax, %eax 3549; X64-NEXT: ucomisd %xmm1, %xmm0 3550; X64-NEXT: setae %al 3551; X64-NEXT: retq 3552 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) 3553 ret i32 %res 3554} 3555declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 3556 3557define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 3558; X32-LABEL: test_mm_ucomigt_sd: 3559; X32: # BB#0: 3560; X32-NEXT: xorl %eax, %eax 3561; X32-NEXT: ucomisd %xmm1, %xmm0 3562; X32-NEXT: seta %al 3563; X32-NEXT: retl 3564; 3565; X64-LABEL: test_mm_ucomigt_sd: 3566; X64: # BB#0: 3567; X64-NEXT: xorl %eax, %eax 3568; X64-NEXT: ucomisd %xmm1, %xmm0 3569; X64-NEXT: seta %al 3570; X64-NEXT: retq 3571 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) 3572 ret i32 %res 3573} 3574declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 3575 3576define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 3577; X32-LABEL: test_mm_ucomile_sd: 3578; X32: # BB#0: 3579; X32-NEXT: xorl %eax, %eax 3580; X32-NEXT: ucomisd %xmm0, %xmm1 3581; X32-NEXT: setae %al 3582; X32-NEXT: retl 3583; 3584; X64-LABEL: test_mm_ucomile_sd: 3585; X64: # BB#0: 3586; X64-NEXT: xorl %eax, %eax 3587; X64-NEXT: ucomisd %xmm0, %xmm1 3588; X64-NEXT: setae %al 3589; X64-NEXT: retq 3590 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) 3591 ret i32 %res 3592} 3593declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 3594 3595define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 3596; X32-LABEL: test_mm_ucomilt_sd: 3597; X32: # BB#0: 3598; X32-NEXT: xorl %eax, %eax 3599; X32-NEXT: ucomisd %xmm0, %xmm1 3600; X32-NEXT: seta %al 3601; X32-NEXT: retl 3602; 3603; X64-LABEL: test_mm_ucomilt_sd: 3604; X64: # BB#0: 3605; X64-NEXT: xorl %eax, %eax 3606; X64-NEXT: ucomisd %xmm0, %xmm1 3607; X64-NEXT: seta %al 3608; X64-NEXT: retq 3609 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) 3610 ret i32 %res 3611} 3612declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 3613 3614define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { 3615; X32-LABEL: test_mm_ucomineq_sd: 3616; X32: # BB#0: 3617; X32-NEXT: ucomisd %xmm1, %xmm0 3618; X32-NEXT: setp %al 3619; X32-NEXT: setne %cl 3620; X32-NEXT: orb %al, %cl 3621; X32-NEXT: movzbl %cl, %eax 3622; X32-NEXT: retl 3623; 3624; X64-LABEL: test_mm_ucomineq_sd: 3625; X64: # BB#0: 3626; X64-NEXT: ucomisd %xmm1, %xmm0 3627; X64-NEXT: setp %al 3628; X64-NEXT: setne %cl 3629; X64-NEXT: orb %al, %cl 3630; X64-NEXT: movzbl %cl, %eax 3631; X64-NEXT: retq 3632 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) 3633 ret i32 %res 3634} 3635declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 3636 3637define <2 x double> @test_mm_undefined_pd() { 3638; X32-LABEL: test_mm_undefined_pd: 3639; X32: # BB#0: 3640; X32-NEXT: retl 3641; 3642; X64-LABEL: test_mm_undefined_pd: 3643; X64: # BB#0: 3644; X64-NEXT: retq 3645 ret <2 x double> undef 3646} 3647 3648define <2 x i64> @test_mm_undefined_si128() { 3649; X32-LABEL: test_mm_undefined_si128: 3650; X32: # BB#0: 3651; X32-NEXT: retl 3652; 3653; X64-LABEL: test_mm_undefined_si128: 3654; X64: # BB#0: 3655; X64-NEXT: retq 3656 ret <2 x i64> undef 3657} 3658 3659define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) { 3660; X32-LABEL: test_mm_unpackhi_epi8: 3661; X32: # BB#0: 3662; X32-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 3663; X32-NEXT: retl 3664; 3665; X64-LABEL: test_mm_unpackhi_epi8: 3666; X64: # BB#0: 3667; X64-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 3668; X64-NEXT: retq 3669 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 3670 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 3671 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 3672 %bc = bitcast <16 x i8> %res to <2 x i64> 3673 ret <2 x i64> %bc 3674} 3675 3676define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { 3677; X32-LABEL: test_mm_unpackhi_epi16: 3678; X32: # BB#0: 3679; X32-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3680; X32-NEXT: retl 3681; 3682; X64-LABEL: test_mm_unpackhi_epi16: 3683; X64: # BB#0: 3684; X64-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3685; X64-NEXT: retq 3686 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 3687 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 3688 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 3689 %bc = bitcast <8 x i16> %res to <2 x i64> 3690 ret <2 x i64> %bc 3691} 3692 3693define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { 3694; X32-LABEL: test_mm_unpackhi_epi32: 3695; X32: # BB#0: 3696; X32-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3697; X32-NEXT: retl 3698; 3699; X64-LABEL: test_mm_unpackhi_epi32: 3700; X64: # BB#0: 3701; X64-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3702; X64-NEXT: retq 3703 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 3704 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 3705 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3706 %bc = bitcast <4 x i32> %res to <2 x i64> 3707 ret <2 x i64> %bc 3708} 3709 3710define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) { 3711; X32-LABEL: test_mm_unpackhi_epi64: 3712; X32: # BB#0: 3713; X32-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 3714; X32-NEXT: retl 3715; 3716; X64-LABEL: test_mm_unpackhi_epi64: 3717; X64: # BB#0: 3718; X64-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 3719; X64-NEXT: retq 3720 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> 3721 ret <2 x i64> %res 3722} 3723 3724define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) { 3725; X32-LABEL: test_mm_unpackhi_pd: 3726; X32: # BB#0: 3727; X32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 3728; X32-NEXT: retl 3729; 3730; X64-LABEL: test_mm_unpackhi_pd: 3731; X64: # BB#0: 3732; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 3733; X64-NEXT: retq 3734 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 3735 ret <2 x double> %res 3736} 3737 3738define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) { 3739; X32-LABEL: test_mm_unpacklo_epi8: 3740; X32: # BB#0: 3741; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3742; X32-NEXT: retl 3743; 3744; X64-LABEL: test_mm_unpacklo_epi8: 3745; X64: # BB#0: 3746; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3747; X64-NEXT: retq 3748 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 3749 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 3750 %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 3751 %bc = bitcast <16 x i8> %res to <2 x i64> 3752 ret <2 x i64> %bc 3753} 3754 3755define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) { 3756; X32-LABEL: test_mm_unpacklo_epi16: 3757; X32: # BB#0: 3758; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3759; X32-NEXT: retl 3760; 3761; X64-LABEL: test_mm_unpacklo_epi16: 3762; X64: # BB#0: 3763; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3764; X64-NEXT: retq 3765 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 3766 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 3767 %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 3768 %bc = bitcast <8 x i16> %res to <2 x i64> 3769 ret <2 x i64> %bc 3770} 3771 3772define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { 3773; X32-LABEL: test_mm_unpacklo_epi32: 3774; X32: # BB#0: 3775; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3776; X32-NEXT: retl 3777; 3778; X64-LABEL: test_mm_unpacklo_epi32: 3779; X64: # BB#0: 3780; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3781; X64-NEXT: retq 3782 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 3783 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 3784 %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3785 %bc = bitcast <4 x i32> %res to <2 x i64> 3786 ret <2 x i64> %bc 3787} 3788 3789define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { 3790; X32-LABEL: test_mm_unpacklo_epi64: 3791; X32: # BB#0: 3792; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3793; X32-NEXT: retl 3794; 3795; X64-LABEL: test_mm_unpacklo_epi64: 3796; X64: # BB#0: 3797; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3798; X64-NEXT: retq 3799 %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> 3800 ret <2 x i64> %res 3801} 3802 3803define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) { 3804; X32-LABEL: test_mm_unpacklo_pd: 3805; X32: # BB#0: 3806; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3807; X32-NEXT: retl 3808; 3809; X64-LABEL: test_mm_unpacklo_pd: 3810; X64: # BB#0: 3811; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3812; X64-NEXT: retq 3813 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 3814 ret <2 x double> %res 3815} 3816 3817define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind { 3818; X32-LABEL: test_mm_xor_pd: 3819; X32: # BB#0: 3820; X32-NEXT: xorps %xmm1, %xmm0 3821; X32-NEXT: retl 3822; 3823; X64-LABEL: test_mm_xor_pd: 3824; X64: # BB#0: 3825; X64-NEXT: xorps %xmm1, %xmm0 3826; X64-NEXT: retq 3827 %arg0 = bitcast <2 x double> %a0 to <4 x i32> 3828 %arg1 = bitcast <2 x double> %a1 to <4 x i32> 3829 %res = xor <4 x i32> %arg0, %arg1 3830 %bc = bitcast <4 x i32> %res to <2 x double> 3831 ret <2 x double> %bc 3832} 3833 3834define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { 3835; X32-LABEL: test_mm_xor_si128: 3836; X32: # BB#0: 3837; X32-NEXT: xorps %xmm1, %xmm0 3838; X32-NEXT: retl 3839; 3840; X64-LABEL: test_mm_xor_si128: 3841; X64: # BB#0: 3842; X64-NEXT: xorps %xmm1, %xmm0 3843; X64-NEXT: retq 3844 %res = xor <2 x i64> %a0, %a1 3845 ret <2 x i64> %res 3846} 3847 3848!0 = !{i32 1} 3849 3850