1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse41-builtins.c 6 7define <2 x i64> @test_mm_blend_epi16(<2 x i64> %a0, <2 x i64> %a1) { 8; X32-LABEL: test_mm_blend_epi16: 9; X32: # BB#0: 10; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7] 11; X32-NEXT: retl 12; 13; X64-LABEL: test_mm_blend_epi16: 14; X64: # BB#0: 15; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7] 16; X64-NEXT: retq 17 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 18 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 19 %shuf = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7> 20 %res = bitcast <8 x i16> %shuf to <2 x i64> 21 ret <2 x i64> %res 22} 23 24define <2 x double> @test_mm_blend_pd(<2 x double> %a0, <2 x double> %a1) { 25; X32-LABEL: test_mm_blend_pd: 26; X32: # BB#0: 27; X32-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 28; X32-NEXT: retl 29; 30; X64-LABEL: test_mm_blend_pd: 31; X64: # BB#0: 32; X64-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] 33; X64-NEXT: retq 34 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3> 35 ret <2 x double> %res 36} 37 38define <4 x float> @test_mm_blend_ps(<4 x float> %a0, <4 x float> %a1) { 39; X32-LABEL: test_mm_blend_ps: 40; X32: # BB#0: 41; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 42; X32-NEXT: retl 43; 44; X64-LABEL: test_mm_blend_ps: 45; X64: # BB#0: 46; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 47; X64-NEXT: retq 48 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 49 ret <4 x float> %res 50} 51 52define <2 x i64> @test_mm_blendv_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 53; X32-LABEL: test_mm_blendv_epi8: 54; X32: # BB#0: 55; X32-NEXT: movdqa %xmm0, %xmm3 56; X32-NEXT: movaps %xmm2, %xmm0 57; X32-NEXT: pblendvb %xmm1, %xmm3 58; X32-NEXT: movdqa %xmm3, %xmm0 59; X32-NEXT: retl 60; 61; X64-LABEL: test_mm_blendv_epi8: 62; X64: # BB#0: 63; X64-NEXT: movdqa %xmm0, %xmm3 64; X64-NEXT: movaps %xmm2, %xmm0 65; X64-NEXT: pblendvb %xmm1, %xmm3 66; X64-NEXT: movdqa %xmm3, %xmm0 67; X64-NEXT: retq 68 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 69 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 70 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 71 %call = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2) 72 %res = bitcast <16 x i8> %call to <2 x i64> 73 ret <2 x i64> %res 74} 75declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 76 77define <2 x double> @test_mm_blendv_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 78; X32-LABEL: test_mm_blendv_pd: 79; X32: # BB#0: 80; X32-NEXT: movapd %xmm0, %xmm3 81; X32-NEXT: movaps %xmm2, %xmm0 82; X32-NEXT: blendvpd %xmm1, %xmm3 83; X32-NEXT: movapd %xmm3, %xmm0 84; X32-NEXT: retl 85; 86; X64-LABEL: test_mm_blendv_pd: 87; X64: # BB#0: 88; X64-NEXT: movapd %xmm0, %xmm3 89; X64-NEXT: movaps %xmm2, %xmm0 90; X64-NEXT: blendvpd %xmm1, %xmm3 91; X64-NEXT: movapd %xmm3, %xmm0 92; X64-NEXT: retq 93 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 94 ret <2 x double> %res 95} 96declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 97 98define <4 x float> @test_mm_blendv_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 99; X32-LABEL: test_mm_blendv_ps: 100; X32: # BB#0: 101; X32-NEXT: movaps %xmm0, %xmm3 102; X32-NEXT: movaps %xmm2, %xmm0 103; X32-NEXT: blendvps %xmm1, %xmm3 104; X32-NEXT: movaps %xmm3, %xmm0 105; X32-NEXT: retl 106; 107; X64-LABEL: test_mm_blendv_ps: 108; X64: # BB#0: 109; X64-NEXT: movaps %xmm0, %xmm3 110; X64-NEXT: movaps %xmm2, %xmm0 111; X64-NEXT: blendvps %xmm1, %xmm3 112; X64-NEXT: movaps %xmm3, %xmm0 113; X64-NEXT: retq 114 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 115 ret <4 x float> %res 116} 117declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 118 119define <2 x double> @test_mm_ceil_pd(<2 x double> %a0) { 120; X32-LABEL: test_mm_ceil_pd: 121; X32: # BB#0: 122; X32-NEXT: roundpd $2, %xmm0, %xmm0 123; X32-NEXT: retl 124; 125; X64-LABEL: test_mm_ceil_pd: 126; X64: # BB#0: 127; X64-NEXT: roundpd $2, %xmm0, %xmm0 128; X64-NEXT: retq 129 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 2) 130 ret <2 x double> %res 131} 132declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 133 134define <4 x float> @test_mm_ceil_ps(<4 x float> %a0) { 135; X32-LABEL: test_mm_ceil_ps: 136; X32: # BB#0: 137; X32-NEXT: roundps $2, %xmm0, %xmm0 138; X32-NEXT: retl 139; 140; X64-LABEL: test_mm_ceil_ps: 141; X64: # BB#0: 142; X64-NEXT: roundps $2, %xmm0, %xmm0 143; X64-NEXT: retq 144 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 2) 145 ret <4 x float> %res 146} 147declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 148 149define <2 x double> @test_mm_ceil_sd(<2 x double> %a0, <2 x double> %a1) { 150; X32-LABEL: test_mm_ceil_sd: 151; X32: # BB#0: 152; X32-NEXT: roundsd $2, %xmm1, %xmm0 153; X32-NEXT: retl 154; 155; X64-LABEL: test_mm_ceil_sd: 156; X64: # BB#0: 157; X64-NEXT: roundsd $2, %xmm1, %xmm0 158; X64-NEXT: retq 159 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 2) 160 ret <2 x double> %res 161} 162declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 163 164define <4 x float> @test_mm_ceil_ss(<4 x float> %a0, <4 x float> %a1) { 165; X32-LABEL: test_mm_ceil_ss: 166; X32: # BB#0: 167; X32-NEXT: roundss $2, %xmm1, %xmm0 168; X32-NEXT: retl 169; 170; X64-LABEL: test_mm_ceil_ss: 171; X64: # BB#0: 172; X64-NEXT: roundss $2, %xmm1, %xmm0 173; X64-NEXT: retq 174 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 2) 175 ret <4 x float> %res 176} 177declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 178 179define <2 x i64> @test_mm_cmpeq_epi64(<2 x i64> %a0, <2 x i64> %a1) { 180; X32-LABEL: test_mm_cmpeq_epi64: 181; X32: # BB#0: 182; X32-NEXT: pcmpeqq %xmm1, %xmm0 183; X32-NEXT: retl 184; 185; X64-LABEL: test_mm_cmpeq_epi64: 186; X64: # BB#0: 187; X64-NEXT: pcmpeqq %xmm1, %xmm0 188; X64-NEXT: retq 189 %cmp = icmp eq <2 x i64> %a0, %a1 190 %res = sext <2 x i1> %cmp to <2 x i64> 191 ret <2 x i64> %res 192} 193 194define <2 x i64> @test_mm_cvtepi8_epi16(<2 x i64> %a0) { 195; X32-LABEL: test_mm_cvtepi8_epi16: 196; X32: # BB#0: 197; X32-NEXT: pmovsxbw %xmm0, %xmm0 198; X32-NEXT: retl 199; 200; X64-LABEL: test_mm_cvtepi8_epi16: 201; X64: # BB#0: 202; X64-NEXT: pmovsxbw %xmm0, %xmm0 203; X64-NEXT: retq 204 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 205 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 206 %sext = sext <8 x i8> %ext0 to <8 x i16> 207 %res = bitcast <8 x i16> %sext to <2 x i64> 208 ret <2 x i64> %res 209} 210 211define <2 x i64> @test_mm_cvtepi8_epi32(<2 x i64> %a0) { 212; X32-LABEL: test_mm_cvtepi8_epi32: 213; X32: # BB#0: 214; X32-NEXT: pmovsxbd %xmm0, %xmm0 215; X32-NEXT: retl 216; 217; X64-LABEL: test_mm_cvtepi8_epi32: 218; X64: # BB#0: 219; X64-NEXT: pmovsxbd %xmm0, %xmm0 220; X64-NEXT: retq 221 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 222 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 223 %sext = sext <4 x i8> %ext0 to <4 x i32> 224 %res = bitcast <4 x i32> %sext to <2 x i64> 225 ret <2 x i64> %res 226} 227 228define <2 x i64> @test_mm_cvtepi8_epi64(<2 x i64> %a0) { 229; X32-LABEL: test_mm_cvtepi8_epi64: 230; X32: # BB#0: 231; X32-NEXT: pmovsxbq %xmm0, %xmm0 232; X32-NEXT: retl 233; 234; X64-LABEL: test_mm_cvtepi8_epi64: 235; X64: # BB#0: 236; X64-NEXT: pmovsxbq %xmm0, %xmm0 237; X64-NEXT: retq 238 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 239 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 240 %sext = sext <2 x i8> %ext0 to <2 x i64> 241 ret <2 x i64> %sext 242} 243 244define <2 x i64> @test_mm_cvtepi16_epi32(<2 x i64> %a0) { 245; X32-LABEL: test_mm_cvtepi16_epi32: 246; X32: # BB#0: 247; X32-NEXT: pmovsxwd %xmm0, %xmm0 248; X32-NEXT: retl 249; 250; X64-LABEL: test_mm_cvtepi16_epi32: 251; X64: # BB#0: 252; X64-NEXT: pmovsxwd %xmm0, %xmm0 253; X64-NEXT: retq 254 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 255 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 256 %sext = sext <4 x i16> %ext0 to <4 x i32> 257 %res = bitcast <4 x i32> %sext to <2 x i64> 258 ret <2 x i64> %res 259} 260 261define <2 x i64> @test_mm_cvtepi16_epi64(<2 x i64> %a0) { 262; X32-LABEL: test_mm_cvtepi16_epi64: 263; X32: # BB#0: 264; X32-NEXT: pmovsxwq %xmm0, %xmm0 265; X32-NEXT: retl 266; 267; X64-LABEL: test_mm_cvtepi16_epi64: 268; X64: # BB#0: 269; X64-NEXT: pmovsxwq %xmm0, %xmm0 270; X64-NEXT: retq 271 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 272 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 273 %sext = sext <2 x i16> %ext0 to <2 x i64> 274 ret <2 x i64> %sext 275} 276 277define <2 x i64> @test_mm_cvtepi32_epi64(<2 x i64> %a0) { 278; X32-LABEL: test_mm_cvtepi32_epi64: 279; X32: # BB#0: 280; X32-NEXT: pmovsxdq %xmm0, %xmm0 281; X32-NEXT: retl 282; 283; X64-LABEL: test_mm_cvtepi32_epi64: 284; X64: # BB#0: 285; X64-NEXT: pmovsxdq %xmm0, %xmm0 286; X64-NEXT: retq 287 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 288 %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 289 %sext = sext <2 x i32> %ext0 to <2 x i64> 290 ret <2 x i64> %sext 291} 292 293define <2 x i64> @test_mm_cvtepu8_epi16(<2 x i64> %a0) { 294; X32-LABEL: test_mm_cvtepu8_epi16: 295; X32: # BB#0: 296; X32-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 297; X32-NEXT: retl 298; 299; X64-LABEL: test_mm_cvtepu8_epi16: 300; X64: # BB#0: 301; X64-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 302; X64-NEXT: retq 303 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 304 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 305 %sext = zext <8 x i8> %ext0 to <8 x i16> 306 %res = bitcast <8 x i16> %sext to <2 x i64> 307 ret <2 x i64> %res 308} 309 310define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) { 311; X32-LABEL: test_mm_cvtepu8_epi32: 312; X32: # BB#0: 313; X32-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 314; X32-NEXT: retl 315; 316; X64-LABEL: test_mm_cvtepu8_epi32: 317; X64: # BB#0: 318; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 319; X64-NEXT: retq 320 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 321 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 322 %sext = zext <4 x i8> %ext0 to <4 x i32> 323 %res = bitcast <4 x i32> %sext to <2 x i64> 324 ret <2 x i64> %res 325} 326 327define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) { 328; X32-LABEL: test_mm_cvtepu8_epi64: 329; X32: # BB#0: 330; X32-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 331; X32-NEXT: retl 332; 333; X64-LABEL: test_mm_cvtepu8_epi64: 334; X64: # BB#0: 335; X64-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 336; X64-NEXT: retq 337 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 338 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 339 %sext = zext <2 x i8> %ext0 to <2 x i64> 340 ret <2 x i64> %sext 341} 342 343define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) { 344; X32-LABEL: test_mm_cvtepu16_epi32: 345; X32: # BB#0: 346; X32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 347; X32-NEXT: retl 348; 349; X64-LABEL: test_mm_cvtepu16_epi32: 350; X64: # BB#0: 351; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 352; X64-NEXT: retq 353 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 354 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 355 %sext = zext <4 x i16> %ext0 to <4 x i32> 356 %res = bitcast <4 x i32> %sext to <2 x i64> 357 ret <2 x i64> %res 358} 359 360define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) { 361; X32-LABEL: test_mm_cvtepu16_epi64: 362; X32: # BB#0: 363; X32-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 364; X32-NEXT: retl 365; 366; X64-LABEL: test_mm_cvtepu16_epi64: 367; X64: # BB#0: 368; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 369; X64-NEXT: retq 370 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 371 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 372 %sext = zext <2 x i16> %ext0 to <2 x i64> 373 ret <2 x i64> %sext 374} 375 376define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) { 377; X32-LABEL: test_mm_cvtepu32_epi64: 378; X32: # BB#0: 379; X32-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 380; X32-NEXT: retl 381; 382; X64-LABEL: test_mm_cvtepu32_epi64: 383; X64: # BB#0: 384; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 385; X64-NEXT: retq 386 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 387 %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 388 %sext = zext <2 x i32> %ext0 to <2 x i64> 389 ret <2 x i64> %sext 390} 391 392define <2 x double> @test_mm_dp_pd(<2 x double> %a0, <2 x double> %a1) { 393; X32-LABEL: test_mm_dp_pd: 394; X32: # BB#0: 395; X32-NEXT: dppd $7, %xmm1, %xmm0 396; X32-NEXT: retl 397; 398; X64-LABEL: test_mm_dp_pd: 399; X64: # BB#0: 400; X64-NEXT: dppd $7, %xmm1, %xmm0 401; X64-NEXT: retq 402 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 403 ret <2 x double> %res 404} 405declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 406 407define <4 x float> @test_mm_dp_ps(<4 x float> %a0, <4 x float> %a1) { 408; X32-LABEL: test_mm_dp_ps: 409; X32: # BB#0: 410; X32-NEXT: dpps $7, %xmm1, %xmm0 411; X32-NEXT: retl 412; 413; X64-LABEL: test_mm_dp_ps: 414; X64: # BB#0: 415; X64-NEXT: dpps $7, %xmm1, %xmm0 416; X64-NEXT: retq 417 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 418 ret <4 x float> %res 419} 420declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 421 422define i32 @test_mm_extract_epi8(<2 x i64> %a0) { 423; X32-LABEL: test_mm_extract_epi8: 424; X32: # BB#0: 425; X32-NEXT: pextrb $1, %xmm0, %eax 426; X32-NEXT: movzbl %al, %eax 427; X32-NEXT: retl 428; 429; X64-LABEL: test_mm_extract_epi8: 430; X64: # BB#0: 431; X64-NEXT: pextrb $1, %xmm0, %eax 432; X64-NEXT: movzbl %al, %eax 433; X64-NEXT: retq 434 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 435 %ext = extractelement <16 x i8> %arg0, i32 1 436 %res = zext i8 %ext to i32 437 ret i32 %res 438} 439 440define i32 @test_mm_extract_epi32(<2 x i64> %a0) { 441; X32-LABEL: test_mm_extract_epi32: 442; X32: # BB#0: 443; X32-NEXT: pextrd $1, %xmm0, %eax 444; X32-NEXT: retl 445; 446; X64-LABEL: test_mm_extract_epi32: 447; X64: # BB#0: 448; X64-NEXT: pextrd $1, %xmm0, %eax 449; X64-NEXT: retq 450 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 451 %ext = extractelement <4 x i32> %arg0, i32 1 452 ret i32 %ext 453} 454 455define i64 @test_mm_extract_epi64(<2 x i64> %a0) { 456; X32-LABEL: test_mm_extract_epi64: 457; X32: # BB#0: 458; X32-NEXT: pextrd $2, %xmm0, %eax 459; X32-NEXT: pextrd $3, %xmm0, %edx 460; X32-NEXT: retl 461; 462; X64-LABEL: test_mm_extract_epi64: 463; X64: # BB#0: 464; X64-NEXT: pextrq $1, %xmm0, %rax 465; X64-NEXT: retq 466 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 467 %ext = extractelement <2 x i64> %a0, i32 1 468 ret i64 %ext 469} 470 471; TODO test_mm_extract_ps 472 473define <2 x double> @test_mm_floor_pd(<2 x double> %a0) { 474; X32-LABEL: test_mm_floor_pd: 475; X32: # BB#0: 476; X32-NEXT: roundpd $1, %xmm0, %xmm0 477; X32-NEXT: retl 478; 479; X64-LABEL: test_mm_floor_pd: 480; X64: # BB#0: 481; X64-NEXT: roundpd $1, %xmm0, %xmm0 482; X64-NEXT: retq 483 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 1) 484 ret <2 x double> %res 485} 486 487define <4 x float> @test_mm_floor_ps(<4 x float> %a0) { 488; X32-LABEL: test_mm_floor_ps: 489; X32: # BB#0: 490; X32-NEXT: roundps $1, %xmm0, %xmm0 491; X32-NEXT: retl 492; 493; X64-LABEL: test_mm_floor_ps: 494; X64: # BB#0: 495; X64-NEXT: roundps $1, %xmm0, %xmm0 496; X64-NEXT: retq 497 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 1) 498 ret <4 x float> %res 499} 500 501define <2 x double> @test_mm_floor_sd(<2 x double> %a0, <2 x double> %a1) { 502; X32-LABEL: test_mm_floor_sd: 503; X32: # BB#0: 504; X32-NEXT: roundsd $1, %xmm1, %xmm0 505; X32-NEXT: retl 506; 507; X64-LABEL: test_mm_floor_sd: 508; X64: # BB#0: 509; X64-NEXT: roundsd $1, %xmm1, %xmm0 510; X64-NEXT: retq 511 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 1) 512 ret <2 x double> %res 513} 514 515define <4 x float> @test_mm_floor_ss(<4 x float> %a0, <4 x float> %a1) { 516; X32-LABEL: test_mm_floor_ss: 517; X32: # BB#0: 518; X32-NEXT: roundss $1, %xmm1, %xmm0 519; X32-NEXT: retl 520; 521; X64-LABEL: test_mm_floor_ss: 522; X64: # BB#0: 523; X64-NEXT: roundss $1, %xmm1, %xmm0 524; X64-NEXT: retq 525 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 1) 526 ret <4 x float> %res 527} 528 529define <2 x i64> @test_mm_insert_epi8(<2 x i64> %a0, i8 %a1) { 530; X32-LABEL: test_mm_insert_epi8: 531; X32: # BB#0: 532; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 533; X32-NEXT: pinsrb $1, %eax, %xmm0 534; X32-NEXT: retl 535; 536; X64-LABEL: test_mm_insert_epi8: 537; X64: # BB#0: 538; X64-NEXT: movzbl %dil, %eax 539; X64-NEXT: pinsrb $1, %eax, %xmm0 540; X64-NEXT: retq 541 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 542 %res = insertelement <16 x i8> %arg0, i8 %a1,i32 1 543 %bc = bitcast <16 x i8> %res to <2 x i64> 544 ret <2 x i64> %bc 545} 546 547define <2 x i64> @test_mm_insert_epi32(<2 x i64> %a0, i32 %a1) { 548; X32-LABEL: test_mm_insert_epi32: 549; X32: # BB#0: 550; X32-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0 551; X32-NEXT: retl 552; 553; X64-LABEL: test_mm_insert_epi32: 554; X64: # BB#0: 555; X64-NEXT: pinsrd $1, %edi, %xmm0 556; X64-NEXT: retq 557 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 558 %res = insertelement <4 x i32> %arg0, i32 %a1,i32 1 559 %bc = bitcast <4 x i32> %res to <2 x i64> 560 ret <2 x i64> %bc 561} 562 563define <2 x i64> @test_mm_insert_epi64(<2 x i64> %a0, i64 %a1) { 564; X32-LABEL: test_mm_insert_epi64: 565; X32: # BB#0: 566; X32-NEXT: pinsrd $2, {{[0-9]+}}(%esp), %xmm0 567; X32-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 568; X32-NEXT: retl 569; 570; X64-LABEL: test_mm_insert_epi64: 571; X64: # BB#0: 572; X64-NEXT: pinsrq $1, %rdi, %xmm0 573; X64-NEXT: retq 574 %res = insertelement <2 x i64> %a0, i64 %a1,i32 1 575 ret <2 x i64> %res 576} 577 578define <4 x float> @test_mm_insert_ps(<4 x float> %a0, <4 x float> %a1) { 579; X32-LABEL: test_mm_insert_ps: 580; X32: # BB#0: 581; X32-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3] 582; X32-NEXT: retl 583; 584; X64-LABEL: test_mm_insert_ps: 585; X64: # BB#0: 586; X64-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3] 587; X64-NEXT: retq 588 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 4) 589 ret <4 x float> %res 590} 591declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 592 593define <2 x i64> @test_mm_max_epi8(<2 x i64> %a0, <2 x i64> %a1) { 594; X32-LABEL: test_mm_max_epi8: 595; X32: # BB#0: 596; X32-NEXT: pmaxsb %xmm1, %xmm0 597; X32-NEXT: retl 598; 599; X64-LABEL: test_mm_max_epi8: 600; X64: # BB#0: 601; X64-NEXT: pmaxsb %xmm1, %xmm0 602; X64-NEXT: retq 603 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 604 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 605 %cmp = icmp sgt <16 x i8> %arg0, %arg1 606 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 607 %bc = bitcast <16 x i8> %sel to <2 x i64> 608 ret <2 x i64> %bc 609} 610 611define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) { 612; X32-LABEL: test_mm_max_epi32: 613; X32: # BB#0: 614; X32-NEXT: pmaxsd %xmm1, %xmm0 615; X32-NEXT: retl 616; 617; X64-LABEL: test_mm_max_epi32: 618; X64: # BB#0: 619; X64-NEXT: pmaxsd %xmm1, %xmm0 620; X64-NEXT: retq 621 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 622 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 623 %cmp = icmp sgt <4 x i32> %arg0, %arg1 624 %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 625 %bc = bitcast <4 x i32> %sel to <2 x i64> 626 ret <2 x i64> %bc 627} 628 629define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) { 630; X32-LABEL: test_mm_max_epu16: 631; X32: # BB#0: 632; X32-NEXT: pmaxuw %xmm1, %xmm0 633; X32-NEXT: retl 634; 635; X64-LABEL: test_mm_max_epu16: 636; X64: # BB#0: 637; X64-NEXT: pmaxuw %xmm1, %xmm0 638; X64-NEXT: retq 639 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 640 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 641 %cmp = icmp ugt <8 x i16> %arg0, %arg1 642 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 643 %bc = bitcast <8 x i16> %sel to <2 x i64> 644 ret <2 x i64> %bc 645} 646 647define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) { 648; X32-LABEL: test_mm_max_epu32: 649; X32: # BB#0: 650; X32-NEXT: pmaxud %xmm1, %xmm0 651; X32-NEXT: retl 652; 653; X64-LABEL: test_mm_max_epu32: 654; X64: # BB#0: 655; X64-NEXT: pmaxud %xmm1, %xmm0 656; X64-NEXT: retq 657 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 658 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 659 %cmp = icmp ugt <4 x i32> %arg0, %arg1 660 %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 661 %bc = bitcast <4 x i32> %sel to <2 x i64> 662 ret <2 x i64> %bc 663} 664 665define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) { 666; X32-LABEL: test_mm_min_epi8: 667; X32: # BB#0: 668; X32-NEXT: pminsb %xmm1, %xmm0 669; X32-NEXT: retl 670; 671; X64-LABEL: test_mm_min_epi8: 672; X64: # BB#0: 673; X64-NEXT: pminsb %xmm1, %xmm0 674; X64-NEXT: retq 675 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 676 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 677 %cmp = icmp slt <16 x i8> %arg0, %arg1 678 %sel = select <16 x i1> %cmp, <16 x i8> %arg0, <16 x i8> %arg1 679 %bc = bitcast <16 x i8> %sel to <2 x i64> 680 ret <2 x i64> %bc 681} 682 683define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) { 684; X32-LABEL: test_mm_min_epi32: 685; X32: # BB#0: 686; X32-NEXT: pminsd %xmm1, %xmm0 687; X32-NEXT: retl 688; 689; X64-LABEL: test_mm_min_epi32: 690; X64: # BB#0: 691; X64-NEXT: pminsd %xmm1, %xmm0 692; X64-NEXT: retq 693 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 694 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 695 %cmp = icmp slt <4 x i32> %arg0, %arg1 696 %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 697 %bc = bitcast <4 x i32> %sel to <2 x i64> 698 ret <2 x i64> %bc 699} 700 701define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) { 702; X32-LABEL: test_mm_min_epu16: 703; X32: # BB#0: 704; X32-NEXT: pminuw %xmm1, %xmm0 705; X32-NEXT: retl 706; 707; X64-LABEL: test_mm_min_epu16: 708; X64: # BB#0: 709; X64-NEXT: pminuw %xmm1, %xmm0 710; X64-NEXT: retq 711 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 712 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 713 %cmp = icmp ult <8 x i16> %arg0, %arg1 714 %sel = select <8 x i1> %cmp, <8 x i16> %arg0, <8 x i16> %arg1 715 %bc = bitcast <8 x i16> %sel to <2 x i64> 716 ret <2 x i64> %bc 717} 718 719define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) { 720; X32-LABEL: test_mm_min_epu32: 721; X32: # BB#0: 722; X32-NEXT: pminud %xmm1, %xmm0 723; X32-NEXT: retl 724; 725; X64-LABEL: test_mm_min_epu32: 726; X64: # BB#0: 727; X64-NEXT: pminud %xmm1, %xmm0 728; X64-NEXT: retq 729 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 730 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 731 %cmp = icmp ult <4 x i32> %arg0, %arg1 732 %sel = select <4 x i1> %cmp, <4 x i32> %arg0, <4 x i32> %arg1 733 %bc = bitcast <4 x i32> %sel to <2 x i64> 734 ret <2 x i64> %bc 735} 736 737define <2 x i64> @test_mm_minpos_epu16(<2 x i64> %a0) { 738; X32-LABEL: test_mm_minpos_epu16: 739; X32: # BB#0: 740; X32-NEXT: phminposuw %xmm0, %xmm0 741; X32-NEXT: retl 742; 743; X64-LABEL: test_mm_minpos_epu16: 744; X64: # BB#0: 745; X64-NEXT: phminposuw %xmm0, %xmm0 746; X64-NEXT: retq 747 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 748 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %arg0) 749 %bc = bitcast <8 x i16> %res to <2 x i64> 750 ret <2 x i64> %bc 751} 752declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 753 754define <2 x i64> @test_mm_mpsadbw_epu8(<2 x i64> %a0, <2 x i64> %a1) { 755; X32-LABEL: test_mm_mpsadbw_epu8: 756; X32: # BB#0: 757; X32-NEXT: mpsadbw $1, %xmm1, %xmm0 758; X32-NEXT: retl 759; 760; X64-LABEL: test_mm_mpsadbw_epu8: 761; X64: # BB#0: 762; X64-NEXT: mpsadbw $1, %xmm1, %xmm0 763; X64-NEXT: retq 764 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 765 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 766 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %arg0, <16 x i8> %arg1, i8 1) 767 %bc = bitcast <8 x i16> %res to <2 x i64> 768 ret <2 x i64> %bc 769} 770declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 771 772define <2 x i64> @test_mm_mul_epi32(<2 x i64> %a0, <2 x i64> %a1) { 773; X32-LABEL: test_mm_mul_epi32: 774; X32: # BB#0: 775; X32-NEXT: pmuldq %xmm1, %xmm0 776; X32-NEXT: retl 777; 778; X64-LABEL: test_mm_mul_epi32: 779; X64: # BB#0: 780; X64-NEXT: pmuldq %xmm1, %xmm0 781; X64-NEXT: retq 782 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 783 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 784 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %arg0, <4 x i32> %arg1) 785 ret <2 x i64> %res 786} 787declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 788 789define <2 x i64> @test_mm_mullo_epi32(<2 x i64> %a0, <2 x i64> %a1) { 790; X32-LABEL: test_mm_mullo_epi32: 791; X32: # BB#0: 792; X32-NEXT: pmulld %xmm1, %xmm0 793; X32-NEXT: retl 794; 795; X64-LABEL: test_mm_mullo_epi32: 796; X64: # BB#0: 797; X64-NEXT: pmulld %xmm1, %xmm0 798; X64-NEXT: retq 799 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 800 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 801 %res = mul <4 x i32> %arg0, %arg1 802 %bc = bitcast <4 x i32> %res to <2 x i64> 803 ret <2 x i64> %bc 804} 805 806define <2 x i64> @test_mm_packus_epi32(<2 x i64> %a0, <2 x i64> %a1) { 807; X32-LABEL: test_mm_packus_epi32: 808; X32: # BB#0: 809; X32-NEXT: packusdw %xmm1, %xmm0 810; X32-NEXT: retl 811; 812; X64-LABEL: test_mm_packus_epi32: 813; X64: # BB#0: 814; X64-NEXT: packusdw %xmm1, %xmm0 815; X64-NEXT: retq 816 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 817 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 818 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %arg0, <4 x i32> %arg1) 819 %bc = bitcast <8 x i16> %res to <2 x i64> 820 ret <2 x i64> %bc 821} 822declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 823 824define <2 x double> @test_mm_round_pd(<2 x double> %a0) { 825; X32-LABEL: test_mm_round_pd: 826; X32: # BB#0: 827; X32-NEXT: roundpd $4, %xmm0, %xmm0 828; X32-NEXT: retl 829; 830; X64-LABEL: test_mm_round_pd: 831; X64: # BB#0: 832; X64-NEXT: roundpd $4, %xmm0, %xmm0 833; X64-NEXT: retq 834 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 4) 835 ret <2 x double> %res 836} 837 838define <4 x float> @test_mm_round_ps(<4 x float> %a0) { 839; X32-LABEL: test_mm_round_ps: 840; X32: # BB#0: 841; X32-NEXT: roundps $4, %xmm0, %xmm0 842; X32-NEXT: retl 843; 844; X64-LABEL: test_mm_round_ps: 845; X64: # BB#0: 846; X64-NEXT: roundps $4, %xmm0, %xmm0 847; X64-NEXT: retq 848 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 4) 849 ret <4 x float> %res 850} 851 852define <2 x double> @test_mm_round_sd(<2 x double> %a0, <2 x double> %a1) { 853; X32-LABEL: test_mm_round_sd: 854; X32: # BB#0: 855; X32-NEXT: roundsd $4, %xmm1, %xmm0 856; X32-NEXT: retl 857; 858; X64-LABEL: test_mm_round_sd: 859; X64: # BB#0: 860; X64-NEXT: roundsd $4, %xmm1, %xmm0 861; X64-NEXT: retq 862 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 4) 863 ret <2 x double> %res 864} 865 866define <4 x float> @test_mm_round_ss(<4 x float> %a0, <4 x float> %a1) { 867; X32-LABEL: test_mm_round_ss: 868; X32: # BB#0: 869; X32-NEXT: roundss $4, %xmm1, %xmm0 870; X32-NEXT: retl 871; 872; X64-LABEL: test_mm_round_ss: 873; X64: # BB#0: 874; X64-NEXT: roundss $4, %xmm1, %xmm0 875; X64-NEXT: retq 876 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 4) 877 ret <4 x float> %res 878} 879 880define <2 x i64> @test_mm_stream_load_si128(<2 x i64>* %a0) { 881; X32-LABEL: test_mm_stream_load_si128: 882; X32: # BB#0: 883; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 884; X32-NEXT: movntdqa (%eax), %xmm0 885; X32-NEXT: retl 886; 887; X64-LABEL: test_mm_stream_load_si128: 888; X64: # BB#0: 889; X64-NEXT: movntdqa (%rdi), %xmm0 890; X64-NEXT: retq 891 %arg0 = bitcast <2 x i64>* %a0 to i8* 892 %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %arg0) 893 ret <2 x i64> %res 894} 895declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone 896 897define i32 @test_mm_test_all_ones(<2 x i64> %a0) { 898; X32-LABEL: test_mm_test_all_ones: 899; X32: # BB#0: 900; X32-NEXT: pcmpeqd %xmm1, %xmm1 901; X32-NEXT: ptest %xmm1, %xmm0 902; X32-NEXT: sbbl %eax, %eax 903; X32-NEXT: andl $1, %eax 904; X32-NEXT: retl 905; 906; X64-LABEL: test_mm_test_all_ones: 907; X64: # BB#0: 908; X64-NEXT: pcmpeqd %xmm1, %xmm1 909; X64-NEXT: ptest %xmm1, %xmm0 910; X64-NEXT: sbbl %eax, %eax 911; X64-NEXT: andl $1, %eax 912; X64-NEXT: retq 913 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> <i64 -1, i64 -1>) 914 ret i32 %res 915} 916declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 917 918define i32 @test_mm_test_all_zeros(<2 x i64> %a0, <2 x i64> %a1) { 919; X32-LABEL: test_mm_test_all_zeros: 920; X32: # BB#0: 921; X32-NEXT: xorl %eax, %eax 922; X32-NEXT: ptest %xmm1, %xmm0 923; X32-NEXT: sete %al 924; X32-NEXT: retl 925; 926; X64-LABEL: test_mm_test_all_zeros: 927; X64: # BB#0: 928; X64-NEXT: xorl %eax, %eax 929; X64-NEXT: ptest %xmm1, %xmm0 930; X64-NEXT: sete %al 931; X64-NEXT: retq 932 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) 933 ret i32 %res 934} 935declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 936 937define i32 @test_mm_test_mix_ones_zeros(<2 x i64> %a0, <2 x i64> %a1) { 938; X32-LABEL: test_mm_test_mix_ones_zeros: 939; X32: # BB#0: 940; X32-NEXT: xorl %eax, %eax 941; X32-NEXT: ptest %xmm1, %xmm0 942; X32-NEXT: seta %al 943; X32-NEXT: retl 944; 945; X64-LABEL: test_mm_test_mix_ones_zeros: 946; X64: # BB#0: 947; X64-NEXT: xorl %eax, %eax 948; X64-NEXT: ptest %xmm1, %xmm0 949; X64-NEXT: seta %al 950; X64-NEXT: retq 951 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) 952 ret i32 %res 953} 954declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 955 956define i32 @test_mm_testc_si128(<2 x i64> %a0, <2 x i64> %a1) { 957; X32-LABEL: test_mm_testc_si128: 958; X32: # BB#0: 959; X32-NEXT: ptest %xmm1, %xmm0 960; X32-NEXT: sbbl %eax, %eax 961; X32-NEXT: andl $1, %eax 962; X32-NEXT: retl 963; 964; X64-LABEL: test_mm_testc_si128: 965; X64: # BB#0: 966; X64-NEXT: ptest %xmm1, %xmm0 967; X64-NEXT: sbbl %eax, %eax 968; X64-NEXT: andl $1, %eax 969; X64-NEXT: retq 970 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) 971 ret i32 %res 972} 973 974define i32 @test_mm_testnzc_si128(<2 x i64> %a0, <2 x i64> %a1) { 975; X32-LABEL: test_mm_testnzc_si128: 976; X32: # BB#0: 977; X32-NEXT: xorl %eax, %eax 978; X32-NEXT: ptest %xmm1, %xmm0 979; X32-NEXT: seta %al 980; X32-NEXT: retl 981; 982; X64-LABEL: test_mm_testnzc_si128: 983; X64: # BB#0: 984; X64-NEXT: xorl %eax, %eax 985; X64-NEXT: ptest %xmm1, %xmm0 986; X64-NEXT: seta %al 987; X64-NEXT: retq 988 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) 989 ret i32 %res 990} 991 992define i32 @test_mm_testz_si128(<2 x i64> %a0, <2 x i64> %a1) { 993; X32-LABEL: test_mm_testz_si128: 994; X32: # BB#0: 995; X32-NEXT: xorl %eax, %eax 996; X32-NEXT: ptest %xmm1, %xmm0 997; X32-NEXT: sete %al 998; X32-NEXT: retl 999; 1000; X64-LABEL: test_mm_testz_si128: 1001; X64: # BB#0: 1002; X64-NEXT: xorl %eax, %eax 1003; X64-NEXT: ptest %xmm1, %xmm0 1004; X64-NEXT: sete %al 1005; X64-NEXT: retq 1006 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) 1007 ret i32 %res 1008} 1009