1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse41-builtins.c 10 11define <2 x i64> @test_mm_blend_epi16(<2 x i64> %a0, <2 x i64> %a1) { 12; SSE-LABEL: test_mm_blend_epi16: 13; SSE: # %bb.0: 14; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7] 15; SSE-NEXT: ret{{[l|q]}} 16; 17; AVX-LABEL: test_mm_blend_epi16: 18; AVX: # %bb.0: 19; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6,7] 20; AVX-NEXT: ret{{[l|q]}} 21 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 22 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 23 %shuf = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7> 24 %res = bitcast <8 x i16> %shuf to <2 x i64> 25 ret <2 x i64> %res 26} 27 28define <2 x double> @test_mm_blend_pd(<2 x double> %a0, <2 x double> %a1) { 29; SSE-LABEL: test_mm_blend_pd: 30; SSE: # %bb.0: 31; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 32; SSE-NEXT: ret{{[l|q]}} 33; 34; AVX-LABEL: test_mm_blend_pd: 35; AVX: # %bb.0: 36; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 37; AVX-NEXT: ret{{[l|q]}} 38 %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3> 39 ret <2 x double> %res 40} 41 42define <4 x float> @test_mm_blend_ps(<4 x float> %a0, <4 x float> %a1) { 43; SSE-LABEL: test_mm_blend_ps: 44; SSE: # %bb.0: 45; SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 46; SSE-NEXT: ret{{[l|q]}} 47; 48; AVX-LABEL: test_mm_blend_ps: 49; AVX: # %bb.0: 50; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] 51; AVX-NEXT: ret{{[l|q]}} 52 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 53 ret <4 x float> %res 54} 55 56define <2 x i64> @test_mm_blendv_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 57; SSE-LABEL: test_mm_blendv_epi8: 58; SSE: # %bb.0: 59; SSE-NEXT: movdqa %xmm0, %xmm3 60; SSE-NEXT: movaps %xmm2, %xmm0 61; SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 62; SSE-NEXT: movdqa %xmm3, %xmm0 63; SSE-NEXT: ret{{[l|q]}} 64; 65; AVX-LABEL: test_mm_blendv_epi8: 66; AVX: # %bb.0: 67; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 68; AVX-NEXT: ret{{[l|q]}} 69 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 70 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 71 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 72 %call = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2) 73 %res = bitcast <16 x i8> %call to <2 x i64> 74 ret <2 x i64> %res 75} 76declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 77 78define <2 x double> @test_mm_blendv_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 79; SSE-LABEL: test_mm_blendv_pd: 80; SSE: # %bb.0: 81; SSE-NEXT: movapd %xmm0, %xmm3 82; SSE-NEXT: movaps %xmm2, %xmm0 83; SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 84; SSE-NEXT: movapd %xmm3, %xmm0 85; SSE-NEXT: ret{{[l|q]}} 86; 87; AVX-LABEL: test_mm_blendv_pd: 88; AVX: # %bb.0: 89; AVX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 90; AVX-NEXT: ret{{[l|q]}} 91 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 92 ret <2 x double> %res 93} 94declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 95 96define <4 x float> @test_mm_blendv_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 97; SSE-LABEL: test_mm_blendv_ps: 98; SSE: # %bb.0: 99; SSE-NEXT: movaps %xmm0, %xmm3 100; SSE-NEXT: movaps %xmm2, %xmm0 101; SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 102; SSE-NEXT: movaps %xmm3, %xmm0 103; SSE-NEXT: ret{{[l|q]}} 104; 105; AVX-LABEL: test_mm_blendv_ps: 106; AVX: # %bb.0: 107; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 108; AVX-NEXT: ret{{[l|q]}} 109 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 110 ret <4 x float> %res 111} 112declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 113 114define <2 x double> @test_mm_ceil_pd(<2 x double> %a0) { 115; SSE-LABEL: test_mm_ceil_pd: 116; SSE: # %bb.0: 117; SSE-NEXT: roundpd $2, %xmm0, %xmm0 118; SSE-NEXT: ret{{[l|q]}} 119; 120; AVX-LABEL: test_mm_ceil_pd: 121; AVX: # %bb.0: 122; AVX-NEXT: vroundpd $2, %xmm0, %xmm0 123; AVX-NEXT: ret{{[l|q]}} 124 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 2) 125 ret <2 x double> %res 126} 127declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 128 129define <4 x float> @test_mm_ceil_ps(<4 x float> %a0) { 130; SSE-LABEL: test_mm_ceil_ps: 131; SSE: # %bb.0: 132; SSE-NEXT: roundps $2, %xmm0, %xmm0 133; SSE-NEXT: ret{{[l|q]}} 134; 135; AVX-LABEL: test_mm_ceil_ps: 136; AVX: # %bb.0: 137; AVX-NEXT: vroundps $2, %xmm0, %xmm0 138; AVX-NEXT: ret{{[l|q]}} 139 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 2) 140 ret <4 x float> %res 141} 142declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 143 144define <2 x double> @test_mm_ceil_sd(<2 x double> %a0, <2 x double> %a1) { 145; SSE-LABEL: test_mm_ceil_sd: 146; SSE: # %bb.0: 147; SSE-NEXT: roundsd $2, %xmm1, %xmm0 148; SSE-NEXT: ret{{[l|q]}} 149; 150; AVX-LABEL: test_mm_ceil_sd: 151; AVX: # %bb.0: 152; AVX-NEXT: vroundsd $2, %xmm1, %xmm0, %xmm0 153; AVX-NEXT: ret{{[l|q]}} 154 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 2) 155 ret <2 x double> %res 156} 157declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 158 159define <4 x float> @test_mm_ceil_ss(<4 x float> %a0, <4 x float> %a1) { 160; SSE-LABEL: test_mm_ceil_ss: 161; SSE: # %bb.0: 162; SSE-NEXT: roundss $2, %xmm1, %xmm0 163; SSE-NEXT: ret{{[l|q]}} 164; 165; AVX-LABEL: test_mm_ceil_ss: 166; AVX: # %bb.0: 167; AVX-NEXT: vroundss $2, %xmm1, %xmm0, %xmm0 168; AVX-NEXT: ret{{[l|q]}} 169 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 2) 170 ret <4 x float> %res 171} 172declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 173 174define <2 x i64> @test_mm_cmpeq_epi64(<2 x i64> %a0, <2 x i64> %a1) { 175; SSE-LABEL: test_mm_cmpeq_epi64: 176; SSE: # %bb.0: 177; SSE-NEXT: pcmpeqq %xmm1, %xmm0 178; SSE-NEXT: ret{{[l|q]}} 179; 180; AVX1-LABEL: test_mm_cmpeq_epi64: 181; AVX1: # %bb.0: 182; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 183; AVX1-NEXT: ret{{[l|q]}} 184; 185; AVX512-LABEL: test_mm_cmpeq_epi64: 186; AVX512: # %bb.0: 187; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 188; AVX512-NEXT: vpmovm2q %k0, %xmm0 189; AVX512-NEXT: ret{{[l|q]}} 190 %cmp = icmp eq <2 x i64> %a0, %a1 191 %res = sext <2 x i1> %cmp to <2 x i64> 192 ret <2 x i64> %res 193} 194 195define <2 x i64> @test_mm_cvtepi8_epi16(<2 x i64> %a0) { 196; SSE-LABEL: test_mm_cvtepi8_epi16: 197; SSE: # %bb.0: 198; SSE-NEXT: pmovsxbw %xmm0, %xmm0 199; SSE-NEXT: ret{{[l|q]}} 200; 201; AVX-LABEL: test_mm_cvtepi8_epi16: 202; AVX: # %bb.0: 203; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 204; AVX-NEXT: ret{{[l|q]}} 205 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 206 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 207 %sext = sext <8 x i8> %ext0 to <8 x i16> 208 %res = bitcast <8 x i16> %sext to <2 x i64> 209 ret <2 x i64> %res 210} 211 212define <2 x i64> @test_mm_cvtepi8_epi32(<2 x i64> %a0) { 213; SSE-LABEL: test_mm_cvtepi8_epi32: 214; SSE: # %bb.0: 215; SSE-NEXT: pmovsxbd %xmm0, %xmm0 216; SSE-NEXT: ret{{[l|q]}} 217; 218; AVX-LABEL: test_mm_cvtepi8_epi32: 219; AVX: # %bb.0: 220; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 221; AVX-NEXT: ret{{[l|q]}} 222 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 223 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 224 %sext = sext <4 x i8> %ext0 to <4 x i32> 225 %res = bitcast <4 x i32> %sext to <2 x i64> 226 ret <2 x i64> %res 227} 228 229define <2 x i64> @test_mm_cvtepi8_epi64(<2 x i64> %a0) { 230; SSE-LABEL: test_mm_cvtepi8_epi64: 231; SSE: # %bb.0: 232; SSE-NEXT: pmovsxbq %xmm0, %xmm0 233; SSE-NEXT: ret{{[l|q]}} 234; 235; AVX-LABEL: test_mm_cvtepi8_epi64: 236; AVX: # %bb.0: 237; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 238; AVX-NEXT: ret{{[l|q]}} 239 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 240 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 241 %sext = sext <2 x i8> %ext0 to <2 x i64> 242 ret <2 x i64> %sext 243} 244 245define <2 x i64> @test_mm_cvtepi16_epi32(<2 x i64> %a0) { 246; SSE-LABEL: test_mm_cvtepi16_epi32: 247; SSE: # %bb.0: 248; SSE-NEXT: pmovsxwd %xmm0, %xmm0 249; SSE-NEXT: ret{{[l|q]}} 250; 251; AVX-LABEL: test_mm_cvtepi16_epi32: 252; AVX: # %bb.0: 253; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 254; AVX-NEXT: ret{{[l|q]}} 255 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 256 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 257 %sext = sext <4 x i16> %ext0 to <4 x i32> 258 %res = bitcast <4 x i32> %sext to <2 x i64> 259 ret <2 x i64> %res 260} 261 262define <2 x i64> @test_mm_cvtepi16_epi64(<2 x i64> %a0) { 263; SSE-LABEL: test_mm_cvtepi16_epi64: 264; SSE: # %bb.0: 265; SSE-NEXT: pmovsxwq %xmm0, %xmm0 266; SSE-NEXT: ret{{[l|q]}} 267; 268; AVX-LABEL: test_mm_cvtepi16_epi64: 269; AVX: # %bb.0: 270; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 271; AVX-NEXT: ret{{[l|q]}} 272 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 273 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 274 %sext = sext <2 x i16> %ext0 to <2 x i64> 275 ret <2 x i64> %sext 276} 277 278define <2 x i64> @test_mm_cvtepi32_epi64(<2 x i64> %a0) { 279; SSE-LABEL: test_mm_cvtepi32_epi64: 280; SSE: # %bb.0: 281; SSE-NEXT: pmovsxdq %xmm0, %xmm0 282; SSE-NEXT: ret{{[l|q]}} 283; 284; AVX-LABEL: test_mm_cvtepi32_epi64: 285; AVX: # %bb.0: 286; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 287; AVX-NEXT: ret{{[l|q]}} 288 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 289 %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 290 %sext = sext <2 x i32> %ext0 to <2 x i64> 291 ret <2 x i64> %sext 292} 293 294define <2 x i64> @test_mm_cvtepu8_epi16(<2 x i64> %a0) { 295; SSE-LABEL: test_mm_cvtepu8_epi16: 296; SSE: # %bb.0: 297; SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 298; SSE-NEXT: ret{{[l|q]}} 299; 300; AVX-LABEL: test_mm_cvtepu8_epi16: 301; AVX: # %bb.0: 302; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 303; AVX-NEXT: ret{{[l|q]}} 304 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 305 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 306 %sext = zext <8 x i8> %ext0 to <8 x i16> 307 %res = bitcast <8 x i16> %sext to <2 x i64> 308 ret <2 x i64> %res 309} 310 311define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) { 312; SSE-LABEL: test_mm_cvtepu8_epi32: 313; SSE: # %bb.0: 314; SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 315; SSE-NEXT: ret{{[l|q]}} 316; 317; AVX-LABEL: test_mm_cvtepu8_epi32: 318; AVX: # %bb.0: 319; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 320; AVX-NEXT: ret{{[l|q]}} 321 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 322 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 323 %sext = zext <4 x i8> %ext0 to <4 x i32> 324 %res = bitcast <4 x i32> %sext to <2 x i64> 325 ret <2 x i64> %res 326} 327 328define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) { 329; SSE-LABEL: test_mm_cvtepu8_epi64: 330; SSE: # %bb.0: 331; SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 332; SSE-NEXT: ret{{[l|q]}} 333; 334; AVX-LABEL: test_mm_cvtepu8_epi64: 335; AVX: # %bb.0: 336; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 337; AVX-NEXT: ret{{[l|q]}} 338 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 339 %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 340 %sext = zext <2 x i8> %ext0 to <2 x i64> 341 ret <2 x i64> %sext 342} 343 344define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) { 345; SSE-LABEL: test_mm_cvtepu16_epi32: 346; SSE: # %bb.0: 347; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 348; SSE-NEXT: ret{{[l|q]}} 349; 350; AVX-LABEL: test_mm_cvtepu16_epi32: 351; AVX: # %bb.0: 352; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 353; AVX-NEXT: ret{{[l|q]}} 354 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 355 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 356 %sext = zext <4 x i16> %ext0 to <4 x i32> 357 %res = bitcast <4 x i32> %sext to <2 x i64> 358 ret <2 x i64> %res 359} 360 361define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) { 362; SSE-LABEL: test_mm_cvtepu16_epi64: 363; SSE: # %bb.0: 364; SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 365; SSE-NEXT: ret{{[l|q]}} 366; 367; AVX-LABEL: test_mm_cvtepu16_epi64: 368; AVX: # %bb.0: 369; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 370; AVX-NEXT: ret{{[l|q]}} 371 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 372 %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 373 %sext = zext <2 x i16> %ext0 to <2 x i64> 374 ret <2 x i64> %sext 375} 376 377define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) { 378; SSE-LABEL: test_mm_cvtepu32_epi64: 379; SSE: # %bb.0: 380; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 381; SSE-NEXT: ret{{[l|q]}} 382; 383; AVX-LABEL: test_mm_cvtepu32_epi64: 384; AVX: # %bb.0: 385; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 386; AVX-NEXT: ret{{[l|q]}} 387 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 388 %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 389 %sext = zext <2 x i32> %ext0 to <2 x i64> 390 ret <2 x i64> %sext 391} 392 393define <2 x double> @test_mm_dp_pd(<2 x double> %a0, <2 x double> %a1) { 394; SSE-LABEL: test_mm_dp_pd: 395; SSE: # %bb.0: 396; SSE-NEXT: dppd $7, %xmm1, %xmm0 397; SSE-NEXT: ret{{[l|q]}} 398; 399; AVX-LABEL: test_mm_dp_pd: 400; AVX: # %bb.0: 401; AVX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 402; AVX-NEXT: ret{{[l|q]}} 403 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) 404 ret <2 x double> %res 405} 406declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 407 408define <4 x float> @test_mm_dp_ps(<4 x float> %a0, <4 x float> %a1) { 409; SSE-LABEL: test_mm_dp_ps: 410; SSE: # %bb.0: 411; SSE-NEXT: dpps $7, %xmm1, %xmm0 412; SSE-NEXT: ret{{[l|q]}} 413; 414; AVX-LABEL: test_mm_dp_ps: 415; AVX: # %bb.0: 416; AVX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 417; AVX-NEXT: ret{{[l|q]}} 418 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) 419 ret <4 x float> %res 420} 421declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 422 423define i32 @test_mm_extract_epi8(<2 x i64> %a0) { 424; SSE-LABEL: test_mm_extract_epi8: 425; SSE: # %bb.0: 426; SSE-NEXT: pextrb $1, %xmm0, %eax 427; SSE-NEXT: movzbl %al, %eax 428; SSE-NEXT: ret{{[l|q]}} 429; 430; AVX-LABEL: test_mm_extract_epi8: 431; AVX: # %bb.0: 432; AVX-NEXT: vpextrb $1, %xmm0, %eax 433; AVX-NEXT: movzbl %al, %eax 434; AVX-NEXT: ret{{[l|q]}} 435 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 436 %ext = extractelement <16 x i8> %arg0, i32 1 437 %res = zext i8 %ext to i32 438 ret i32 %res 439} 440 441define i32 @test_mm_extract_epi32(<2 x i64> %a0) { 442; SSE-LABEL: test_mm_extract_epi32: 443; SSE: # %bb.0: 444; SSE-NEXT: extractps $1, %xmm0, %eax 445; SSE-NEXT: ret{{[l|q]}} 446; 447; AVX-LABEL: test_mm_extract_epi32: 448; AVX: # %bb.0: 449; AVX-NEXT: vextractps $1, %xmm0, %eax 450; AVX-NEXT: ret{{[l|q]}} 451 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 452 %ext = extractelement <4 x i32> %arg0, i32 1 453 ret i32 %ext 454} 455 456define i64 @test_mm_extract_epi64(<2 x i64> %a0) { 457; X86-SSE-LABEL: test_mm_extract_epi64: 458; X86-SSE: # %bb.0: 459; X86-SSE-NEXT: extractps $2, %xmm0, %eax 460; X86-SSE-NEXT: extractps $3, %xmm0, %edx 461; X86-SSE-NEXT: retl 462; 463; X86-AVX-LABEL: test_mm_extract_epi64: 464; X86-AVX: # %bb.0: 465; X86-AVX-NEXT: vextractps $2, %xmm0, %eax 466; X86-AVX-NEXT: vextractps $3, %xmm0, %edx 467; X86-AVX-NEXT: retl 468; 469; X64-SSE-LABEL: test_mm_extract_epi64: 470; X64-SSE: # %bb.0: 471; X64-SSE-NEXT: pextrq $1, %xmm0, %rax 472; X64-SSE-NEXT: retq 473; 474; X64-AVX-LABEL: test_mm_extract_epi64: 475; X64-AVX: # %bb.0: 476; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax 477; X64-AVX-NEXT: retq 478 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 479 %ext = extractelement <2 x i64> %a0, i32 1 480 ret i64 %ext 481} 482 483define i32 @test_mm_extract_ps(<4 x float> %a0) { 484; SSE-LABEL: test_mm_extract_ps: 485; SSE: # %bb.0: 486; SSE-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 487; SSE-NEXT: movd %xmm0, %eax 488; SSE-NEXT: ret{{[l|q]}} 489; 490; AVX-LABEL: test_mm_extract_ps: 491; AVX: # %bb.0: 492; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 493; AVX-NEXT: vmovd %xmm0, %eax 494; AVX-NEXT: ret{{[l|q]}} 495 %ext = extractelement <4 x float> %a0, i32 1 496 %bc = bitcast float %ext to i32 497 ret i32 %bc 498} 499 500define <2 x double> @test_mm_floor_pd(<2 x double> %a0) { 501; SSE-LABEL: test_mm_floor_pd: 502; SSE: # %bb.0: 503; SSE-NEXT: roundpd $1, %xmm0, %xmm0 504; SSE-NEXT: ret{{[l|q]}} 505; 506; AVX-LABEL: test_mm_floor_pd: 507; AVX: # %bb.0: 508; AVX-NEXT: vroundpd $1, %xmm0, %xmm0 509; AVX-NEXT: ret{{[l|q]}} 510 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 1) 511 ret <2 x double> %res 512} 513 514define <4 x float> @test_mm_floor_ps(<4 x float> %a0) { 515; SSE-LABEL: test_mm_floor_ps: 516; SSE: # %bb.0: 517; SSE-NEXT: roundps $1, %xmm0, %xmm0 518; SSE-NEXT: ret{{[l|q]}} 519; 520; AVX-LABEL: test_mm_floor_ps: 521; AVX: # %bb.0: 522; AVX-NEXT: vroundps $1, %xmm0, %xmm0 523; AVX-NEXT: ret{{[l|q]}} 524 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 1) 525 ret <4 x float> %res 526} 527 528define <2 x double> @test_mm_floor_sd(<2 x double> %a0, <2 x double> %a1) { 529; SSE-LABEL: test_mm_floor_sd: 530; SSE: # %bb.0: 531; SSE-NEXT: roundsd $1, %xmm1, %xmm0 532; SSE-NEXT: ret{{[l|q]}} 533; 534; AVX-LABEL: test_mm_floor_sd: 535; AVX: # %bb.0: 536; AVX-NEXT: vroundsd $1, %xmm1, %xmm0, %xmm0 537; AVX-NEXT: ret{{[l|q]}} 538 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 1) 539 ret <2 x double> %res 540} 541 542define <4 x float> @test_mm_floor_ss(<4 x float> %a0, <4 x float> %a1) { 543; SSE-LABEL: test_mm_floor_ss: 544; SSE: # %bb.0: 545; SSE-NEXT: roundss $1, %xmm1, %xmm0 546; SSE-NEXT: ret{{[l|q]}} 547; 548; AVX-LABEL: test_mm_floor_ss: 549; AVX: # %bb.0: 550; AVX-NEXT: vroundss $1, %xmm1, %xmm0, %xmm0 551; AVX-NEXT: ret{{[l|q]}} 552 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 1) 553 ret <4 x float> %res 554} 555 556define <2 x i64> @test_mm_insert_epi8(<2 x i64> %a0, i8 %a1) { 557; X86-SSE-LABEL: test_mm_insert_epi8: 558; X86-SSE: # %bb.0: 559; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax 560; X86-SSE-NEXT: pinsrb $1, %eax, %xmm0 561; X86-SSE-NEXT: retl 562; 563; X86-AVX-LABEL: test_mm_insert_epi8: 564; X86-AVX: # %bb.0: 565; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax 566; X86-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 567; X86-AVX-NEXT: retl 568; 569; X64-SSE-LABEL: test_mm_insert_epi8: 570; X64-SSE: # %bb.0: 571; X64-SSE-NEXT: movzbl %dil, %eax 572; X64-SSE-NEXT: pinsrb $1, %eax, %xmm0 573; X64-SSE-NEXT: retq 574; 575; X64-AVX-LABEL: test_mm_insert_epi8: 576; X64-AVX: # %bb.0: 577; X64-AVX-NEXT: movzbl %dil, %eax 578; X64-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 579; X64-AVX-NEXT: retq 580 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 581 %res = insertelement <16 x i8> %arg0, i8 %a1,i32 1 582 %bc = bitcast <16 x i8> %res to <2 x i64> 583 ret <2 x i64> %bc 584} 585 586define <2 x i64> @test_mm_insert_epi32(<2 x i64> %a0, i32 %a1) { 587; X86-SSE-LABEL: test_mm_insert_epi32: 588; X86-SSE: # %bb.0: 589; X86-SSE-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0 590; X86-SSE-NEXT: retl 591; 592; X86-AVX-LABEL: test_mm_insert_epi32: 593; X86-AVX: # %bb.0: 594; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 595; X86-AVX-NEXT: retl 596; 597; X64-SSE-LABEL: test_mm_insert_epi32: 598; X64-SSE: # %bb.0: 599; X64-SSE-NEXT: pinsrd $1, %edi, %xmm0 600; X64-SSE-NEXT: retq 601; 602; X64-AVX-LABEL: test_mm_insert_epi32: 603; X64-AVX: # %bb.0: 604; X64-AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 605; X64-AVX-NEXT: retq 606 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 607 %res = insertelement <4 x i32> %arg0, i32 %a1,i32 1 608 %bc = bitcast <4 x i32> %res to <2 x i64> 609 ret <2 x i64> %bc 610} 611 612define <2 x i64> @test_mm_insert_epi64(<2 x i64> %a0, i64 %a1) { 613; X86-SSE-LABEL: test_mm_insert_epi64: 614; X86-SSE: # %bb.0: 615; X86-SSE-NEXT: pinsrd $2, {{[0-9]+}}(%esp), %xmm0 616; X86-SSE-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 617; X86-SSE-NEXT: retl 618; 619; X86-AVX-LABEL: test_mm_insert_epi64: 620; X86-AVX: # %bb.0: 621; X86-AVX-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 622; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 623; X86-AVX-NEXT: retl 624; 625; X64-SSE-LABEL: test_mm_insert_epi64: 626; X64-SSE: # %bb.0: 627; X64-SSE-NEXT: pinsrq $1, %rdi, %xmm0 628; X64-SSE-NEXT: retq 629; 630; X64-AVX-LABEL: test_mm_insert_epi64: 631; X64-AVX: # %bb.0: 632; X64-AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 633; X64-AVX-NEXT: retq 634 %res = insertelement <2 x i64> %a0, i64 %a1,i32 1 635 ret <2 x i64> %res 636} 637 638define <4 x float> @test_mm_insert_ps(<4 x float> %a0, <4 x float> %a1) { 639; SSE-LABEL: test_mm_insert_ps: 640; SSE: # %bb.0: 641; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3] 642; SSE-NEXT: ret{{[l|q]}} 643; 644; AVX-LABEL: test_mm_insert_ps: 645; AVX: # %bb.0: 646; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3] 647; AVX-NEXT: ret{{[l|q]}} 648 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 4) 649 ret <4 x float> %res 650} 651declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 652 653define <2 x i64> @test_mm_max_epi8(<2 x i64> %a0, <2 x i64> %a1) { 654; SSE-LABEL: test_mm_max_epi8: 655; SSE: # %bb.0: 656; SSE-NEXT: pmaxsb %xmm1, %xmm0 657; SSE-NEXT: ret{{[l|q]}} 658; 659; AVX-LABEL: test_mm_max_epi8: 660; AVX: # %bb.0: 661; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 662; AVX-NEXT: ret{{[l|q]}} 663 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 664 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 665 %sel = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) 666 %bc = bitcast <16 x i8> %sel to <2 x i64> 667 ret <2 x i64> %bc 668} 669declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) 670 671define <2 x i64> @test_mm_max_epi32(<2 x i64> %a0, <2 x i64> %a1) { 672; SSE-LABEL: test_mm_max_epi32: 673; SSE: # %bb.0: 674; SSE-NEXT: pmaxsd %xmm1, %xmm0 675; SSE-NEXT: ret{{[l|q]}} 676; 677; AVX-LABEL: test_mm_max_epi32: 678; AVX: # %bb.0: 679; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 680; AVX-NEXT: ret{{[l|q]}} 681 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 682 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 683 %sel = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %arg0, <4 x i32> %arg1) 684 %bc = bitcast <4 x i32> %sel to <2 x i64> 685 ret <2 x i64> %bc 686} 687declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 688 689define <2 x i64> @test_mm_max_epu16(<2 x i64> %a0, <2 x i64> %a1) { 690; SSE-LABEL: test_mm_max_epu16: 691; SSE: # %bb.0: 692; SSE-NEXT: pmaxuw %xmm1, %xmm0 693; SSE-NEXT: ret{{[l|q]}} 694; 695; AVX-LABEL: test_mm_max_epu16: 696; AVX: # %bb.0: 697; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 698; AVX-NEXT: ret{{[l|q]}} 699 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 700 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 701 %sel = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) 702 %bc = bitcast <8 x i16> %sel to <2 x i64> 703 ret <2 x i64> %bc 704} 705declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) 706 707define <2 x i64> @test_mm_max_epu32(<2 x i64> %a0, <2 x i64> %a1) { 708; SSE-LABEL: test_mm_max_epu32: 709; SSE: # %bb.0: 710; SSE-NEXT: pmaxud %xmm1, %xmm0 711; SSE-NEXT: ret{{[l|q]}} 712; 713; AVX-LABEL: test_mm_max_epu32: 714; AVX: # %bb.0: 715; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 716; AVX-NEXT: ret{{[l|q]}} 717 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 718 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 719 %sel = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %arg0, <4 x i32> %arg1) 720 %bc = bitcast <4 x i32> %sel to <2 x i64> 721 ret <2 x i64> %bc 722} 723declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) 724 725define <2 x i64> @test_mm_min_epi8(<2 x i64> %a0, <2 x i64> %a1) { 726; SSE-LABEL: test_mm_min_epi8: 727; SSE: # %bb.0: 728; SSE-NEXT: pminsb %xmm1, %xmm0 729; SSE-NEXT: ret{{[l|q]}} 730; 731; AVX-LABEL: test_mm_min_epi8: 732; AVX: # %bb.0: 733; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 734; AVX-NEXT: ret{{[l|q]}} 735 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 736 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 737 %sel = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %arg0, <16 x i8> %arg1) 738 %bc = bitcast <16 x i8> %sel to <2 x i64> 739 ret <2 x i64> %bc 740} 741declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) 742 743define <2 x i64> @test_mm_min_epi32(<2 x i64> %a0, <2 x i64> %a1) { 744; SSE-LABEL: test_mm_min_epi32: 745; SSE: # %bb.0: 746; SSE-NEXT: pminsd %xmm1, %xmm0 747; SSE-NEXT: ret{{[l|q]}} 748; 749; AVX-LABEL: test_mm_min_epi32: 750; AVX: # %bb.0: 751; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 752; AVX-NEXT: ret{{[l|q]}} 753 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 754 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 755 %sel = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %arg0, <4 x i32> %arg1) 756 %bc = bitcast <4 x i32> %sel to <2 x i64> 757 ret <2 x i64> %bc 758} 759declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) 760 761define <2 x i64> @test_mm_min_epu16(<2 x i64> %a0, <2 x i64> %a1) { 762; SSE-LABEL: test_mm_min_epu16: 763; SSE: # %bb.0: 764; SSE-NEXT: pminuw %xmm1, %xmm0 765; SSE-NEXT: ret{{[l|q]}} 766; 767; AVX-LABEL: test_mm_min_epu16: 768; AVX: # %bb.0: 769; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 770; AVX-NEXT: ret{{[l|q]}} 771 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 772 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 773 %sel = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %arg0, <8 x i16> %arg1) 774 %bc = bitcast <8 x i16> %sel to <2 x i64> 775 ret <2 x i64> %bc 776} 777declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) 778 779define <2 x i64> @test_mm_min_epu32(<2 x i64> %a0, <2 x i64> %a1) { 780; SSE-LABEL: test_mm_min_epu32: 781; SSE: # %bb.0: 782; SSE-NEXT: pminud %xmm1, %xmm0 783; SSE-NEXT: ret{{[l|q]}} 784; 785; AVX-LABEL: test_mm_min_epu32: 786; AVX: # %bb.0: 787; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 788; AVX-NEXT: ret{{[l|q]}} 789 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 790 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 791 %sel = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %arg0, <4 x i32> %arg1) 792 %bc = bitcast <4 x i32> %sel to <2 x i64> 793 ret <2 x i64> %bc 794} 795declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) 796 797define <2 x i64> @test_mm_minpos_epu16(<2 x i64> %a0) { 798; SSE-LABEL: test_mm_minpos_epu16: 799; SSE: # %bb.0: 800; SSE-NEXT: phminposuw %xmm0, %xmm0 801; SSE-NEXT: ret{{[l|q]}} 802; 803; AVX-LABEL: test_mm_minpos_epu16: 804; AVX: # %bb.0: 805; AVX-NEXT: vphminposuw %xmm0, %xmm0 806; AVX-NEXT: ret{{[l|q]}} 807 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 808 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %arg0) 809 %bc = bitcast <8 x i16> %res to <2 x i64> 810 ret <2 x i64> %bc 811} 812declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 813 814define <2 x i64> @test_mm_mpsadbw_epu8(<2 x i64> %a0, <2 x i64> %a1) { 815; SSE-LABEL: test_mm_mpsadbw_epu8: 816; SSE: # %bb.0: 817; SSE-NEXT: mpsadbw $1, %xmm1, %xmm0 818; SSE-NEXT: ret{{[l|q]}} 819; 820; AVX-LABEL: test_mm_mpsadbw_epu8: 821; AVX: # %bb.0: 822; AVX-NEXT: vmpsadbw $1, %xmm1, %xmm0, %xmm0 823; AVX-NEXT: ret{{[l|q]}} 824 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 825 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 826 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %arg0, <16 x i8> %arg1, i8 1) 827 %bc = bitcast <8 x i16> %res to <2 x i64> 828 ret <2 x i64> %bc 829} 830declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 831 832define <2 x i64> @test_mm_mul_epi32(<2 x i64> %a0, <2 x i64> %a1) { 833; SSE-LABEL: test_mm_mul_epi32: 834; SSE: # %bb.0: 835; SSE-NEXT: pmuldq %xmm1, %xmm0 836; SSE-NEXT: ret{{[l|q]}} 837; 838; AVX1-LABEL: test_mm_mul_epi32: 839; AVX1: # %bb.0: 840; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 841; AVX1-NEXT: ret{{[l|q]}} 842; 843; AVX512-LABEL: test_mm_mul_epi32: 844; AVX512: # %bb.0: 845; AVX512-NEXT: vpsllq $32, %xmm0, %xmm0 846; AVX512-NEXT: vpsraq $32, %xmm0, %xmm0 847; AVX512-NEXT: vpsllq $32, %xmm1, %xmm1 848; AVX512-NEXT: vpsraq $32, %xmm1, %xmm1 849; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 850; AVX512-NEXT: ret{{[l|q]}} 851 %A = shl <2 x i64> %a0, <i64 32, i64 32> 852 %A1 = ashr exact <2 x i64> %A, <i64 32, i64 32> 853 %B = shl <2 x i64> %a1, <i64 32, i64 32> 854 %B1 = ashr exact <2 x i64> %B, <i64 32, i64 32> 855 %res = mul nsw <2 x i64> %A1, %B1 856 ret <2 x i64> %res 857} 858 859define <2 x i64> @test_mm_mullo_epi32(<2 x i64> %a0, <2 x i64> %a1) { 860; SSE-LABEL: test_mm_mullo_epi32: 861; SSE: # %bb.0: 862; SSE-NEXT: pmulld %xmm1, %xmm0 863; SSE-NEXT: ret{{[l|q]}} 864; 865; AVX-LABEL: test_mm_mullo_epi32: 866; AVX: # %bb.0: 867; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 868; AVX-NEXT: ret{{[l|q]}} 869 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 870 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 871 %res = mul <4 x i32> %arg0, %arg1 872 %bc = bitcast <4 x i32> %res to <2 x i64> 873 ret <2 x i64> %bc 874} 875 876define <2 x i64> @test_mm_packus_epi32(<2 x i64> %a0, <2 x i64> %a1) { 877; SSE-LABEL: test_mm_packus_epi32: 878; SSE: # %bb.0: 879; SSE-NEXT: packusdw %xmm1, %xmm0 880; SSE-NEXT: ret{{[l|q]}} 881; 882; AVX-LABEL: test_mm_packus_epi32: 883; AVX: # %bb.0: 884; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 885; AVX-NEXT: ret{{[l|q]}} 886 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 887 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 888 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %arg0, <4 x i32> %arg1) 889 %bc = bitcast <8 x i16> %res to <2 x i64> 890 ret <2 x i64> %bc 891} 892declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 893 894define <2 x double> @test_mm_round_pd(<2 x double> %a0) { 895; SSE-LABEL: test_mm_round_pd: 896; SSE: # %bb.0: 897; SSE-NEXT: roundpd $4, %xmm0, %xmm0 898; SSE-NEXT: ret{{[l|q]}} 899; 900; AVX-LABEL: test_mm_round_pd: 901; AVX: # %bb.0: 902; AVX-NEXT: vroundpd $4, %xmm0, %xmm0 903; AVX-NEXT: ret{{[l|q]}} 904 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 4) 905 ret <2 x double> %res 906} 907 908define <4 x float> @test_mm_round_ps(<4 x float> %a0) { 909; SSE-LABEL: test_mm_round_ps: 910; SSE: # %bb.0: 911; SSE-NEXT: roundps $4, %xmm0, %xmm0 912; SSE-NEXT: ret{{[l|q]}} 913; 914; AVX-LABEL: test_mm_round_ps: 915; AVX: # %bb.0: 916; AVX-NEXT: vroundps $4, %xmm0, %xmm0 917; AVX-NEXT: ret{{[l|q]}} 918 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 4) 919 ret <4 x float> %res 920} 921 922define <2 x double> @test_mm_round_sd(<2 x double> %a0, <2 x double> %a1) { 923; SSE-LABEL: test_mm_round_sd: 924; SSE: # %bb.0: 925; SSE-NEXT: roundsd $4, %xmm1, %xmm0 926; SSE-NEXT: ret{{[l|q]}} 927; 928; AVX-LABEL: test_mm_round_sd: 929; AVX: # %bb.0: 930; AVX-NEXT: vroundsd $4, %xmm1, %xmm0, %xmm0 931; AVX-NEXT: ret{{[l|q]}} 932 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 4) 933 ret <2 x double> %res 934} 935 936define <4 x float> @test_mm_round_ss(<4 x float> %a0, <4 x float> %a1) { 937; SSE-LABEL: test_mm_round_ss: 938; SSE: # %bb.0: 939; SSE-NEXT: roundss $4, %xmm1, %xmm0 940; SSE-NEXT: ret{{[l|q]}} 941; 942; AVX-LABEL: test_mm_round_ss: 943; AVX: # %bb.0: 944; AVX-NEXT: vroundss $4, %xmm1, %xmm0, %xmm0 945; AVX-NEXT: ret{{[l|q]}} 946 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 4) 947 ret <4 x float> %res 948} 949 950define <2 x i64> @test_mm_stream_load_si128(<2 x i64>* %a0) { 951; X86-SSE-LABEL: test_mm_stream_load_si128: 952; X86-SSE: # %bb.0: 953; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 954; X86-SSE-NEXT: movntdqa (%eax), %xmm0 955; X86-SSE-NEXT: retl 956; 957; X86-AVX-LABEL: test_mm_stream_load_si128: 958; X86-AVX: # %bb.0: 959; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 960; X86-AVX-NEXT: vmovntdqa (%eax), %xmm0 961; X86-AVX-NEXT: retl 962; 963; X64-SSE-LABEL: test_mm_stream_load_si128: 964; X64-SSE: # %bb.0: 965; X64-SSE-NEXT: movntdqa (%rdi), %xmm0 966; X64-SSE-NEXT: retq 967; 968; X64-AVX-LABEL: test_mm_stream_load_si128: 969; X64-AVX: # %bb.0: 970; X64-AVX-NEXT: vmovntdqa (%rdi), %xmm0 971; X64-AVX-NEXT: retq 972 %arg0 = bitcast <2 x i64>* %a0 to i8* 973 %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %arg0) 974 ret <2 x i64> %res 975} 976declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone 977 978define i32 @test_mm_test_all_ones(<2 x i64> %a0) { 979; SSE-LABEL: test_mm_test_all_ones: 980; SSE: # %bb.0: 981; SSE-NEXT: pcmpeqd %xmm1, %xmm1 982; SSE-NEXT: xorl %eax, %eax 983; SSE-NEXT: ptest %xmm1, %xmm0 984; SSE-NEXT: setb %al 985; SSE-NEXT: ret{{[l|q]}} 986; 987; AVX-LABEL: test_mm_test_all_ones: 988; AVX: # %bb.0: 989; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 990; AVX-NEXT: xorl %eax, %eax 991; AVX-NEXT: vptest %xmm1, %xmm0 992; AVX-NEXT: setb %al 993; AVX-NEXT: ret{{[l|q]}} 994 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> <i64 -1, i64 -1>) 995 ret i32 %res 996} 997declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 998 999define i32 @test_mm_test_all_zeros(<2 x i64> %a0, <2 x i64> %a1) { 1000; SSE-LABEL: test_mm_test_all_zeros: 1001; SSE: # %bb.0: 1002; SSE-NEXT: xorl %eax, %eax 1003; SSE-NEXT: ptest %xmm1, %xmm0 1004; SSE-NEXT: sete %al 1005; SSE-NEXT: ret{{[l|q]}} 1006; 1007; AVX-LABEL: test_mm_test_all_zeros: 1008; AVX: # %bb.0: 1009; AVX-NEXT: xorl %eax, %eax 1010; AVX-NEXT: vptest %xmm1, %xmm0 1011; AVX-NEXT: sete %al 1012; AVX-NEXT: ret{{[l|q]}} 1013 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) 1014 ret i32 %res 1015} 1016declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 1017 1018define i32 @test_mm_test_mix_ones_zeros(<2 x i64> %a0, <2 x i64> %a1) { 1019; SSE-LABEL: test_mm_test_mix_ones_zeros: 1020; SSE: # %bb.0: 1021; SSE-NEXT: xorl %eax, %eax 1022; SSE-NEXT: ptest %xmm1, %xmm0 1023; SSE-NEXT: seta %al 1024; SSE-NEXT: ret{{[l|q]}} 1025; 1026; AVX-LABEL: test_mm_test_mix_ones_zeros: 1027; AVX: # %bb.0: 1028; AVX-NEXT: xorl %eax, %eax 1029; AVX-NEXT: vptest %xmm1, %xmm0 1030; AVX-NEXT: seta %al 1031; AVX-NEXT: ret{{[l|q]}} 1032 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) 1033 ret i32 %res 1034} 1035declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 1036 1037define i32 @test_mm_testc_si128(<2 x i64> %a0, <2 x i64> %a1) { 1038; SSE-LABEL: test_mm_testc_si128: 1039; SSE: # %bb.0: 1040; SSE-NEXT: xorl %eax, %eax 1041; SSE-NEXT: ptest %xmm1, %xmm0 1042; SSE-NEXT: setb %al 1043; SSE-NEXT: ret{{[l|q]}} 1044; 1045; AVX-LABEL: test_mm_testc_si128: 1046; AVX: # %bb.0: 1047; AVX-NEXT: xorl %eax, %eax 1048; AVX-NEXT: vptest %xmm1, %xmm0 1049; AVX-NEXT: setb %al 1050; AVX-NEXT: ret{{[l|q]}} 1051 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) 1052 ret i32 %res 1053} 1054 1055define i32 @test_mm_testnzc_si128(<2 x i64> %a0, <2 x i64> %a1) { 1056; SSE-LABEL: test_mm_testnzc_si128: 1057; SSE: # %bb.0: 1058; SSE-NEXT: xorl %eax, %eax 1059; SSE-NEXT: ptest %xmm1, %xmm0 1060; SSE-NEXT: seta %al 1061; SSE-NEXT: ret{{[l|q]}} 1062; 1063; AVX-LABEL: test_mm_testnzc_si128: 1064; AVX: # %bb.0: 1065; AVX-NEXT: xorl %eax, %eax 1066; AVX-NEXT: vptest %xmm1, %xmm0 1067; AVX-NEXT: seta %al 1068; AVX-NEXT: ret{{[l|q]}} 1069 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) 1070 ret i32 %res 1071} 1072 1073define i32 @test_mm_testz_si128(<2 x i64> %a0, <2 x i64> %a1) { 1074; SSE-LABEL: test_mm_testz_si128: 1075; SSE: # %bb.0: 1076; SSE-NEXT: xorl %eax, %eax 1077; SSE-NEXT: ptest %xmm1, %xmm0 1078; SSE-NEXT: sete %al 1079; SSE-NEXT: ret{{[l|q]}} 1080; 1081; AVX-LABEL: test_mm_testz_si128: 1082; AVX: # %bb.0: 1083; AVX-NEXT: xorl %eax, %eax 1084; AVX-NEXT: vptest %xmm1, %xmm0 1085; AVX-NEXT: sete %al 1086; AVX-NEXT: ret{{[l|q]}} 1087 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) 1088 ret i32 %res 1089} 1090