1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=X86 --check-prefix=X86-AVX512 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX2 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=X64 --check-prefix=X64-AVX512 6 7define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 8; X86-LABEL: test_x86_avx2_pblendw: 9; X86: ## %bb.0: 10; X86-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 11; X86-NEXT: retl 12; 13; X64-LABEL: test_x86_avx2_pblendw: 14; X64: ## %bb.0: 15; X64-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 16; X64-NEXT: retq 17 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1] 18 ret <16 x i16> %res 19} 20declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone 21 22 23define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 24; X86-LABEL: test_x86_avx2_pblendd_128: 25; X86: ## %bb.0: 26; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 27; X86-NEXT: retl 28; 29; X64-LABEL: test_x86_avx2_pblendd_128: 30; X64: ## %bb.0: 31; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 32; X64-NEXT: retq 33 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1] 34 ret <4 x i32> %res 35} 36declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone 37 38 39define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 40; X86-LABEL: test_x86_avx2_pblendd_256: 41; X86: ## %bb.0: 42; X86-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 43; X86-NEXT: retl 44; 45; X64-LABEL: test_x86_avx2_pblendd_256: 46; X64: ## %bb.0: 47; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 48; X64-NEXT: retq 49 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1] 50 ret <8 x i32> %res 51} 52declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone 53 54 55define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) { 56; X86-LABEL: test_x86_avx2_movntdqa: 57; X86: ## %bb.0: 58; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 59; X86-NEXT: vmovntdqa (%eax), %ymm0 60; X86-NEXT: retl 61; 62; X64-LABEL: test_x86_avx2_movntdqa: 63; X64: ## %bb.0: 64; X64-NEXT: vmovntdqa (%rdi), %ymm0 65; X64-NEXT: retq 66 %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1] 67 ret <4 x i64> %res 68} 69declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 70 71 72define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { 73; X86-LABEL: test_x86_avx2_mpsadbw: 74; X86: ## %bb.0: 75; X86-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 76; X86-NEXT: retl 77; 78; X64-LABEL: test_x86_avx2_mpsadbw: 79; X64: ## %bb.0: 80; X64-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 81; X64-NEXT: retq 82 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1] 83 ret <16 x i16> %res 84} 85declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone 86 87 88define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) { 89; X86-LABEL: test_x86_avx2_psll_dq_bs: 90; X86: ## %bb.0: 91; X86-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24] 92; X86-NEXT: retl 93; 94; X64-LABEL: test_x86_avx2_psll_dq_bs: 95; X64: ## %bb.0: 96; X64-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24] 97; X64-NEXT: retq 98 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 99 ret <4 x i64> %res 100} 101declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone 102 103 104define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) { 105; X86-LABEL: test_x86_avx2_psrl_dq_bs: 106; X86: ## %bb.0: 107; X86-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero 108; X86-NEXT: retl 109; 110; X64-LABEL: test_x86_avx2_psrl_dq_bs: 111; X64: ## %bb.0: 112; X64-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero 113; X64-NEXT: retq 114 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 115 ret <4 x i64> %res 116} 117declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone 118 119 120define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) { 121; X86-LABEL: test_x86_avx2_psll_dq: 122; X86: ## %bb.0: 123; X86-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 124; X86-NEXT: retl 125; 126; X64-LABEL: test_x86_avx2_psll_dq: 127; X64: ## %bb.0: 128; X64-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 129; X64-NEXT: retq 130 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1] 131 ret <4 x i64> %res 132} 133declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone 134 135 136define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) { 137; X86-LABEL: test_x86_avx2_psrl_dq: 138; X86: ## %bb.0: 139; X86-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero 140; X86-NEXT: retl 141; 142; X64-LABEL: test_x86_avx2_psrl_dq: 143; X64: ## %bb.0: 144; X64-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero 145; X64-NEXT: retq 146 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1] 147 ret <4 x i64> %res 148} 149declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone 150 151 152define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) { 153; X86-LABEL: test_x86_avx2_vextracti128: 154; X86: ## %bb.0: 155; X86-NEXT: vextractf128 $1, %ymm0, %xmm0 156; X86-NEXT: vzeroupper 157; X86-NEXT: retl 158; 159; X64-LABEL: test_x86_avx2_vextracti128: 160; X64: ## %bb.0: 161; X64-NEXT: vextractf128 $1, %ymm0, %xmm0 162; X64-NEXT: vzeroupper 163; X64-NEXT: retq 164 %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) 165 ret <2 x i64> %res 166} 167declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone 168 169 170define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) { 171; X86-LABEL: test_x86_avx2_vinserti128: 172; X86: ## %bb.0: 173; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 174; X86-NEXT: retl 175; 176; X64-LABEL: test_x86_avx2_vinserti128: 177; X64: ## %bb.0: 178; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 179; X64-NEXT: retq 180 %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) 181 ret <4 x i64> %res 182} 183declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone 184 185 186define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { 187; X86-LABEL: test_x86_avx2_vbroadcast_sd_pd_256: 188; X86: ## %bb.0: 189; X86-NEXT: vbroadcastsd %xmm0, %ymm0 190; X86-NEXT: retl 191; 192; X64-LABEL: test_x86_avx2_vbroadcast_sd_pd_256: 193; X64: ## %bb.0: 194; X64-NEXT: vbroadcastsd %xmm0, %ymm0 195; X64-NEXT: retq 196 %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) 197 ret <4 x double> %res 198} 199declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly 200 201 202define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { 203; X86-LABEL: test_x86_avx2_vbroadcast_ss_ps: 204; X86: ## %bb.0: 205; X86-NEXT: vbroadcastss %xmm0, %xmm0 206; X86-NEXT: retl 207; 208; X64-LABEL: test_x86_avx2_vbroadcast_ss_ps: 209; X64: ## %bb.0: 210; X64-NEXT: vbroadcastss %xmm0, %xmm0 211; X64-NEXT: retq 212 %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) 213 ret <4 x float> %res 214} 215declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly 216 217 218define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { 219; X86-LABEL: test_x86_avx2_vbroadcast_ss_ps_256: 220; X86: ## %bb.0: 221; X86-NEXT: vbroadcastss %xmm0, %ymm0 222; X86-NEXT: retl 223; 224; X64-LABEL: test_x86_avx2_vbroadcast_ss_ps_256: 225; X64: ## %bb.0: 226; X64-NEXT: vbroadcastss %xmm0, %ymm0 227; X64-NEXT: retq 228 %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) 229 ret <8 x float> %res 230} 231declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly 232 233 234define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) { 235; X86-LABEL: test_x86_avx2_pbroadcastb_128: 236; X86: ## %bb.0: 237; X86-NEXT: vpbroadcastb %xmm0, %xmm0 238; X86-NEXT: retl 239; 240; X64-LABEL: test_x86_avx2_pbroadcastb_128: 241; X64: ## %bb.0: 242; X64-NEXT: vpbroadcastb %xmm0, %xmm0 243; X64-NEXT: retq 244 %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) 245 ret <16 x i8> %res 246} 247declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly 248 249 250define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) { 251; X86-LABEL: test_x86_avx2_pbroadcastb_256: 252; X86: ## %bb.0: 253; X86-NEXT: vpbroadcastb %xmm0, %ymm0 254; X86-NEXT: retl 255; 256; X64-LABEL: test_x86_avx2_pbroadcastb_256: 257; X64: ## %bb.0: 258; X64-NEXT: vpbroadcastb %xmm0, %ymm0 259; X64-NEXT: retq 260 %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) 261 ret <32 x i8> %res 262} 263declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly 264 265 266define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) { 267; X86-LABEL: test_x86_avx2_pbroadcastw_128: 268; X86: ## %bb.0: 269; X86-NEXT: vpbroadcastw %xmm0, %xmm0 270; X86-NEXT: retl 271; 272; X64-LABEL: test_x86_avx2_pbroadcastw_128: 273; X64: ## %bb.0: 274; X64-NEXT: vpbroadcastw %xmm0, %xmm0 275; X64-NEXT: retq 276 %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) 277 ret <8 x i16> %res 278} 279declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly 280 281 282define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) { 283; X86-LABEL: test_x86_avx2_pbroadcastw_256: 284; X86: ## %bb.0: 285; X86-NEXT: vpbroadcastw %xmm0, %ymm0 286; X86-NEXT: retl 287; 288; X64-LABEL: test_x86_avx2_pbroadcastw_256: 289; X64: ## %bb.0: 290; X64-NEXT: vpbroadcastw %xmm0, %ymm0 291; X64-NEXT: retq 292 %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) 293 ret <16 x i16> %res 294} 295declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly 296 297 298define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) { 299; X86-LABEL: test_x86_avx2_pbroadcastd_128: 300; X86: ## %bb.0: 301; X86-NEXT: vbroadcastss %xmm0, %xmm0 302; X86-NEXT: retl 303; 304; X64-LABEL: test_x86_avx2_pbroadcastd_128: 305; X64: ## %bb.0: 306; X64-NEXT: vbroadcastss %xmm0, %xmm0 307; X64-NEXT: retq 308 %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) 309 ret <4 x i32> %res 310} 311declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly 312 313 314define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) { 315; X86-LABEL: test_x86_avx2_pbroadcastd_256: 316; X86: ## %bb.0: 317; X86-NEXT: vbroadcastss %xmm0, %ymm0 318; X86-NEXT: retl 319; 320; X64-LABEL: test_x86_avx2_pbroadcastd_256: 321; X64: ## %bb.0: 322; X64-NEXT: vbroadcastss %xmm0, %ymm0 323; X64-NEXT: retq 324 %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) 325 ret <8 x i32> %res 326} 327declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly 328 329 330define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) { 331; X86-LABEL: test_x86_avx2_pbroadcastq_128: 332; X86: ## %bb.0: 333; X86-NEXT: vpbroadcastq %xmm0, %xmm0 334; X86-NEXT: retl 335; 336; X64-LABEL: test_x86_avx2_pbroadcastq_128: 337; X64: ## %bb.0: 338; X64-NEXT: vpbroadcastq %xmm0, %xmm0 339; X64-NEXT: retq 340 %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) 341 ret <2 x i64> %res 342} 343declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly 344 345 346define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { 347; X86-LABEL: test_x86_avx2_pbroadcastq_256: 348; X86: ## %bb.0: 349; X86-NEXT: vbroadcastsd %xmm0, %ymm0 350; X86-NEXT: retl 351; 352; X64-LABEL: test_x86_avx2_pbroadcastq_256: 353; X64: ## %bb.0: 354; X64-NEXT: vbroadcastsd %xmm0, %ymm0 355; X64-NEXT: retq 356 %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) 357 ret <4 x i64> %res 358} 359declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly 360 361 362define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) { 363; X86-LABEL: test_x86_avx2_pmovsxbd: 364; X86: ## %bb.0: 365; X86-NEXT: vpmovsxbd %xmm0, %ymm0 366; X86-NEXT: retl 367; 368; X64-LABEL: test_x86_avx2_pmovsxbd: 369; X64: ## %bb.0: 370; X64-NEXT: vpmovsxbd %xmm0, %ymm0 371; X64-NEXT: retq 372 %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 373 ret <8 x i32> %res 374} 375declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone 376 377 378define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) { 379; X86-LABEL: test_x86_avx2_pmovsxbq: 380; X86: ## %bb.0: 381; X86-NEXT: vpmovsxbq %xmm0, %ymm0 382; X86-NEXT: retl 383; 384; X64-LABEL: test_x86_avx2_pmovsxbq: 385; X64: ## %bb.0: 386; X64-NEXT: vpmovsxbq %xmm0, %ymm0 387; X64-NEXT: retq 388 %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 389 ret <4 x i64> %res 390} 391declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone 392 393 394define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) { 395; X86-LABEL: test_x86_avx2_pmovsxbw: 396; X86: ## %bb.0: 397; X86-NEXT: vpmovsxbw %xmm0, %ymm0 398; X86-NEXT: retl 399; 400; X64-LABEL: test_x86_avx2_pmovsxbw: 401; X64: ## %bb.0: 402; X64-NEXT: vpmovsxbw %xmm0, %ymm0 403; X64-NEXT: retq 404 %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 405 ret <16 x i16> %res 406} 407declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone 408 409 410define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) { 411; X86-LABEL: test_x86_avx2_pmovsxdq: 412; X86: ## %bb.0: 413; X86-NEXT: vpmovsxdq %xmm0, %ymm0 414; X86-NEXT: retl 415; 416; X64-LABEL: test_x86_avx2_pmovsxdq: 417; X64: ## %bb.0: 418; X64-NEXT: vpmovsxdq %xmm0, %ymm0 419; X64-NEXT: retq 420 %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 421 ret <4 x i64> %res 422} 423declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone 424 425 426define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) { 427; X86-LABEL: test_x86_avx2_pmovsxwd: 428; X86: ## %bb.0: 429; X86-NEXT: vpmovsxwd %xmm0, %ymm0 430; X86-NEXT: retl 431; 432; X64-LABEL: test_x86_avx2_pmovsxwd: 433; X64: ## %bb.0: 434; X64-NEXT: vpmovsxwd %xmm0, %ymm0 435; X64-NEXT: retq 436 %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 437 ret <8 x i32> %res 438} 439declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone 440 441 442define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) { 443; X86-LABEL: test_x86_avx2_pmovsxwq: 444; X86: ## %bb.0: 445; X86-NEXT: vpmovsxwq %xmm0, %ymm0 446; X86-NEXT: retl 447; 448; X64-LABEL: test_x86_avx2_pmovsxwq: 449; X64: ## %bb.0: 450; X64-NEXT: vpmovsxwq %xmm0, %ymm0 451; X64-NEXT: retq 452 %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 453 ret <4 x i64> %res 454} 455declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone 456 457 458define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) { 459; X86-LABEL: test_x86_avx2_pmovzxbd: 460; X86: ## %bb.0: 461; X86-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 462; X86-NEXT: retl 463; 464; X64-LABEL: test_x86_avx2_pmovzxbd: 465; X64: ## %bb.0: 466; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 467; X64-NEXT: retq 468 %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 469 ret <8 x i32> %res 470} 471declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone 472 473 474define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) { 475; X86-LABEL: test_x86_avx2_pmovzxbq: 476; X86: ## %bb.0: 477; X86-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 478; X86-NEXT: retl 479; 480; X64-LABEL: test_x86_avx2_pmovzxbq: 481; X64: ## %bb.0: 482; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 483; X64-NEXT: retq 484 %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 485 ret <4 x i64> %res 486} 487declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone 488 489 490define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) { 491; X86-LABEL: test_x86_avx2_pmovzxbw: 492; X86: ## %bb.0: 493; X86-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 494; X86-NEXT: retl 495; 496; X64-LABEL: test_x86_avx2_pmovzxbw: 497; X64: ## %bb.0: 498; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 499; X64-NEXT: retq 500 %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1] 501 ret <16 x i16> %res 502} 503declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone 504 505 506define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) { 507; X86-LABEL: test_x86_avx2_pmovzxdq: 508; X86: ## %bb.0: 509; X86-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 510; X86-NEXT: retl 511; 512; X64-LABEL: test_x86_avx2_pmovzxdq: 513; X64: ## %bb.0: 514; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 515; X64-NEXT: retq 516 %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 517 ret <4 x i64> %res 518} 519declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone 520 521 522define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) { 523; X86-LABEL: test_x86_avx2_pmovzxwd: 524; X86: ## %bb.0: 525; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 526; X86-NEXT: retl 527; 528; X64-LABEL: test_x86_avx2_pmovzxwd: 529; X64: ## %bb.0: 530; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 531; X64-NEXT: retq 532 %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 533 ret <8 x i32> %res 534} 535declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone 536 537 538define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) { 539; X86-LABEL: test_x86_avx2_pmovzxwq: 540; X86: ## %bb.0: 541; X86-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 542; X86-NEXT: retl 543; 544; X64-LABEL: test_x86_avx2_pmovzxwq: 545; X64: ## %bb.0: 546; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 547; X64-NEXT: retq 548 %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 549 ret <4 x i64> %res 550} 551declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone 552 553; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions 554define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 555 ; add operation forces the execution domain. 556; X86-LABEL: test_x86_avx_storeu_dq_256: 557; X86: ## %bb.0: 558; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 559; X86-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 560; X86-NEXT: vpsubb %ymm1, %ymm0, %ymm0 561; X86-NEXT: vmovdqu %ymm0, (%eax) 562; X86-NEXT: vzeroupper 563; X86-NEXT: retl 564; 565; X64-LABEL: test_x86_avx_storeu_dq_256: 566; X64: ## %bb.0: 567; X64-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 568; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0 569; X64-NEXT: vmovdqu %ymm0, (%rdi) 570; X64-NEXT: vzeroupper 571; X64-NEXT: retq 572 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 573 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 574 ret void 575} 576declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 577 578define <32 x i8> @mm256_max_epi8(<32 x i8> %a0, <32 x i8> %a1) { 579; X86-LABEL: mm256_max_epi8: 580; X86: ## %bb.0: 581; X86-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 582; X86-NEXT: retl 583; 584; X64-LABEL: mm256_max_epi8: 585; X64: ## %bb.0: 586; X64-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 587; X64-NEXT: retq 588 %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) 589 ret <32 x i8> %res 590} 591declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 592 593define <16 x i16> @mm256_max_epi16(<16 x i16> %a0, <16 x i16> %a1) { 594; X86-LABEL: mm256_max_epi16: 595; X86: ## %bb.0: 596; X86-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 597; X86-NEXT: retl 598; 599; X64-LABEL: mm256_max_epi16: 600; X64: ## %bb.0: 601; X64-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 602; X64-NEXT: retq 603 %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) 604 ret <16 x i16> %res 605} 606declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 607 608define <8 x i32> @mm256_max_epi32(<8 x i32> %a0, <8 x i32> %a1) { 609; X86-LABEL: mm256_max_epi32: 610; X86: ## %bb.0: 611; X86-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 612; X86-NEXT: retl 613; 614; X64-LABEL: mm256_max_epi32: 615; X64: ## %bb.0: 616; X64-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 617; X64-NEXT: retq 618 %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) 619 ret <8 x i32> %res 620} 621declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 622 623define <32 x i8> @mm256_max_epu8(<32 x i8> %a0, <32 x i8> %a1) { 624; X86-LABEL: mm256_max_epu8: 625; X86: ## %bb.0: 626; X86-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 627; X86-NEXT: retl 628; 629; X64-LABEL: mm256_max_epu8: 630; X64: ## %bb.0: 631; X64-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 632; X64-NEXT: retq 633 %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) 634 ret <32 x i8> %res 635} 636declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 637 638define <16 x i16> @mm256_max_epu16(<16 x i16> %a0, <16 x i16> %a1) { 639; X86-LABEL: mm256_max_epu16: 640; X86: ## %bb.0: 641; X86-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 642; X86-NEXT: retl 643; 644; X64-LABEL: mm256_max_epu16: 645; X64: ## %bb.0: 646; X64-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 647; X64-NEXT: retq 648 %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) 649 ret <16 x i16> %res 650} 651declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 652 653define <8 x i32> @mm256_max_epu32(<8 x i32> %a0, <8 x i32> %a1) { 654; X86-LABEL: mm256_max_epu32: 655; X86: ## %bb.0: 656; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 657; X86-NEXT: retl 658; 659; X64-LABEL: mm256_max_epu32: 660; X64: ## %bb.0: 661; X64-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 662; X64-NEXT: retq 663 %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) 664 ret <8 x i32> %res 665} 666declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 667 668define <32 x i8> @mm256_min_epi8(<32 x i8> %a0, <32 x i8> %a1) { 669; X86-LABEL: mm256_min_epi8: 670; X86: ## %bb.0: 671; X86-NEXT: vpminsb %ymm1, %ymm0, %ymm0 672; X86-NEXT: retl 673; 674; X64-LABEL: mm256_min_epi8: 675; X64: ## %bb.0: 676; X64-NEXT: vpminsb %ymm1, %ymm0, %ymm0 677; X64-NEXT: retq 678 %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) 679 ret <32 x i8> %res 680} 681declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 682 683define <16 x i16> @mm256_min_epi16(<16 x i16> %a0, <16 x i16> %a1) { 684; X86-LABEL: mm256_min_epi16: 685; X86: ## %bb.0: 686; X86-NEXT: vpminsw %ymm1, %ymm0, %ymm0 687; X86-NEXT: retl 688; 689; X64-LABEL: mm256_min_epi16: 690; X64: ## %bb.0: 691; X64-NEXT: vpminsw %ymm1, %ymm0, %ymm0 692; X64-NEXT: retq 693 %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) 694 ret <16 x i16> %res 695} 696declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 697 698define <8 x i32> @mm256_min_epi32(<8 x i32> %a0, <8 x i32> %a1) { 699; X86-LABEL: mm256_min_epi32: 700; X86: ## %bb.0: 701; X86-NEXT: vpminsd %ymm1, %ymm0, %ymm0 702; X86-NEXT: retl 703; 704; X64-LABEL: mm256_min_epi32: 705; X64: ## %bb.0: 706; X64-NEXT: vpminsd %ymm1, %ymm0, %ymm0 707; X64-NEXT: retq 708 %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) 709 ret <8 x i32> %res 710} 711declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 712 713define <32 x i8> @mm256_min_epu8(<32 x i8> %a0, <32 x i8> %a1) { 714; X86-LABEL: mm256_min_epu8: 715; X86: ## %bb.0: 716; X86-NEXT: vpminub %ymm1, %ymm0, %ymm0 717; X86-NEXT: retl 718; 719; X64-LABEL: mm256_min_epu8: 720; X64: ## %bb.0: 721; X64-NEXT: vpminub %ymm1, %ymm0, %ymm0 722; X64-NEXT: retq 723 %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) 724 ret <32 x i8> %res 725} 726declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 727 728define <16 x i16> @mm256_min_epu16(<16 x i16> %a0, <16 x i16> %a1) { 729; X86-LABEL: mm256_min_epu16: 730; X86: ## %bb.0: 731; X86-NEXT: vpminuw %ymm1, %ymm0, %ymm0 732; X86-NEXT: retl 733; 734; X64-LABEL: mm256_min_epu16: 735; X64: ## %bb.0: 736; X64-NEXT: vpminuw %ymm1, %ymm0, %ymm0 737; X64-NEXT: retq 738 %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) 739 ret <16 x i16> %res 740} 741declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 742 743define <8 x i32> @mm256_min_epu32(<8 x i32> %a0, <8 x i32> %a1) { 744; X86-LABEL: mm256_min_epu32: 745; X86: ## %bb.0: 746; X86-NEXT: vpminud %ymm1, %ymm0, %ymm0 747; X86-NEXT: retl 748; 749; X64-LABEL: mm256_min_epu32: 750; X64: ## %bb.0: 751; X64-NEXT: vpminud %ymm1, %ymm0, %ymm0 752; X64-NEXT: retq 753 %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) 754 ret <8 x i32> %res 755} 756declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 757 758define <32 x i8> @mm256_avg_epu8(<32 x i8> %a0, <32 x i8> %a1) { 759; X86-LABEL: mm256_avg_epu8: 760; X86: ## %bb.0: 761; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm0 762; X86-NEXT: retl 763; 764; X64-LABEL: mm256_avg_epu8: 765; X64: ## %bb.0: 766; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm0 767; X64-NEXT: retq 768 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 769 ret <32 x i8> %res 770} 771declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone 772 773define <16 x i16> @mm256_avg_epu16(<16 x i16> %a0, <16 x i16> %a1) { 774; X86-LABEL: mm256_avg_epu16: 775; X86: ## %bb.0: 776; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm0 777; X86-NEXT: retl 778; 779; X64-LABEL: mm256_avg_epu16: 780; X64: ## %bb.0: 781; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm0 782; X64-NEXT: retq 783 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 784 ret <16 x i16> %res 785} 786declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone 787 788define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) { 789; X86-LABEL: test_x86_avx2_pabs_b: 790; X86: ## %bb.0: 791; X86-NEXT: vpabsb %ymm0, %ymm0 792; X86-NEXT: retl 793; 794; X64-LABEL: test_x86_avx2_pabs_b: 795; X64: ## %bb.0: 796; X64-NEXT: vpabsb %ymm0, %ymm0 797; X64-NEXT: retq 798 %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] 799 ret <32 x i8> %res 800} 801declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 802 803define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) { 804; X86-LABEL: test_x86_avx2_pabs_d: 805; X86: ## %bb.0: 806; X86-NEXT: vpabsd %ymm0, %ymm0 807; X86-NEXT: retl 808; 809; X64-LABEL: test_x86_avx2_pabs_d: 810; X64: ## %bb.0: 811; X64-NEXT: vpabsd %ymm0, %ymm0 812; X64-NEXT: retq 813 %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] 814 ret <8 x i32> %res 815} 816declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 817 818 819define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { 820; X86-LABEL: test_x86_avx2_pabs_w: 821; X86: ## %bb.0: 822; X86-NEXT: vpabsw %ymm0, %ymm0 823; X86-NEXT: retl 824; 825; X64-LABEL: test_x86_avx2_pabs_w: 826; X64: ## %bb.0: 827; X64-NEXT: vpabsw %ymm0, %ymm0 828; X64-NEXT: retq 829 %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] 830 ret <16 x i16> %res 831} 832declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 833 834 835define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { 836; X86-LABEL: test_x86_avx2_vperm2i128: 837; X86: ## %bb.0: 838; X86-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 839; X86-NEXT: retl 840; 841; X64-LABEL: test_x86_avx2_vperm2i128: 842; X64: ## %bb.0: 843; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 844; X64-NEXT: retq 845 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] 846 ret <4 x i64> %res 847} 848declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly 849 850 851define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) { 852; X86-LABEL: test_x86_avx2_pmulu_dq: 853; X86: ## %bb.0: 854; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 855; X86-NEXT: retl 856; 857; X64-LABEL: test_x86_avx2_pmulu_dq: 858; X64: ## %bb.0: 859; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 860; X64-NEXT: retq 861 %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 862 ret <4 x i64> %res 863} 864declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 865 866 867define <4 x i64> @test_x86_avx2_pmul_dq(<8 x i32> %a0, <8 x i32> %a1) { 868; X86-LABEL: test_x86_avx2_pmul_dq: 869; X86: ## %bb.0: 870; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 871; X86-NEXT: retl 872; 873; X64-LABEL: test_x86_avx2_pmul_dq: 874; X64: ## %bb.0: 875; X64-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 876; X64-NEXT: retq 877 %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 878 ret <4 x i64> %res 879} 880declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 881