1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 6 7define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 8; CHECK-LABEL: test_x86_avx2_pblendw: 9; CHECK: ## %bb.0: 10; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 11; CHECK-NEXT: ret{{[l|q]}} 12 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1] 13 ret <16 x i16> %res 14} 15declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone 16 17 18define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 19; CHECK-LABEL: test_x86_avx2_pblendd_128: 20; CHECK: ## %bb.0: 21; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] 22; CHECK-NEXT: ret{{[l|q]}} 23 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1] 24 ret <4 x i32> %res 25} 26declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone 27 28 29define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 30; CHECK-LABEL: test_x86_avx2_pblendd_256: 31; CHECK: ## %bb.0: 32; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 33; CHECK-NEXT: ret{{[l|q]}} 34 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1] 35 ret <8 x i32> %res 36} 37declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone 38 39 40define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) { 41; X86-LABEL: test_x86_avx2_movntdqa: 42; X86: ## %bb.0: 43; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 44; X86-NEXT: vmovntdqa (%eax), %ymm0 45; X86-NEXT: retl 46; 47; X64-LABEL: test_x86_avx2_movntdqa: 48; X64: ## %bb.0: 49; X64-NEXT: vmovntdqa (%rdi), %ymm0 50; X64-NEXT: retq 51 %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1] 52 ret <4 x i64> %res 53} 54declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 55 56 57define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { 58; CHECK-LABEL: test_x86_avx2_mpsadbw: 59; CHECK: ## %bb.0: 60; CHECK-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 61; CHECK-NEXT: ret{{[l|q]}} 62 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1] 63 ret <16 x i16> %res 64} 65declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone 66 67 68define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) { 69; CHECK-LABEL: test_x86_avx2_psll_dq_bs: 70; CHECK: ## %bb.0: 71; CHECK-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24] 72; CHECK-NEXT: ret{{[l|q]}} 73 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 74 ret <4 x i64> %res 75} 76declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone 77 78 79define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) { 80; CHECK-LABEL: test_x86_avx2_psrl_dq_bs: 81; CHECK: ## %bb.0: 82; CHECK-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero 83; CHECK-NEXT: ret{{[l|q]}} 84 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 85 ret <4 x i64> %res 86} 87declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone 88 89 90define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) { 91; CHECK-LABEL: test_x86_avx2_psll_dq: 92; CHECK: ## %bb.0: 93; CHECK-NEXT: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30] 94; CHECK-NEXT: ret{{[l|q]}} 95 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1] 96 ret <4 x i64> %res 97} 98declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone 99 100 101define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) { 102; CHECK-LABEL: test_x86_avx2_psrl_dq: 103; CHECK: ## %bb.0: 104; CHECK-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero 105; CHECK-NEXT: ret{{[l|q]}} 106 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1] 107 ret <4 x i64> %res 108} 109declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone 110 111 112define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) { 113; CHECK-LABEL: test_x86_avx2_vextracti128: 114; CHECK: ## %bb.0: 115; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 116; CHECK-NEXT: vzeroupper 117; CHECK-NEXT: ret{{[l|q]}} 118 %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) 119 ret <2 x i64> %res 120} 121declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone 122 123 124define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) { 125; CHECK-LABEL: test_x86_avx2_vinserti128: 126; CHECK: ## %bb.0: 127; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 128; CHECK-NEXT: ret{{[l|q]}} 129 %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) 130 ret <4 x i64> %res 131} 132declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone 133 134 135define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { 136; CHECK-LABEL: test_x86_avx2_vbroadcast_sd_pd_256: 137; CHECK: ## %bb.0: 138; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 139; CHECK-NEXT: ret{{[l|q]}} 140 %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) 141 ret <4 x double> %res 142} 143declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly 144 145 146define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { 147; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps: 148; CHECK: ## %bb.0: 149; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 150; CHECK-NEXT: ret{{[l|q]}} 151 %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) 152 ret <4 x float> %res 153} 154declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly 155 156 157define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { 158; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps_256: 159; CHECK: ## %bb.0: 160; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 161; CHECK-NEXT: ret{{[l|q]}} 162 %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) 163 ret <8 x float> %res 164} 165declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly 166 167 168define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) { 169; CHECK-LABEL: test_x86_avx2_pbroadcastb_128: 170; CHECK: ## %bb.0: 171; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 172; CHECK-NEXT: ret{{[l|q]}} 173 %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) 174 ret <16 x i8> %res 175} 176declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly 177 178 179define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) { 180; CHECK-LABEL: test_x86_avx2_pbroadcastb_256: 181; CHECK: ## %bb.0: 182; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 183; CHECK-NEXT: ret{{[l|q]}} 184 %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) 185 ret <32 x i8> %res 186} 187declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly 188 189 190define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) { 191; CHECK-LABEL: test_x86_avx2_pbroadcastw_128: 192; CHECK: ## %bb.0: 193; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 194; CHECK-NEXT: ret{{[l|q]}} 195 %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) 196 ret <8 x i16> %res 197} 198declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly 199 200 201define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) { 202; CHECK-LABEL: test_x86_avx2_pbroadcastw_256: 203; CHECK: ## %bb.0: 204; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 205; CHECK-NEXT: ret{{[l|q]}} 206 %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) 207 ret <16 x i16> %res 208} 209declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly 210 211 212define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) { 213; CHECK-LABEL: test_x86_avx2_pbroadcastd_128: 214; CHECK: ## %bb.0: 215; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 216; CHECK-NEXT: ret{{[l|q]}} 217 %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) 218 ret <4 x i32> %res 219} 220declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly 221 222 223define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) { 224; CHECK-LABEL: test_x86_avx2_pbroadcastd_256: 225; CHECK: ## %bb.0: 226; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 227; CHECK-NEXT: ret{{[l|q]}} 228 %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) 229 ret <8 x i32> %res 230} 231declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly 232 233 234define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) { 235; CHECK-LABEL: test_x86_avx2_pbroadcastq_128: 236; CHECK: ## %bb.0: 237; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 238; CHECK-NEXT: ret{{[l|q]}} 239 %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) 240 ret <2 x i64> %res 241} 242declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly 243 244 245define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { 246; CHECK-LABEL: test_x86_avx2_pbroadcastq_256: 247; CHECK: ## %bb.0: 248; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 249; CHECK-NEXT: ret{{[l|q]}} 250 %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) 251 ret <4 x i64> %res 252} 253declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly 254 255 256define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) { 257; CHECK-LABEL: test_x86_avx2_pmovsxbd: 258; CHECK: ## %bb.0: 259; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 260; CHECK-NEXT: ret{{[l|q]}} 261 %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 262 ret <8 x i32> %res 263} 264declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone 265 266 267define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) { 268; CHECK-LABEL: test_x86_avx2_pmovsxbq: 269; CHECK: ## %bb.0: 270; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0 271; CHECK-NEXT: ret{{[l|q]}} 272 %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 273 ret <4 x i64> %res 274} 275declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone 276 277 278define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) { 279; CHECK-LABEL: test_x86_avx2_pmovsxbw: 280; CHECK: ## %bb.0: 281; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 282; CHECK-NEXT: ret{{[l|q]}} 283 %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 284 ret <16 x i16> %res 285} 286declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone 287 288 289define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) { 290; CHECK-LABEL: test_x86_avx2_pmovsxdq: 291; CHECK: ## %bb.0: 292; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 293; CHECK-NEXT: ret{{[l|q]}} 294 %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 295 ret <4 x i64> %res 296} 297declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone 298 299 300define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) { 301; CHECK-LABEL: test_x86_avx2_pmovsxwd: 302; CHECK: ## %bb.0: 303; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 304; CHECK-NEXT: ret{{[l|q]}} 305 %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 306 ret <8 x i32> %res 307} 308declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone 309 310 311define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) { 312; CHECK-LABEL: test_x86_avx2_pmovsxwq: 313; CHECK: ## %bb.0: 314; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0 315; CHECK-NEXT: ret{{[l|q]}} 316 %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 317 ret <4 x i64> %res 318} 319declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone 320 321 322define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) { 323; CHECK-LABEL: test_x86_avx2_pmovzxbd: 324; CHECK: ## %bb.0: 325; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 326; CHECK-NEXT: ret{{[l|q]}} 327 %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 328 ret <8 x i32> %res 329} 330declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone 331 332 333define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) { 334; CHECK-LABEL: test_x86_avx2_pmovzxbq: 335; CHECK: ## %bb.0: 336; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 337; CHECK-NEXT: ret{{[l|q]}} 338 %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 339 ret <4 x i64> %res 340} 341declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone 342 343 344define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) { 345; CHECK-LABEL: test_x86_avx2_pmovzxbw: 346; CHECK: ## %bb.0: 347; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 348; CHECK-NEXT: ret{{[l|q]}} 349 %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1] 350 ret <16 x i16> %res 351} 352declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone 353 354 355define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) { 356; CHECK-LABEL: test_x86_avx2_pmovzxdq: 357; CHECK: ## %bb.0: 358; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 359; CHECK-NEXT: ret{{[l|q]}} 360 %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 361 ret <4 x i64> %res 362} 363declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone 364 365 366define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) { 367; CHECK-LABEL: test_x86_avx2_pmovzxwd: 368; CHECK: ## %bb.0: 369; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 370; CHECK-NEXT: ret{{[l|q]}} 371 %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 372 ret <8 x i32> %res 373} 374declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone 375 376 377define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) { 378; CHECK-LABEL: test_x86_avx2_pmovzxwq: 379; CHECK: ## %bb.0: 380; CHECK-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 381; CHECK-NEXT: ret{{[l|q]}} 382 %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 383 ret <4 x i64> %res 384} 385declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone 386 387; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions 388define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 389 ; add operation forces the execution domain. 390; X86-LABEL: test_x86_avx_storeu_dq_256: 391; X86: ## %bb.0: 392; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 393; X86-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 394; X86-NEXT: vpsubb %ymm1, %ymm0, %ymm0 395; X86-NEXT: vmovdqu %ymm0, (%eax) 396; X86-NEXT: vzeroupper 397; X86-NEXT: retl 398; 399; X64-LABEL: test_x86_avx_storeu_dq_256: 400; X64: ## %bb.0: 401; X64-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 402; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0 403; X64-NEXT: vmovdqu %ymm0, (%rdi) 404; X64-NEXT: vzeroupper 405; X64-NEXT: retq 406 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 407 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 408 ret void 409} 410declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 411 412define <32 x i8> @mm256_max_epi8(<32 x i8> %a0, <32 x i8> %a1) { 413; CHECK-LABEL: mm256_max_epi8: 414; CHECK: ## %bb.0: 415; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 416; CHECK-NEXT: ret{{[l|q]}} 417 %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) 418 ret <32 x i8> %res 419} 420declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 421 422define <16 x i16> @mm256_max_epi16(<16 x i16> %a0, <16 x i16> %a1) { 423; CHECK-LABEL: mm256_max_epi16: 424; CHECK: ## %bb.0: 425; CHECK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 426; CHECK-NEXT: ret{{[l|q]}} 427 %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) 428 ret <16 x i16> %res 429} 430declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 431 432define <8 x i32> @mm256_max_epi32(<8 x i32> %a0, <8 x i32> %a1) { 433; CHECK-LABEL: mm256_max_epi32: 434; CHECK: ## %bb.0: 435; CHECK-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 436; CHECK-NEXT: ret{{[l|q]}} 437 %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) 438 ret <8 x i32> %res 439} 440declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 441 442define <32 x i8> @mm256_max_epu8(<32 x i8> %a0, <32 x i8> %a1) { 443; CHECK-LABEL: mm256_max_epu8: 444; CHECK: ## %bb.0: 445; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 446; CHECK-NEXT: ret{{[l|q]}} 447 %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) 448 ret <32 x i8> %res 449} 450declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 451 452define <16 x i16> @mm256_max_epu16(<16 x i16> %a0, <16 x i16> %a1) { 453; CHECK-LABEL: mm256_max_epu16: 454; CHECK: ## %bb.0: 455; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 456; CHECK-NEXT: ret{{[l|q]}} 457 %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) 458 ret <16 x i16> %res 459} 460declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 461 462define <8 x i32> @mm256_max_epu32(<8 x i32> %a0, <8 x i32> %a1) { 463; CHECK-LABEL: mm256_max_epu32: 464; CHECK: ## %bb.0: 465; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 466; CHECK-NEXT: ret{{[l|q]}} 467 %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) 468 ret <8 x i32> %res 469} 470declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 471 472define <32 x i8> @mm256_min_epi8(<32 x i8> %a0, <32 x i8> %a1) { 473; CHECK-LABEL: mm256_min_epi8: 474; CHECK: ## %bb.0: 475; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm0 476; CHECK-NEXT: ret{{[l|q]}} 477 %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) 478 ret <32 x i8> %res 479} 480declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 481 482define <16 x i16> @mm256_min_epi16(<16 x i16> %a0, <16 x i16> %a1) { 483; CHECK-LABEL: mm256_min_epi16: 484; CHECK: ## %bb.0: 485; CHECK-NEXT: vpminsw %ymm1, %ymm0, %ymm0 486; CHECK-NEXT: ret{{[l|q]}} 487 %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) 488 ret <16 x i16> %res 489} 490declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 491 492define <8 x i32> @mm256_min_epi32(<8 x i32> %a0, <8 x i32> %a1) { 493; CHECK-LABEL: mm256_min_epi32: 494; CHECK: ## %bb.0: 495; CHECK-NEXT: vpminsd %ymm1, %ymm0, %ymm0 496; CHECK-NEXT: ret{{[l|q]}} 497 %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) 498 ret <8 x i32> %res 499} 500declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 501 502define <32 x i8> @mm256_min_epu8(<32 x i8> %a0, <32 x i8> %a1) { 503; CHECK-LABEL: mm256_min_epu8: 504; CHECK: ## %bb.0: 505; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm0 506; CHECK-NEXT: ret{{[l|q]}} 507 %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) 508 ret <32 x i8> %res 509} 510declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 511 512define <16 x i16> @mm256_min_epu16(<16 x i16> %a0, <16 x i16> %a1) { 513; CHECK-LABEL: mm256_min_epu16: 514; CHECK: ## %bb.0: 515; CHECK-NEXT: vpminuw %ymm1, %ymm0, %ymm0 516; CHECK-NEXT: ret{{[l|q]}} 517 %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) 518 ret <16 x i16> %res 519} 520declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 521 522define <8 x i32> @mm256_min_epu32(<8 x i32> %a0, <8 x i32> %a1) { 523; CHECK-LABEL: mm256_min_epu32: 524; CHECK: ## %bb.0: 525; CHECK-NEXT: vpminud %ymm1, %ymm0, %ymm0 526; CHECK-NEXT: ret{{[l|q]}} 527 %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) 528 ret <8 x i32> %res 529} 530declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 531 532define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) { 533; CHECK-LABEL: test_x86_avx2_pabs_b: 534; CHECK: ## %bb.0: 535; CHECK-NEXT: vpabsb %ymm0, %ymm0 536; CHECK-NEXT: ret{{[l|q]}} 537 %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] 538 ret <32 x i8> %res 539} 540declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 541 542define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) { 543; CHECK-LABEL: test_x86_avx2_pabs_d: 544; CHECK: ## %bb.0: 545; CHECK-NEXT: vpabsd %ymm0, %ymm0 546; CHECK-NEXT: ret{{[l|q]}} 547 %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] 548 ret <8 x i32> %res 549} 550declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 551 552 553define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { 554; CHECK-LABEL: test_x86_avx2_pabs_w: 555; CHECK: ## %bb.0: 556; CHECK-NEXT: vpabsw %ymm0, %ymm0 557; CHECK-NEXT: ret{{[l|q]}} 558 %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] 559 ret <16 x i16> %res 560} 561declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 562 563 564define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { 565; CHECK-LABEL: test_x86_avx2_vperm2i128: 566; CHECK: ## %bb.0: 567; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 568; CHECK-NEXT: ret{{[l|q]}} 569 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] 570 ret <4 x i64> %res 571} 572declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly 573 574 575define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) { 576; CHECK-LABEL: test_x86_avx2_pmulu_dq: 577; CHECK: ## %bb.0: 578; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 579; CHECK-NEXT: ret{{[l|q]}} 580 %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 581 ret <4 x i64> %res 582} 583declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 584 585 586define <4 x i64> @test_x86_avx2_pmul_dq(<8 x i32> %a0, <8 x i32> %a1) { 587; CHECK-LABEL: test_x86_avx2_pmul_dq: 588; CHECK: ## %bb.0: 589; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 590; CHECK-NEXT: ret{{[l|q]}} 591 %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 592 ret <4 x i64> %res 593} 594declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 595 596 597define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) { 598; CHECK-LABEL: test_x86_avx2_padds_b: 599; CHECK: ## %bb.0: 600; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 601; CHECK-NEXT: ret{{[l|q]}} 602 %res = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 603 ret <32 x i8> %res 604} 605declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone 606 607 608define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) { 609; CHECK-LABEL: test_x86_avx2_padds_w: 610; CHECK: ## %bb.0: 611; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 612; CHECK-NEXT: ret{{[l|q]}} 613 %res = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 614 ret <16 x i16> %res 615} 616declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone 617 618 619define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) { 620; CHECK-LABEL: test_x86_avx2_paddus_b: 621; CHECK: ## %bb.0: 622; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 623; CHECK-NEXT: ret{{[l|q]}} 624 %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 625 ret <32 x i8> %res 626} 627declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone 628 629 630define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) { 631; CHECK-LABEL: test_x86_avx2_paddus_w: 632; CHECK: ## %bb.0: 633; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 634; CHECK-NEXT: ret{{[l|q]}} 635 %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 636 ret <16 x i16> %res 637} 638declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone 639 640 641define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) { 642; CHECK-LABEL: test_x86_avx2_psubs_b: 643; CHECK: ## %bb.0: 644; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 645; CHECK-NEXT: ret{{[l|q]}} 646 %res = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 647 ret <32 x i8> %res 648} 649declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone 650 651 652define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) { 653; CHECK-LABEL: test_x86_avx2_psubs_w: 654; CHECK: ## %bb.0: 655; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 656; CHECK-NEXT: ret{{[l|q]}} 657 %res = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 658 ret <16 x i16> %res 659} 660declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone 661 662 663define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) { 664; CHECK-LABEL: test_x86_avx2_psubus_b: 665; CHECK: ## %bb.0: 666; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 667; CHECK-NEXT: ret{{[l|q]}} 668 %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 669 ret <32 x i8> %res 670} 671declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone 672 673 674define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) { 675; CHECK-LABEL: test_x86_avx2_psubus_w: 676; CHECK: ## %bb.0: 677; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 678; CHECK-NEXT: ret{{[l|q]}} 679 %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 680 ret <16 x i16> %res 681} 682declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone 683