1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s 3 4declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly 5 6declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8) 7 8define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 9; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 10; CHECK: ## BB#0: 11; CHECK-NEXT: kmovw %edi, %k1 12; CHECK-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 13; CHECK-NEXT: vplzcntd %xmm0, %xmm2 {%k1} {z} 14; CHECK-NEXT: vplzcntd %xmm0, %xmm0 15; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 16; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 17; CHECK-NEXT: retq 18 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 19 %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 20 %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 21 %res2 = add <4 x i32> %res, %res1 22 %res4 = add <4 x i32> %res2, %res3 23 ret <4 x i32> %res4 24} 25 26declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8) 27 28define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 29; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 30; CHECK: ## BB#0: 31; CHECK-NEXT: kmovw %edi, %k1 32; CHECK-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 33; CHECK-NEXT: vplzcntd %ymm0, %ymm0 34; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 35; CHECK-NEXT: retq 36 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 37 %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 38 %res2 = add <8 x i32> %res, %res1 39 ret <8 x i32> %res2 40} 41 42declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8) 43 44define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 45; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 46; CHECK: ## BB#0: 47; CHECK-NEXT: kmovw %edi, %k1 48; CHECK-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 49; CHECK-NEXT: vplzcntq %xmm0, %xmm0 50; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 51; CHECK-NEXT: retq 52 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 53 %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 54 %res2 = add <2 x i64> %res, %res1 55 ret <2 x i64> %res2 56} 57 58declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8) 59 60define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 61; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 62; CHECK: ## BB#0: 63; CHECK-NEXT: kmovw %edi, %k1 64; CHECK-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 65; CHECK-NEXT: vplzcntq %ymm0, %ymm0 66; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 67; CHECK-NEXT: retq 68 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 69 %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 70 %res2 = add <4 x i64> %res, %res1 71 ret <4 x i64> %res2 72} 73 74declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8) 75 76define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 77; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 78; CHECK: ## BB#0: 79; CHECK-NEXT: kmovw %edi, %k1 80; CHECK-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 81; CHECK-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z} 82; CHECK-NEXT: vpconflictd %xmm0, %xmm0 83; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 84; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 85; CHECK-NEXT: retq 86 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 87 %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 88 %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 89 %res2 = add <4 x i32> %res, %res1 90 %res4 = add <4 x i32> %res2, %res3 91 ret <4 x i32> %res4 92} 93 94declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8) 95 96define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 97; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 98; CHECK: ## BB#0: 99; CHECK-NEXT: kmovw %edi, %k1 100; CHECK-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 101; CHECK-NEXT: vpconflictd %ymm0, %ymm0 102; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 103; CHECK-NEXT: retq 104 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 105 %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 106 %res2 = add <8 x i32> %res, %res1 107 ret <8 x i32> %res2 108} 109 110declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8) 111 112define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 113; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 114; CHECK: ## BB#0: 115; CHECK-NEXT: kmovw %edi, %k1 116; CHECK-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 117; CHECK-NEXT: vpconflictq %xmm0, %xmm0 118; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 119; CHECK-NEXT: retq 120 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 121 %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 122 %res2 = add <2 x i64> %res, %res1 123 ret <2 x i64> %res2 124} 125 126declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8) 127 128define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 129; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 130; CHECK: ## BB#0: 131; CHECK-NEXT: kmovw %edi, %k1 132; CHECK-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 133; CHECK-NEXT: vpconflictq %ymm0, %ymm0 134; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 135; CHECK-NEXT: retq 136 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 137 %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 138 %res2 = add <4 x i64> %res, %res1 139 ret <4 x i64> %res2 140} 141 142define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) { 143; CHECK-LABEL: test_x86_vbroadcastmw_256: 144; CHECK: ## BB#0: 145; CHECK-NEXT: kmovw %edi, %k0 146; CHECK-NEXT: vpbroadcastmw2d %k0, %ymm0 147; CHECK-NEXT: retq 148 %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; 149 ret <8 x i32> %res 150} 151declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16) 152 153define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) { 154; CHECK-LABEL: test_x86_vbroadcastmw_128: 155; CHECK: ## BB#0: 156; CHECK-NEXT: kmovw %edi, %k0 157; CHECK-NEXT: vpbroadcastmw2d %k0, %xmm0 158; CHECK-NEXT: retq 159 %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; 160 ret <4 x i32> %res 161} 162declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16) 163 164define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) { 165; CHECK-LABEL: test_x86_broadcastmb_256: 166; CHECK: ## BB#0: 167; CHECK-NEXT: kmovw %edi, %k0 168; CHECK-NEXT: vpbroadcastmb2q %k0, %ymm0 169; CHECK-NEXT: retq 170 %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; 171 ret <4 x i64> %res 172} 173declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8) 174 175define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) { 176; CHECK-LABEL: test_x86_broadcastmb_128: 177; CHECK: ## BB#0: 178; CHECK-NEXT: kmovw %edi, %k0 179; CHECK-NEXT: vpbroadcastmb2q %k0, %xmm0 180; CHECK-NEXT: retq 181 %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; 182 ret <2 x i64> %res 183} 184declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8) 185