1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 2 3; 256-bit 4 5define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) { 6; CHECK-LABEL: test_pcmpeq_d_256 7; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ## 8 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 9 ret i8 %res 10} 11 12define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 13; CHECK-LABEL: test_mask_pcmpeq_d_256 14; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## 15 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 16 ret i8 %res 17} 18 19declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8) 20 21define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) { 22; CHECK-LABEL: test_pcmpeq_q_256 23; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ## 24 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 25 ret i8 %res 26} 27 28define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 29; CHECK-LABEL: test_mask_pcmpeq_q_256 30; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## 31 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 32 ret i8 %res 33} 34 35declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8) 36 37define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) { 38; CHECK-LABEL: test_pcmpgt_d_256 39; CHECK: vpcmpgtd %ymm1, %ymm0, %k0 ## 40 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 41 ret i8 %res 42} 43 44define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 45; CHECK-LABEL: test_mask_pcmpgt_d_256 46; CHECK: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## 47 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 48 ret i8 %res 49} 50 51declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8) 52 53define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) { 54; CHECK-LABEL: test_pcmpgt_q_256 55; CHECK: vpcmpgtq %ymm1, %ymm0, %k0 ## 56 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 57 ret i8 %res 58} 59 60define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 61; CHECK-LABEL: test_mask_pcmpgt_q_256 62; CHECK: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## 63 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 64 ret i8 %res 65} 66 67declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8) 68 69define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { 70; CHECK-LABEL: test_cmp_d_256 71; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ## 72 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 73 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 74; CHECK: vpcmpltd %ymm1, %ymm0, %k0 ## 75 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 76 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 77; CHECK: vpcmpled %ymm1, %ymm0, %k0 ## 78 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 79 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 80; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 ## 81 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 82 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 83; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 ## 84 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 85 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 86; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 ## 87 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 88 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 89; CHECK: vpcmpnled %ymm1, %ymm0, %k0 ## 90 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 91 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 92; CHECK: vpcmpordd %ymm1, %ymm0, %k0 ## 93 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 94 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 95 ret <8 x i8> %vec7 96} 97 98define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { 99; CHECK-LABEL: test_mask_cmp_d_256 100; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## 101 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 102 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 103; CHECK: vpcmpltd %ymm1, %ymm0, %k0 {%k1} ## 104 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) 105 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 106; CHECK: vpcmpled %ymm1, %ymm0, %k0 {%k1} ## 107 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask) 108 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 109; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 {%k1} ## 110 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask) 111 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 112; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} ## 113 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask) 114 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 115; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ## 116 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask) 117 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 118; CHECK: vpcmpnled %ymm1, %ymm0, %k0 {%k1} ## 119 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask) 120 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 121; CHECK: vpcmpordd %ymm1, %ymm0, %k0 {%k1} ## 122 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask) 123 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 124 ret <8 x i8> %vec7 125} 126 127declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone 128 129define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { 130; CHECK-LABEL: test_ucmp_d_256 131; CHECK: vpcmpequd %ymm1, %ymm0, %k0 ## 132 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 133 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 134; CHECK: vpcmpltud %ymm1, %ymm0, %k0 ## 135 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 136 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 137; CHECK: vpcmpleud %ymm1, %ymm0, %k0 ## 138 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 139 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 140; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 ## 141 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 142 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 143; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 ## 144 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 145 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 146; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 ## 147 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 148 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 149; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 ## 150 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 151 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 152; CHECK: vpcmpordud %ymm1, %ymm0, %k0 ## 153 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 154 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 155 ret <8 x i8> %vec7 156} 157 158define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { 159; CHECK-LABEL: test_mask_ucmp_d_256 160; CHECK: vpcmpequd %ymm1, %ymm0, %k0 {%k1} ## 161 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 162 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 163; CHECK: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ## 164 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) 165 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 166; CHECK: vpcmpleud %ymm1, %ymm0, %k0 {%k1} ## 167 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask) 168 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 169; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 {%k1} ## 170 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask) 171 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 172; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 {%k1} ## 173 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask) 174 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 175; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 {%k1} ## 176 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask) 177 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 178; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 {%k1} ## 179 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask) 180 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 181; CHECK: vpcmpordud %ymm1, %ymm0, %k0 {%k1} ## 182 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask) 183 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 184 ret <8 x i8> %vec7 185} 186 187declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone 188 189define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { 190; CHECK-LABEL: test_cmp_q_256 191; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ## 192 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) 193 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 194; CHECK: vpcmpltq %ymm1, %ymm0, %k0 ## 195 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1) 196 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 197; CHECK: vpcmpleq %ymm1, %ymm0, %k0 ## 198 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1) 199 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 200; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 ## 201 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1) 202 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 203; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 ## 204 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1) 205 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 206; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 ## 207 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1) 208 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 209; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 ## 210 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1) 211 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 212; CHECK: vpcmpordq %ymm1, %ymm0, %k0 ## 213 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1) 214 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 215 ret <8 x i8> %vec7 216} 217 218define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { 219; CHECK-LABEL: test_mask_cmp_q_256 220; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## 221 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) 222 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 223; CHECK: vpcmpltq %ymm1, %ymm0, %k0 {%k1} ## 224 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask) 225 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 226; CHECK: vpcmpleq %ymm1, %ymm0, %k0 {%k1} ## 227 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask) 228 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 229; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 {%k1} ## 230 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask) 231 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 232; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} ## 233 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask) 234 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 235; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ## 236 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask) 237 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 238; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 {%k1} ## 239 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask) 240 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 241; CHECK: vpcmpordq %ymm1, %ymm0, %k0 {%k1} ## 242 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask) 243 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 244 ret <8 x i8> %vec7 245} 246 247declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone 248 249define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { 250; CHECK-LABEL: test_ucmp_q_256 251; CHECK: vpcmpequq %ymm1, %ymm0, %k0 ## 252 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) 253 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 254; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 ## 255 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1) 256 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 257; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 ## 258 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1) 259 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 260; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 ## 261 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1) 262 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 263; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 ## 264 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1) 265 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 266; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 ## 267 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1) 268 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 269; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 ## 270 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1) 271 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 272; CHECK: vpcmporduq %ymm1, %ymm0, %k0 ## 273 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1) 274 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 275 ret <8 x i8> %vec7 276} 277 278define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { 279; CHECK-LABEL: test_mask_ucmp_q_256 280; CHECK: vpcmpequq %ymm1, %ymm0, %k0 {%k1} ## 281 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) 282 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 283; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ## 284 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask) 285 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 286; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 {%k1} ## 287 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask) 288 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 289; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 {%k1} ## 290 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask) 291 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 292; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 {%k1} ## 293 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask) 294 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 295; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 {%k1} ## 296 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask) 297 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 298; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 {%k1} ## 299 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask) 300 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 301; CHECK: vpcmporduq %ymm1, %ymm0, %k0 {%k1} ## 302 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask) 303 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 304 ret <8 x i8> %vec7 305} 306 307declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone 308 309; 128-bit 310 311define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) { 312; CHECK-LABEL: test_pcmpeq_d_128 313; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ## 314 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 315 ret i8 %res 316} 317 318define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 319; CHECK-LABEL: test_mask_pcmpeq_d_128 320; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## 321 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 322 ret i8 %res 323} 324 325declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8) 326 327define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) { 328; CHECK-LABEL: test_pcmpeq_q_128 329; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ## 330 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 331 ret i8 %res 332} 333 334define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 335; CHECK-LABEL: test_mask_pcmpeq_q_128 336; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## 337 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 338 ret i8 %res 339} 340 341declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8) 342 343define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) { 344; CHECK-LABEL: test_pcmpgt_d_128 345; CHECK: vpcmpgtd %xmm1, %xmm0, %k0 ## 346 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 347 ret i8 %res 348} 349 350define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 351; CHECK-LABEL: test_mask_pcmpgt_d_128 352; CHECK: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## 353 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 354 ret i8 %res 355} 356 357declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8) 358 359define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) { 360; CHECK-LABEL: test_pcmpgt_q_128 361; CHECK: vpcmpgtq %xmm1, %xmm0, %k0 ## 362 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 363 ret i8 %res 364} 365 366define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 367; CHECK-LABEL: test_mask_pcmpgt_q_128 368; CHECK: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## 369 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 370 ret i8 %res 371} 372 373declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8) 374 375define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { 376; CHECK-LABEL: test_cmp_d_128 377; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ## 378 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) 379 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 380; CHECK: vpcmpltd %xmm1, %xmm0, %k0 ## 381 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1) 382 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 383; CHECK: vpcmpled %xmm1, %xmm0, %k0 ## 384 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1) 385 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 386; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 ## 387 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1) 388 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 389; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 ## 390 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1) 391 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 392; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 ## 393 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1) 394 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 395; CHECK: vpcmpnled %xmm1, %xmm0, %k0 ## 396 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1) 397 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 398; CHECK: vpcmpordd %xmm1, %xmm0, %k0 ## 399 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1) 400 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 401 ret <8 x i8> %vec7 402} 403 404define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { 405; CHECK-LABEL: test_mask_cmp_d_128 406; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## 407 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) 408 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 409; CHECK: vpcmpltd %xmm1, %xmm0, %k0 {%k1} ## 410 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask) 411 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 412; CHECK: vpcmpled %xmm1, %xmm0, %k0 {%k1} ## 413 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask) 414 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 415; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 {%k1} ## 416 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask) 417 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 418; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} ## 419 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask) 420 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 421; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ## 422 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask) 423 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 424; CHECK: vpcmpnled %xmm1, %xmm0, %k0 {%k1} ## 425 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask) 426 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 427; CHECK: vpcmpordd %xmm1, %xmm0, %k0 {%k1} ## 428 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask) 429 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 430 ret <8 x i8> %vec7 431} 432 433declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone 434 435define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { 436; CHECK-LABEL: test_ucmp_d_128 437; CHECK: vpcmpequd %xmm1, %xmm0, %k0 ## 438 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) 439 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 440; CHECK: vpcmpltud %xmm1, %xmm0, %k0 ## 441 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1) 442 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 443; CHECK: vpcmpleud %xmm1, %xmm0, %k0 ## 444 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1) 445 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 446; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 ## 447 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1) 448 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 449; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 ## 450 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1) 451 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 452; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 ## 453 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1) 454 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 455; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 ## 456 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1) 457 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 458; CHECK: vpcmpordud %xmm1, %xmm0, %k0 ## 459 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1) 460 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 461 ret <8 x i8> %vec7 462} 463 464define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { 465; CHECK-LABEL: test_mask_ucmp_d_128 466; CHECK: vpcmpequd %xmm1, %xmm0, %k0 {%k1} ## 467 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) 468 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 469; CHECK: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ## 470 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask) 471 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 472; CHECK: vpcmpleud %xmm1, %xmm0, %k0 {%k1} ## 473 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask) 474 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 475; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 {%k1} ## 476 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask) 477 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 478; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 {%k1} ## 479 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask) 480 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 481; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 {%k1} ## 482 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask) 483 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 484; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 {%k1} ## 485 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask) 486 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 487; CHECK: vpcmpordud %xmm1, %xmm0, %k0 {%k1} ## 488 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask) 489 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 490 ret <8 x i8> %vec7 491} 492 493declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone 494 495define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { 496; CHECK-LABEL: test_cmp_q_128 497; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ## 498 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) 499 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 500; CHECK: vpcmpltq %xmm1, %xmm0, %k0 ## 501 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1) 502 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 503; CHECK: vpcmpleq %xmm1, %xmm0, %k0 ## 504 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1) 505 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 506; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 ## 507 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1) 508 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 509; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 ## 510 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1) 511 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 512; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 ## 513 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1) 514 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 515; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 ## 516 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1) 517 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 518; CHECK: vpcmpordq %xmm1, %xmm0, %k0 ## 519 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1) 520 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 521 ret <8 x i8> %vec7 522} 523 524define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { 525; CHECK-LABEL: test_mask_cmp_q_128 526; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## 527 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) 528 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 529; CHECK: vpcmpltq %xmm1, %xmm0, %k0 {%k1} ## 530 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask) 531 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 532; CHECK: vpcmpleq %xmm1, %xmm0, %k0 {%k1} ## 533 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask) 534 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 535; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 {%k1} ## 536 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask) 537 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 538; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} ## 539 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask) 540 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 541; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ## 542 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask) 543 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 544; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 {%k1} ## 545 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask) 546 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 547; CHECK: vpcmpordq %xmm1, %xmm0, %k0 {%k1} ## 548 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask) 549 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 550 ret <8 x i8> %vec7 551} 552 553declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone 554 555define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { 556; CHECK-LABEL: test_ucmp_q_128 557; CHECK: vpcmpequq %xmm1, %xmm0, %k0 ## 558 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) 559 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 560; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 ## 561 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1) 562 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 563; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 ## 564 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1) 565 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 566; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 ## 567 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1) 568 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 569; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 ## 570 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1) 571 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 572; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 ## 573 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1) 574 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 575; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 ## 576 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1) 577 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 578; CHECK: vpcmporduq %xmm1, %xmm0, %k0 ## 579 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1) 580 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 581 ret <8 x i8> %vec7 582} 583 584define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { 585; CHECK-LABEL: test_mask_ucmp_q_128 586; CHECK: vpcmpequq %xmm1, %xmm0, %k0 {%k1} ## 587 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) 588 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 589; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ## 590 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask) 591 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 592; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 {%k1} ## 593 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask) 594 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 595; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 {%k1} ## 596 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask) 597 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 598; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 {%k1} ## 599 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask) 600 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 601; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 {%k1} ## 602 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask) 603 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 604; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 {%k1} ## 605 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask) 606 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 607; CHECK: vpcmporduq %xmm1, %xmm0, %k0 {%k1} ## 608 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask) 609 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 610 ret <8 x i8> %vec7 611} 612 613declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone 614 615; CHECK-LABEL: compr1 616; CHECK: vcompresspd %zmm0 617define void @compr1(i8* %addr, <8 x double> %data, i8 %mask) { 618 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 619 ret void 620} 621 622declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 623 624; CHECK-LABEL: compr2 625; CHECK: vcompresspd %ymm0 626define void @compr2(i8* %addr, <4 x double> %data, i8 %mask) { 627 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 628 ret void 629} 630 631declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 632 633; CHECK-LABEL: compr3 634; CHECK: vcompressps %xmm0 635define void @compr3(i8* %addr, <4 x float> %data, i8 %mask) { 636 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 637 ret void 638} 639 640declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 641 642; CHECK-LABEL: compr4 643; CHECK: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0] 644define <8 x double> @compr4(i8* %addr, <8 x double> %data, i8 %mask) { 645 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) 646 ret <8 x double> %res 647} 648 649declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) 650 651; CHECK-LABEL: compr5 652; CHECK: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1] 653define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) { 654 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask) 655 ret <4 x double> %res 656} 657 658declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask) 659 660; CHECK-LABEL: compr6 661; CHECK: vcompressps %xmm0 662define <4 x float> @compr6(<4 x float> %data, i8 %mask) { 663 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask) 664 ret <4 x float> %res 665} 666 667declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask) 668 669; CHECK-LABEL: compr7 670; CHECK-NOT: vcompress 671; CHECK: vmovupd 672define void @compr7(i8* %addr, <8 x double> %data) { 673 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1) 674 ret void 675} 676 677; CHECK-LABEL: compr8 678; CHECK-NOT: vcompressps %xmm0 679define <4 x float> @compr8(<4 x float> %data) { 680 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1) 681 ret <4 x float> %res 682} 683 684; CHECK-LABEL: compr9 685; CHECK: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07] 686define void @compr9(i8* %addr, <8 x i64> %data, i8 %mask) { 687 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 688 ret void 689} 690 691declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 692 693; CHECK-LABEL: compr10 694; CHECK: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0] 695define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) { 696 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask) 697 ret <4 x i32> %res 698} 699 700declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask) 701 702; Expand 703 704; CHECK-LABEL: expand1 705; CHECK: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07] 706define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) { 707 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 708 ret <8 x double> %res 709} 710 711declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 712 713; CHECK-LABEL: expand2 714; CHECK: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] 715define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) { 716 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 717 ret <4 x double> %res 718} 719 720declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 721 722; CHECK-LABEL: expand3 723; CHECK: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] 724define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) { 725 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 726 ret <4 x float> %res 727} 728 729declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 730 731; CHECK-LABEL: expand4 732; CHECK: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0] 733define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) { 734 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) 735 ret <8 x double> %res 736} 737 738declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) 739 740; CHECK-LABEL: expand5 741; CHECK: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8] 742define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) { 743 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask) 744 ret <4 x double> %res 745} 746 747declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask) 748 749; CHECK-LABEL: expand6 750; CHECK: vexpandps %xmm0 751define <4 x float> @expand6(<4 x float> %data, i8 %mask) { 752 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask) 753 ret <4 x float> %res 754} 755 756declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask) 757 758; CHECK-LABEL: expand7 759; CHECK-NOT: vexpand 760; CHECK: vmovupd 761define <8 x double> @expand7(i8* %addr, <8 x double> %data) { 762 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1) 763 ret <8 x double> %res 764} 765 766; CHECK-LABEL: expand8 767; CHECK-NOT: vexpandps %xmm0 768define <4 x float> @expand8(<4 x float> %data) { 769 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1) 770 ret <4 x float> %res 771} 772 773; CHECK-LABEL: expand9 774; CHECK: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07] 775define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) { 776 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 777 ret <8 x i64> %res 778} 779 780declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 781 782; CHECK-LABEL: expand10 783; CHECK: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0] 784define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) { 785 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask) 786 ret <4 x i32> %res 787} 788 789declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask) 790 791define <8 x float> @test_x86_mask_blend_ps_256(i8 %a0, <8 x float> %a1, <8 x float> %a2) { 792 ; CHECK: vblendmps %ymm1, %ymm0 793 %res = call <8 x float> @llvm.x86.avx512.mask.blend.ps.256(<8 x float> %a1, <8 x float> %a2, i8 %a0) ; <<8 x float>> [#uses=1] 794 ret <8 x float> %res 795} 796 797declare <8 x float> @llvm.x86.avx512.mask.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readonly 798 799define <4 x double> @test_x86_mask_blend_pd_256(i8 %a0, <4 x double> %a1, <4 x double> %a2) { 800 ; CHECK: vblendmpd %ymm1, %ymm0 801 %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a1, <4 x double> %a2, i8 %a0) ; <<4 x double>> [#uses=1] 802 ret <4 x double> %res 803} 804 805define <4 x double> @test_x86_mask_blend_pd_256_memop(<4 x double> %a, <4 x double>* %ptr, i8 %mask) { 806 ; CHECK-LABEL: test_x86_mask_blend_pd_256_memop 807 ; CHECK: vblendmpd (% 808 %b = load <4 x double>, <4 x double>* %ptr 809 %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a, <4 x double> %b, i8 %mask) ; <<4 x double>> [#uses=1] 810 ret <4 x double> %res 811} 812declare <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readonly 813 814; CHECK-LABEL: test_x86_mask_blend_d_256 815; CHECK: vpblendmd 816define <8 x i32> @test_x86_mask_blend_d_256(i8 %a0, <8 x i32> %a1, <8 x i32> %a2) { 817 %res = call <8 x i32> @llvm.x86.avx512.mask.blend.d.256(<8 x i32> %a1, <8 x i32> %a2, i8 %a0) ; <<8 x i32>> [#uses=1] 818 ret <8 x i32> %res 819} 820declare <8 x i32> @llvm.x86.avx512.mask.blend.d.256(<8 x i32>, <8 x i32>, i8) nounwind readonly 821 822define <4 x i64> @test_x86_mask_blend_q_256(i8 %a0, <4 x i64> %a1, <4 x i64> %a2) { 823 ; CHECK: vpblendmq 824 %res = call <4 x i64> @llvm.x86.avx512.mask.blend.q.256(<4 x i64> %a1, <4 x i64> %a2, i8 %a0) ; <<4 x i64>> [#uses=1] 825 ret <4 x i64> %res 826} 827declare <4 x i64> @llvm.x86.avx512.mask.blend.q.256(<4 x i64>, <4 x i64>, i8) nounwind readonly 828 829define <4 x float> @test_x86_mask_blend_ps_128(i8 %a0, <4 x float> %a1, <4 x float> %a2) { 830 ; CHECK: vblendmps %xmm1, %xmm0 831 %res = call <4 x float> @llvm.x86.avx512.mask.blend.ps.128(<4 x float> %a1, <4 x float> %a2, i8 %a0) ; <<4 x float>> [#uses=1] 832 ret <4 x float> %res 833} 834 835declare <4 x float> @llvm.x86.avx512.mask.blend.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly 836 837define <2 x double> @test_x86_mask_blend_pd_128(i8 %a0, <2 x double> %a1, <2 x double> %a2) { 838 ; CHECK: vblendmpd %xmm1, %xmm0 839 %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a1, <2 x double> %a2, i8 %a0) ; <<2 x double>> [#uses=1] 840 ret <2 x double> %res 841} 842 843define <2 x double> @test_x86_mask_blend_pd_128_memop(<2 x double> %a, <2 x double>* %ptr, i8 %mask) { 844 ; CHECK-LABEL: test_x86_mask_blend_pd_128_memop 845 ; CHECK: vblendmpd (% 846 %b = load <2 x double>, <2 x double>* %ptr 847 %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a, <2 x double> %b, i8 %mask) ; <<2 x double>> [#uses=1] 848 ret <2 x double> %res 849} 850declare <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double>, <2 x double>, i8) nounwind readonly 851 852define <4 x i32> @test_x86_mask_blend_d_128(i8 %a0, <4 x i32> %a1, <4 x i32> %a2) { 853 ; CHECK: vpblendmd 854 %res = call <4 x i32> @llvm.x86.avx512.mask.blend.d.128(<4 x i32> %a1, <4 x i32> %a2, i8 %a0) ; <<4 x i32>> [#uses=1] 855 ret <4 x i32> %res 856} 857declare <4 x i32> @llvm.x86.avx512.mask.blend.d.128(<4 x i32>, <4 x i32>, i8) nounwind readonly 858 859define <2 x i64> @test_x86_mask_blend_q_128(i8 %a0, <2 x i64> %a1, <2 x i64> %a2) { 860 ; CHECK: vpblendmq 861 %res = call <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64> %a1, <2 x i64> %a2, i8 %a0) ; <<2 x i64>> [#uses=1] 862 ret <2 x i64> %res 863} 864declare <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64>, <2 x i64>, i8) nounwind readonly 865 866 867define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { 868 ;CHECK-LABEL: test_mask_mul_epi32_rr_128 869 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1] 870 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 871 ret < 2 x i64> %res 872} 873 874define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { 875 ;CHECK-LABEL: test_mask_mul_epi32_rrk_128 876 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] 877 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 878 ret < 2 x i64> %res 879} 880 881define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { 882 ;CHECK-LABEL: test_mask_mul_epi32_rrkz_128 883 ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1] 884 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 885 ret < 2 x i64> %res 886} 887 888define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { 889 ;CHECK-LABEL: test_mask_mul_epi32_rm_128 890 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07] 891 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 892 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 893 ret < 2 x i64> %res 894} 895 896define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 897 ;CHECK-LABEL: test_mask_mul_epi32_rmk_128 898 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f] 899 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 900 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 901 ret < 2 x i64> %res 902} 903 904define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { 905 ;CHECK-LABEL: test_mask_mul_epi32_rmkz_128 906 ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07] 907 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 908 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 909 ret < 2 x i64> %res 910} 911 912define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { 913 ;CHECK-LABEL: test_mask_mul_epi32_rmb_128 914 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07] 915 %q = load i64, i64* %ptr_b 916 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 917 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 918 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 919 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 920 ret < 2 x i64> %res 921} 922 923define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 924 ;CHECK-LABEL: test_mask_mul_epi32_rmbk_128 925 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f] 926 %q = load i64, i64* %ptr_b 927 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 928 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 929 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 930 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 931 ret < 2 x i64> %res 932} 933 934define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { 935 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_128 936 ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07] 937 %q = load i64, i64* %ptr_b 938 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 939 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer 940 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 941 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 942 ret < 2 x i64> %res 943} 944 945declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) 946 947define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { 948 ;CHECK-LABEL: test_mask_mul_epi32_rr_256 949 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1] 950 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 951 ret < 4 x i64> %res 952} 953 954define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { 955 ;CHECK-LABEL: test_mask_mul_epi32_rrk_256 956 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] 957 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 958 ret < 4 x i64> %res 959} 960 961define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { 962 ;CHECK-LABEL: test_mask_mul_epi32_rrkz_256 963 ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1] 964 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 965 ret < 4 x i64> %res 966} 967 968define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { 969 ;CHECK-LABEL: test_mask_mul_epi32_rm_256 970 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07] 971 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 972 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 973 ret < 4 x i64> %res 974} 975 976define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 977 ;CHECK-LABEL: test_mask_mul_epi32_rmk_256 978 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f] 979 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 980 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 981 ret < 4 x i64> %res 982} 983 984define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { 985 ;CHECK-LABEL: test_mask_mul_epi32_rmkz_256 986 ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07] 987 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 988 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 989 ret < 4 x i64> %res 990} 991 992define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { 993 ;CHECK-LABEL: test_mask_mul_epi32_rmb_256 994 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07] 995 %q = load i64, i64* %ptr_b 996 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 997 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 998 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 999 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1000 ret < 4 x i64> %res 1001} 1002 1003define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 1004 ;CHECK-LABEL: test_mask_mul_epi32_rmbk_256 1005 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f] 1006 %q = load i64, i64* %ptr_b 1007 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1008 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1009 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1010 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1011 ret < 4 x i64> %res 1012} 1013 1014define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { 1015 ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_256 1016 ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07] 1017 %q = load i64, i64* %ptr_b 1018 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1019 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1020 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1021 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1022 ret < 4 x i64> %res 1023} 1024 1025declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) 1026 1027define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { 1028 ;CHECK-LABEL: test_mask_mul_epu32_rr_128 1029 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1] 1030 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1031 ret < 2 x i64> %res 1032} 1033 1034define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { 1035 ;CHECK-LABEL: test_mask_mul_epu32_rrk_128 1036 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] 1037 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1038 ret < 2 x i64> %res 1039} 1040 1041define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { 1042 ;CHECK-LABEL: test_mask_mul_epu32_rrkz_128 1043 ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1] 1044 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1045 ret < 2 x i64> %res 1046} 1047 1048define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { 1049 ;CHECK-LABEL: test_mask_mul_epu32_rm_128 1050 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0x07] 1051 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1052 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1053 ret < 2 x i64> %res 1054} 1055 1056define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 1057 ;CHECK-LABEL: test_mask_mul_epu32_rmk_128 1058 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f] 1059 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1060 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1061 ret < 2 x i64> %res 1062} 1063 1064define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { 1065 ;CHECK-LABEL: test_mask_mul_epu32_rmkz_128 1066 ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07] 1067 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1068 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1069 ret < 2 x i64> %res 1070} 1071 1072define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { 1073 ;CHECK-LABEL: test_mask_mul_epu32_rmb_128 1074 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07] 1075 %q = load i64, i64* %ptr_b 1076 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1077 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 1078 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1079 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1080 ret < 2 x i64> %res 1081} 1082 1083define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 1084 ;CHECK-LABEL: test_mask_mul_epu32_rmbk_128 1085 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f] 1086 %q = load i64, i64* %ptr_b 1087 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1088 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 1089 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1090 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1091 ret < 2 x i64> %res 1092} 1093 1094define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { 1095 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_128 1096 ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07] 1097 %q = load i64, i64* %ptr_b 1098 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1099 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer 1100 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1101 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1102 ret < 2 x i64> %res 1103} 1104 1105declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) 1106 1107define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { 1108 ;CHECK-LABEL: test_mask_mul_epu32_rr_256 1109 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0xc1] 1110 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1111 ret < 4 x i64> %res 1112} 1113 1114define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { 1115 ;CHECK-LABEL: test_mask_mul_epu32_rrk_256 1116 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] 1117 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1118 ret < 4 x i64> %res 1119} 1120 1121define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { 1122 ;CHECK-LABEL: test_mask_mul_epu32_rrkz_256 1123 ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1] 1124 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1125 ret < 4 x i64> %res 1126} 1127 1128define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { 1129 ;CHECK-LABEL: test_mask_mul_epu32_rm_256 1130 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0x07] 1131 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1132 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1133 ret < 4 x i64> %res 1134} 1135 1136define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 1137 ;CHECK-LABEL: test_mask_mul_epu32_rmk_256 1138 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f] 1139 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1140 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1141 ret < 4 x i64> %res 1142} 1143 1144define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { 1145 ;CHECK-LABEL: test_mask_mul_epu32_rmkz_256 1146 ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07] 1147 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1148 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1149 ret < 4 x i64> %res 1150} 1151 1152define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { 1153 ;CHECK-LABEL: test_mask_mul_epu32_rmb_256 1154 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07] 1155 %q = load i64, i64* %ptr_b 1156 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1157 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1158 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1159 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1160 ret < 4 x i64> %res 1161} 1162 1163define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 1164 ;CHECK-LABEL: test_mask_mul_epu32_rmbk_256 1165 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f] 1166 %q = load i64, i64* %ptr_b 1167 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1168 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1169 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1170 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1171 ret < 4 x i64> %res 1172} 1173 1174define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { 1175 ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_256 1176 ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07] 1177 %q = load i64, i64* %ptr_b 1178 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1179 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1180 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1181 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1182 ret < 4 x i64> %res 1183} 1184 1185declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) 1186 1187define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1188 ;CHECK-LABEL: test_mask_add_epi32_rr_128 1189 ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 1190 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1191 ret <4 x i32> %res 1192} 1193 1194define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1195 ;CHECK-LABEL: test_mask_add_epi32_rrk_128 1196 ;CHECK: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] 1197 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1198 ret <4 x i32> %res 1199} 1200 1201define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1202 ;CHECK-LABEL: test_mask_add_epi32_rrkz_128 1203 ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] 1204 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1205 ret <4 x i32> %res 1206} 1207 1208define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1209 ;CHECK-LABEL: test_mask_add_epi32_rm_128 1210 ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07] 1211 %b = load <4 x i32>, <4 x i32>* %ptr_b 1212 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1213 ret <4 x i32> %res 1214} 1215 1216define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1217 ;CHECK-LABEL: test_mask_add_epi32_rmk_128 1218 ;CHECK: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f] 1219 %b = load <4 x i32>, <4 x i32>* %ptr_b 1220 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1221 ret <4 x i32> %res 1222} 1223 1224define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1225 ;CHECK-LABEL: test_mask_add_epi32_rmkz_128 1226 ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07] 1227 %b = load <4 x i32>, <4 x i32>* %ptr_b 1228 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1229 ret <4 x i32> %res 1230} 1231 1232define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1233 ;CHECK-LABEL: test_mask_add_epi32_rmb_128 1234 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07] 1235 %q = load i32, i32* %ptr_b 1236 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1237 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1238 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1239 ret <4 x i32> %res 1240} 1241 1242define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1243 ;CHECK-LABEL: test_mask_add_epi32_rmbk_128 1244 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f] 1245 %q = load i32, i32* %ptr_b 1246 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1247 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1248 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1249 ret <4 x i32> %res 1250} 1251 1252define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1253 ;CHECK-LABEL: test_mask_add_epi32_rmbkz_128 1254 ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07] 1255 %q = load i32, i32* %ptr_b 1256 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1257 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1258 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1259 ret <4 x i32> %res 1260} 1261 1262declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1263 1264define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1265 ;CHECK-LABEL: test_mask_sub_epi32_rr_128 1266 ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1] 1267 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1268 ret <4 x i32> %res 1269} 1270 1271define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1272 ;CHECK-LABEL: test_mask_sub_epi32_rrk_128 1273 ;CHECK: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] 1274 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1275 ret <4 x i32> %res 1276} 1277 1278define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1279 ;CHECK-LABEL: test_mask_sub_epi32_rrkz_128 1280 ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1] 1281 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1282 ret <4 x i32> %res 1283} 1284 1285define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1286 ;CHECK-LABEL: test_mask_sub_epi32_rm_128 1287 ;CHECK: (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07] 1288 %b = load <4 x i32>, <4 x i32>* %ptr_b 1289 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1290 ret <4 x i32> %res 1291} 1292 1293define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1294 ;CHECK-LABEL: test_mask_sub_epi32_rmk_128 1295 ;CHECK: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f] 1296 %b = load <4 x i32>, <4 x i32>* %ptr_b 1297 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1298 ret <4 x i32> %res 1299} 1300 1301define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1302 ;CHECK-LABEL: test_mask_sub_epi32_rmkz_128 1303 ;CHECK: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07] 1304 %b = load <4 x i32>, <4 x i32>* %ptr_b 1305 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1306 ret <4 x i32> %res 1307} 1308 1309define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1310 ;CHECK-LABEL: test_mask_sub_epi32_rmb_128 1311 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07] 1312 %q = load i32, i32* %ptr_b 1313 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1314 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1315 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1316 ret <4 x i32> %res 1317} 1318 1319define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1320 ;CHECK-LABEL: test_mask_sub_epi32_rmbk_128 1321 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f] 1322 %q = load i32, i32* %ptr_b 1323 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1324 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1325 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1326 ret <4 x i32> %res 1327} 1328 1329define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1330 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_128 1331 ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07] 1332 %q = load i32, i32* %ptr_b 1333 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1334 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1335 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1336 ret <4 x i32> %res 1337} 1338 1339declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1340 1341define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1342 ;CHECK-LABEL: test_mask_sub_epi32_rr_256 1343 ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1] 1344 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1345 ret <8 x i32> %res 1346} 1347 1348define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1349 ;CHECK-LABEL: test_mask_sub_epi32_rrk_256 1350 ;CHECK: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] 1351 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1352 ret <8 x i32> %res 1353} 1354 1355define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1356 ;CHECK-LABEL: test_mask_sub_epi32_rrkz_256 1357 ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1] 1358 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1359 ret <8 x i32> %res 1360} 1361 1362define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1363 ;CHECK-LABEL: test_mask_sub_epi32_rm_256 1364 ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07] 1365 %b = load <8 x i32>, <8 x i32>* %ptr_b 1366 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1367 ret <8 x i32> %res 1368} 1369 1370define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1371 ;CHECK-LABEL: test_mask_sub_epi32_rmk_256 1372 ;CHECK: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f] 1373 %b = load <8 x i32>, <8 x i32>* %ptr_b 1374 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1375 ret <8 x i32> %res 1376} 1377 1378define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1379 ;CHECK-LABEL: test_mask_sub_epi32_rmkz_256 1380 ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07] 1381 %b = load <8 x i32>, <8 x i32>* %ptr_b 1382 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1383 ret <8 x i32> %res 1384} 1385 1386define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1387 ;CHECK-LABEL: test_mask_sub_epi32_rmb_256 1388 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07] 1389 %q = load i32, i32* %ptr_b 1390 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1391 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1392 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1393 ret <8 x i32> %res 1394} 1395 1396define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1397 ;CHECK-LABEL: test_mask_sub_epi32_rmbk_256 1398 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f] 1399 %q = load i32, i32* %ptr_b 1400 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1401 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1402 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1403 ret <8 x i32> %res 1404} 1405 1406define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1407 ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_256 1408 ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07] 1409 %q = load i32, i32* %ptr_b 1410 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1411 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1412 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1413 ret <8 x i32> %res 1414} 1415 1416declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1417 1418define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1419 ;CHECK-LABEL: test_mask_add_epi32_rr_256 1420 ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] 1421 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1422 ret <8 x i32> %res 1423} 1424 1425define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1426 ;CHECK-LABEL: test_mask_add_epi32_rrk_256 1427 ;CHECK: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] 1428 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1429 ret <8 x i32> %res 1430} 1431 1432define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1433 ;CHECK-LABEL: test_mask_add_epi32_rrkz_256 1434 ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] 1435 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1436 ret <8 x i32> %res 1437} 1438 1439define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1440 ;CHECK-LABEL: test_mask_add_epi32_rm_256 1441 ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07] 1442 %b = load <8 x i32>, <8 x i32>* %ptr_b 1443 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1444 ret <8 x i32> %res 1445} 1446 1447define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1448 ;CHECK-LABEL: test_mask_add_epi32_rmk_256 1449 ;CHECK: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f] 1450 %b = load <8 x i32>, <8 x i32>* %ptr_b 1451 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1452 ret <8 x i32> %res 1453} 1454 1455define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1456 ;CHECK-LABEL: test_mask_add_epi32_rmkz_256 1457 ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07] 1458 %b = load <8 x i32>, <8 x i32>* %ptr_b 1459 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1460 ret <8 x i32> %res 1461} 1462 1463define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1464 ;CHECK-LABEL: test_mask_add_epi32_rmb_256 1465 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07] 1466 %q = load i32, i32* %ptr_b 1467 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1468 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1469 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1470 ret <8 x i32> %res 1471} 1472 1473define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1474 ;CHECK-LABEL: test_mask_add_epi32_rmbk_256 1475 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f] 1476 %q = load i32, i32* %ptr_b 1477 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1478 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1479 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1480 ret <8 x i32> %res 1481} 1482 1483define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1484 ;CHECK-LABEL: test_mask_add_epi32_rmbkz_256 1485 ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07] 1486 %q = load i32, i32* %ptr_b 1487 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1488 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1489 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1490 ret <8 x i32> %res 1491} 1492 1493declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1494 1495define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1496 ;CHECK-LABEL: test_mask_and_epi32_rr_128 1497 ;CHECK: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1] 1498 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1499 ret <4 x i32> %res 1500} 1501 1502define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1503 ;CHECK-LABEL: test_mask_and_epi32_rrk_128 1504 ;CHECK: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] 1505 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1506 ret <4 x i32> %res 1507} 1508 1509define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1510 ;CHECK-LABEL: test_mask_and_epi32_rrkz_128 1511 ;CHECK: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1] 1512 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1513 ret <4 x i32> %res 1514} 1515 1516define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1517 ;CHECK-LABEL: test_mask_and_epi32_rm_128 1518 ;CHECK: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07] 1519 %b = load <4 x i32>, <4 x i32>* %ptr_b 1520 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1521 ret <4 x i32> %res 1522} 1523 1524define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1525 ;CHECK-LABEL: test_mask_and_epi32_rmk_128 1526 ;CHECK: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f] 1527 %b = load <4 x i32>, <4 x i32>* %ptr_b 1528 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1529 ret <4 x i32> %res 1530} 1531 1532define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1533 ;CHECK-LABEL: test_mask_and_epi32_rmkz_128 1534 ;CHECK: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07] 1535 %b = load <4 x i32>, <4 x i32>* %ptr_b 1536 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1537 ret <4 x i32> %res 1538} 1539 1540define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1541 ;CHECK-LABEL: test_mask_and_epi32_rmb_128 1542 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07] 1543 %q = load i32, i32* %ptr_b 1544 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1545 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1546 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1547 ret <4 x i32> %res 1548} 1549 1550define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1551 ;CHECK-LABEL: test_mask_and_epi32_rmbk_128 1552 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f] 1553 %q = load i32, i32* %ptr_b 1554 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1555 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1556 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1557 ret <4 x i32> %res 1558} 1559 1560define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1561 ;CHECK-LABEL: test_mask_and_epi32_rmbkz_128 1562 ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07] 1563 %q = load i32, i32* %ptr_b 1564 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1565 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1566 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1567 ret <4 x i32> %res 1568} 1569 1570declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1571 1572define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1573 ;CHECK-LABEL: test_mask_and_epi32_rr_256 1574 ;CHECK: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1] 1575 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1576 ret <8 x i32> %res 1577} 1578 1579define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1580 ;CHECK-LABEL: test_mask_and_epi32_rrk_256 1581 ;CHECK: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] 1582 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1583 ret <8 x i32> %res 1584} 1585 1586define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1587 ;CHECK-LABEL: test_mask_and_epi32_rrkz_256 1588 ;CHECK: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1] 1589 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1590 ret <8 x i32> %res 1591} 1592 1593define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1594 ;CHECK-LABEL: test_mask_and_epi32_rm_256 1595 ;CHECK: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07] 1596 %b = load <8 x i32>, <8 x i32>* %ptr_b 1597 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1598 ret <8 x i32> %res 1599} 1600 1601define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1602 ;CHECK-LABEL: test_mask_and_epi32_rmk_256 1603 ;CHECK: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f] 1604 %b = load <8 x i32>, <8 x i32>* %ptr_b 1605 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1606 ret <8 x i32> %res 1607} 1608 1609define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1610 ;CHECK-LABEL: test_mask_and_epi32_rmkz_256 1611 ;CHECK: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07] 1612 %b = load <8 x i32>, <8 x i32>* %ptr_b 1613 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1614 ret <8 x i32> %res 1615} 1616 1617define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1618 ;CHECK-LABEL: test_mask_and_epi32_rmb_256 1619 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07] 1620 %q = load i32, i32* %ptr_b 1621 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1622 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1623 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1624 ret <8 x i32> %res 1625} 1626 1627define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1628 ;CHECK-LABEL: test_mask_and_epi32_rmbk_256 1629 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f] 1630 %q = load i32, i32* %ptr_b 1631 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1632 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1633 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1634 ret <8 x i32> %res 1635} 1636 1637define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1638 ;CHECK-LABEL: test_mask_and_epi32_rmbkz_256 1639 ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07] 1640 %q = load i32, i32* %ptr_b 1641 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1642 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1643 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1644 ret <8 x i32> %res 1645} 1646 1647declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1648 1649define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1650 ;CHECK-LABEL: test_mask_or_epi32_rr_128 1651 ;CHECK: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1] 1652 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1653 ret <4 x i32> %res 1654} 1655 1656define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1657 ;CHECK-LABEL: test_mask_or_epi32_rrk_128 1658 ;CHECK: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] 1659 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1660 ret <4 x i32> %res 1661} 1662 1663define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1664 ;CHECK-LABEL: test_mask_or_epi32_rrkz_128 1665 ;CHECK: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1] 1666 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1667 ret <4 x i32> %res 1668} 1669 1670define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1671 ;CHECK-LABEL: test_mask_or_epi32_rm_128 1672 ;CHECK: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07] 1673 %b = load <4 x i32>, <4 x i32>* %ptr_b 1674 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1675 ret <4 x i32> %res 1676} 1677 1678define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1679 ;CHECK-LABEL: test_mask_or_epi32_rmk_128 1680 ;CHECK: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f] 1681 %b = load <4 x i32>, <4 x i32>* %ptr_b 1682 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1683 ret <4 x i32> %res 1684} 1685 1686define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1687 ;CHECK-LABEL: test_mask_or_epi32_rmkz_128 1688 ;CHECK: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07] 1689 %b = load <4 x i32>, <4 x i32>* %ptr_b 1690 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1691 ret <4 x i32> %res 1692} 1693 1694define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1695 ;CHECK-LABEL: test_mask_or_epi32_rmb_128 1696 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07] 1697 %q = load i32, i32* %ptr_b 1698 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1699 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1700 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1701 ret <4 x i32> %res 1702} 1703 1704define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1705 ;CHECK-LABEL: test_mask_or_epi32_rmbk_128 1706 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f] 1707 %q = load i32, i32* %ptr_b 1708 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1709 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1710 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1711 ret <4 x i32> %res 1712} 1713 1714define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1715 ;CHECK-LABEL: test_mask_or_epi32_rmbkz_128 1716 ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07] 1717 %q = load i32, i32* %ptr_b 1718 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1719 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1720 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1721 ret <4 x i32> %res 1722} 1723 1724declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1725 1726define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1727 ;CHECK-LABEL: test_mask_or_epi32_rr_256 1728 ;CHECK: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1] 1729 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1730 ret <8 x i32> %res 1731} 1732 1733define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1734 ;CHECK-LABEL: test_mask_or_epi32_rrk_256 1735 ;CHECK: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] 1736 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1737 ret <8 x i32> %res 1738} 1739 1740define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1741 ;CHECK-LABEL: test_mask_or_epi32_rrkz_256 1742 ;CHECK: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1] 1743 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1744 ret <8 x i32> %res 1745} 1746 1747define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1748 ;CHECK-LABEL: test_mask_or_epi32_rm_256 1749 ;CHECK: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07] 1750 %b = load <8 x i32>, <8 x i32>* %ptr_b 1751 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1752 ret <8 x i32> %res 1753} 1754 1755define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1756 ;CHECK-LABEL: test_mask_or_epi32_rmk_256 1757 ;CHECK: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f] 1758 %b = load <8 x i32>, <8 x i32>* %ptr_b 1759 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1760 ret <8 x i32> %res 1761} 1762 1763define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1764 ;CHECK-LABEL: test_mask_or_epi32_rmkz_256 1765 ;CHECK: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07] 1766 %b = load <8 x i32>, <8 x i32>* %ptr_b 1767 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1768 ret <8 x i32> %res 1769} 1770 1771define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1772 ;CHECK-LABEL: test_mask_or_epi32_rmb_256 1773 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07] 1774 %q = load i32, i32* %ptr_b 1775 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1776 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1777 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1778 ret <8 x i32> %res 1779} 1780 1781define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1782 ;CHECK-LABEL: test_mask_or_epi32_rmbk_256 1783 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f] 1784 %q = load i32, i32* %ptr_b 1785 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1786 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1787 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1788 ret <8 x i32> %res 1789} 1790 1791define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1792 ;CHECK-LABEL: test_mask_or_epi32_rmbkz_256 1793 ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07] 1794 %q = load i32, i32* %ptr_b 1795 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1796 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1797 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1798 ret <8 x i32> %res 1799} 1800 1801declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1802 1803define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1804 ;CHECK-LABEL: test_mask_xor_epi32_rr_128 1805 ;CHECK: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1] 1806 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1807 ret <4 x i32> %res 1808} 1809 1810define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1811 ;CHECK-LABEL: test_mask_xor_epi32_rrk_128 1812 ;CHECK: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] 1813 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1814 ret <4 x i32> %res 1815} 1816 1817define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1818 ;CHECK-LABEL: test_mask_xor_epi32_rrkz_128 1819 ;CHECK: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1] 1820 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1821 ret <4 x i32> %res 1822} 1823 1824define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1825 ;CHECK-LABEL: test_mask_xor_epi32_rm_128 1826 ;CHECK: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07] 1827 %b = load <4 x i32>, <4 x i32>* %ptr_b 1828 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1829 ret <4 x i32> %res 1830} 1831 1832define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1833 ;CHECK-LABEL: test_mask_xor_epi32_rmk_128 1834 ;CHECK: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f] 1835 %b = load <4 x i32>, <4 x i32>* %ptr_b 1836 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1837 ret <4 x i32> %res 1838} 1839 1840define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1841 ;CHECK-LABEL: test_mask_xor_epi32_rmkz_128 1842 ;CHECK: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07] 1843 %b = load <4 x i32>, <4 x i32>* %ptr_b 1844 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1845 ret <4 x i32> %res 1846} 1847 1848define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1849 ;CHECK-LABEL: test_mask_xor_epi32_rmb_128 1850 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07] 1851 %q = load i32, i32* %ptr_b 1852 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1853 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1854 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1855 ret <4 x i32> %res 1856} 1857 1858define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1859 ;CHECK-LABEL: test_mask_xor_epi32_rmbk_128 1860 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f] 1861 %q = load i32, i32* %ptr_b 1862 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1863 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1864 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1865 ret <4 x i32> %res 1866} 1867 1868define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1869 ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128 1870 ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07] 1871 %q = load i32, i32* %ptr_b 1872 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1873 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1874 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1875 ret <4 x i32> %res 1876} 1877 1878declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1879 1880define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1881 ;CHECK-LABEL: test_mask_xor_epi32_rr_256 1882 ;CHECK: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1] 1883 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1884 ret <8 x i32> %res 1885} 1886 1887define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1888 ;CHECK-LABEL: test_mask_xor_epi32_rrk_256 1889 ;CHECK: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] 1890 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1891 ret <8 x i32> %res 1892} 1893 1894define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1895 ;CHECK-LABEL: test_mask_xor_epi32_rrkz_256 1896 ;CHECK: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1] 1897 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1898 ret <8 x i32> %res 1899} 1900 1901define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1902 ;CHECK-LABEL: test_mask_xor_epi32_rm_256 1903 ;CHECK: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07] 1904 %b = load <8 x i32>, <8 x i32>* %ptr_b 1905 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1906 ret <8 x i32> %res 1907} 1908 1909define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1910 ;CHECK-LABEL: test_mask_xor_epi32_rmk_256 1911 ;CHECK: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f] 1912 %b = load <8 x i32>, <8 x i32>* %ptr_b 1913 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1914 ret <8 x i32> %res 1915} 1916 1917define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1918 ;CHECK-LABEL: test_mask_xor_epi32_rmkz_256 1919 ;CHECK: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07] 1920 %b = load <8 x i32>, <8 x i32>* %ptr_b 1921 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1922 ret <8 x i32> %res 1923} 1924 1925define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1926 ;CHECK-LABEL: test_mask_xor_epi32_rmb_256 1927 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07] 1928 %q = load i32, i32* %ptr_b 1929 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1930 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1931 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1932 ret <8 x i32> %res 1933} 1934 1935define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1936 ;CHECK-LABEL: test_mask_xor_epi32_rmbk_256 1937 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f] 1938 %q = load i32, i32* %ptr_b 1939 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1940 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1941 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1942 ret <8 x i32> %res 1943} 1944 1945define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1946 ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_256 1947 ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07] 1948 %q = load i32, i32* %ptr_b 1949 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1950 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1951 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1952 ret <8 x i32> %res 1953} 1954 1955declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1956 1957define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1958 ;CHECK-LABEL: test_mask_andnot_epi32_rr_128 1959 ;CHECK: vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1] 1960 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1961 ret <4 x i32> %res 1962} 1963 1964define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1965 ;CHECK-LABEL: test_mask_andnot_epi32_rrk_128 1966 ;CHECK: vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] 1967 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1968 ret <4 x i32> %res 1969} 1970 1971define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1972 ;CHECK-LABEL: test_mask_andnot_epi32_rrkz_128 1973 ;CHECK: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1] 1974 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1975 ret <4 x i32> %res 1976} 1977 1978define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1979 ;CHECK-LABEL: test_mask_andnot_epi32_rm_128 1980 ;CHECK: vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07] 1981 %b = load <4 x i32>, <4 x i32>* %ptr_b 1982 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1983 ret <4 x i32> %res 1984} 1985 1986define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1987 ;CHECK-LABEL: test_mask_andnot_epi32_rmk_128 1988 ;CHECK: vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f] 1989 %b = load <4 x i32>, <4 x i32>* %ptr_b 1990 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1991 ret <4 x i32> %res 1992} 1993 1994define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1995 ;CHECK-LABEL: test_mask_andnot_epi32_rmkz_128 1996 ;CHECK: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07] 1997 %b = load <4 x i32>, <4 x i32>* %ptr_b 1998 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1999 ret <4 x i32> %res 2000} 2001 2002define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2003 ;CHECK-LABEL: test_mask_andnot_epi32_rmb_128 2004 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07] 2005 %q = load i32, i32* %ptr_b 2006 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2007 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2008 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2009 ret <4 x i32> %res 2010} 2011 2012define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2013 ;CHECK-LABEL: test_mask_andnot_epi32_rmbk_128 2014 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f] 2015 %q = load i32, i32* %ptr_b 2016 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2017 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2018 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2019 ret <4 x i32> %res 2020} 2021 2022define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2023 ;CHECK-LABEL: test_mask_andnot_epi32_rmbkz_128 2024 ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07] 2025 %q = load i32, i32* %ptr_b 2026 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2027 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2028 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2029 ret <4 x i32> %res 2030} 2031 2032declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2033 2034define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2035 ;CHECK-LABEL: test_mask_andnot_epi32_rr_256 2036 ;CHECK: vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1] 2037 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2038 ret <8 x i32> %res 2039} 2040 2041define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 2042 ;CHECK-LABEL: test_mask_andnot_epi32_rrk_256 2043 ;CHECK: vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] 2044 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2045 ret <8 x i32> %res 2046} 2047 2048define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 2049 ;CHECK-LABEL: test_mask_andnot_epi32_rrkz_256 2050 ;CHECK: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1] 2051 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2052 ret <8 x i32> %res 2053} 2054 2055define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2056 ;CHECK-LABEL: test_mask_andnot_epi32_rm_256 2057 ;CHECK: vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07] 2058 %b = load <8 x i32>, <8 x i32>* %ptr_b 2059 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2060 ret <8 x i32> %res 2061} 2062 2063define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2064 ;CHECK-LABEL: test_mask_andnot_epi32_rmk_256 2065 ;CHECK: vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f] 2066 %b = load <8 x i32>, <8 x i32>* %ptr_b 2067 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2068 ret <8 x i32> %res 2069} 2070 2071define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 2072 ;CHECK-LABEL: test_mask_andnot_epi32_rmkz_256 2073 ;CHECK: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07] 2074 %b = load <8 x i32>, <8 x i32>* %ptr_b 2075 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2076 ret <8 x i32> %res 2077} 2078 2079define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2080 ;CHECK-LABEL: test_mask_andnot_epi32_rmb_256 2081 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07] 2082 %q = load i32, i32* %ptr_b 2083 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2084 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2085 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2086 ret <8 x i32> %res 2087} 2088 2089define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2090 ;CHECK-LABEL: test_mask_andnot_epi32_rmbk_256 2091 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f] 2092 %q = load i32, i32* %ptr_b 2093 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2094 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2095 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2096 ret <8 x i32> %res 2097} 2098 2099define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 2100 ;CHECK-LABEL: test_mask_andnot_epi32_rmbkz_256 2101 ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07] 2102 %q = load i32, i32* %ptr_b 2103 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2104 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2105 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2106 ret <8 x i32> %res 2107} 2108 2109declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2110 2111define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { 2112 ;CHECK-LABEL: test_mask_andnot_epi64_rr_128 2113 ;CHECK: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1] 2114 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2115 ret <2 x i64> %res 2116} 2117 2118define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { 2119 ;CHECK-LABEL: test_mask_andnot_epi64_rrk_128 2120 ;CHECK: vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] 2121 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2122 ret <2 x i64> %res 2123} 2124 2125define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 2126 ;CHECK-LABEL: test_mask_andnot_epi64_rrkz_128 2127 ;CHECK: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1] 2128 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2129 ret <2 x i64> %res 2130} 2131 2132define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { 2133 ;CHECK-LABEL: test_mask_andnot_epi64_rm_128 2134 ;CHECK: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07] 2135 %b = load <2 x i64>, <2 x i64>* %ptr_b 2136 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2137 ret <2 x i64> %res 2138} 2139 2140define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2141 ;CHECK-LABEL: test_mask_andnot_epi64_rmk_128 2142 ;CHECK: vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f] 2143 %b = load <2 x i64>, <2 x i64>* %ptr_b 2144 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2145 ret <2 x i64> %res 2146} 2147 2148define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { 2149 ;CHECK-LABEL: test_mask_andnot_epi64_rmkz_128 2150 ;CHECK: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07] 2151 %b = load <2 x i64>, <2 x i64>* %ptr_b 2152 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2153 ret <2 x i64> %res 2154} 2155 2156define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) { 2157 ;CHECK-LABEL: test_mask_andnot_epi64_rmb_128 2158 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07] 2159 %q = load i64, i64* %ptr_b 2160 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2161 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2162 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 2163 ret <2 x i64> %res 2164} 2165 2166define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { 2167 ;CHECK-LABEL: test_mask_andnot_epi64_rmbk_128 2168 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f] 2169 %q = load i64, i64* %ptr_b 2170 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2171 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2172 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 2173 ret <2 x i64> %res 2174} 2175 2176define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { 2177 ;CHECK-LABEL: test_mask_andnot_epi64_rmbkz_128 2178 ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07] 2179 %q = load i64, i64* %ptr_b 2180 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 2181 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 2182 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 2183 ret <2 x i64> %res 2184} 2185 2186declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2187 2188define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { 2189 ;CHECK-LABEL: test_mask_andnot_epi64_rr_256 2190 ;CHECK: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1] 2191 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2192 ret <4 x i64> %res 2193} 2194 2195define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { 2196 ;CHECK-LABEL: test_mask_andnot_epi64_rrk_256 2197 ;CHECK: vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] 2198 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2199 ret <4 x i64> %res 2200} 2201 2202define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 2203 ;CHECK-LABEL: test_mask_andnot_epi64_rrkz_256 2204 ;CHECK: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1] 2205 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2206 ret <4 x i64> %res 2207} 2208 2209define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { 2210 ;CHECK-LABEL: test_mask_andnot_epi64_rm_256 2211 ;CHECK: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07] 2212 %b = load <4 x i64>, <4 x i64>* %ptr_b 2213 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2214 ret <4 x i64> %res 2215} 2216 2217define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2218 ;CHECK-LABEL: test_mask_andnot_epi64_rmk_256 2219 ;CHECK: vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f] 2220 %b = load <4 x i64>, <4 x i64>* %ptr_b 2221 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2222 ret <4 x i64> %res 2223} 2224 2225define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { 2226 ;CHECK-LABEL: test_mask_andnot_epi64_rmkz_256 2227 ;CHECK: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07] 2228 %b = load <4 x i64>, <4 x i64>* %ptr_b 2229 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2230 ret <4 x i64> %res 2231} 2232 2233define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { 2234 ;CHECK-LABEL: test_mask_andnot_epi64_rmb_256 2235 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07] 2236 %q = load i64, i64* %ptr_b 2237 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2238 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2239 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 2240 ret <4 x i64> %res 2241} 2242 2243define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { 2244 ;CHECK-LABEL: test_mask_andnot_epi64_rmbk_256 2245 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f] 2246 %q = load i64, i64* %ptr_b 2247 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2248 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2249 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 2250 ret <4 x i64> %res 2251} 2252 2253define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { 2254 ;CHECK-LABEL: test_mask_andnot_epi64_rmbkz_256 2255 ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07] 2256 %q = load i64, i64* %ptr_b 2257 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 2258 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 2259 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 2260 ret <4 x i64> %res 2261} 2262 2263declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2264 2265define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) { 2266 ;CHECK: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02] 2267 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1) 2268 ret i8 %res 2269 } 2270 declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8) 2271 2272define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) { 2273 ;CHECK: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 2274 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1) 2275 ret i8 %res 2276 } 2277 declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8) 2278 2279define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) { 2280 ;CHECK: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02] 2281 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1) 2282 ret i8 %res 2283 } 2284 declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8) 2285 2286define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) { 2287 ;CHECK: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] 2288 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1) 2289 ret i8 %res 2290 } 2291 declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8) 2292 2293define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2294 ;CHECK-LABEL: test_mm512_maskz_add_ps_256 2295 ;CHECK: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} 2296 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2297 ret <8 x float> %res 2298} 2299 2300define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2301 ;CHECK-LABEL: test_mm512_mask_add_ps_256 2302 ;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1} 2303 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2304 ret <8 x float> %res 2305} 2306 2307define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2308 ;CHECK-LABEL: test_mm512_add_ps_256 2309 ;CHECK: vaddps %ymm1, %ymm0, %ymm0 2310 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2311 ret <8 x float> %res 2312} 2313declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2314 2315define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2316 ;CHECK-LABEL: test_mm512_maskz_add_ps_128 2317 ;CHECK: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} 2318 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2319 ret <4 x float> %res 2320} 2321 2322define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2323 ;CHECK-LABEL: test_mm512_mask_add_ps_128 2324 ;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1} 2325 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2326 ret <4 x float> %res 2327} 2328 2329define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2330 ;CHECK-LABEL: test_mm512_add_ps_128 2331 ;CHECK: vaddps %xmm1, %xmm0, %xmm0 2332 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2333 ret <4 x float> %res 2334} 2335declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2336 2337define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2338 ;CHECK-LABEL: test_mm512_maskz_sub_ps_256 2339 ;CHECK: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} 2340 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2341 ret <8 x float> %res 2342} 2343 2344define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2345 ;CHECK-LABEL: test_mm512_mask_sub_ps_256 2346 ;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1} 2347 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2348 ret <8 x float> %res 2349} 2350 2351define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2352 ;CHECK-LABEL: test_mm512_sub_ps_256 2353 ;CHECK: vsubps %ymm1, %ymm0, %ymm0 2354 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2355 ret <8 x float> %res 2356} 2357declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2358 2359define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2360 ;CHECK-LABEL: test_mm512_maskz_sub_ps_128 2361 ;CHECK: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} 2362 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2363 ret <4 x float> %res 2364} 2365 2366define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2367 ;CHECK-LABEL: test_mm512_mask_sub_ps_128 2368 ;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1} 2369 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2370 ret <4 x float> %res 2371} 2372 2373define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2374 ;CHECK-LABEL: test_mm512_sub_ps_128 2375 ;CHECK: vsubps %xmm1, %xmm0, %xmm0 2376 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2377 ret <4 x float> %res 2378} 2379declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2380 2381define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2382 ;CHECK-LABEL: test_mm512_maskz_mul_ps_256 2383 ;CHECK: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} 2384 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2385 ret <8 x float> %res 2386} 2387 2388define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2389 ;CHECK-LABEL: test_mm512_mask_mul_ps_256 2390 ;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1} 2391 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2392 ret <8 x float> %res 2393} 2394 2395define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2396 ;CHECK-LABEL: test_mm512_mul_ps_256 2397 ;CHECK: vmulps %ymm1, %ymm0, %ymm0 2398 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2399 ret <8 x float> %res 2400} 2401declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2402 2403define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2404 ;CHECK-LABEL: test_mm512_maskz_mul_ps_128 2405 ;CHECK: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} 2406 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2407 ret <4 x float> %res 2408} 2409 2410define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2411 ;CHECK-LABEL: test_mm512_mask_mul_ps_128 2412 ;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1} 2413 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2414 ret <4 x float> %res 2415} 2416 2417define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2418 ;CHECK-LABEL: test_mm512_mul_ps_128 2419 ;CHECK: vmulps %xmm1, %xmm0, %xmm0 2420 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2421 ret <4 x float> %res 2422} 2423declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2424 2425define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2426 ;CHECK-LABEL: test_mm512_maskz_div_ps_256 2427 ;CHECK: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} 2428 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2429 ret <8 x float> %res 2430} 2431 2432define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2433 ;CHECK-LABEL: test_mm512_mask_div_ps_256 2434 ;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1} 2435 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2436 ret <8 x float> %res 2437} 2438 2439define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2440 ;CHECK-LABEL: test_mm512_div_ps_256 2441 ;CHECK: vdivps %ymm1, %ymm0, %ymm0 2442 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2443 ret <8 x float> %res 2444} 2445declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2446 2447define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2448 ;CHECK-LABEL: test_mm512_maskz_div_ps_128 2449 ;CHECK: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} 2450 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2451 ret <4 x float> %res 2452} 2453 2454define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2455 ;CHECK-LABEL: test_mm512_mask_div_ps_128 2456 ;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1} 2457 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2458 ret <4 x float> %res 2459} 2460 2461define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2462 ;CHECK-LABEL: test_mm512_div_ps_128 2463 ;CHECK: vdivps %xmm1, %xmm0, %xmm0 2464 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2465 ret <4 x float> %res 2466} 2467declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2468 2469define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2470 ;CHECK-LABEL: test_mm512_maskz_max_ps_256 2471 ;CHECK: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} 2472 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2473 ret <8 x float> %res 2474} 2475 2476define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2477 ;CHECK-LABEL: test_mm512_mask_max_ps_256 2478 ;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1} 2479 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2480 ret <8 x float> %res 2481} 2482 2483define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2484 ;CHECK-LABEL: test_mm512_max_ps_256 2485 ;CHECK: vmaxps %ymm1, %ymm0, %ymm0 2486 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2487 ret <8 x float> %res 2488} 2489declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2490 2491define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2492 ;CHECK-LABEL: test_mm512_maskz_max_ps_128 2493 ;CHECK: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} 2494 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2495 ret <4 x float> %res 2496} 2497 2498define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2499 ;CHECK-LABEL: test_mm512_mask_max_ps_128 2500 ;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1} 2501 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2502 ret <4 x float> %res 2503} 2504 2505define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2506 ;CHECK-LABEL: test_mm512_max_ps_128 2507 ;CHECK: vmaxps %xmm1, %xmm0, %xmm0 2508 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2509 ret <4 x float> %res 2510} 2511declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2512 2513define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2514 ;CHECK-LABEL: test_mm512_maskz_min_ps_256 2515 ;CHECK: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} 2516 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2517 ret <8 x float> %res 2518} 2519 2520define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2521 ;CHECK-LABEL: test_mm512_mask_min_ps_256 2522 ;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1} 2523 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2524 ret <8 x float> %res 2525} 2526 2527define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2528 ;CHECK-LABEL: test_mm512_min_ps_256 2529 ;CHECK: vminps %ymm1, %ymm0, %ymm0 2530 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2531 ret <8 x float> %res 2532} 2533declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2534 2535define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2536 ;CHECK-LABEL: test_mm512_maskz_min_ps_128 2537 ;CHECK: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} 2538 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2539 ret <4 x float> %res 2540} 2541 2542define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2543 ;CHECK-LABEL: test_mm512_mask_min_ps_128 2544 ;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1} 2545 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2546 ret <4 x float> %res 2547} 2548 2549define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2550 ;CHECK-LABEL: test_mm512_min_ps_128 2551 ;CHECK: vminps %xmm1, %xmm0, %xmm0 2552 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2553 ret <4 x float> %res 2554} 2555declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2556 2557define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) { 2558 ; CHECK-LABEL: test_sqrt_pd_256 2559 ; CHECK: vsqrtpd 2560 %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 2561 ret <4 x double> %res 2562} 2563declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2564 2565define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) { 2566 ; CHECK-LABEL: test_sqrt_ps_256 2567 ; CHECK: vsqrtps 2568 %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 2569 ret <8 x float> %res 2570} 2571 2572declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2573 2574define <4 x double> @test_getexp_pd_256(<4 x double> %a0) { 2575 ; CHECK-LABEL: test_getexp_pd_256 2576 ; CHECK: vgetexppd 2577 %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1) 2578 ret <4 x double> %res 2579} 2580 2581declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2582 2583define <8 x float> @test_getexp_ps_256(<8 x float> %a0) { 2584 ; CHECK-LABEL: test_getexp_ps_256 2585 ; CHECK: vgetexpps 2586 %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1) 2587 ret <8 x float> %res 2588} 2589declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2590 2591declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2592 2593; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_128 2594; CHECK-NOT: call 2595; CHECK: vpmaxsd %xmm 2596; CHECK: {%k1} 2597define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 2598 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask) 2599 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 2600 %res2 = add <4 x i32> %res, %res1 2601 ret <4 x i32> %res2 2602} 2603 2604declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2605 2606; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_256 2607; CHECK-NOT: call 2608; CHECK: vpmaxsd %ymm 2609; CHECK: {%k1} 2610define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2611 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2612 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2613 %res2 = add <8 x i32> %res, %res1 2614 ret <8 x i32> %res2 2615} 2616 2617declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2618 2619; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_128 2620; CHECK-NOT: call 2621; CHECK: vpmaxsq %xmm 2622; CHECK: {%k1} 2623define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2624 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2625 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2626 %res2 = add <2 x i64> %res, %res1 2627 ret <2 x i64> %res2 2628} 2629 2630declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2631 2632; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_256 2633; CHECK-NOT: call 2634; CHECK: vpmaxsq %ymm 2635; CHECK: {%k1} 2636define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 2637 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 2638 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 2639 %res2 = add <4 x i64> %res, %res1 2640 ret <4 x i64> %res2 2641} 2642 2643declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2644 2645; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_128 2646; CHECK-NOT: call 2647; CHECK: vpmaxud %xmm 2648; CHECK: {%k1} 2649define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) { 2650 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 2651 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 2652 %res2 = add <4 x i32> %res, %res1 2653 ret <4 x i32> %res2 2654} 2655 2656declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2657 2658; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_256 2659; CHECK-NOT: call 2660; CHECK: vpmaxud %ymm 2661; CHECK: {%k1} 2662define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2663 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2664 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2665 %res2 = add <8 x i32> %res, %res1 2666 ret <8 x i32> %res2 2667} 2668 2669declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2670 2671; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_128 2672; CHECK-NOT: call 2673; CHECK: vpmaxuq %xmm 2674; CHECK: {%k1} 2675define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2676 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2677 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2678 %res2 = add <2 x i64> %res, %res1 2679 ret <2 x i64> %res2 2680} 2681 2682declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2683 2684; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_256 2685; CHECK-NOT: call 2686; CHECK: vpmaxuq %ymm 2687; CHECK: {%k1} 2688define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 2689 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 2690 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 2691 %res2 = add <4 x i64> %res, %res1 2692 ret <4 x i64> %res2 2693} 2694 2695declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2696 2697; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_128 2698; CHECK-NOT: call 2699; CHECK: vpminsd %xmm 2700; CHECK: {%k1} 2701define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 2702 %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 2703 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 2704 %res2 = add <4 x i32> %res, %res1 2705 ret <4 x i32> %res2 2706} 2707 2708declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2709 2710; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_256 2711; CHECK-NOT: call 2712; CHECK: vpminsd %ymm 2713; CHECK: {%k1} 2714define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2715 %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2716 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2717 %res2 = add <8 x i32> %res, %res1 2718 ret <8 x i32> %res2 2719} 2720 2721declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2722 2723; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_128 2724; CHECK-NOT: call 2725; CHECK: vpminsq %xmm 2726; CHECK: {%k1} 2727define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2728 %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2729 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2730 %res2 = add <2 x i64> %res, %res1 2731 ret <2 x i64> %res2 2732} 2733 2734declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2735 2736; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_256 2737; CHECK-NOT: call 2738; CHECK: vpminsq %ymm 2739; CHECK: {%k1} 2740define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 2741 %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 2742 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 2743 %res2 = add <4 x i64> %res, %res1 2744 ret <4 x i64> %res2 2745} 2746 2747declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2748 2749; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_128 2750; CHECK-NOT: call 2751; CHECK: vpminud %xmm 2752; CHECK: {%k1} 2753define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 2754 %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 2755 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 2756 %res2 = add <4 x i32> %res, %res1 2757 ret <4 x i32> %res2 2758} 2759 2760declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2761 2762; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_256 2763; CHECK-NOT: call 2764; CHECK: vpminud %ymm 2765; CHECK: {%k1} 2766define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2767 %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2768 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2769 %res2 = add <8 x i32> %res, %res1 2770 ret <8 x i32> %res2 2771} 2772 2773declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2774 2775; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_128 2776; CHECK-NOT: call 2777; CHECK: vpminuq %xmm 2778; CHECK: {%k1} 2779define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2780 %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2781 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2782 %res2 = add <2 x i64> %res, %res1 2783 ret <2 x i64> %res2 2784} 2785 2786declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2787 2788; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_256 2789; CHECK-NOT: call 2790; CHECK: vpminuq %ymm 2791; CHECK: {%k1} 2792define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 2793 %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 2794 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 2795 %res2 = add <4 x i64> %res, %res1 2796 ret <4 x i64> %res2 2797} 2798 2799declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2800 2801; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128 2802; CHECK-NOT: call 2803; CHECK: kmov 2804; CHECK: vpermt2d %xmm{{.*}}{%k1} 2805; CHECK-NOT: {z} 2806define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 2807 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 2808 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 2809 %res2 = add <4 x i32> %res, %res1 2810 ret <4 x i32> %res2 2811} 2812 2813declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2814 2815; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128 2816; CHECK-NOT: call 2817; CHECK: kmov 2818; CHECK: vpermt2d %xmm{{.*}}{%k1} {z} 2819define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 2820 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 2821 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 2822 %res2 = add <4 x i32> %res, %res1 2823 ret <4 x i32> %res2 2824} 2825 2826declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2827 2828; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256 2829; CHECK-NOT: call 2830; CHECK: kmov 2831; CHECK: vpermt2d %ymm{{.*}}{%k1} 2832; CHECK-NOT: {z} 2833define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2834 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2835 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2836 %res2 = add <8 x i32> %res, %res1 2837 ret <8 x i32> %res2 2838} 2839 2840declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2841 2842; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256 2843; CHECK-NOT: call 2844; CHECK: kmov 2845; CHECK: vpermt2d {{.*}}{%k1} {z} 2846define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2847 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2848 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2849 %res2 = add <8 x i32> %res, %res1 2850 ret <8 x i32> %res2 2851} 2852 2853declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 2854 2855; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128 2856; CHECK-NOT: call 2857; CHECK: kmov 2858; CHECK: vpermi2pd %xmm{{.*}}{%k1} 2859define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 2860 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) 2861 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 2862 %res2 = fadd <2 x double> %res, %res1 2863 ret <2 x double> %res2 2864} 2865 2866declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 2867 2868; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256 2869; CHECK-NOT: call 2870; CHECK: kmov 2871; CHECK: vpermi2pd %ymm{{.*}}{%k1} 2872define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 2873 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 2874 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 2875 %res2 = fadd <4 x double> %res, %res1 2876 ret <4 x double> %res2 2877} 2878 2879declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 2880 2881; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128 2882; CHECK-NOT: call 2883; CHECK: kmov 2884; CHECK: vpermi2ps %xmm{{.*}}{%k1} 2885define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { 2886 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) 2887 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 2888 %res2 = fadd <4 x float> %res, %res1 2889 ret <4 x float> %res2 2890} 2891 2892declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 2893 2894; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256 2895; CHECK-NOT: call 2896; CHECK: kmov 2897; CHECK: vpermi2ps %ymm{{.*}}{%k1} 2898define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 2899 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 2900 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 2901 %res2 = fadd <8 x float> %res, %res1 2902 ret <8 x float> %res2 2903} 2904 2905declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) 2906 2907; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128 2908; CHECK-NOT: call 2909; CHECK: kmov 2910; CHECK: vpabsq{{.*}}{%k1} 2911define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 2912 %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 2913 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 2914 %res2 = add <2 x i64> %res, %res1 2915 ret <2 x i64> %res2 2916} 2917 2918declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) 2919 2920; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256 2921; CHECK-NOT: call 2922; CHECK: kmov 2923; CHECK: vpabsq{{.*}}{%k1} 2924define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 2925 %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 2926 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 2927 %res2 = add <4 x i64> %res, %res1 2928 ret <4 x i64> %res2 2929} 2930 2931declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8) 2932 2933; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128 2934; CHECK-NOT: call 2935; CHECK: kmov 2936; CHECK: vpabsd{{.*}}{%k1} 2937define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 2938 %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 2939 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 2940 %res2 = add <4 x i32> %res, %res1 2941 ret <4 x i32> %res2 2942} 2943 2944declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8) 2945 2946; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256 2947; CHECK-NOT: call 2948; CHECK: kmov 2949; CHECK: vpabsd{{.*}}{%k1} 2950define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 2951 %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 2952 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 2953 %res2 = add <8 x i32> %res, %res1 2954 ret <8 x i32> %res2 2955} 2956 2957 2958declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 2959 2960; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128 2961; CHECK-NOT: call 2962; CHECK: kmov 2963; CHECK: vscalefpd{{.*}}{%k1} 2964define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 2965 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 2966 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 2967 %res2 = fadd <2 x double> %res, %res1 2968 ret <2 x double> %res2 2969} 2970 2971declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 2972 2973; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256 2974; CHECK-NOT: call 2975; CHECK: kmov 2976; CHECK: vscalefpd{{.*}}{%k1} 2977define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 2978 %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 2979 %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 2980 %res2 = fadd <4 x double> %res, %res1 2981 ret <4 x double> %res2 2982} 2983 2984declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2985; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128 2986; CHECK-NOT: call 2987; CHECK: kmov 2988; CHECK: vscalefps{{.*}}{%k1} 2989define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 2990 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 2991 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 2992 %res2 = fadd <4 x float> %res, %res1 2993 ret <4 x float> %res2 2994} 2995 2996declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2997; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256 2998; CHECK-NOT: call 2999; CHECK: kmov 3000; CHECK: vscalefps{{.*}}{%k1} 3001define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 3002 %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 3003 %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 3004 %res2 = fadd <8 x float> %res, %res1 3005 ret <8 x float> %res2 3006} 3007 3008declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 3009 3010define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 3011; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: 3012; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} 3013; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1] 3014; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1] 3015; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1] 3016 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 3017 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 3018 %res2 = fadd <2 x double> %res, %res1 3019 ret <2 x double> %res2 3020} 3021 3022declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 3023 3024define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 3025; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: 3026; CHECK: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} 3027; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3] 3028; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xc1] 3029; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 3030 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 3031 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 3032 %res2 = fadd <4 x double> %res, %res1 3033 ret <4 x double> %res2 3034} 3035 3036declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 3037 3038define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 3039; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: 3040; CHECK: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} 3041; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3] 3042; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1] 3043; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3044 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 3045 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 3046 %res2 = fadd <4 x float> %res, %res1 3047 ret <4 x float> %res2 3048} 3049 3050declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 3051 3052define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 3053; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: 3054; CHECK: ## BB#0: 3055; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} 3056; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7] 3057; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1] 3058; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 3059 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 3060 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 3061 %res2 = fadd <8 x float> %res, %res1 3062 ret <8 x float> %res2 3063} 3064 3065declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 3066 3067define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 3068; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: 3069; CHECK: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} 3070; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0] 3071; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xc1] 3072; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0] 3073 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 3074 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 3075 %res2 = fadd <2 x double> %res, %res1 3076 ret <2 x double> %res2 3077} 3078 3079declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 3080 3081define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 3082; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: 3083; CHECK: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} 3084; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2] 3085; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xc1] 3086; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 3087 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 3088 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 3089 %res2 = fadd <4 x double> %res, %res1 3090 ret <4 x double> %res2 3091} 3092 3093declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 3094 3095define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 3096; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: 3097; CHECK: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} 3098; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1] 3099; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xc1] 3100; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3101 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 3102 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 3103 %res2 = fadd <4 x float> %res, %res1 3104 ret <4 x float> %res2 3105} 3106 3107declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 3108 3109define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 3110; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: 3111; CHECK: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} 3112; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5] 3113; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xc1] 3114; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 3115 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 3116 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 3117 %res2 = fadd <8 x float> %res, %res1 3118 ret <8 x float> %res2 3119} 3120 3121declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 3122 3123define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 3124; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: 3125; CHECK: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} 3126; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3] 3127; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xc1] 3128; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3129 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 3130 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 3131 %res2 = add <4 x i32> %res, %res1 3132 ret <4 x i32> %res2 3133} 3134 3135declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 3136 3137define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 3138; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128: 3139; CHECK: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} 3140; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1] 3141; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xc1] 3142; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3143 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 3144 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 3145 %res2 = add <4 x i32> %res, %res1 3146 ret <4 x i32> %res2 3147} 3148 3149declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 3150 3151define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 3152; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: 3153; CHECK: ## BB#0: 3154; CHECK: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} 3155; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7] 3156; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xc1] 3157; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 3158 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 3159 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 3160 %res2 = add <8 x i32> %res, %res1 3161 ret <8 x i32> %res2 3162} 3163 3164declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 3165 3166define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 3167; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256: 3168; CHECK: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} 3169; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5] 3170; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xc1] 3171; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 3172 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 3173 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 3174 %res2 = add <8 x i32> %res, %res1 3175 ret <8 x i32> %res2 3176} 3177 3178declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 3179 3180define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 3181; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: 3182; CHECK: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} 3183; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1] 3184; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xc1] 3185; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1] 3186 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 3187 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 3188 %res2 = add <2 x i64> %res, %res1 3189 ret <2 x i64> %res2 3190} 3191 3192declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 3193 3194define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 3195; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: 3196; CHECK: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} 3197; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0] 3198; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xc1] 3199; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0] 3200 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 3201 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 3202 %res2 = add <2 x i64> %res, %res1 3203 ret <2 x i64> %res2 3204} 3205 3206declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 3207 3208define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 3209; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: 3210; CHECK: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} 3211; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2] 3212; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xc1] 3213; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 3214 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 3215 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 3216 %res2 = add <4 x i64> %res, %res1 3217 ret <4 x i64> %res2 3218} 3219 3220declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 3221 3222define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 3223; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: 3224; CHECK: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} 3225; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3] 3226; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xc1] 3227; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 3228 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 3229 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 3230 %res2 = add <4 x i64> %res, %res1 3231 ret <4 x i64> %res2 3232} 3233 3234declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8) 3235 3236define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { 3237; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128: 3238; CHECK: vpmovqb %xmm0, %xmm1 {%k1} 3239; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} 3240; CHECK-NEXT: vpmovqb %xmm0, %xmm0 3241 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) 3242 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) 3243 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 3244 %res3 = add <16 x i8> %res0, %res1 3245 %res4 = add <16 x i8> %res3, %res2 3246 ret <16 x i8> %res4 3247} 3248 3249declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8) 3250 3251define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3252; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128: 3253; CHECK: vpmovqb %xmm0, (%rdi) 3254; CHECK: vpmovqb %xmm0, (%rdi) {%k1} 3255 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3256 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3257 ret void 3258} 3259 3260declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8) 3261 3262define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { 3263; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128: 3264; CHECK: vpmovsqb %xmm0, %xmm1 {%k1} 3265; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} 3266; CHECK-NEXT: vpmovsqb %xmm0, %xmm0 3267 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) 3268 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) 3269 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 3270 %res3 = add <16 x i8> %res0, %res1 3271 %res4 = add <16 x i8> %res3, %res2 3272 ret <16 x i8> %res4 3273} 3274 3275declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8) 3276 3277define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3278; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128: 3279; CHECK: vpmovsqb %xmm0, (%rdi) 3280; CHECK: vpmovsqb %xmm0, (%rdi) {%k1} 3281 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3282 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3283 ret void 3284} 3285 3286declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8) 3287 3288define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { 3289; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128: 3290; CHECK: vpmovusqb %xmm0, %xmm1 {%k1} 3291; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} 3292; CHECK-NEXT: vpmovusqb %xmm0, %xmm0 3293 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) 3294 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) 3295 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 3296 %res3 = add <16 x i8> %res0, %res1 3297 %res4 = add <16 x i8> %res3, %res2 3298 ret <16 x i8> %res4 3299} 3300 3301declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8) 3302 3303define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3304; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128: 3305; CHECK: vpmovusqb %xmm0, (%rdi) 3306; CHECK: vpmovusqb %xmm0, (%rdi) {%k1} 3307 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3308 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3309 ret void 3310} 3311 3312declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8) 3313 3314define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { 3315; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256: 3316; CHECK: vpmovqb %ymm0, %xmm1 {%k1} 3317; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} 3318; CHECK-NEXT: vpmovqb %ymm0, %xmm0 3319 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) 3320 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) 3321 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 3322 %res3 = add <16 x i8> %res0, %res1 3323 %res4 = add <16 x i8> %res3, %res2 3324 ret <16 x i8> %res4 3325} 3326 3327declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8) 3328 3329define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3330; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256: 3331; CHECK: vpmovqb %ymm0, (%rdi) 3332; CHECK: vpmovqb %ymm0, (%rdi) {%k1} 3333 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3334 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3335 ret void 3336} 3337 3338declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8) 3339 3340define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { 3341; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256: 3342; CHECK: vpmovsqb %ymm0, %xmm1 {%k1} 3343; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} 3344; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 3345 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) 3346 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) 3347 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 3348 %res3 = add <16 x i8> %res0, %res1 3349 %res4 = add <16 x i8> %res3, %res2 3350 ret <16 x i8> %res4 3351} 3352 3353declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8) 3354 3355define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3356; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256: 3357; CHECK: vpmovsqb %ymm0, (%rdi) 3358; CHECK: vpmovsqb %ymm0, (%rdi) {%k1} 3359 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3360 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3361 ret void 3362} 3363 3364declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8) 3365 3366define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { 3367; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256: 3368; CHECK: vpmovusqb %ymm0, %xmm1 {%k1} 3369; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} 3370; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 3371 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) 3372 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) 3373 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 3374 %res3 = add <16 x i8> %res0, %res1 3375 %res4 = add <16 x i8> %res3, %res2 3376 ret <16 x i8> %res4 3377} 3378 3379declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8) 3380 3381define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3382; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256: 3383; CHECK: vpmovusqb %ymm0, (%rdi) 3384; CHECK: vpmovusqb %ymm0, (%rdi) {%k1} 3385 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3386 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3387 ret void 3388} 3389 3390declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8) 3391 3392define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3393; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128: 3394; CHECK: vpmovqw %xmm0, %xmm1 {%k1} 3395; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} 3396; CHECK-NEXT: vpmovqw %xmm0, %xmm0 3397 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) 3398 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) 3399 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3400 %res3 = add <8 x i16> %res0, %res1 3401 %res4 = add <8 x i16> %res3, %res2 3402 ret <8 x i16> %res4 3403} 3404 3405declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8) 3406 3407define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3408; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128: 3409; CHECK: vpmovqw %xmm0, (%rdi) 3410; CHECK: vpmovqw %xmm0, (%rdi) {%k1} 3411 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3412 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3413 ret void 3414} 3415 3416declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8) 3417 3418define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3419; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128: 3420; CHECK: vpmovsqw %xmm0, %xmm1 {%k1} 3421; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} 3422; CHECK-NEXT: vpmovsqw %xmm0, %xmm0 3423 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) 3424 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) 3425 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3426 %res3 = add <8 x i16> %res0, %res1 3427 %res4 = add <8 x i16> %res3, %res2 3428 ret <8 x i16> %res4 3429} 3430 3431declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8) 3432 3433define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3434; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128: 3435; CHECK: vpmovsqw %xmm0, (%rdi) 3436; CHECK: vpmovsqw %xmm0, (%rdi) {%k1} 3437 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3438 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3439 ret void 3440} 3441 3442declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8) 3443 3444define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3445; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128: 3446; CHECK: vpmovusqw %xmm0, %xmm1 {%k1} 3447; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} 3448; CHECK-NEXT: vpmovusqw %xmm0, %xmm0 3449 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) 3450 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) 3451 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3452 %res3 = add <8 x i16> %res0, %res1 3453 %res4 = add <8 x i16> %res3, %res2 3454 ret <8 x i16> %res4 3455} 3456 3457declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8) 3458 3459define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3460; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128: 3461; CHECK: vpmovusqw %xmm0, (%rdi) 3462; CHECK: vpmovusqw %xmm0, (%rdi) {%k1} 3463 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3464 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3465 ret void 3466} 3467 3468declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8) 3469 3470define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3471; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256: 3472; CHECK: vpmovqw %ymm0, %xmm1 {%k1} 3473; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} 3474; CHECK-NEXT: vpmovqw %ymm0, %xmm0 3475 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) 3476 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) 3477 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3478 %res3 = add <8 x i16> %res0, %res1 3479 %res4 = add <8 x i16> %res3, %res2 3480 ret <8 x i16> %res4 3481} 3482 3483declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8) 3484 3485define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3486; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256: 3487; CHECK: vpmovqw %ymm0, (%rdi) 3488; CHECK: vpmovqw %ymm0, (%rdi) {%k1} 3489 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3490 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3491 ret void 3492} 3493 3494declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8) 3495 3496define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3497; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256: 3498; CHECK: vpmovsqw %ymm0, %xmm1 {%k1} 3499; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} 3500; CHECK-NEXT: vpmovsqw %ymm0, %xmm0 3501 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) 3502 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) 3503 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3504 %res3 = add <8 x i16> %res0, %res1 3505 %res4 = add <8 x i16> %res3, %res2 3506 ret <8 x i16> %res4 3507} 3508 3509declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8) 3510 3511define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3512; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256: 3513; CHECK: vpmovsqw %ymm0, (%rdi) 3514; CHECK: vpmovsqw %ymm0, (%rdi) {%k1} 3515 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3516 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3517 ret void 3518} 3519 3520declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8) 3521 3522define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3523; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256: 3524; CHECK: vpmovusqw %ymm0, %xmm1 {%k1} 3525; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} 3526; CHECK-NEXT: vpmovusqw %ymm0, %xmm0 3527 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) 3528 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) 3529 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3530 %res3 = add <8 x i16> %res0, %res1 3531 %res4 = add <8 x i16> %res3, %res2 3532 ret <8 x i16> %res4 3533} 3534 3535declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8) 3536 3537define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3538; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256: 3539; CHECK: vpmovusqw %ymm0, (%rdi) 3540; CHECK: vpmovusqw %ymm0, (%rdi) {%k1} 3541 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3542 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3543 ret void 3544} 3545 3546declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8) 3547 3548define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3549; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128: 3550; CHECK: vpmovqd %xmm0, %xmm1 {%k1} 3551; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} 3552; CHECK-NEXT: vpmovqd %xmm0, %xmm0 3553 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) 3554 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) 3555 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3556 %res3 = add <4 x i32> %res0, %res1 3557 %res4 = add <4 x i32> %res3, %res2 3558 ret <4 x i32> %res4 3559} 3560 3561declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8) 3562 3563define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3564; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128: 3565; CHECK: vpmovqd %xmm0, (%rdi) 3566; CHECK: vpmovqd %xmm0, (%rdi) {%k1} 3567 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3568 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3569 ret void 3570} 3571 3572declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8) 3573 3574define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3575; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128: 3576; CHECK: vpmovsqd %xmm0, %xmm1 {%k1} 3577; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} 3578; CHECK-NEXT: vpmovsqd %xmm0, %xmm0 3579 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) 3580 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) 3581 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3582 %res3 = add <4 x i32> %res0, %res1 3583 %res4 = add <4 x i32> %res3, %res2 3584 ret <4 x i32> %res4 3585} 3586 3587declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8) 3588 3589define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3590; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128: 3591; CHECK: vpmovsqd %xmm0, (%rdi) 3592; CHECK: vpmovsqd %xmm0, (%rdi) {%k1} 3593 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3594 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3595 ret void 3596} 3597 3598declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8) 3599 3600define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3601; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128: 3602; CHECK: vpmovusqd %xmm0, %xmm1 {%k1} 3603; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} 3604; CHECK-NEXT: vpmovusqd %xmm0, %xmm0 3605 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) 3606 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) 3607 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3608 %res3 = add <4 x i32> %res0, %res1 3609 %res4 = add <4 x i32> %res3, %res2 3610 ret <4 x i32> %res4 3611} 3612 3613declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8) 3614 3615define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3616; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128: 3617; CHECK: vpmovusqd %xmm0, (%rdi) 3618; CHECK: vpmovusqd %xmm0, (%rdi) {%k1} 3619 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3620 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3621 ret void 3622} 3623 3624declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8) 3625 3626define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3627; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256: 3628; CHECK: vpmovqd %ymm0, %xmm1 {%k1} 3629; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z} 3630; CHECK-NEXT: vpmovqd %ymm0, %xmm0 3631 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) 3632 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) 3633 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3634 %res3 = add <4 x i32> %res0, %res1 3635 %res4 = add <4 x i32> %res3, %res2 3636 ret <4 x i32> %res4 3637} 3638 3639declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8) 3640 3641define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3642; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256: 3643; CHECK: vpmovqd %ymm0, (%rdi) 3644; CHECK: vpmovqd %ymm0, (%rdi) {%k1} 3645 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3646 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3647 ret void 3648} 3649 3650declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8) 3651 3652define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3653; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256: 3654; CHECK: vpmovsqd %ymm0, %xmm1 {%k1} 3655; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z} 3656; CHECK-NEXT: vpmovsqd %ymm0, %xmm0 3657 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) 3658 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) 3659 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3660 %res3 = add <4 x i32> %res0, %res1 3661 %res4 = add <4 x i32> %res3, %res2 3662 ret <4 x i32> %res4 3663} 3664 3665declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8) 3666 3667define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3668; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256: 3669; CHECK: vpmovsqd %ymm0, (%rdi) 3670; CHECK: vpmovsqd %ymm0, (%rdi) {%k1} 3671 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3672 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3673 ret void 3674} 3675 3676declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8) 3677 3678define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3679; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256: 3680; CHECK: vpmovusqd %ymm0, %xmm1 {%k1} 3681; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z} 3682; CHECK-NEXT: vpmovusqd %ymm0, %xmm0 3683 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) 3684 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) 3685 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3686 %res3 = add <4 x i32> %res0, %res1 3687 %res4 = add <4 x i32> %res3, %res2 3688 ret <4 x i32> %res4 3689} 3690 3691declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8) 3692 3693define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3694; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256: 3695; CHECK: vpmovusqd %ymm0, (%rdi) 3696; CHECK: vpmovusqd %ymm0, (%rdi) {%k1} 3697 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3698 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3699 ret void 3700} 3701 3702declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8) 3703 3704define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3705; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128: 3706; CHECK: vpmovdb %xmm0, %xmm1 {%k1} 3707; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} 3708; CHECK-NEXT: vpmovdb %xmm0, %xmm0 3709 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) 3710 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) 3711 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3712 %res3 = add <16 x i8> %res0, %res1 3713 %res4 = add <16 x i8> %res3, %res2 3714 ret <16 x i8> %res4 3715} 3716 3717declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8) 3718 3719define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3720; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128: 3721; CHECK: vpmovdb %xmm0, (%rdi) 3722; CHECK: vpmovdb %xmm0, (%rdi) {%k1} 3723 call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3724 call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3725 ret void 3726} 3727 3728declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8) 3729 3730define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3731; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128: 3732; CHECK: vpmovsdb %xmm0, %xmm1 {%k1} 3733; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} 3734; CHECK-NEXT: vpmovsdb %xmm0, %xmm0 3735 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) 3736 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) 3737 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3738 %res3 = add <16 x i8> %res0, %res1 3739 %res4 = add <16 x i8> %res3, %res2 3740 ret <16 x i8> %res4 3741} 3742 3743declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8) 3744 3745define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3746; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128: 3747; CHECK: vpmovsdb %xmm0, (%rdi) 3748; CHECK: vpmovsdb %xmm0, (%rdi) {%k1} 3749 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3750 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3751 ret void 3752} 3753 3754declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8) 3755 3756define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3757; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128: 3758; CHECK: vpmovusdb %xmm0, %xmm1 {%k1} 3759; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} 3760; CHECK-NEXT: vpmovusdb %xmm0, %xmm0 3761 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) 3762 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) 3763 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3764 %res3 = add <16 x i8> %res0, %res1 3765 %res4 = add <16 x i8> %res3, %res2 3766 ret <16 x i8> %res4 3767} 3768 3769declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8) 3770 3771define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3772; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128: 3773; CHECK: vpmovusdb %xmm0, (%rdi) 3774; CHECK: vpmovusdb %xmm0, (%rdi) {%k1} 3775 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3776 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3777 ret void 3778} 3779 3780declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8) 3781 3782define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3783; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256: 3784; CHECK: vpmovdb %ymm0, %xmm1 {%k1} 3785; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} 3786; CHECK-NEXT: vpmovdb %ymm0, %xmm0 3787 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) 3788 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) 3789 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3790 %res3 = add <16 x i8> %res0, %res1 3791 %res4 = add <16 x i8> %res3, %res2 3792 ret <16 x i8> %res4 3793} 3794 3795declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8) 3796 3797define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3798; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256: 3799; CHECK: vpmovdb %ymm0, (%rdi) 3800; CHECK: vpmovdb %ymm0, (%rdi) {%k1} 3801 call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3802 call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3803 ret void 3804} 3805 3806declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8) 3807 3808define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3809; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256: 3810; CHECK: vpmovsdb %ymm0, %xmm1 {%k1} 3811; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} 3812; CHECK-NEXT: vpmovsdb %ymm0, %xmm0 3813 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) 3814 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) 3815 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3816 %res3 = add <16 x i8> %res0, %res1 3817 %res4 = add <16 x i8> %res3, %res2 3818 ret <16 x i8> %res4 3819} 3820 3821declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8) 3822 3823define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3824; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256: 3825; CHECK: vpmovsdb %ymm0, (%rdi) 3826; CHECK: vpmovsdb %ymm0, (%rdi) {%k1} 3827 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3828 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3829 ret void 3830} 3831 3832declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8) 3833 3834define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3835; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256: 3836; CHECK: vpmovusdb %ymm0, %xmm1 {%k1} 3837; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} 3838; CHECK-NEXT: vpmovusdb %ymm0, %xmm0 3839 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) 3840 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) 3841 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3842 %res3 = add <16 x i8> %res0, %res1 3843 %res4 = add <16 x i8> %res3, %res2 3844 ret <16 x i8> %res4 3845} 3846 3847declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8) 3848 3849define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3850; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256: 3851; CHECK: vpmovusdb %ymm0, (%rdi) 3852; CHECK: vpmovusdb %ymm0, (%rdi) {%k1} 3853 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3854 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3855 ret void 3856} 3857 3858declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8) 3859 3860define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3861; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128: 3862; CHECK: vpmovdw %xmm0, %xmm1 {%k1} 3863; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} 3864; CHECK-NEXT: vpmovdw %xmm0, %xmm0 3865 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) 3866 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) 3867 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3868 %res3 = add <8 x i16> %res0, %res1 3869 %res4 = add <8 x i16> %res3, %res2 3870 ret <8 x i16> %res4 3871} 3872 3873declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8) 3874 3875define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3876; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128: 3877; CHECK: vpmovdw %xmm0, (%rdi) 3878; CHECK: vpmovdw %xmm0, (%rdi) {%k1} 3879 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3880 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3881 ret void 3882} 3883 3884declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8) 3885 3886define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3887; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128: 3888; CHECK: vpmovsdw %xmm0, %xmm1 {%k1} 3889; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} 3890; CHECK-NEXT: vpmovsdw %xmm0, %xmm0 3891 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) 3892 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) 3893 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3894 %res3 = add <8 x i16> %res0, %res1 3895 %res4 = add <8 x i16> %res3, %res2 3896 ret <8 x i16> %res4 3897} 3898 3899declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8) 3900 3901define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3902; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128: 3903; CHECK: vpmovsdw %xmm0, (%rdi) 3904; CHECK: vpmovsdw %xmm0, (%rdi) {%k1} 3905 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3906 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3907 ret void 3908} 3909 3910declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8) 3911 3912define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3913; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128: 3914; CHECK: vpmovusdw %xmm0, %xmm1 {%k1} 3915; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} 3916; CHECK-NEXT: vpmovusdw %xmm0, %xmm0 3917 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) 3918 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) 3919 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3920 %res3 = add <8 x i16> %res0, %res1 3921 %res4 = add <8 x i16> %res3, %res2 3922 ret <8 x i16> %res4 3923} 3924 3925declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8) 3926 3927define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3928; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128: 3929; CHECK: vpmovusdw %xmm0, (%rdi) 3930; CHECK: vpmovusdw %xmm0, (%rdi) {%k1} 3931 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3932 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3933 ret void 3934} 3935 3936declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8) 3937 3938define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3939; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256: 3940; CHECK: vpmovdw %ymm0, %xmm1 {%k1} 3941; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} 3942; CHECK-NEXT: vpmovdw %ymm0, %xmm0 3943 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) 3944 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) 3945 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3946 %res3 = add <8 x i16> %res0, %res1 3947 %res4 = add <8 x i16> %res3, %res2 3948 ret <8 x i16> %res4 3949} 3950 3951declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8) 3952 3953define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3954; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256: 3955; CHECK: vpmovdw %ymm0, (%rdi) 3956; CHECK: vpmovdw %ymm0, (%rdi) {%k1} 3957 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3958 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3959 ret void 3960} 3961 3962declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8) 3963 3964define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3965; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256: 3966; CHECK: vpmovsdw %ymm0, %xmm1 {%k1} 3967; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} 3968; CHECK-NEXT: vpmovsdw %ymm0, %xmm0 3969 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) 3970 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) 3971 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3972 %res3 = add <8 x i16> %res0, %res1 3973 %res4 = add <8 x i16> %res3, %res2 3974 ret <8 x i16> %res4 3975} 3976 3977declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8) 3978 3979define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3980; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256: 3981; CHECK: vpmovsdw %ymm0, (%rdi) 3982; CHECK: vpmovsdw %ymm0, (%rdi) {%k1} 3983 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3984 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3985 ret void 3986} 3987 3988declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8) 3989 3990define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3991; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256: 3992; CHECK: vpmovusdw %ymm0, %xmm1 {%k1} 3993; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} 3994; CHECK-NEXT: vpmovusdw %ymm0, %xmm0 3995 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) 3996 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) 3997 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3998 %res3 = add <8 x i16> %res0, %res1 3999 %res4 = add <8 x i16> %res3, %res2 4000 ret <8 x i16> %res4 4001} 4002 4003declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8) 4004 4005define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 4006; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256: 4007; CHECK: vpmovusdw %ymm0, (%rdi) 4008; CHECK: vpmovusdw %ymm0, (%rdi) {%k1} 4009 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 4010 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 4011 ret void 4012} 4013 4014declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8) 4015 4016define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { 4017; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: 4018; CHECK: ## BB#0: 4019; CHECK-NEXT: movzbl %dil, %eax 4020; CHECK-NEXT: kmovw %eax, %k1 4021; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} 4022; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 4023; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 4024; CHECK-NEXT: retq 4025 %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) 4026 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) 4027 %res2 = fadd <2 x double> %res, %res1 4028 ret <2 x double> %res2 4029} 4030 4031declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8) 4032 4033define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { 4034; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: 4035; CHECK: ## BB#0: 4036; CHECK-NEXT: movzbl %dil, %eax 4037; CHECK-NEXT: kmovw %eax, %k1 4038; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} 4039; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 4040; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 4041; CHECK-NEXT: retq 4042 %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) 4043 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) 4044 %res2 = fadd <4 x double> %res, %res1 4045 ret <4 x double> %res2 4046} 4047 4048declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8) 4049 4050define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { 4051; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128: 4052; CHECK: ## BB#0: 4053; CHECK-NEXT: movzbl %dil, %eax 4054; CHECK-NEXT: kmovw %eax, %k1 4055; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} 4056; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 4057; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 4058; CHECK-NEXT: retq 4059 %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) 4060 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) 4061 %res2 = fadd <4 x float> %res, %res1 4062 ret <4 x float> %res2 4063} 4064 4065declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8) 4066 4067define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { 4068; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256: 4069; CHECK: ## BB#0: 4070; CHECK-NEXT: movzbl %dil, %eax 4071; CHECK-NEXT: kmovw %eax, %k1 4072; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} 4073; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 4074; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 4075; CHECK-NEXT: retq 4076 %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) 4077 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) 4078 %res2 = fadd <8 x float> %res, %res1 4079 ret <8 x float> %res2 4080} 4081 4082declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8) 4083 4084define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { 4085; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128: 4086; CHECK: ## BB#0: 4087; CHECK-NEXT: movzbl %dil, %eax 4088; CHECK-NEXT: kmovw %eax, %k1 4089; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} 4090; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 4091; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4092; CHECK-NEXT: retq 4093 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) 4094 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) 4095 %res2 = add <4 x i32> %res, %res1 4096 ret <4 x i32> %res2 4097} 4098 4099declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8) 4100 4101define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 4102; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256: 4103; CHECK: ## BB#0: 4104; CHECK-NEXT: movzbl %dil, %eax 4105; CHECK-NEXT: kmovw %eax, %k1 4106; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} 4107; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0 4108; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4109; CHECK-NEXT: retq 4110 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 4111 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 4112 %res2 = add <4 x i32> %res, %res1 4113 ret <4 x i32> %res2 4114} 4115 4116declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8) 4117 4118define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) { 4119; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256: 4120; CHECK: ## BB#0: 4121; CHECK-NEXT: movzbl %dil, %eax 4122; CHECK-NEXT: kmovw %eax, %k1 4123; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} 4124; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 4125; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 4126; CHECK-NEXT: retq 4127 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2) 4128 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1) 4129 %res2 = fadd <4 x float> %res, %res1 4130 ret <4 x float> %res2 4131} 4132 4133declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8) 4134 4135define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) { 4136; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps: 4137; CHECK: ## BB#0: 4138; CHECK-NEXT: movzbl %dil, %eax 4139; CHECK-NEXT: kmovw %eax, %k1 4140; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} 4141; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 4142; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 4143; CHECK-NEXT: retq 4144 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) 4145 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1) 4146 %res2 = fadd <4 x float> %res, %res1 4147 ret <4 x float> %res2 4148} 4149 4150declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8) 4151 4152define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { 4153; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128: 4154; CHECK: ## BB#0: 4155; CHECK-NEXT: movzbl %dil, %eax 4156; CHECK-NEXT: kmovw %eax, %k1 4157; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} 4158; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0 4159; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4160; CHECK-NEXT: retq 4161 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) 4162 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) 4163 %res2 = add <4 x i32> %res, %res1 4164 ret <4 x i32> %res2 4165} 4166 4167declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8) 4168 4169define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 4170; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256: 4171; CHECK: ## BB#0: 4172; CHECK-NEXT: movzbl %dil, %eax 4173; CHECK-NEXT: kmovw %eax, %k1 4174; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1} 4175; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0 4176; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4177; CHECK-NEXT: retq 4178 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 4179 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 4180 %res2 = add <4 x i32> %res, %res1 4181 ret <4 x i32> %res2 4182} 4183 4184declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8) 4185 4186define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 4187; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128: 4188; CHECK: ## BB#0: 4189; CHECK-NEXT: movzbl %dil, %eax 4190; CHECK-NEXT: kmovw %eax, %k1 4191; CHECK-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1} 4192; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 4193; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4194; CHECK-NEXT: retq 4195 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 4196 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 4197 %res2 = add <4 x i32> %res, %res1 4198 ret <4 x i32> %res2 4199} 4200 4201declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8) 4202 4203define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 4204; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256: 4205; CHECK: ## BB#0: 4206; CHECK-NEXT: movzbl %dil, %eax 4207; CHECK-NEXT: kmovw %eax, %k1 4208; CHECK-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1} 4209; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 4210; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 4211; CHECK-NEXT: retq 4212 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 4213 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 4214 %res2 = add <8 x i32> %res, %res1 4215 ret <8 x i32> %res2 4216} 4217 4218declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8) 4219 4220define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) { 4221; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128: 4222; CHECK: ## BB#0: 4223; CHECK-NEXT: movzbl %dil, %eax 4224; CHECK-NEXT: kmovw %eax, %k1 4225; CHECK-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} 4226; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 4227; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 4228; CHECK-NEXT: retq 4229 %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2) 4230 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1) 4231 %res2 = fadd <2 x double> %res, %res1 4232 ret <2 x double> %res2 4233} 4234 4235declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8) 4236 4237define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) { 4238; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256: 4239; CHECK: ## BB#0: 4240; CHECK-NEXT: movzbl %dil, %eax 4241; CHECK-NEXT: kmovw %eax, %k1 4242; CHECK-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} 4243; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 4244; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 4245; CHECK-NEXT: retq 4246 %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2) 4247 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1) 4248 %res2 = fadd <4 x double> %res, %res1 4249 ret <4 x double> %res2 4250} 4251 4252declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8) 4253 4254define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 4255; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128: 4256; CHECK: ## BB#0: 4257; CHECK-NEXT: movzbl %dil, %eax 4258; CHECK-NEXT: kmovw %eax, %k1 4259; CHECK-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1} 4260; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0 4261; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4262; CHECK-NEXT: retq 4263 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 4264 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 4265 %res2 = add <4 x i32> %res, %res1 4266 ret <4 x i32> %res2 4267} 4268 4269declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8) 4270 4271define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 4272; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256: 4273; CHECK: ## BB#0: 4274; CHECK-NEXT: movzbl %dil, %eax 4275; CHECK-NEXT: kmovw %eax, %k1 4276; CHECK-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1} 4277; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0 4278; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 4279; CHECK-NEXT: retq 4280 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 4281 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 4282 %res2 = add <8 x i32> %res, %res1 4283 ret <8 x i32> %res2 4284} 4285 4286declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8) 4287 4288define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { 4289; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128: 4290; CHECK: ## BB#0: 4291; CHECK-NEXT: movzbl %dil, %eax 4292; CHECK-NEXT: kmovw %eax, %k1 4293; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} 4294; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 4295; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4296; CHECK-NEXT: retq 4297 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) 4298 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) 4299 %res2 = add <4 x i32> %res, %res1 4300 ret <4 x i32> %res2 4301} 4302 4303declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8) 4304 4305define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 4306; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: 4307; CHECK: ## BB#0: 4308; CHECK-NEXT: movzbl %dil, %eax 4309; CHECK-NEXT: kmovw %eax, %k1 4310; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} 4311; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 4312; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4313; CHECK-NEXT: retq 4314 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 4315 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 4316 %res2 = add <4 x i32> %res, %res1 4317 ret <4 x i32> %res2 4318} 4319 4320declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8) 4321 4322define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { 4323; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128: 4324; CHECK: ## BB#0: 4325; CHECK-NEXT: movzbl %dil, %eax 4326; CHECK-NEXT: kmovw %eax, %k1 4327; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} 4328; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0 4329; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4330; CHECK-NEXT: retq 4331 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) 4332 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) 4333 %res2 = add <4 x i32> %res, %res1 4334 ret <4 x i32> %res2 4335} 4336 4337declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8) 4338 4339define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 4340; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256: 4341; CHECK: ## BB#0: 4342; CHECK-NEXT: movzbl %dil, %eax 4343; CHECK-NEXT: kmovw %eax, %k1 4344; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} 4345; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 4346; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4347; CHECK-NEXT: retq 4348 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 4349 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 4350 %res2 = add <4 x i32> %res, %res1 4351 ret <4 x i32> %res2 4352} 4353 4354declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8) 4355 4356define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 4357; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: 4358; CHECK: ## BB#0: 4359; CHECK-NEXT: movzbl %dil, %eax 4360; CHECK-NEXT: kmovw %eax, %k1 4361; CHECK-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} 4362; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 4363; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4364; CHECK-NEXT: retq 4365 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 4366 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 4367 %res2 = add <4 x i32> %res, %res1 4368 ret <4 x i32> %res2 4369} 4370 4371declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8) 4372 4373define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 4374; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: 4375; CHECK: ## BB#0: 4376; CHECK-NEXT: movzbl %dil, %eax 4377; CHECK-NEXT: kmovw %eax, %k1 4378; CHECK-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} 4379; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 4380; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 4381; CHECK-NEXT: retq 4382 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 4383 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 4384 %res2 = add <8 x i32> %res, %res1 4385 ret <8 x i32> %res2 4386} 4387 4388declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8) 4389 4390define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 4391; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128: 4392; CHECK: ## BB#0: 4393; CHECK-NEXT: movzbl %dil, %eax 4394; CHECK-NEXT: kmovw %eax, %k1 4395; CHECK-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} 4396; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 4397; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 4398; CHECK-NEXT: retq 4399 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 4400 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 4401 %res2 = add <4 x i32> %res, %res1 4402 ret <4 x i32> %res2 4403} 4404 4405declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8) 4406 4407define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 4408; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256: 4409; CHECK: ## BB#0: 4410; CHECK-NEXT: movzbl %dil, %eax 4411; CHECK-NEXT: kmovw %eax, %k1 4412; CHECK-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} 4413; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 4414; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 4415; CHECK-NEXT: retq 4416 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 4417 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 4418 %res2 = add <8 x i32> %res, %res1 4419 ret <8 x i32> %res2 4420} 4421 4422declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8) 4423 4424define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { 4425; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: 4426; CHECK: ## BB#0: 4427; CHECK-NEXT: movzbl %dil, %eax 4428; CHECK-NEXT: kmovw %eax, %k1 4429; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} 4430; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0 4431; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 4432; CHECK-NEXT: retq 4433 %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) 4434 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) 4435 %res2 = fadd <2 x double> %res, %res1 4436 ret <2 x double> %res2 4437} 4438 4439declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8) 4440 4441define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { 4442; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: 4443; CHECK: ## BB#0: 4444; CHECK-NEXT: movzbl %dil, %eax 4445; CHECK-NEXT: kmovw %eax, %k1 4446; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} 4447; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 4448; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 4449; CHECK-NEXT: retq 4450 %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) 4451 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) 4452 %res2 = fadd <4 x double> %res, %res1 4453 ret <4 x double> %res2 4454} 4455 4456declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) 4457 4458define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { 4459; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: 4460; CHECK: ## BB#0: 4461; CHECK-NEXT: movzbl %dil, %eax 4462; CHECK-NEXT: kmovw %eax, %k1 4463; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} 4464; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 4465; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 4466; CHECK-NEXT: retq 4467 %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) 4468 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) 4469 %res2 = fadd <4 x float> %res, %res1 4470 ret <4 x float> %res2 4471} 4472 4473declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8) 4474 4475define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { 4476; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: 4477; CHECK: ## BB#0: 4478; CHECK-NEXT: movzbl %dil, %eax 4479; CHECK-NEXT: kmovw %eax, %k1 4480; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} 4481; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 4482; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 4483; CHECK-NEXT: retq 4484 %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) 4485 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) 4486 %res2 = fadd <8 x float> %res, %res1 4487 ret <8 x float> %res2 4488} 4489 4490declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8) 4491; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128 4492; CHECK-NOT: call 4493; CHECK: kmov 4494; CHECK: vrndscalepd {{.*}}{%k1} 4495; CHECK: vrndscalepd 4496define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 4497 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3) 4498 %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1) 4499 %res2 = fadd <2 x double> %res, %res1 4500 ret <2 x double> %res2 4501} 4502 4503declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8) 4504; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256 4505; CHECK-NOT: call 4506; CHECK: kmov 4507; CHECK: vrndscalepd {{.*}}{%k1} 4508; CHECK: vrndscalepd 4509define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 4510 %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3) 4511 %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1) 4512 %res2 = fadd <4 x double> %res, %res1 4513 ret <4 x double> %res2 4514} 4515 4516declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8) 4517; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128 4518; CHECK-NOT: call 4519; CHECK: kmov 4520; CHECK: vrndscaleps {{.*}}{%k1} 4521; CHECK: vrndscaleps 4522define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 4523 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3) 4524 %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1) 4525 %res2 = fadd <4 x float> %res, %res1 4526 ret <4 x float> %res2 4527} 4528 4529declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8) 4530 4531; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256 4532; CHECK-NOT: call 4533; CHECK: kmov 4534; CHECK: vrndscaleps {{.*}}{%k1} 4535; CHECK: vrndscaleps 4536define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 4537 %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3) 4538 %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1) 4539 %res2 = fadd <8 x float> %res, %res1 4540 ret <8 x float> %res2 4541} 4542 4543declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 4544 4545define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 4546; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: 4547; CHECK: ## BB#0: 4548; CHECK-NEXT: movzbl %dil, %eax 4549; CHECK-NEXT: kmovw %eax, %k1 4550; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} 4551; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7] 4552; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} 4553; CHECK-NEXT: ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7] 4554; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 4555; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 4556; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 4557; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 4558; CHECK-NEXT: retq 4559 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) 4560 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) 4561 %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4) 4562 %res3 = fadd <8 x float> %res, %res1 4563 %res4 = fadd <8 x float> %res2, %res3 4564 ret <8 x float> %res4 4565} 4566 4567declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 4568 4569define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 4570; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256: 4571; CHECK: ## BB#0: 4572; CHECK-NEXT: movzbl %dil, %eax 4573; CHECK-NEXT: kmovw %eax, %k1 4574; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} 4575; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3] 4576; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} 4577; CHECK-NEXT: ## ymm3 = ymm0[0,1],ymm1[2,3] 4578; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 4579; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] 4580; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 4581; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 4582; CHECK-NEXT: retq 4583 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4) 4584 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1) 4585 %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4) 4586 %res3 = fadd <4 x double> %res, %res1 4587 %res4 = fadd <4 x double> %res2, %res3 4588 ret <4 x double> %res4 4589} 4590 4591declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 4592 4593define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 4594; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256: 4595; CHECK: ## BB#0: 4596; CHECK-NEXT: movzbl %dil, %eax 4597; CHECK-NEXT: kmovw %eax, %k1 4598; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} 4599; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7] 4600; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 4601; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 4602; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 4603; CHECK-NEXT: retq 4604 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 4605 %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) 4606 %res2 = add <8 x i32> %res, %res1 4607 ret <8 x i32> %res2 4608} 4609 4610declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 4611 4612define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 4613; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256: 4614; CHECK: ## BB#0: 4615; CHECK-NEXT: movzbl %dil, %eax 4616; CHECK-NEXT: kmovw %eax, %k1 4617; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} 4618; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3] 4619; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 4620; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] 4621; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 4622; CHECK-NEXT: retq 4623 %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 4624 %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) 4625 %res2 = add <4 x i64> %res, %res1 4626 ret <4 x i64> %res2 4627} 4628 4629declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8) 4630 4631define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) { 4632; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256: 4633; CHECK: ## BB#0: 4634; CHECK-NEXT: kmovw %edi, %k1 4635; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} 4636; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z} 4637; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 4638; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 4639; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 4640; CHECK-NEXT: retq 4641 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3) 4642 %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3) 4643 %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1) 4644 %res3 = fadd <4 x float> %res, %res1 4645 %res4 = fadd <4 x float> %res2, %res3 4646 ret <4 x float> %res4 4647} 4648 4649declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8) 4650 4651define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 4652; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128: 4653; CHECK: ## BB#0: 4654; CHECK-NEXT: movzbl %dil, %eax 4655; CHECK-NEXT: kmovw %eax, %k1 4656; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} 4657; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z} 4658; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0 4659; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 4660; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 4661; CHECK-NEXT: retq 4662 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3) 4663 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3) 4664 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 -1) 4665 %res3 = fadd <2 x double> %res, %res1 4666 %res4 = fadd <2 x double> %res2, %res3 4667 ret <2 x double> %res4 4668} 4669 4670declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8) 4671 4672define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 4673; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256: 4674; CHECK: ## BB#0: 4675; CHECK-NEXT: movzbl %dil, %eax 4676; CHECK-NEXT: kmovw %eax, %k1 4677; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} 4678; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm0 4679; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 4680; CHECK-NEXT: retq 4681 %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3) 4682 %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1) 4683 %res2 = fadd <4 x double> %res, %res1 4684 ret <4 x double> %res2 4685} 4686 4687declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8) 4688 4689define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 4690; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128: 4691; CHECK: ## BB#0: 4692; CHECK-NEXT: movzbl %dil, %eax 4693; CHECK-NEXT: kmovw %eax, %k1 4694; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1} 4695; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm0 4696; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 4697; CHECK-NEXT: retq 4698 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3) 4699 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1) 4700 %res2 = fadd <4 x float> %res, %res1 4701 ret <4 x float> %res2 4702} 4703 4704declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8) 4705 4706define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 4707; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256: 4708; CHECK: ## BB#0: 4709; CHECK-NEXT: movzbl %dil, %eax 4710; CHECK-NEXT: kmovw %eax, %k1 4711; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1} 4712; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm0 4713; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 4714; CHECK-NEXT: retq 4715 %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3) 4716 %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1) 4717 %res2 = fadd <8 x float> %res, %res1 4718 ret <8 x float> %res2 4719} 4720 4721declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) 4722 4723define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 4724; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128: 4725; CHECK: ## BB#0: 4726; CHECK-NEXT: movzbl %dil, %eax 4727; CHECK-NEXT: kmovw %eax, %k1 4728; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1} 4729; CHECK-NEXT: ## xmm2 = xmm2[0],k1[1] 4730; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} 4731; CHECK-NEXT: ## xmm3 = k1[0],xmm0[1] 4732; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0 4733; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1] 4734; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 4735; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 4736; CHECK-NEXT: retq 4737 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4) 4738 %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 -1) 4739 %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> zeroinitializer, i8 %x4) 4740 %res3 = fadd <2 x double> %res, %res1 4741 %res4 = fadd <2 x double> %res2, %res3 4742 ret <2 x double> %res4 4743} 4744 4745declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 4746 4747define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 4748; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256: 4749; CHECK: ## BB#0: 4750; CHECK-NEXT: movzbl %dil, %eax 4751; CHECK-NEXT: kmovw %eax, %k1 4752; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1} 4753; CHECK-NEXT: ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2] 4754; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0 4755; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 4756; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 4757; CHECK-NEXT: retq 4758 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4) 4759 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1) 4760 %res2 = fadd <4 x double> %res, %res1 4761 ret <4 x double> %res2 4762} 4763 4764declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) 4765 4766define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 4767; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128: 4768; CHECK: ## BB#0: 4769; CHECK-NEXT: movzbl %dil, %eax 4770; CHECK-NEXT: kmovw %eax, %k1 4771; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} 4772; CHECK-NEXT: ## xmm2 = xmm2[2,1],k1[1,0] 4773; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0 4774; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0] 4775; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 4776; CHECK-NEXT: retq 4777 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4) 4778 %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1) 4779 %res2 = fadd <4 x float> %res, %res1 4780 ret <4 x float> %res2 4781} 4782 4783declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 4784 4785define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 4786; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256: 4787; CHECK: ## BB#0: 4788; CHECK-NEXT: movzbl %dil, %eax 4789; CHECK-NEXT: kmovw %eax, %k1 4790; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} 4791; CHECK-NEXT: ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4] 4792; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0 4793; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 4794; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 4795; CHECK-NEXT: retq 4796 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) 4797 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) 4798 %res2 = fadd <8 x float> %res, %res1 4799 ret <8 x float> %res2 4800} 4801 4802declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 4803 4804define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 4805; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128: 4806; CHECK: ## BB#0: 4807; CHECK-NEXT: movzbl %dil, %eax 4808; CHECK-NEXT: kmovw %eax, %k1 4809; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1} 4810; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} 4811; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0 4812; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 4813; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 4814; CHECK-NEXT: retq 4815 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) 4816 %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1) 4817 %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4) 4818 %res3 = add <4 x i32> %res, %res1 4819 %res4 = add <4 x i32> %res3, %res2 4820 ret <4 x i32> %res4 4821} 4822 4823declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 4824 4825define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 4826; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256: 4827; CHECK: ## BB#0: 4828; CHECK-NEXT: movzbl %dil, %eax 4829; CHECK-NEXT: kmovw %eax, %k1 4830; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1} 4831; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0 4832; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 4833; CHECK-NEXT: retq 4834 %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 4835 %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) 4836 %res2 = add <8 x i32> %res, %res1 4837 ret <8 x i32> %res2 4838} 4839 4840declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 4841 4842define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 4843; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128: 4844; CHECK: ## BB#0: 4845; CHECK-NEXT: movzbl %dil, %eax 4846; CHECK-NEXT: kmovw %eax, %k1 4847; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1} 4848; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0 4849; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 4850; CHECK-NEXT: retq 4851 %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4) 4852 %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1) 4853 %res2 = add <2 x i64> %res, %res1 4854 ret <2 x i64> %res2 4855} 4856 4857declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 4858 4859define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 4860; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256: 4861; CHECK: ## BB#0: 4862; CHECK-NEXT: movzbl %dil, %eax 4863; CHECK-NEXT: kmovw %eax, %k1 4864; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1} 4865; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0 4866; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 4867; CHECK-NEXT: retq 4868 %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 4869 %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) 4870 %res2 = add <4 x i64> %res, %res1 4871 ret <4 x i64> %res2 4872} 4873 4874declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8) 4875 4876define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 4877; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: 4878; CHECK: ## BB#0: 4879; CHECK-NEXT: movzbl %dil, %eax 4880; CHECK-NEXT: kmovw %eax, %k1 4881; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} 4882; CHECK-NEXT: ## ymm1 = ymm1[0,1,3,2] 4883; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} 4884; CHECK-NEXT: ## ymm2 = k1[0,1,3,2] 4885; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0 4886; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2] 4887; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 4888; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 4889; CHECK-NEXT: retq 4890 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) 4891 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) 4892 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1) 4893 %res3 = fadd <4 x double> %res, %res1 4894 %res4 = fadd <4 x double> %res2, %res3 4895 ret <4 x double> %res4 4896} 4897 4898declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8) 4899 4900define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 4901; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: 4902; CHECK: ## BB#0: 4903; CHECK-NEXT: movzbl %dil, %eax 4904; CHECK-NEXT: kmovw %eax, %k1 4905; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} 4906; CHECK-NEXT: ## xmm1 = xmm1[1,0] 4907; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} 4908; CHECK-NEXT: ## xmm2 = k1[1,0] 4909; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 4910; CHECK-NEXT: ## xmm0 = xmm0[1,0] 4911; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 4912; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 4913; CHECK-NEXT: retq 4914 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) 4915 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) 4916 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1) 4917 %res3 = fadd <2 x double> %res, %res1 4918 %res4 = fadd <2 x double> %res3, %res2 4919 ret <2 x double> %res4 4920} 4921 4922declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8) 4923 4924define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 4925; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: 4926; CHECK: ## BB#0: 4927; CHECK-NEXT: movzbl %dil, %eax 4928; CHECK-NEXT: kmovw %eax, %k1 4929; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} 4930; CHECK-NEXT: ## ymm1 = ymm1[2,1,1,0,6,5,5,4] 4931; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} 4932; CHECK-NEXT: ## ymm2 = k1[2,1,1,0,6,5,5,4] 4933; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 4934; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4] 4935; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 4936; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 4937; CHECK-NEXT: retq 4938 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) 4939 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) 4940 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1) 4941 %res3 = fadd <8 x float> %res, %res1 4942 %res4 = fadd <8 x float> %res3, %res2 4943 ret <8 x float> %res4 4944} 4945 4946declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8) 4947 4948define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 4949; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: 4950; CHECK: ## BB#0: 4951; CHECK-NEXT: movzbl %dil, %eax 4952; CHECK-NEXT: kmovw %eax, %k1 4953; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} 4954; CHECK-NEXT: ## xmm1 = xmm1[2,1,1,0] 4955; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} 4956; CHECK-NEXT: ## xmm2 = k1[2,1,1,0] 4957; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 4958; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0] 4959; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 4960; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 4961; CHECK-NEXT: retq 4962 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) 4963 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) 4964 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1) 4965 %res3 = fadd <4 x float> %res, %res1 4966 %res4 = fadd <4 x float> %res2, %res3 4967 ret <4 x float> %res4 4968} 4969 4970declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 4971 4972define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 4973; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: 4974; CHECK: ## BB#0: 4975; CHECK-NEXT: movzbl %dil, %eax 4976; CHECK-NEXT: kmovw %eax, %k1 4977; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} 4978; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z} 4979; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 4980; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 4981; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 4982; CHECK-NEXT: retq 4983 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 4984 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) 4985 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 4986 %res3 = fadd <4 x double> %res, %res1 4987 %res4 = fadd <4 x double> %res2, %res3 4988 ret <4 x double> %res4 4989} 4990 4991declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 4992 4993define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 4994; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: 4995; CHECK: ## BB#0: 4996; CHECK-NEXT: movzbl %dil, %eax 4997; CHECK-NEXT: kmovw %eax, %k1 4998; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} 4999; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z} 5000; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 5001; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1 5002; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 5003; CHECK-NEXT: retq 5004 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) 5005 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3) 5006 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 5007 %res3 = fadd <2 x double> %res, %res1 5008 %res4 = fadd <2 x double> %res3, %res2 5009 ret <2 x double> %res4 5010} 5011 5012declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 5013 5014define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 5015; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: 5016; CHECK: ## BB#0: 5017; CHECK-NEXT: movzbl %dil, %eax 5018; CHECK-NEXT: kmovw %eax, %k1 5019; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} 5020; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z} 5021; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 5022; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 5023; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 5024; CHECK-NEXT: retq 5025 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 5026 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) 5027 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 5028 %res3 = fadd <8 x float> %res, %res1 5029 %res4 = fadd <8 x float> %res3, %res2 5030 ret <8 x float> %res4 5031} 5032 5033declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 5034 5035define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { 5036; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: 5037; CHECK: ## BB#0: 5038; CHECK-NEXT: movzbl %dil, %eax 5039; CHECK-NEXT: kmovw %eax, %k1 5040; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} 5041; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z} 5042; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 5043; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1 5044; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 5045; CHECK-NEXT: retq 5046 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) 5047 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3) 5048 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 5049 %res3 = fadd <4 x float> %res, %res1 5050 %res4 = fadd <4 x float> %res2, %res3 5051 ret <4 x float> %res4 5052} 5053 5054declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8) 5055 5056define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) { 5057; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256: 5058; CHECK: ## BB#0: 5059; CHECK-NEXT: movzbl %dil, %eax 5060; CHECK-NEXT: kmovw %eax, %k1 5061; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} 5062; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} 5063; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 5064; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 5065; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 5066; CHECK-NEXT: retq 5067 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4) 5068 %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1) 5069 %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4) 5070 %res3 = fadd <8 x float> %res, %res1 5071 %res4 = fadd <8 x float> %res2, %res3 5072 ret <8 x float> %res4 5073} 5074 5075declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8) 5076 5077define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) { 5078; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256: 5079; CHECK: ## BB#0: 5080; CHECK-NEXT: movzbl %dil, %eax 5081; CHECK-NEXT: kmovw %eax, %k1 5082; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} 5083; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} 5084; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 5085; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 5086; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 5087; CHECK-NEXT: retq 5088 5089 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4) 5090 %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1) 5091 %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4) 5092 %res3 = add <8 x i32> %res, %res1 5093 %res4 = add <8 x i32> %res2, %res3 5094 ret <8 x i32> %res4 5095} 5096 5097declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) 5098 5099define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { 5100; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128: 5101; CHECK: ## BB#0: 5102; CHECK-NEXT: movzbl %dil, %eax 5103; CHECK-NEXT: kmovw %eax, %k1 5104; CHECK-NEXT: vmovaps %zmm0, %zmm3 5105; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} 5106; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 5107; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 5108; CHECK-NEXT: retq 5109 %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) 5110 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) 5111 %res2 = add <4 x i32> %res, %res1 5112 ret <4 x i32> %res2 5113} 5114 5115declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) 5116 5117define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { 5118; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: 5119; CHECK: ## BB#0: 5120; CHECK-NEXT: movzbl %dil, %eax 5121; CHECK-NEXT: kmovw %eax, %k1 5122; CHECK-NEXT: vmovaps %zmm0, %zmm3 5123; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z} 5124; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 5125; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 5126; CHECK-NEXT: retq 5127 %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) 5128 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) 5129 %res2 = add <4 x i32> %res, %res1 5130 ret <4 x i32> %res2 5131} 5132 5133declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) 5134 5135define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { 5136; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256: 5137; CHECK: ## BB#0: 5138; CHECK-NEXT: movzbl %dil, %eax 5139; CHECK-NEXT: kmovw %eax, %k1 5140; CHECK-NEXT: vmovaps %zmm0, %zmm3 5141; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} 5142; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 5143; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 5144; CHECK-NEXT: retq 5145 %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) 5146 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) 5147 %res2 = add <8 x i32> %res, %res1 5148 ret <8 x i32> %res2 5149} 5150 5151declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) 5152 5153define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { 5154; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: 5155; CHECK: ## BB#0: 5156; CHECK-NEXT: movzbl %dil, %eax 5157; CHECK-NEXT: kmovw %eax, %k1 5158; CHECK-NEXT: vmovaps %zmm0, %zmm3 5159; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z} 5160; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 5161; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 5162; CHECK-NEXT: retq 5163 %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) 5164 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) 5165 %res2 = add <8 x i32> %res, %res1 5166 ret <8 x i32> %res2 5167} 5168 5169declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) 5170 5171define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { 5172; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128: 5173; CHECK: ## BB#0: 5174; CHECK-NEXT: movzbl %dil, %eax 5175; CHECK-NEXT: kmovw %eax, %k1 5176; CHECK-NEXT: vmovaps %zmm0, %zmm3 5177; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} 5178; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 5179; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 5180; CHECK-NEXT: retq 5181 %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) 5182 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) 5183 %res2 = add <2 x i64> %res, %res1 5184 ret <2 x i64> %res2 5185} 5186 5187declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) 5188 5189define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { 5190; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: 5191; CHECK: ## BB#0: 5192; CHECK-NEXT: movzbl %dil, %eax 5193; CHECK-NEXT: kmovw %eax, %k1 5194; CHECK-NEXT: vmovaps %zmm0, %zmm3 5195; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z} 5196; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 5197; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 5198; CHECK-NEXT: retq 5199 %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) 5200 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) 5201 %res2 = add <2 x i64> %res, %res1 5202 ret <2 x i64> %res2 5203} 5204 5205declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) 5206 5207define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { 5208; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256: 5209; CHECK: ## BB#0: 5210; CHECK-NEXT: movzbl %dil, %eax 5211; CHECK-NEXT: kmovw %eax, %k1 5212; CHECK-NEXT: vmovaps %zmm0, %zmm3 5213; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} 5214; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 5215; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 5216; CHECK-NEXT: retq 5217 %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) 5218 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) 5219 %res2 = add <4 x i64> %res, %res1 5220 ret <4 x i64> %res2 5221} 5222 5223declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) 5224 5225define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { 5226; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: 5227; CHECK: ## BB#0: 5228; CHECK-NEXT: movzbl %dil, %eax 5229; CHECK-NEXT: kmovw %eax, %k1 5230; CHECK-NEXT: vmovaps %zmm0, %zmm3 5231; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z} 5232; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 5233; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 5234; CHECK-NEXT: retq 5235 %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) 5236 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) 5237 %res2 = add <4 x i64> %res, %res1 5238 ret <4 x i64> %res2 5239} 5240 5241declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) 5242 5243define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) { 5244; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256: 5245; CHECK: ## BB#0: 5246; CHECK-NEXT: movzbl %dil, %eax 5247; CHECK-NEXT: kmovw %eax, %k1 5248; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} 5249; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z} 5250; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 5251; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 5252; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 5253; CHECK-NEXT: retq 5254 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1) 5255 %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) 5256 %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) 5257 %res3 = add <8 x i32> %res, %res1 5258 %res4 = add <8 x i32> %res2, %res3 5259 ret <8 x i32> %res4 5260} 5261 5262declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8) 5263 5264define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 5265; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128: 5266; CHECK: ## BB#0: 5267; CHECK-NEXT: movzbl %dil, %eax 5268; CHECK-NEXT: kmovw %eax, %k1 5269; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} 5270; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 {%k1} {z} 5271; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 5272; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 5273; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 5274; CHECK-NEXT: retq 5275 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 5276 %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) 5277 %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask) 5278 %res3 = add <4 x i32> %res, %res1 5279 %res4 = add <4 x i32> %res2, %res3 5280 ret <4 x i32> %res4 5281} 5282 5283declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8) 5284 5285define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) { 5286; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256: 5287; CHECK: ## BB#0: 5288; CHECK-NEXT: movzbl %dil, %eax 5289; CHECK-NEXT: kmovw %eax, %k1 5290; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} 5291; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 {%k1} {z} 5292; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 5293; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 5294; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 5295; CHECK-NEXT: retq 5296 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1) 5297 %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask) 5298 %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask) 5299 %res3 = add <4 x i64> %res, %res1 5300 %res4 = add <4 x i64> %res2, %res3 5301 ret <4 x i64> %res4 5302} 5303 5304declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8) 5305 5306define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) { 5307; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128: 5308; CHECK: ## BB#0: 5309; CHECK-NEXT: movzbl %dil, %eax 5310; CHECK-NEXT: kmovw %eax, %k1 5311; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} 5312; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 {%k1} {z} 5313; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 5314; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 5315; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 5316; CHECK-NEXT: retq 5317 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1) 5318 %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask) 5319 %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask) 5320 %res3 = add <2 x i64> %res, %res1 5321 %res4 = add <2 x i64> %res2, %res3 5322 ret <2 x i64> %res4 5323} 5324 5325define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) { 5326 ; CHECK: test_x86_vcvtph2ps_128 5327 ; CHECK: vcvtph2ps %xmm0, %xmm0 5328 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1) 5329 ret <4 x float> %res 5330} 5331 5332define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) { 5333 ; CHECK: test_x86_vcvtph2ps_128_rrk 5334 ; CHECK: vcvtph2ps %xmm0, %xmm1 {%k1} 5335 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask) 5336 ret <4 x float> %res 5337} 5338 5339 5340define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) { 5341 ; CHECK: test_x86_vcvtph2ps_128_rrkz 5342 ; CHECK: vcvtph2ps %xmm0, %xmm0 {%k1} {z} 5343 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask) 5344 ret <4 x float> %res 5345} 5346 5347declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly 5348 5349define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) { 5350 ; CHECK: test_x86_vcvtph2ps_256 5351 ; CHECK: vcvtph2ps %xmm0, %ymm0 5352 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1) 5353 ret <8 x float> %res 5354} 5355 5356define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) { 5357 ; CHECK: test_x86_vcvtph2ps_256_rrk 5358 ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1} 5359 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask) 5360 ret <8 x float> %res 5361} 5362 5363define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) { 5364 ; CHECK: test_x86_vcvtph2ps_256_rrkz 5365 ; CHECK: vcvtph2ps %xmm0, %ymm0 {%k1} {z} 5366 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask) 5367 ret <8 x float> %res 5368} 5369 5370declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly 5371 5372define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) { 5373 ; CHECK: test_x86_vcvtps2ph_128 5374 ; CHECK: vcvtps2ph $2, %xmm0, %xmm0 5375 %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1) 5376 ret <8 x i16> %res 5377} 5378 5379 5380declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly 5381 5382define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) { 5383 ; CHECK: test_x86_vcvtps2ph_256 5384 ; CHECK: vcvtps2ph $2, %ymm0, %xmm0 5385 %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1) 5386 ret <8 x i16> %res 5387} 5388 5389declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly 5390 5391declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8) 5392 5393define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 5394; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128: 5395; CHECK: ## BB#0: 5396; CHECK-NEXT: movzbl %dil, %eax 5397; CHECK-NEXT: kmovw %eax, %k1 5398; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} 5399; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2] 5400; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z} 5401; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2] 5402; CHECK-NEXT: vmovsldup %xmm0, %xmm0 5403; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2] 5404; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 5405; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 5406; CHECK-NEXT: retq 5407 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 5408 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 5409 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 5410 %res3 = fadd <4 x float> %res, %res1 5411 %res4 = fadd <4 x float> %res2, %res3 5412 ret <4 x float> %res4 5413} 5414 5415declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8) 5416 5417define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 5418; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256: 5419; CHECK: ## BB#0: 5420; CHECK-NEXT: movzbl %dil, %eax 5421; CHECK-NEXT: kmovw %eax, %k1 5422; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} 5423; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6] 5424; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z} 5425; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6] 5426; CHECK-NEXT: vmovsldup %ymm0, %ymm0 5427; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6] 5428; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 5429; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 5430; CHECK-NEXT: retq 5431 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 5432 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 5433 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 5434 %res3 = fadd <8 x float> %res, %res1 5435 %res4 = fadd <8 x float> %res2, %res3 5436 ret <8 x float> %res4 5437} 5438 5439declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8) 5440 5441define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 5442; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128: 5443; CHECK: ## BB#0: 5444; CHECK-NEXT: movzbl %dil, %eax 5445; CHECK-NEXT: kmovw %eax, %k1 5446; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} 5447; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3] 5448; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z} 5449; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3] 5450; CHECK-NEXT: vmovshdup %xmm0, %xmm0 5451; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3] 5452; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 5453; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 5454; CHECK-NEXT: retq 5455 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 5456 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 5457 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 5458 %res3 = fadd <4 x float> %res, %res1 5459 %res4 = fadd <4 x float> %res2, %res3 5460 ret <4 x float> %res4 5461} 5462 5463declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8) 5464 5465define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 5466; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256: 5467; CHECK: ## BB#0: 5468; CHECK-NEXT: movzbl %dil, %eax 5469; CHECK-NEXT: kmovw %eax, %k1 5470; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} 5471; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7] 5472; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z} 5473; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7] 5474; CHECK-NEXT: vmovshdup %ymm0, %ymm0 5475; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7] 5476; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 5477; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 5478; CHECK-NEXT: retq 5479 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 5480 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 5481 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 5482 %res3 = fadd <8 x float> %res, %res1 5483 %res4 = fadd <8 x float> %res2, %res3 5484 ret <8 x float> %res4 5485} 5486declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8) 5487 5488define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 5489; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128: 5490; CHECK: ## BB#0: 5491; CHECK-NEXT: movzbl %dil, %eax 5492; CHECK-NEXT: kmovw %eax, %k1 5493; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1} 5494; CHECK-NEXT: ## xmm1 = xmm0[0,0] 5495; CHECK-NEXT: vmovddup %xmm0, %xmm2 {%k1} {z} 5496; CHECK-NEXT: ## xmm2 = xmm0[0,0] 5497; CHECK-NEXT: vmovddup %xmm0, %xmm0 5498; CHECK-NEXT: ## xmm0 = xmm0[0,0] 5499; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 5500; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 5501; CHECK-NEXT: retq 5502 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2) 5503 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1) 5504 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2) 5505 %res3 = fadd <2 x double> %res, %res1 5506 %res4 = fadd <2 x double> %res2, %res3 5507 ret <2 x double> %res4 5508} 5509 5510declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8) 5511 5512define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) { 5513; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256: 5514; CHECK: ## BB#0: 5515; CHECK-NEXT: movzbl %dil, %eax 5516; CHECK-NEXT: kmovw %eax, %k1 5517; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1} 5518; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2] 5519; CHECK-NEXT: vmovddup %ymm0, %ymm2 {%k1} {z} 5520; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2] 5521; CHECK-NEXT: vmovddup %ymm0, %ymm0 5522; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2] 5523; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 5524; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 5525; CHECK-NEXT: retq 5526 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2) 5527 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1) 5528 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2) 5529 %res3 = fadd <4 x double> %res, %res1 5530 %res4 = fadd <4 x double> %res2, %res3 5531 ret <4 x double> %res4 5532} 5533