1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 3; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 4 5define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 6; AVX512BW-LABEL: test_cmp_b_512: 7; AVX512BW: ## BB#0: 8; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 9; AVX512BW-NEXT: kmovq %k0, %rax 10; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 11; AVX512BW-NEXT: kmovq %k0, %rcx 12; AVX512BW-NEXT: addq %rax, %rcx 13; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 14; AVX512BW-NEXT: kmovq %k0, %rax 15; AVX512BW-NEXT: addq %rcx, %rax 16; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 17; AVX512BW-NEXT: kmovq %k0, %rcx 18; AVX512BW-NEXT: addq %rax, %rcx 19; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 20; AVX512BW-NEXT: kmovq %k0, %rax 21; AVX512BW-NEXT: addq %rcx, %rax 22; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 23; AVX512BW-NEXT: kmovq %k0, %rcx 24; AVX512BW-NEXT: addq %rax, %rcx 25; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 26; AVX512BW-NEXT: kmovq %k0, %rdx 27; AVX512BW-NEXT: addq %rcx, %rdx 28; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 29; AVX512BW-NEXT: kmovq %k0, %rax 30; AVX512BW-NEXT: addq %rdx, %rax 31; AVX512BW-NEXT: retq 32; 33; AVX512F-32-LABEL: test_cmp_b_512: 34; AVX512F-32: # BB#0: 35; AVX512F-32-NEXT: subl $68, %esp 36; AVX512F-32-NEXT: .Ltmp0: 37; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 38; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 39; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 40; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 41; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 42; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 43; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 44; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 45; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 46; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 47; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 48; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 49; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 50; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 51; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 52; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 53; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 54; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 55; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 56; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 57; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 58; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 59; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 60; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 61; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 62; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 63; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 64; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 65; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 66; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 67; AVX512F-32-NEXT: kmovq %k0, (%esp) 68; AVX512F-32-NEXT: addl (%esp), %eax 69; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 70; AVX512F-32-NEXT: addl $68, %esp 71; AVX512F-32-NEXT: retl 72 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 74 %ret1 = add i64 %res0, %res1 75 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 76 %ret2 = add i64 %ret1, %res2 77 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 78 %ret3 = add i64 %ret2, %res3 79 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 80 %ret4 = add i64 %ret3, %res4 81 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 82 %ret5 = add i64 %ret4, %res5 83 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 84 %ret6 = add i64 %ret5, %res6 85 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 86 %ret7 = add i64 %ret6, %res7 87 ret i64 %ret7 88} 89 90define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 91; AVX512BW-LABEL: test_mask_cmp_b_512: 92; AVX512BW: ## BB#0: 93; AVX512BW-NEXT: kmovq %rdi, %k1 94; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 95; AVX512BW-NEXT: kmovq %k0, %rax 96; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} 97; AVX512BW-NEXT: kmovq %k0, %rcx 98; AVX512BW-NEXT: addq %rax, %rcx 99; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} 100; AVX512BW-NEXT: kmovq %k0, %rax 101; AVX512BW-NEXT: addq %rcx, %rax 102; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} 103; AVX512BW-NEXT: kmovq %k0, %rcx 104; AVX512BW-NEXT: addq %rax, %rcx 105; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} 106; AVX512BW-NEXT: kmovq %k0, %rax 107; AVX512BW-NEXT: addq %rcx, %rax 108; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} 109; AVX512BW-NEXT: kmovq %k0, %rcx 110; AVX512BW-NEXT: addq %rax, %rcx 111; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} 112; AVX512BW-NEXT: kmovq %k0, %rdx 113; AVX512BW-NEXT: addq %rcx, %rdx 114; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} 115; AVX512BW-NEXT: kmovq %k0, %rax 116; AVX512BW-NEXT: addq %rdx, %rax 117; AVX512BW-NEXT: retq 118; 119; AVX512F-32-LABEL: test_mask_cmp_b_512: 120; AVX512F-32: # BB#0: 121; AVX512F-32-NEXT: subl $68, %esp 122; AVX512F-32-NEXT: .Ltmp1: 123; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 124; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 125; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 126; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 127; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 128; AVX512F-32-NEXT: kmovq %k0, (%esp) 129; AVX512F-32-NEXT: movl (%esp), %eax 130; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 131; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} 132; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 133; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 134; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 135; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} 136; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 137; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 138; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 139; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} 140; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 141; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 142; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 143; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} 144; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 145; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 146; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 147; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} 148; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 149; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 150; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 151; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} 152; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 153; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 154; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 155; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} 156; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 157; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 158; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 159; AVX512F-32-NEXT: addl $68, %esp 160; AVX512F-32-NEXT: retl 161 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 162 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 163 %ret1 = add i64 %res0, %res1 164 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 165 %ret2 = add i64 %ret1, %res2 166 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 167 %ret3 = add i64 %ret2, %res3 168 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 169 %ret4 = add i64 %ret3, %res4 170 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 171 %ret5 = add i64 %ret4, %res5 172 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 173 %ret6 = add i64 %ret5, %res6 174 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 175 %ret7 = add i64 %ret6, %res7 176 ret i64 %ret7 177} 178 179declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 180 181define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 182; AVX512BW-LABEL: test_ucmp_b_512: 183; AVX512BW: ## BB#0: 184; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 185; AVX512BW-NEXT: kmovq %k0, %rax 186; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 187; AVX512BW-NEXT: kmovq %k0, %rcx 188; AVX512BW-NEXT: addq %rax, %rcx 189; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 190; AVX512BW-NEXT: kmovq %k0, %rax 191; AVX512BW-NEXT: addq %rcx, %rax 192; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 193; AVX512BW-NEXT: kmovq %k0, %rcx 194; AVX512BW-NEXT: addq %rax, %rcx 195; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 196; AVX512BW-NEXT: kmovq %k0, %rax 197; AVX512BW-NEXT: addq %rcx, %rax 198; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 199; AVX512BW-NEXT: kmovq %k0, %rcx 200; AVX512BW-NEXT: addq %rax, %rcx 201; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 202; AVX512BW-NEXT: kmovq %k0, %rdx 203; AVX512BW-NEXT: addq %rcx, %rdx 204; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 205; AVX512BW-NEXT: kmovq %k0, %rax 206; AVX512BW-NEXT: addq %rdx, %rax 207; AVX512BW-NEXT: retq 208; 209; AVX512F-32-LABEL: test_ucmp_b_512: 210; AVX512F-32: # BB#0: 211; AVX512F-32-NEXT: subl $68, %esp 212; AVX512F-32-NEXT: .Ltmp2: 213; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 214; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 215; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 216; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 217; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 218; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 219; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 220; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 221; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 222; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 223; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 224; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 225; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 226; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 227; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 228; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 229; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 230; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 231; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 232; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 233; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 234; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 235; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 236; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 237; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 238; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 239; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 240; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 241; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 242; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 243; AVX512F-32-NEXT: kmovq %k0, (%esp) 244; AVX512F-32-NEXT: addl (%esp), %eax 245; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 246; AVX512F-32-NEXT: addl $68, %esp 247; AVX512F-32-NEXT: retl 248 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 249 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 250 %ret1 = add i64 %res0, %res1 251 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 252 %ret2 = add i64 %ret1, %res2 253 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 254 %ret3 = add i64 %ret2, %res3 255 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 256 %ret4 = add i64 %ret3, %res4 257 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 258 %ret5 = add i64 %ret4, %res5 259 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 260 %ret6 = add i64 %ret5, %res6 261 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 262 %ret7 = add i64 %ret6, %res7 263 ret i64 %ret7 264} 265 266define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 267; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512: 268; AVX512BW: ## BB#0: 269; AVX512BW-NEXT: kmovq %rdi, %k1 270; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1} 271; AVX512BW-NEXT: kmovq %k0, %rax 272; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} 273; AVX512BW-NEXT: kmovq %k0, %rcx 274; AVX512BW-NEXT: addq %rax, %rcx 275; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} 276; AVX512BW-NEXT: kmovq %k0, %rax 277; AVX512BW-NEXT: addq %rcx, %rax 278; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} 279; AVX512BW-NEXT: kmovq %k0, %rcx 280; AVX512BW-NEXT: addq %rax, %rcx 281; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} 282; AVX512BW-NEXT: kmovq %k0, %rax 283; AVX512BW-NEXT: addq %rcx, %rax 284; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} 285; AVX512BW-NEXT: kmovq %k0, %rcx 286; AVX512BW-NEXT: addq %rax, %rcx 287; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} 288; AVX512BW-NEXT: kmovq %k0, %rdx 289; AVX512BW-NEXT: addq %rcx, %rdx 290; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} 291; AVX512BW-NEXT: kmovq %k0, %rax 292; AVX512BW-NEXT: addq %rdx, %rax 293; AVX512BW-NEXT: retq 294; 295; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512: 296; AVX512F-32: # BB#0: 297; AVX512F-32-NEXT: subl $68, %esp 298; AVX512F-32-NEXT: .Ltmp3: 299; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 300; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 301; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 302; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 303; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1} 304; AVX512F-32-NEXT: kmovq %k0, (%esp) 305; AVX512F-32-NEXT: movl (%esp), %eax 306; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 307; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} 308; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 309; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 310; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 311; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} 312; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 313; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 314; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 315; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} 316; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 317; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 318; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 319; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} 320; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 321; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 322; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 323; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} 324; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 325; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 326; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 327; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} 328; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 329; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 330; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 331; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} 332; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 333; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 334; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 335; AVX512F-32-NEXT: addl $68, %esp 336; AVX512F-32-NEXT: retl 337 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 338 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 339 %ret1 = add i64 %res0, %res1 340 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 341 %ret2 = add i64 %ret1, %res2 342 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 343 %ret3 = add i64 %ret2, %res3 344 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 345 %ret4 = add i64 %ret3, %res4 346 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 347 %ret5 = add i64 %ret4, %res5 348 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 349 %ret6 = add i64 %ret5, %res6 350 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 351 %ret7 = add i64 %ret6, %res7 352 ret i64 %ret7 353} 354 355declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 356 357define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 358; AVX512BW-LABEL: test_cmp_w_512: 359; AVX512BW: ## BB#0: 360; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 361; AVX512BW-NEXT: kmovd %k0, %eax 362; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 363; AVX512BW-NEXT: kmovd %k0, %ecx 364; AVX512BW-NEXT: addl %eax, %ecx 365; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 366; AVX512BW-NEXT: kmovd %k0, %eax 367; AVX512BW-NEXT: addl %ecx, %eax 368; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 369; AVX512BW-NEXT: kmovd %k0, %ecx 370; AVX512BW-NEXT: addl %eax, %ecx 371; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 372; AVX512BW-NEXT: kmovd %k0, %eax 373; AVX512BW-NEXT: addl %ecx, %eax 374; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 375; AVX512BW-NEXT: kmovd %k0, %ecx 376; AVX512BW-NEXT: addl %eax, %ecx 377; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 378; AVX512BW-NEXT: kmovd %k0, %edx 379; AVX512BW-NEXT: addl %ecx, %edx 380; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 381; AVX512BW-NEXT: kmovd %k0, %eax 382; AVX512BW-NEXT: addl %edx, %eax 383; AVX512BW-NEXT: retq 384; 385; AVX512F-32-LABEL: test_cmp_w_512: 386; AVX512F-32: # BB#0: 387; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 388; AVX512F-32-NEXT: kmovd %k0, %eax 389; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 390; AVX512F-32-NEXT: kmovd %k0, %ecx 391; AVX512F-32-NEXT: addl %eax, %ecx 392; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 393; AVX512F-32-NEXT: kmovd %k0, %eax 394; AVX512F-32-NEXT: addl %ecx, %eax 395; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 396; AVX512F-32-NEXT: kmovd %k0, %ecx 397; AVX512F-32-NEXT: addl %eax, %ecx 398; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 399; AVX512F-32-NEXT: kmovd %k0, %eax 400; AVX512F-32-NEXT: addl %ecx, %eax 401; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 402; AVX512F-32-NEXT: kmovd %k0, %ecx 403; AVX512F-32-NEXT: addl %eax, %ecx 404; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 405; AVX512F-32-NEXT: kmovd %k0, %edx 406; AVX512F-32-NEXT: addl %ecx, %edx 407; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 408; AVX512F-32-NEXT: kmovd %k0, %eax 409; AVX512F-32-NEXT: addl %edx, %eax 410; AVX512F-32-NEXT: retl 411 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 412 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 413 %ret1 = add i32 %res0, %res1 414 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 415 %ret2 = add i32 %ret1, %res2 416 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 417 %ret3 = add i32 %ret2, %res3 418 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 419 %ret4 = add i32 %ret3, %res4 420 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 421 %ret5 = add i32 %ret4, %res5 422 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 423 %ret6 = add i32 %ret5, %res6 424 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 425 %ret7 = add i32 %ret6, %res7 426 ret i32 %ret7 427} 428 429define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 430; AVX512BW-LABEL: test_mask_cmp_w_512: 431; AVX512BW: ## BB#0: 432; AVX512BW-NEXT: kmovd %edi, %k1 433; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 434; AVX512BW-NEXT: kmovd %k0, %eax 435; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1} 436; AVX512BW-NEXT: kmovd %k0, %ecx 437; AVX512BW-NEXT: addl %eax, %ecx 438; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} 439; AVX512BW-NEXT: kmovd %k0, %eax 440; AVX512BW-NEXT: addl %ecx, %eax 441; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} 442; AVX512BW-NEXT: kmovd %k0, %ecx 443; AVX512BW-NEXT: addl %eax, %ecx 444; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} 445; AVX512BW-NEXT: kmovd %k0, %eax 446; AVX512BW-NEXT: addl %ecx, %eax 447; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} 448; AVX512BW-NEXT: kmovd %k0, %ecx 449; AVX512BW-NEXT: addl %eax, %ecx 450; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} 451; AVX512BW-NEXT: kmovd %k0, %edx 452; AVX512BW-NEXT: addl %ecx, %edx 453; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1} 454; AVX512BW-NEXT: kmovd %k0, %eax 455; AVX512BW-NEXT: addl %edx, %eax 456; AVX512BW-NEXT: retq 457; 458; AVX512F-32-LABEL: test_mask_cmp_w_512: 459; AVX512F-32: # BB#0: 460; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 461; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 462; AVX512F-32-NEXT: kmovd %k0, %eax 463; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1} 464; AVX512F-32-NEXT: kmovd %k0, %ecx 465; AVX512F-32-NEXT: addl %eax, %ecx 466; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} 467; AVX512F-32-NEXT: kmovd %k0, %eax 468; AVX512F-32-NEXT: addl %ecx, %eax 469; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} 470; AVX512F-32-NEXT: kmovd %k0, %ecx 471; AVX512F-32-NEXT: addl %eax, %ecx 472; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} 473; AVX512F-32-NEXT: kmovd %k0, %eax 474; AVX512F-32-NEXT: addl %ecx, %eax 475; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} 476; AVX512F-32-NEXT: kmovd %k0, %ecx 477; AVX512F-32-NEXT: addl %eax, %ecx 478; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} 479; AVX512F-32-NEXT: kmovd %k0, %edx 480; AVX512F-32-NEXT: addl %ecx, %edx 481; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1} 482; AVX512F-32-NEXT: kmovd %k0, %eax 483; AVX512F-32-NEXT: addl %edx, %eax 484; AVX512F-32-NEXT: retl 485 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 486 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 487 %ret1 = add i32 %res0, %res1 488 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 489 %ret2 = add i32 %ret1, %res2 490 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 491 %ret3 = add i32 %ret2, %res3 492 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 493 %ret4 = add i32 %ret3, %res4 494 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 495 %ret5 = add i32 %ret4, %res5 496 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 497 %ret6 = add i32 %ret5, %res6 498 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 499 %ret7 = add i32 %ret6, %res7 500 ret i32 %ret7 501} 502 503declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 504 505define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 506; AVX512BW-LABEL: test_ucmp_w_512: 507; AVX512BW: ## BB#0: 508; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 509; AVX512BW-NEXT: kmovd %k0, %eax 510; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 511; AVX512BW-NEXT: kmovd %k0, %ecx 512; AVX512BW-NEXT: addl %eax, %ecx 513; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 514; AVX512BW-NEXT: kmovd %k0, %eax 515; AVX512BW-NEXT: addl %ecx, %eax 516; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 517; AVX512BW-NEXT: kmovd %k0, %ecx 518; AVX512BW-NEXT: addl %eax, %ecx 519; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 520; AVX512BW-NEXT: kmovd %k0, %eax 521; AVX512BW-NEXT: addl %ecx, %eax 522; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 523; AVX512BW-NEXT: kmovd %k0, %ecx 524; AVX512BW-NEXT: addl %eax, %ecx 525; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 526; AVX512BW-NEXT: kmovd %k0, %edx 527; AVX512BW-NEXT: addl %ecx, %edx 528; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 529; AVX512BW-NEXT: kmovd %k0, %eax 530; AVX512BW-NEXT: addl %edx, %eax 531; AVX512BW-NEXT: retq 532; 533; AVX512F-32-LABEL: test_ucmp_w_512: 534; AVX512F-32: # BB#0: 535; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 536; AVX512F-32-NEXT: kmovd %k0, %eax 537; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 538; AVX512F-32-NEXT: kmovd %k0, %ecx 539; AVX512F-32-NEXT: addl %eax, %ecx 540; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 541; AVX512F-32-NEXT: kmovd %k0, %eax 542; AVX512F-32-NEXT: addl %ecx, %eax 543; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 544; AVX512F-32-NEXT: kmovd %k0, %ecx 545; AVX512F-32-NEXT: addl %eax, %ecx 546; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 547; AVX512F-32-NEXT: kmovd %k0, %eax 548; AVX512F-32-NEXT: addl %ecx, %eax 549; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 550; AVX512F-32-NEXT: kmovd %k0, %ecx 551; AVX512F-32-NEXT: addl %eax, %ecx 552; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 553; AVX512F-32-NEXT: kmovd %k0, %edx 554; AVX512F-32-NEXT: addl %ecx, %edx 555; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 556; AVX512F-32-NEXT: kmovd %k0, %eax 557; AVX512F-32-NEXT: addl %edx, %eax 558; AVX512F-32-NEXT: retl 559 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 560 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 561 %ret1 = add i32 %res0, %res1 562 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 563 %ret2 = add i32 %ret1, %res2 564 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 565 %ret3 = add i32 %ret2, %res3 566 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 567 %ret4 = add i32 %ret3, %res4 568 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 569 %ret5 = add i32 %ret4, %res5 570 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 571 %ret6 = add i32 %ret5, %res6 572 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 573 %ret7 = add i32 %ret6, %res7 574 ret i32 %ret7 575} 576 577define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 578; AVX512BW-LABEL: test_mask_ucmp_w_512: 579; AVX512BW: ## BB#0: 580; AVX512BW-NEXT: kmovd %edi, %k1 581; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1} 582; AVX512BW-NEXT: kmovd %k0, %eax 583; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} 584; AVX512BW-NEXT: kmovd %k0, %ecx 585; AVX512BW-NEXT: addl %eax, %ecx 586; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} 587; AVX512BW-NEXT: kmovd %k0, %eax 588; AVX512BW-NEXT: addl %ecx, %eax 589; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} 590; AVX512BW-NEXT: kmovd %k0, %ecx 591; AVX512BW-NEXT: addl %eax, %ecx 592; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} 593; AVX512BW-NEXT: kmovd %k0, %eax 594; AVX512BW-NEXT: addl %ecx, %eax 595; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} 596; AVX512BW-NEXT: kmovd %k0, %ecx 597; AVX512BW-NEXT: addl %eax, %ecx 598; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} 599; AVX512BW-NEXT: kmovd %k0, %edx 600; AVX512BW-NEXT: addl %ecx, %edx 601; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1} 602; AVX512BW-NEXT: kmovd %k0, %eax 603; AVX512BW-NEXT: addl %edx, %eax 604; AVX512BW-NEXT: retq 605; 606; AVX512F-32-LABEL: test_mask_ucmp_w_512: 607; AVX512F-32: # BB#0: 608; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 609; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1} 610; AVX512F-32-NEXT: kmovd %k0, %eax 611; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} 612; AVX512F-32-NEXT: kmovd %k0, %ecx 613; AVX512F-32-NEXT: addl %eax, %ecx 614; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} 615; AVX512F-32-NEXT: kmovd %k0, %eax 616; AVX512F-32-NEXT: addl %ecx, %eax 617; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} 618; AVX512F-32-NEXT: kmovd %k0, %ecx 619; AVX512F-32-NEXT: addl %eax, %ecx 620; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} 621; AVX512F-32-NEXT: kmovd %k0, %eax 622; AVX512F-32-NEXT: addl %ecx, %eax 623; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} 624; AVX512F-32-NEXT: kmovd %k0, %ecx 625; AVX512F-32-NEXT: addl %eax, %ecx 626; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} 627; AVX512F-32-NEXT: kmovd %k0, %edx 628; AVX512F-32-NEXT: addl %ecx, %edx 629; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1} 630; AVX512F-32-NEXT: kmovd %k0, %eax 631; AVX512F-32-NEXT: addl %edx, %eax 632; AVX512F-32-NEXT: retl 633 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 634 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 635 %ret1 = add i32 %res0, %res1 636 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 637 %ret2 = add i32 %ret1, %res2 638 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 639 %ret3 = add i32 %ret2, %res3 640 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 641 %ret4 = add i32 %ret3, %res4 642 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 643 %ret5 = add i32 %ret4, %res5 644 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 645 %ret6 = add i32 %ret5, %res6 646 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 647 %ret7 = add i32 %ret6, %res7 648 ret i32 %ret7 649} 650 651declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 652 653define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 654; AVX512BW-LABEL: test_mask_packs_epi32_rr_512: 655; AVX512BW: ## BB#0: 656; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 657; AVX512BW-NEXT: retq 658; 659; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512: 660; AVX512F-32: # BB#0: 661; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 662; AVX512F-32-NEXT: retl 663 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 664 ret <32 x i16> %res 665} 666 667define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 668; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512: 669; AVX512BW: ## BB#0: 670; AVX512BW-NEXT: kmovd %edi, %k1 671; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} 672; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 673; AVX512BW-NEXT: retq 674; 675; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512: 676; AVX512F-32: # BB#0: 677; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 678; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} 679; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 680; AVX512F-32-NEXT: retl 681 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 682 ret <32 x i16> %res 683} 684 685define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 686; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512: 687; AVX512BW: ## BB#0: 688; AVX512BW-NEXT: kmovd %edi, %k1 689; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} 690; AVX512BW-NEXT: retq 691; 692; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512: 693; AVX512F-32: # BB#0: 694; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 695; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} 696; AVX512F-32-NEXT: retl 697 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 698 ret <32 x i16> %res 699} 700 701define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 702; AVX512BW-LABEL: test_mask_packs_epi32_rm_512: 703; AVX512BW: ## BB#0: 704; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 705; AVX512BW-NEXT: retq 706; 707; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512: 708; AVX512F-32: # BB#0: 709; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 710; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 711; AVX512F-32-NEXT: retl 712 %b = load <16 x i32>, <16 x i32>* %ptr_b 713 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 714 ret <32 x i16> %res 715} 716 717define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 718; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512: 719; AVX512BW: ## BB#0: 720; AVX512BW-NEXT: kmovd %esi, %k1 721; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} 722; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 723; AVX512BW-NEXT: retq 724; 725; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512: 726; AVX512F-32: # BB#0: 727; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 728; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 729; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} 730; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 731; AVX512F-32-NEXT: retl 732 %b = load <16 x i32>, <16 x i32>* %ptr_b 733 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 734 ret <32 x i16> %res 735} 736 737define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 738; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512: 739; AVX512BW: ## BB#0: 740; AVX512BW-NEXT: kmovd %esi, %k1 741; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} 742; AVX512BW-NEXT: retq 743; 744; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512: 745; AVX512F-32: # BB#0: 746; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 747; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 748; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} 749; AVX512F-32-NEXT: retl 750 %b = load <16 x i32>, <16 x i32>* %ptr_b 751 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 752 ret <32 x i16> %res 753} 754 755define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 756; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512: 757; AVX512BW: ## BB#0: 758; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 759; AVX512BW-NEXT: retq 760; 761; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512: 762; AVX512F-32: # BB#0: 763; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 764; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 765; AVX512F-32-NEXT: retl 766 %q = load i32, i32* %ptr_b 767 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 768 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 769 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 770 ret <32 x i16> %res 771} 772 773define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 774; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512: 775; AVX512BW: ## BB#0: 776; AVX512BW-NEXT: kmovd %esi, %k1 777; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} 778; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 779; AVX512BW-NEXT: retq 780; 781; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512: 782; AVX512F-32: # BB#0: 783; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 784; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 785; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} 786; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 787; AVX512F-32-NEXT: retl 788 %q = load i32, i32* %ptr_b 789 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 790 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 791 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 792 ret <32 x i16> %res 793} 794 795define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 796; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512: 797; AVX512BW: ## BB#0: 798; AVX512BW-NEXT: kmovd %esi, %k1 799; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 800; AVX512BW-NEXT: retq 801; 802; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512: 803; AVX512F-32: # BB#0: 804; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 805; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 806; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} 807; AVX512F-32-NEXT: retl 808 %q = load i32, i32* %ptr_b 809 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 810 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 811 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 812 ret <32 x i16> %res 813} 814 815declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 816 817define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 818; AVX512BW-LABEL: test_mask_packs_epi16_rr_512: 819; AVX512BW: ## BB#0: 820; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 821; AVX512BW-NEXT: retq 822; 823; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512: 824; AVX512F-32: # BB#0: 825; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 826; AVX512F-32-NEXT: retl 827 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 828 ret <64 x i8> %res 829} 830 831define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 832; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512: 833; AVX512BW: ## BB#0: 834; AVX512BW-NEXT: kmovq %rdi, %k1 835; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} 836; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 837; AVX512BW-NEXT: retq 838; 839; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512: 840; AVX512F-32: # BB#0: 841; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 842; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 843; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 844; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} 845; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 846; AVX512F-32-NEXT: retl 847 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 848 ret <64 x i8> %res 849} 850 851define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 852; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512: 853; AVX512BW: ## BB#0: 854; AVX512BW-NEXT: kmovq %rdi, %k1 855; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} 856; AVX512BW-NEXT: retq 857; 858; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512: 859; AVX512F-32: # BB#0: 860; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 861; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 862; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 863; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} 864; AVX512F-32-NEXT: retl 865 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 866 ret <64 x i8> %res 867} 868 869define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 870; AVX512BW-LABEL: test_mask_packs_epi16_rm_512: 871; AVX512BW: ## BB#0: 872; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 873; AVX512BW-NEXT: retq 874; 875; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512: 876; AVX512F-32: # BB#0: 877; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 878; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 879; AVX512F-32-NEXT: retl 880 %b = load <32 x i16>, <32 x i16>* %ptr_b 881 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 882 ret <64 x i8> %res 883} 884 885define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 886; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512: 887; AVX512BW: ## BB#0: 888; AVX512BW-NEXT: kmovq %rsi, %k1 889; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} 890; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 891; AVX512BW-NEXT: retq 892; 893; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512: 894; AVX512F-32: # BB#0: 895; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 896; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 897; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 898; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 899; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} 900; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 901; AVX512F-32-NEXT: retl 902 %b = load <32 x i16>, <32 x i16>* %ptr_b 903 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 904 ret <64 x i8> %res 905} 906 907define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 908; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512: 909; AVX512BW: ## BB#0: 910; AVX512BW-NEXT: kmovq %rsi, %k1 911; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} 912; AVX512BW-NEXT: retq 913; 914; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512: 915; AVX512F-32: # BB#0: 916; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 917; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 918; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 919; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 920; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} 921; AVX512F-32-NEXT: retl 922 %b = load <32 x i16>, <32 x i16>* %ptr_b 923 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 924 ret <64 x i8> %res 925} 926 927declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 928 929 930define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 931; AVX512BW-LABEL: test_mask_packus_epi32_rr_512: 932; AVX512BW: ## BB#0: 933; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 934; AVX512BW-NEXT: retq 935; 936; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512: 937; AVX512F-32: # BB#0: 938; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 939; AVX512F-32-NEXT: retl 940 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 941 ret <32 x i16> %res 942} 943 944define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 945; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512: 946; AVX512BW: ## BB#0: 947; AVX512BW-NEXT: kmovd %edi, %k1 948; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} 949; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 950; AVX512BW-NEXT: retq 951; 952; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512: 953; AVX512F-32: # BB#0: 954; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 955; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} 956; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 957; AVX512F-32-NEXT: retl 958 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 959 ret <32 x i16> %res 960} 961 962define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 963; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512: 964; AVX512BW: ## BB#0: 965; AVX512BW-NEXT: kmovd %edi, %k1 966; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} 967; AVX512BW-NEXT: retq 968; 969; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512: 970; AVX512F-32: # BB#0: 971; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 972; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} 973; AVX512F-32-NEXT: retl 974 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 975 ret <32 x i16> %res 976} 977 978define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 979; AVX512BW-LABEL: test_mask_packus_epi32_rm_512: 980; AVX512BW: ## BB#0: 981; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 982; AVX512BW-NEXT: retq 983; 984; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512: 985; AVX512F-32: # BB#0: 986; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 987; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 988; AVX512F-32-NEXT: retl 989 %b = load <16 x i32>, <16 x i32>* %ptr_b 990 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 991 ret <32 x i16> %res 992} 993 994define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 995; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512: 996; AVX512BW: ## BB#0: 997; AVX512BW-NEXT: kmovd %esi, %k1 998; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} 999; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1000; AVX512BW-NEXT: retq 1001; 1002; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512: 1003; AVX512F-32: # BB#0: 1004; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1005; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1006; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} 1007; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1008; AVX512F-32-NEXT: retl 1009 %b = load <16 x i32>, <16 x i32>* %ptr_b 1010 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1011 ret <32 x i16> %res 1012} 1013 1014define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 1015; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512: 1016; AVX512BW: ## BB#0: 1017; AVX512BW-NEXT: kmovd %esi, %k1 1018; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} 1019; AVX512BW-NEXT: retq 1020; 1021; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512: 1022; AVX512F-32: # BB#0: 1023; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1024; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1025; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} 1026; AVX512F-32-NEXT: retl 1027 %b = load <16 x i32>, <16 x i32>* %ptr_b 1028 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1029 ret <32 x i16> %res 1030} 1031 1032define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 1033; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512: 1034; AVX512BW: ## BB#0: 1035; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 1036; AVX512BW-NEXT: retq 1037; 1038; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512: 1039; AVX512F-32: # BB#0: 1040; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1041; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 1042; AVX512F-32-NEXT: retl 1043 %q = load i32, i32* %ptr_b 1044 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1045 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1046 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1047 ret <32 x i16> %res 1048} 1049 1050define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1051; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512: 1052; AVX512BW: ## BB#0: 1053; AVX512BW-NEXT: kmovd %esi, %k1 1054; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} 1055; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1056; AVX512BW-NEXT: retq 1057; 1058; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512: 1059; AVX512F-32: # BB#0: 1060; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1061; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1062; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} 1063; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1064; AVX512F-32-NEXT: retl 1065 %q = load i32, i32* %ptr_b 1066 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1067 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1068 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1069 ret <32 x i16> %res 1070} 1071 1072define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1073; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512: 1074; AVX512BW: ## BB#0: 1075; AVX512BW-NEXT: kmovd %esi, %k1 1076; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 1077; AVX512BW-NEXT: retq 1078; 1079; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512: 1080; AVX512F-32: # BB#0: 1081; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1082; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1083; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} 1084; AVX512F-32-NEXT: retl 1085 %q = load i32, i32* %ptr_b 1086 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1087 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1088 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1089 ret <32 x i16> %res 1090} 1091 1092declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1093 1094define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1095; AVX512BW-LABEL: test_mask_packus_epi16_rr_512: 1096; AVX512BW: ## BB#0: 1097; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1098; AVX512BW-NEXT: retq 1099; 1100; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512: 1101; AVX512F-32: # BB#0: 1102; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1103; AVX512F-32-NEXT: retl 1104 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1105 ret <64 x i8> %res 1106} 1107 1108define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1109; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512: 1110; AVX512BW: ## BB#0: 1111; AVX512BW-NEXT: kmovq %rdi, %k1 1112; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} 1113; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1114; AVX512BW-NEXT: retq 1115; 1116; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512: 1117; AVX512F-32: # BB#0: 1118; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1119; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1120; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1121; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} 1122; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1123; AVX512F-32-NEXT: retl 1124 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1125 ret <64 x i8> %res 1126} 1127 1128define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1129; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512: 1130; AVX512BW: ## BB#0: 1131; AVX512BW-NEXT: kmovq %rdi, %k1 1132; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1133; AVX512BW-NEXT: retq 1134; 1135; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512: 1136; AVX512F-32: # BB#0: 1137; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1138; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1139; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1140; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1141; AVX512F-32-NEXT: retl 1142 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1143 ret <64 x i8> %res 1144} 1145 1146define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1147; AVX512BW-LABEL: test_mask_packus_epi16_rm_512: 1148; AVX512BW: ## BB#0: 1149; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 1150; AVX512BW-NEXT: retq 1151; 1152; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512: 1153; AVX512F-32: # BB#0: 1154; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1155; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 1156; AVX512F-32-NEXT: retl 1157 %b = load <32 x i16>, <32 x i16>* %ptr_b 1158 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1159 ret <64 x i8> %res 1160} 1161 1162define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1163; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512: 1164; AVX512BW: ## BB#0: 1165; AVX512BW-NEXT: kmovq %rsi, %k1 1166; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} 1167; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1168; AVX512BW-NEXT: retq 1169; 1170; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512: 1171; AVX512F-32: # BB#0: 1172; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1173; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1174; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1175; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1176; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} 1177; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1178; AVX512F-32-NEXT: retl 1179 %b = load <32 x i16>, <32 x i16>* %ptr_b 1180 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1181 ret <64 x i8> %res 1182} 1183 1184define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1185; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512: 1186; AVX512BW: ## BB#0: 1187; AVX512BW-NEXT: kmovq %rsi, %k1 1188; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} 1189; AVX512BW-NEXT: retq 1190; 1191; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512: 1192; AVX512F-32: # BB#0: 1193; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1194; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1195; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1196; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1197; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} 1198; AVX512F-32-NEXT: retl 1199 %b = load <32 x i16>, <32 x i16>* %ptr_b 1200 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1201 ret <64 x i8> %res 1202} 1203 1204declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1205 1206define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1207; AVX512BW-LABEL: test_mask_adds_epi16_rr_512: 1208; AVX512BW: ## BB#0: 1209; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 1210; AVX512BW-NEXT: retq 1211; 1212; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512: 1213; AVX512F-32: # BB#0: 1214; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 1215; AVX512F-32-NEXT: retl 1216 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1217 ret <32 x i16> %res 1218} 1219 1220define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1221; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512: 1222; AVX512BW: ## BB#0: 1223; AVX512BW-NEXT: kmovd %edi, %k1 1224; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} 1225; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1226; AVX512BW-NEXT: retq 1227; 1228; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512: 1229; AVX512F-32: # BB#0: 1230; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1231; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} 1232; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1233; AVX512F-32-NEXT: retl 1234 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1235 ret <32 x i16> %res 1236} 1237 1238define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1239; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512: 1240; AVX512BW: ## BB#0: 1241; AVX512BW-NEXT: kmovd %edi, %k1 1242; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1243; AVX512BW-NEXT: retq 1244; 1245; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512: 1246; AVX512F-32: # BB#0: 1247; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1248; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1249; AVX512F-32-NEXT: retl 1250 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1251 ret <32 x i16> %res 1252} 1253 1254define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1255; AVX512BW-LABEL: test_mask_adds_epi16_rm_512: 1256; AVX512BW: ## BB#0: 1257; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 1258; AVX512BW-NEXT: retq 1259; 1260; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512: 1261; AVX512F-32: # BB#0: 1262; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1263; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 1264; AVX512F-32-NEXT: retl 1265 %b = load <32 x i16>, <32 x i16>* %ptr_b 1266 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1267 ret <32 x i16> %res 1268} 1269 1270define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1271; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512: 1272; AVX512BW: ## BB#0: 1273; AVX512BW-NEXT: kmovd %esi, %k1 1274; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} 1275; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1276; AVX512BW-NEXT: retq 1277; 1278; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512: 1279; AVX512F-32: # BB#0: 1280; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1281; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1282; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} 1283; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1284; AVX512F-32-NEXT: retl 1285 %b = load <32 x i16>, <32 x i16>* %ptr_b 1286 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1287 ret <32 x i16> %res 1288} 1289 1290define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1291; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512: 1292; AVX512BW: ## BB#0: 1293; AVX512BW-NEXT: kmovd %esi, %k1 1294; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} 1295; AVX512BW-NEXT: retq 1296; 1297; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512: 1298; AVX512F-32: # BB#0: 1299; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1300; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1301; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} 1302; AVX512F-32-NEXT: retl 1303 %b = load <32 x i16>, <32 x i16>* %ptr_b 1304 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1305 ret <32 x i16> %res 1306} 1307 1308declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1309 1310define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1311; AVX512BW-LABEL: test_mask_subs_epi16_rr_512: 1312; AVX512BW: ## BB#0: 1313; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 1314; AVX512BW-NEXT: retq 1315; 1316; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512: 1317; AVX512F-32: # BB#0: 1318; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 1319; AVX512F-32-NEXT: retl 1320 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1321 ret <32 x i16> %res 1322} 1323 1324define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1325; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512: 1326; AVX512BW: ## BB#0: 1327; AVX512BW-NEXT: kmovd %edi, %k1 1328; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} 1329; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1330; AVX512BW-NEXT: retq 1331; 1332; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512: 1333; AVX512F-32: # BB#0: 1334; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1335; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} 1336; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1337; AVX512F-32-NEXT: retl 1338 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1339 ret <32 x i16> %res 1340} 1341 1342define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1343; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512: 1344; AVX512BW: ## BB#0: 1345; AVX512BW-NEXT: kmovd %edi, %k1 1346; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1347; AVX512BW-NEXT: retq 1348; 1349; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512: 1350; AVX512F-32: # BB#0: 1351; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1352; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1353; AVX512F-32-NEXT: retl 1354 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1355 ret <32 x i16> %res 1356} 1357 1358define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1359; AVX512BW-LABEL: test_mask_subs_epi16_rm_512: 1360; AVX512BW: ## BB#0: 1361; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 1362; AVX512BW-NEXT: retq 1363; 1364; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512: 1365; AVX512F-32: # BB#0: 1366; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1367; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 1368; AVX512F-32-NEXT: retl 1369 %b = load <32 x i16>, <32 x i16>* %ptr_b 1370 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1371 ret <32 x i16> %res 1372} 1373 1374define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1375; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512: 1376; AVX512BW: ## BB#0: 1377; AVX512BW-NEXT: kmovd %esi, %k1 1378; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} 1379; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1380; AVX512BW-NEXT: retq 1381; 1382; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512: 1383; AVX512F-32: # BB#0: 1384; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1385; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1386; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} 1387; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1388; AVX512F-32-NEXT: retl 1389 %b = load <32 x i16>, <32 x i16>* %ptr_b 1390 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1391 ret <32 x i16> %res 1392} 1393 1394define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1395; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512: 1396; AVX512BW: ## BB#0: 1397; AVX512BW-NEXT: kmovd %esi, %k1 1398; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} 1399; AVX512BW-NEXT: retq 1400; 1401; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512: 1402; AVX512F-32: # BB#0: 1403; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1404; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1405; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} 1406; AVX512F-32-NEXT: retl 1407 %b = load <32 x i16>, <32 x i16>* %ptr_b 1408 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1409 ret <32 x i16> %res 1410} 1411 1412declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1413 1414define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1415; AVX512BW-LABEL: test_mask_adds_epu16_rr_512: 1416; AVX512BW: ## BB#0: 1417; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 1418; AVX512BW-NEXT: retq 1419; 1420; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512: 1421; AVX512F-32: # BB#0: 1422; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 1423; AVX512F-32-NEXT: retl 1424 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1425 ret <32 x i16> %res 1426} 1427 1428define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1429; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512: 1430; AVX512BW: ## BB#0: 1431; AVX512BW-NEXT: kmovd %edi, %k1 1432; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} 1433; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1434; AVX512BW-NEXT: retq 1435; 1436; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512: 1437; AVX512F-32: # BB#0: 1438; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1439; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} 1440; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1441; AVX512F-32-NEXT: retl 1442 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1443 ret <32 x i16> %res 1444} 1445 1446define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1447; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512: 1448; AVX512BW: ## BB#0: 1449; AVX512BW-NEXT: kmovd %edi, %k1 1450; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1451; AVX512BW-NEXT: retq 1452; 1453; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512: 1454; AVX512F-32: # BB#0: 1455; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1456; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1457; AVX512F-32-NEXT: retl 1458 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1459 ret <32 x i16> %res 1460} 1461 1462define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1463; AVX512BW-LABEL: test_mask_adds_epu16_rm_512: 1464; AVX512BW: ## BB#0: 1465; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 1466; AVX512BW-NEXT: retq 1467; 1468; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512: 1469; AVX512F-32: # BB#0: 1470; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1471; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 1472; AVX512F-32-NEXT: retl 1473 %b = load <32 x i16>, <32 x i16>* %ptr_b 1474 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1475 ret <32 x i16> %res 1476} 1477 1478define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1479; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512: 1480; AVX512BW: ## BB#0: 1481; AVX512BW-NEXT: kmovd %esi, %k1 1482; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} 1483; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1484; AVX512BW-NEXT: retq 1485; 1486; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512: 1487; AVX512F-32: # BB#0: 1488; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1489; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1490; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} 1491; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1492; AVX512F-32-NEXT: retl 1493 %b = load <32 x i16>, <32 x i16>* %ptr_b 1494 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1495 ret <32 x i16> %res 1496} 1497 1498define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1499; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512: 1500; AVX512BW: ## BB#0: 1501; AVX512BW-NEXT: kmovd %esi, %k1 1502; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} 1503; AVX512BW-NEXT: retq 1504; 1505; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512: 1506; AVX512F-32: # BB#0: 1507; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1508; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1509; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} 1510; AVX512F-32-NEXT: retl 1511 %b = load <32 x i16>, <32 x i16>* %ptr_b 1512 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1513 ret <32 x i16> %res 1514} 1515 1516declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1517 1518define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1519; AVX512BW-LABEL: test_mask_subs_epu16_rr_512: 1520; AVX512BW: ## BB#0: 1521; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 1522; AVX512BW-NEXT: retq 1523; 1524; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512: 1525; AVX512F-32: # BB#0: 1526; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 1527; AVX512F-32-NEXT: retl 1528 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1529 ret <32 x i16> %res 1530} 1531 1532define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1533; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512: 1534; AVX512BW: ## BB#0: 1535; AVX512BW-NEXT: kmovd %edi, %k1 1536; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} 1537; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1538; AVX512BW-NEXT: retq 1539; 1540; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512: 1541; AVX512F-32: # BB#0: 1542; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1543; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} 1544; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1545; AVX512F-32-NEXT: retl 1546 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1547 ret <32 x i16> %res 1548} 1549 1550define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1551; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512: 1552; AVX512BW: ## BB#0: 1553; AVX512BW-NEXT: kmovd %edi, %k1 1554; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1555; AVX512BW-NEXT: retq 1556; 1557; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512: 1558; AVX512F-32: # BB#0: 1559; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1560; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1561; AVX512F-32-NEXT: retl 1562 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1563 ret <32 x i16> %res 1564} 1565 1566define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1567; AVX512BW-LABEL: test_mask_subs_epu16_rm_512: 1568; AVX512BW: ## BB#0: 1569; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 1570; AVX512BW-NEXT: retq 1571; 1572; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512: 1573; AVX512F-32: # BB#0: 1574; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1575; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 1576; AVX512F-32-NEXT: retl 1577 %b = load <32 x i16>, <32 x i16>* %ptr_b 1578 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1579 ret <32 x i16> %res 1580} 1581 1582define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1583; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512: 1584; AVX512BW: ## BB#0: 1585; AVX512BW-NEXT: kmovd %esi, %k1 1586; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} 1587; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1588; AVX512BW-NEXT: retq 1589; 1590; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512: 1591; AVX512F-32: # BB#0: 1592; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1593; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1594; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} 1595; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1596; AVX512F-32-NEXT: retl 1597 %b = load <32 x i16>, <32 x i16>* %ptr_b 1598 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1599 ret <32 x i16> %res 1600} 1601 1602define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1603; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512: 1604; AVX512BW: ## BB#0: 1605; AVX512BW-NEXT: kmovd %esi, %k1 1606; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} 1607; AVX512BW-NEXT: retq 1608; 1609; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512: 1610; AVX512F-32: # BB#0: 1611; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1612; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1613; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} 1614; AVX512F-32-NEXT: retl 1615 %b = load <32 x i16>, <32 x i16>* %ptr_b 1616 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1617 ret <32 x i16> %res 1618} 1619 1620declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1621 1622declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1623 1624define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1625; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 1626; AVX512BW: ## BB#0: 1627; AVX512BW-NEXT: kmovq %rdi, %k1 1628; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} 1629; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 1630; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1631; AVX512BW-NEXT: retq 1632; 1633; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 1634; AVX512F-32: # BB#0: 1635; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1636; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1637; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1638; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} 1639; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 1640; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1641; AVX512F-32-NEXT: retl 1642 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1643 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1644 %res2 = add <64 x i8> %res, %res1 1645 ret <64 x i8> %res2 1646} 1647 1648declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1649 1650define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1651; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 1652; AVX512BW: ## BB#0: 1653; AVX512BW-NEXT: kmovd %edi, %k1 1654; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} 1655; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 1656; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1657; AVX512BW-NEXT: retq 1658; 1659; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 1660; AVX512F-32: # BB#0: 1661; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1662; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} 1663; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 1664; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1665; AVX512F-32-NEXT: retl 1666 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1667 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1668 %res2 = add <32 x i16> %res, %res1 1669 ret <32 x i16> %res2 1670} 1671 1672declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1673 1674define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1675; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 1676; AVX512BW: ## BB#0: 1677; AVX512BW-NEXT: kmovq %rdi, %k1 1678; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} 1679; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 1680; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1681; AVX512BW-NEXT: retq 1682; 1683; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 1684; AVX512F-32: # BB#0: 1685; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1686; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1687; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1688; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} 1689; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 1690; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1691; AVX512F-32-NEXT: retl 1692 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1693 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1694 %res2 = add <64 x i8> %res, %res1 1695 ret <64 x i8> %res2 1696} 1697 1698declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1699 1700define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1701; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 1702; AVX512BW: ## BB#0: 1703; AVX512BW-NEXT: kmovd %edi, %k1 1704; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} 1705; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 1706; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1707; AVX512BW-NEXT: retq 1708; 1709; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 1710; AVX512F-32: # BB#0: 1711; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1712; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} 1713; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 1714; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1715; AVX512F-32-NEXT: retl 1716 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1717 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1718 %res2 = add <32 x i16> %res, %res1 1719 ret <32 x i16> %res2 1720} 1721 1722declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1723 1724define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1725; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512: 1726; AVX512BW: ## BB#0: 1727; AVX512BW-NEXT: kmovq %rdi, %k1 1728; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} 1729; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0 1730; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1731; AVX512BW-NEXT: retq 1732; 1733; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512: 1734; AVX512F-32: # BB#0: 1735; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1736; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1737; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1738; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} 1739; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm0 1740; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1741; AVX512F-32-NEXT: retl 1742 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1743 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1744 %res2 = add <64 x i8> %res, %res1 1745 ret <64 x i8> %res2 1746} 1747 1748declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1749 1750define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1751; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512: 1752; AVX512BW: ## BB#0: 1753; AVX512BW-NEXT: kmovd %edi, %k1 1754; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} 1755; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0 1756; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1757; AVX512BW-NEXT: retq 1758; 1759; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512: 1760; AVX512F-32: # BB#0: 1761; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1762; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} 1763; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm0 1764; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1765; AVX512F-32-NEXT: retl 1766 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1767 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1768 %res2 = add <32 x i16> %res, %res1 1769 ret <32 x i16> %res2 1770} 1771 1772declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1773 1774define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1775; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512: 1776; AVX512BW: ## BB#0: 1777; AVX512BW-NEXT: kmovq %rdi, %k1 1778; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} 1779; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0 1780; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1781; AVX512BW-NEXT: retq 1782; 1783; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512: 1784; AVX512F-32: # BB#0: 1785; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1786; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1787; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1788; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} 1789; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm0 1790; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1791; AVX512F-32-NEXT: retl 1792 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1793 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1794 %res2 = add <64 x i8> %res, %res1 1795 ret <64 x i8> %res2 1796} 1797 1798declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1799 1800define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1801; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512: 1802; AVX512BW: ## BB#0: 1803; AVX512BW-NEXT: kmovd %edi, %k1 1804; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} 1805; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0 1806; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1807; AVX512BW-NEXT: retq 1808; 1809; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512: 1810; AVX512F-32: # BB#0: 1811; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1812; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} 1813; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm0 1814; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1815; AVX512F-32-NEXT: retl 1816 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1817 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1818 %res2 = add <32 x i16> %res, %res1 1819 ret <32 x i16> %res2 1820} 1821 1822declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1823 1824define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1825; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 1826; AVX512BW: ## BB#0: 1827; AVX512BW-NEXT: kmovd %edi, %k1 1828; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 1829; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} 1830; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 1831; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 1832; AVX512BW-NEXT: retq 1833; 1834; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 1835; AVX512F-32: # BB#0: 1836; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1837; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 1838; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} 1839; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 1840; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 1841; AVX512F-32-NEXT: retl 1842 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1843 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1844 %res2 = add <32 x i16> %res, %res1 1845 ret <32 x i16> %res2 1846} 1847 1848declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1849 1850define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1851; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 1852; AVX512BW: ## BB#0: 1853; AVX512BW-NEXT: kmovd %edi, %k1 1854; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 1855; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z} 1856; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 1857; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 1858; AVX512BW-NEXT: retq 1859; 1860; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 1861; AVX512F-32: # BB#0: 1862; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1863; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 1864; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z} 1865; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 1866; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 1867; AVX512F-32-NEXT: retl 1868 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1869 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1870 %res2 = add <32 x i16> %res, %res1 1871 ret <32 x i16> %res2 1872} 1873 1874declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1875 1876define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1877; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 1878; AVX512BW: ## BB#0: 1879; AVX512BW-NEXT: kmovd %edi, %k1 1880; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 1881; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1} 1882; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 1883; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 1884; AVX512BW-NEXT: retq 1885; 1886; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 1887; AVX512F-32: # BB#0: 1888; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1889; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 1890; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1} 1891; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 1892; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 1893; AVX512F-32-NEXT: retl 1894 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1895 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1896 %res2 = add <32 x i16> %res, %res1 1897 ret <32 x i16> %res2 1898} 1899 1900declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1901 1902define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1903; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512: 1904; AVX512BW: ## BB#0: 1905; AVX512BW-NEXT: kmovq %rdi, %k1 1906; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} 1907; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm0 1908; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1909; AVX512BW-NEXT: retq 1910; 1911; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512: 1912; AVX512F-32: # BB#0: 1913; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1914; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1915; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1916; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} 1917; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm0 1918; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1919; AVX512F-32-NEXT: retl 1920 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1921 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1922 %res2 = add <64 x i8> %res, %res1 1923 ret <64 x i8> %res2 1924} 1925 1926declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1927 1928define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1929; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512: 1930; AVX512BW: ## BB#0: 1931; AVX512BW-NEXT: kmovd %edi, %k1 1932; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} 1933; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm0 1934; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1935; AVX512BW-NEXT: retq 1936; 1937; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512: 1938; AVX512F-32: # BB#0: 1939; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1940; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} 1941; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm0 1942; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1943; AVX512F-32-NEXT: retl 1944 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1945 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1946 %res2 = add <32 x i16> %res, %res1 1947 ret <32 x i16> %res2 1948} 1949 1950declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1951 1952define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1953; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 1954; AVX512BW: ## BB#0: 1955; AVX512BW-NEXT: kmovq %rdi, %k1 1956; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} 1957; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 1958; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1959; AVX512BW-NEXT: retq 1960; 1961; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 1962; AVX512F-32: # BB#0: 1963; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1964; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1965; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1966; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} 1967; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 1968; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1969; AVX512F-32-NEXT: retl 1970 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1971 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1972 %res2 = add <64 x i8> %res, %res1 1973 ret <64 x i8> %res2 1974} 1975 1976declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) 1977 1978define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 1979; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512: 1980; AVX512BW: ## BB#0: 1981; AVX512BW-NEXT: kmovd %edi, %k1 1982; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1} 1983; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 1984; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 1985; AVX512BW-NEXT: retq 1986; 1987; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512: 1988; AVX512F-32: # BB#0: 1989; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1990; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1} 1991; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0 1992; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 1993; AVX512F-32-NEXT: retl 1994 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 1995 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) 1996 %res2 = add <32 x i16> %res, %res1 1997 ret <32 x i16> %res2 1998} 1999 2000declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) 2001 2002define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2003; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2004; AVX512BW: ## BB#0: 2005; AVX512BW-NEXT: kmovq %rdi, %k1 2006; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1} 2007; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 2008; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 2009; AVX512BW-NEXT: retq 2010; 2011; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2012; AVX512F-32: # BB#0: 2013; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2014; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2015; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2016; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1} 2017; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0 2018; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 2019; AVX512F-32-NEXT: retl 2020 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2021 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) 2022 %res2 = add <64 x i8> %res, %res1 2023 ret <64 x i8> %res2 2024} 2025 2026declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2027 2028define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2029; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2030; AVX512BW: ## BB#0: 2031; AVX512BW-NEXT: kmovd %edi, %k1 2032; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} 2033; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 2034; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2035; AVX512BW-NEXT: retq 2036; 2037; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2038; AVX512F-32: # BB#0: 2039; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2040; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} 2041; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 2042; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2043; AVX512F-32-NEXT: retl 2044 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2045 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2046 %res2 = add <32 x i16> %res, %res1 2047 ret <32 x i16> %res2 2048} 2049 2050declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2051 2052define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2053; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2054; AVX512BW: ## BB#0: 2055; AVX512BW-NEXT: kmovd %edi, %k1 2056; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} 2057; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 2058; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2059; AVX512BW-NEXT: retq 2060; 2061; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2062; AVX512F-32: # BB#0: 2063; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2064; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} 2065; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 2066; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2067; AVX512F-32-NEXT: retl 2068 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2069 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2070 %res2 = add <32 x i16> %res, %res1 2071 ret <32 x i16> %res2 2072} 2073 2074declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2075 2076define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2077; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2078; AVX512BW: ## BB#0: 2079; AVX512BW-NEXT: kmovd %edi, %k1 2080; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} 2081; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 2082; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2083; AVX512BW-NEXT: retq 2084; 2085; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2086; AVX512F-32: # BB#0: 2087; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2088; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} 2089; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 2090; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2091; AVX512F-32-NEXT: retl 2092 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2093 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2094 %res2 = add <32 x i16> %res, %res1 2095 ret <32 x i16> %res2 2096} 2097 2098declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) 2099 2100define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2101; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 2102; AVX512BW: ## BB#0: 2103; AVX512BW-NEXT: kmovd %edi, %k1 2104; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 {%k1} 2105; AVX512BW-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} 2106; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 2107; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2108; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2109; AVX512BW-NEXT: retq 2110; 2111; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 2112; AVX512F-32: # BB#0: 2113; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2114; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1} 2115; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} 2116; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0 2117; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2118; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2119; AVX512F-32-NEXT: retl 2120 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2121 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2122 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2123 %res3 = add <32 x i8> %res0, %res1 2124 %res4 = add <32 x i8> %res3, %res2 2125 ret <32 x i8> %res4 2126} 2127 2128declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2129 2130define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2131; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 2132; AVX512BW: ## BB#0: 2133; AVX512BW-NEXT: kmovd %esi, %k1 2134; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) 2135; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1} 2136; AVX512BW-NEXT: retq 2137; 2138; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 2139; AVX512F-32: # BB#0: 2140; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2141; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2142; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) 2143; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1} 2144; AVX512F-32-NEXT: retl 2145 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2146 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2147 ret void 2148} 2149 2150declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) 2151 2152define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2153; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 2154; AVX512BW: ## BB#0: 2155; AVX512BW-NEXT: kmovd %edi, %k1 2156; AVX512BW-NEXT: vpmovswb %zmm0, %ymm1 {%k1} 2157; AVX512BW-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} 2158; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0 2159; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2160; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2161; AVX512BW-NEXT: retq 2162; 2163; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 2164; AVX512F-32: # BB#0: 2165; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2166; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1} 2167; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} 2168; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0 2169; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2170; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2171; AVX512F-32-NEXT: retl 2172 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2173 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2174 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2175 %res3 = add <32 x i8> %res0, %res1 2176 %res4 = add <32 x i8> %res3, %res2 2177 ret <32 x i8> %res4 2178} 2179 2180declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2181 2182define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2183; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 2184; AVX512BW: ## BB#0: 2185; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) 2186; AVX512BW-NEXT: kmovd %esi, %k1 2187; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1} 2188; AVX512BW-NEXT: retq 2189; 2190; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 2191; AVX512F-32: # BB#0: 2192; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2193; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2194; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) 2195; AVX512F-32-NEXT: kmovd %eax, %k1 2196; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1} 2197; AVX512F-32-NEXT: retl 2198 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2199 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2200 ret void 2201} 2202 2203declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) 2204 2205define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2206; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 2207; AVX512BW: ## BB#0: 2208; AVX512BW-NEXT: kmovd %edi, %k1 2209; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} 2210; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} 2211; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 2212; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2213; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2214; AVX512BW-NEXT: retq 2215; 2216; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 2217; AVX512F-32: # BB#0: 2218; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2219; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} 2220; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} 2221; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0 2222; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2223; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2224; AVX512F-32-NEXT: retl 2225 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2226 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2227 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2228 %res3 = add <32 x i8> %res0, %res1 2229 %res4 = add <32 x i8> %res3, %res2 2230 ret <32 x i8> %res4 2231} 2232 2233declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2234 2235define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2236; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 2237; AVX512BW: ## BB#0: 2238; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) 2239; AVX512BW-NEXT: kmovd %esi, %k1 2240; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} 2241; AVX512BW-NEXT: retq 2242; 2243; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 2244; AVX512F-32: # BB#0: 2245; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2246; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2247; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) 2248; AVX512F-32-NEXT: kmovd %eax, %k1 2249; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1} 2250; AVX512F-32-NEXT: retl 2251 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2252 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2253 ret void 2254} 2255 2256declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) 2257 2258define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { 2259; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2260; AVX512BW: ## BB#0: 2261; AVX512BW-NEXT: kmovd %edi, %k1 2262; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} 2263; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 2264; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2265; AVX512BW-NEXT: retq 2266; 2267; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2268; AVX512F-32: # BB#0: 2269; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2270; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} 2271; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 2272; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2273; AVX512F-32-NEXT: retl 2274 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) 2275 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) 2276 %res2 = add <32 x i16> %res, %res1 2277 ret <32 x i16> %res2 2278} 2279 2280declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) 2281 2282define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { 2283; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2284; AVX512BW: ## BB#0: 2285; AVX512BW-NEXT: kmovw %edi, %k1 2286; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} 2287; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 2288; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0 2289; AVX512BW-NEXT: retq 2290; 2291; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2292; AVX512F-32: # BB#0: 2293; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 2294; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} 2295; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 2296; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0 2297; AVX512F-32-NEXT: retl 2298 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) 2299 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) 2300 %res2 = add <16 x i32> %res, %res1 2301 ret <16 x i32> %res2 2302} 2303 2304declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) 2305 2306define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { 2307; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 2308; AVX512BW: ## BB#0: 2309; AVX512BW-NEXT: kmovd %edi, %k1 2310; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} 2311; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} 2312; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 2313; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2314; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2315; AVX512BW-NEXT: retq 2316; 2317; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 2318; AVX512F-32: # BB#0: 2319; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2320; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} 2321; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} 2322; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 2323; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2324; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2325; AVX512F-32-NEXT: retl 2326 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) 2327 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) 2328 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) 2329 %res3 = add <32 x i16> %res, %res1 2330 %res4 = add <32 x i16> %res3, %res2 2331 ret <32 x i16> %res4 2332} 2333 2334declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) 2335 2336define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ 2337; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512: 2338; AVX512BW: ## BB#0: 2339; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 2340; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 2341; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2342; AVX512BW-NEXT: retq 2343; 2344; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512: 2345; AVX512F-32: # BB#0: 2346; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 2347; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 2348; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2349; AVX512F-32-NEXT: retl 2350 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) 2351 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) 2352 %res2 = add <8 x i64> %res, %res1 2353 ret <8 x i64> %res2 2354} 2355 2356declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) 2357 2358define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { 2359; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd: 2360; AVX512BW: ## BB#0: 2361; AVX512BW-NEXT: kmovd %edi, %k0 2362; AVX512BW-NEXT: kmovd %esi, %k1 2363; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0 2364; AVX512BW-NEXT: kmovd %k0, %eax 2365; AVX512BW-NEXT: retq 2366; 2367; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd: 2368; AVX512F-32: # BB#0: 2369; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2370; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2371; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0 2372; AVX512F-32-NEXT: kmovd %k0, %eax 2373; AVX512F-32-NEXT: retl 2374 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) 2375 ret i32 %res 2376} 2377 2378declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) 2379 2380define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { 2381; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd: 2382; AVX512BW: ## BB#0: 2383; AVX512BW-NEXT: kmovq %rdi, %k0 2384; AVX512BW-NEXT: kmovq %rsi, %k1 2385; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0 2386; AVX512BW-NEXT: kmovq %k0, %rax 2387; AVX512BW-NEXT: retq 2388; 2389; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd: 2390; AVX512F-32: # BB#0: 2391; AVX512F-32-NEXT: subl $12, %esp 2392; AVX512F-32-NEXT: .Ltmp4: 2393; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 2394; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2395; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2396; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 2397; AVX512F-32-NEXT: kmovq %k0, (%esp) 2398; AVX512F-32-NEXT: movl (%esp), %eax 2399; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 2400; AVX512F-32-NEXT: addl $12, %esp 2401; AVX512F-32-NEXT: retl 2402 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) 2403 ret i64 %res 2404} 2405 2406declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) 2407 2408define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { 2409; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512: 2410; AVX512BW: ## BB#0: 2411; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 2412; AVX512BW-NEXT: kmovq %k0, %rax 2413; AVX512BW-NEXT: retq 2414; 2415; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512: 2416; AVX512F-32: # BB#0: 2417; AVX512F-32-NEXT: subl $12, %esp 2418; AVX512F-32-NEXT: .Ltmp5: 2419; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 2420; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 2421; AVX512F-32-NEXT: kmovq %k0, (%esp) 2422; AVX512F-32-NEXT: movl (%esp), %eax 2423; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 2424; AVX512F-32-NEXT: addl $12, %esp 2425; AVX512F-32-NEXT: retl 2426 %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) 2427 ret i64 %res 2428} 2429 2430declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) 2431 2432define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { 2433; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512: 2434; AVX512BW: ## BB#0: 2435; AVX512BW-NEXT: vpmovw2m %zmm0, %k0 2436; AVX512BW-NEXT: kmovd %k0, %eax 2437; AVX512BW-NEXT: retq 2438; 2439; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512: 2440; AVX512F-32: # BB#0: 2441; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0 2442; AVX512F-32-NEXT: kmovd %k0, %eax 2443; AVX512F-32-NEXT: retl 2444 %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) 2445 ret i32 %res 2446} 2447 2448declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64) 2449 2450define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) { 2451; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2b_512: 2452; AVX512BW: ## BB#0: 2453; AVX512BW-NEXT: kmovq %rdi, %k0 2454; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2455; AVX512BW-NEXT: retq 2456; 2457; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512: 2458; AVX512F-32: # BB#0: 2459; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2460; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2461; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 2462; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 2463; AVX512F-32-NEXT: retl 2464 %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0) 2465 ret <64 x i8> %res 2466} 2467 2468declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32) 2469 2470define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) { 2471; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2w_512: 2472; AVX512BW: ## BB#0: 2473; AVX512BW-NEXT: kmovd %edi, %k0 2474; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2475; AVX512BW-NEXT: retq 2476; 2477; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512: 2478; AVX512F-32: # BB#0: 2479; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2480; AVX512F-32-NEXT: vpmovm2w %k0, %zmm0 2481; AVX512F-32-NEXT: retl 2482 %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0) 2483 ret <32 x i16> %res 2484} 2485 2486declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 2487 2488define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2489; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_w_512: 2490; AVX512BW: ## BB#0: 2491; AVX512BW-NEXT: kmovd %edi, %k1 2492; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} 2493; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z} 2494; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 2495; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2496; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 2497; AVX512BW-NEXT: retq 2498; 2499; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_w_512: 2500; AVX512F-32: # BB#0: 2501; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2502; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} 2503; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z} 2504; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 2505; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2506; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 2507; AVX512F-32-NEXT: retl 2508 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 2509 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 2510 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 2511 %res3 = add <32 x i16> %res, %res1 2512 %res4 = add <32 x i16> %res3, %res2 2513 ret <32 x i16> %res4 2514} 2515 2516declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32) 2517 2518define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 2519; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_wi_512: 2520; AVX512BW: ## BB#0: 2521; AVX512BW-NEXT: kmovd %esi, %k1 2522; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} 2523; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z} 2524; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 2525; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2526; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0 2527; AVX512BW-NEXT: retq 2528; 2529; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_wi_512: 2530; AVX512F-32: # BB#0: 2531; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2532; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} 2533; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z} 2534; AVX512F-32-NEXT: vpsrlw $3, %zmm0, %zmm0 2535; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2536; AVX512F-32-NEXT: vpaddw %zmm2, %zmm0, %zmm0 2537; AVX512F-32-NEXT: retl 2538 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 2539 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 2540 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 2541 %res3 = add <32 x i16> %res, %res1 2542 %res4 = add <32 x i16> %res3, %res2 2543 ret <32 x i16> %res4 2544} 2545 2546declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2547 2548define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2549; AVX512BW-LABEL: test_int_x86_avx512_mask_psrlv32hi: 2550; AVX512BW: ## BB#0: 2551; AVX512BW-NEXT: kmovd %edi, %k1 2552; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} 2553; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z} 2554; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 2555; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2556; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2557; AVX512BW-NEXT: retq 2558; 2559; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi: 2560; AVX512F-32: # BB#0: 2561; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2562; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} 2563; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z} 2564; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 2565; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2566; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2567; AVX512F-32-NEXT: retl 2568 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2569 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 2570 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2571 %res3 = add <32 x i16> %res, %res1 2572 %res4 = add <32 x i16> %res3, %res2 2573 ret <32 x i16> %res4 2574} 2575 2576declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 2577 2578define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2579; AVX512BW-LABEL: test_int_x86_avx512_mask_psra_w_512: 2580; AVX512BW: ## BB#0: 2581; AVX512BW-NEXT: kmovd %edi, %k1 2582; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} 2583; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z} 2584; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0 2585; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2586; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2587; AVX512BW-NEXT: retq 2588; 2589; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_w_512: 2590; AVX512F-32: # BB#0: 2591; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2592; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} 2593; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z} 2594; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0 2595; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2596; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2597; AVX512F-32-NEXT: retl 2598 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 2599 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 2600 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 2601 %res3 = add <32 x i16> %res, %res1 2602 %res4 = add <32 x i16> %res3, %res2 2603 ret <32 x i16> %res4 2604} 2605 2606declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32) 2607 2608define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 2609; AVX512BW-LABEL: test_int_x86_avx512_mask_psra_wi_512: 2610; AVX512BW: ## BB#0: 2611; AVX512BW-NEXT: kmovd %esi, %k1 2612; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} 2613; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z} 2614; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm0 2615; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 2616; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2617; AVX512BW-NEXT: retq 2618; 2619; AVX512F-32-LABEL: test_int_x86_avx512_mask_psra_wi_512: 2620; AVX512F-32: # BB#0: 2621; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2622; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} 2623; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z} 2624; AVX512F-32-NEXT: vpsraw $3, %zmm0, %zmm0 2625; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 2626; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2627; AVX512F-32-NEXT: retl 2628 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 2629 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 2630 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 2631 %res3 = add <32 x i16> %res, %res1 2632 %res4 = add <32 x i16> %res3, %res2 2633 ret <32 x i16> %res4 2634} 2635 2636declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2637 2638define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2639; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi: 2640; AVX512BW: ## BB#0: 2641; AVX512BW-NEXT: kmovd %edi, %k1 2642; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} 2643; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z} 2644; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 2645; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2646; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2647; AVX512BW-NEXT: retq 2648; 2649; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi: 2650; AVX512F-32: # BB#0: 2651; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2652; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} 2653; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z} 2654; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0 2655; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2656; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2657; AVX512F-32-NEXT: retl 2658 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2659 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 2660 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2661 %res3 = add <32 x i16> %res, %res1 2662 %res4 = add <32 x i16> %res3, %res2 2663 ret <32 x i16> %res4 2664} 2665 2666define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2667; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: 2668; AVX512BW: ## BB#0: 2669; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] 2670; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0 2671; AVX512BW-NEXT: retq 2672; 2673; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: 2674; AVX512F-32: # BB#0: 2675; AVX512F-32-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] 2676; AVX512F-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 2677; AVX512F-32-NEXT: retl 2678 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, 2679 <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>, 2680 <32 x i16> zeroinitializer, i32 -1) 2681 ret <32 x i16> %res 2682} 2683 2684declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 2685 2686define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2687; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_w_512: 2688; AVX512BW: ## BB#0: 2689; AVX512BW-NEXT: kmovd %edi, %k1 2690; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} 2691; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z} 2692; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 2693; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2694; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2695; AVX512BW-NEXT: retq 2696; 2697; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_w_512: 2698; AVX512F-32: # BB#0: 2699; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2700; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} 2701; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z} 2702; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0 2703; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2704; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2705; AVX512F-32-NEXT: retl 2706 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 2707 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 2708 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 2709 %res3 = add <32 x i16> %res, %res1 2710 %res4 = add <32 x i16> %res3, %res2 2711 ret <32 x i16> %res4 2712} 2713 2714declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32) 2715 2716define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 2717; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_wi_512: 2718; AVX512BW: ## BB#0: 2719; AVX512BW-NEXT: kmovd %esi, %k1 2720; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} 2721; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z} 2722; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0 2723; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 2724; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2725; AVX512BW-NEXT: retq 2726; 2727; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_wi_512: 2728; AVX512F-32: # BB#0: 2729; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2730; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} 2731; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z} 2732; AVX512F-32-NEXT: vpsllw $3, %zmm0, %zmm0 2733; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 2734; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2735; AVX512F-32-NEXT: retl 2736 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 2737 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 2738 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 2739 %res3 = add <32 x i16> %res, %res1 2740 %res4 = add <32 x i16> %res3, %res2 2741 ret <32 x i16> %res4 2742} 2743 2744declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2745 2746define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2747; AVX512BW-LABEL: test_int_x86_avx512_mask_psllv32hi: 2748; AVX512BW: ## BB#0: 2749; AVX512BW-NEXT: kmovd %edi, %k1 2750; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} 2751; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z} 2752; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 2753; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2754; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2755; AVX512BW-NEXT: retq 2756; 2757; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi: 2758; AVX512F-32: # BB#0: 2759; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2760; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} 2761; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z} 2762; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 2763; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2764; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2765; AVX512F-32-NEXT: retl 2766 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2767 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 2768 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2769 %res3 = add <32 x i16> %res, %res1 2770 %res4 = add <32 x i16> %res3, %res2 2771 ret <32 x i16> %res4 2772} 2773 2774declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32) 2775 2776define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { 2777; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: 2778; AVX512BW: ## BB#0: 2779; AVX512BW-NEXT: kmovd %edi, %k1 2780; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 2781; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 2782; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 2783; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 2784; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2785; AVX512BW-NEXT: retq 2786; 2787; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: 2788; AVX512F-32: # BB#0: 2789; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2790; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 2791; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm2 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 2792; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 2793; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 2794; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2795; AVX512F-32-NEXT: retl 2796 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) 2797 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) 2798 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) 2799 %res3 = add <32 x i16> %res, %res1 2800 %res4 = add <32 x i16> %res3, %res2 2801 ret <32 x i16> %res4 2802} 2803 2804declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32) 2805 2806define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { 2807; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512: 2808; AVX512BW: ## BB#0: 2809; AVX512BW-NEXT: kmovd %edi, %k1 2810; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} 2811; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z} 2812; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 2813; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 2814; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2815; AVX512BW-NEXT: retq 2816; 2817; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512: 2818; AVX512F-32: # BB#0: 2819; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2820; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} 2821; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z} 2822; AVX512F-32-NEXT: vpmovsxbw %ymm0, %zmm0 2823; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 2824; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2825; AVX512F-32-NEXT: retl 2826 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) 2827 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) 2828 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) 2829 %res3 = add <32 x i16> %res, %res1 2830 %res4 = add <32 x i16> %res3, %res2 2831 ret <32 x i16> %res4 2832} 2833 2834declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2835 2836define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2837; AVX512BW-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 2838; AVX512BW: ## BB#0: 2839; AVX512BW-NEXT: kmovd %edi, %k1 2840; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} 2841; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z} 2842; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0 2843; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2844; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2845; AVX512BW-NEXT: retq 2846; 2847; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 2848; AVX512F-32: # BB#0: 2849; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2850; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} 2851; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3 {%k1} {z} 2852; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0 2853; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2854; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2855; AVX512F-32-NEXT: retl 2856 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2857 %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 2858 %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2859 %res3 = add <32 x i16> %res, %res1 2860 %res4 = add <32 x i16> %res3, %res2 2861 ret <32 x i16> %res4 2862} 2863 2864declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64) 2865 2866define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2867; AVX512BW-LABEL: test_int_x86_avx512_ptestm_b_512: 2868; AVX512BW: ## BB#0: 2869; AVX512BW-NEXT: kmovq %rdi, %k1 2870; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1} 2871; AVX512BW-NEXT: kmovq %k0, %rcx 2872; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 2873; AVX512BW-NEXT: kmovq %k0, %rax 2874; AVX512BW-NEXT: addq %rcx, %rax 2875; AVX512BW-NEXT: retq 2876; 2877; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512: 2878; AVX512F-32: # BB#0: 2879; AVX512F-32-NEXT: subl $20, %esp 2880; AVX512F-32-NEXT: .Ltmp6: 2881; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 2882; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2883; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2884; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2885; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1} 2886; AVX512F-32-NEXT: kmovq %k0, (%esp) 2887; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 2888; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 2889; AVX512F-32-NEXT: movl (%esp), %eax 2890; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 2891; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 2892; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 2893; AVX512F-32-NEXT: addl $20, %esp 2894; AVX512F-32-NEXT: retl 2895 %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2896 %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) 2897 %res2 = add i64 %res, %res1 2898 ret i64 %res2 2899} 2900 2901declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32) 2902 2903define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2904; AVX512BW-LABEL: test_int_x86_avx512_ptestm_w_512: 2905; AVX512BW: ## BB#0: 2906; AVX512BW-NEXT: kmovd %edi, %k1 2907; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1} 2908; AVX512BW-NEXT: kmovd %k0, %ecx 2909; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 2910; AVX512BW-NEXT: kmovd %k0, %eax 2911; AVX512BW-NEXT: addl %ecx, %eax 2912; AVX512BW-NEXT: retq 2913; 2914; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512: 2915; AVX512F-32: # BB#0: 2916; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2917; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1} 2918; AVX512F-32-NEXT: kmovd %k0, %ecx 2919; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 2920; AVX512F-32-NEXT: kmovd %k0, %eax 2921; AVX512F-32-NEXT: addl %ecx, %eax 2922; AVX512F-32-NEXT: retl 2923 %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2924 %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) 2925 %res2 = add i32 %res, %res1 2926 ret i32 %res2 2927} 2928 2929declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2) 2930 2931define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2932; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_b_512: 2933; AVX512BW: ## BB#0: 2934; AVX512BW-NEXT: kmovq %rdi, %k1 2935; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1} 2936; AVX512BW-NEXT: kmovq %k0, %rcx 2937; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 2938; AVX512BW-NEXT: kmovq %k0, %rax 2939; AVX512BW-NEXT: addq %rcx, %rax 2940; AVX512BW-NEXT: retq 2941; 2942; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512: 2943; AVX512F-32: # BB#0: 2944; AVX512F-32-NEXT: subl $20, %esp 2945; AVX512F-32-NEXT: .Ltmp7: 2946; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 2947; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2948; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2949; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2950; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1} 2951; AVX512F-32-NEXT: kmovq %k0, (%esp) 2952; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 2953; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 2954; AVX512F-32-NEXT: movl (%esp), %eax 2955; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 2956; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 2957; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx 2958; AVX512F-32-NEXT: addl $20, %esp 2959; AVX512F-32-NEXT: retl 2960 %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2961 %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) 2962 %res2 = add i64 %res, %res1 2963 ret i64 %res2 2964} 2965 2966declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2) 2967 2968define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2969; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_w_512: 2970; AVX512BW: ## BB#0: 2971; AVX512BW-NEXT: kmovd %edi, %k1 2972; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1} 2973; AVX512BW-NEXT: kmovd %k0, %ecx 2974; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 2975; AVX512BW-NEXT: kmovd %k0, %eax 2976; AVX512BW-NEXT: addl %ecx, %eax 2977; AVX512BW-NEXT: retq 2978; 2979; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512: 2980; AVX512F-32: # BB#0: 2981; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2982; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1} 2983; AVX512F-32-NEXT: kmovd %k0, %ecx 2984; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 2985; AVX512F-32-NEXT: kmovd %k0, %eax 2986; AVX512F-32-NEXT: addl %ecx, %eax 2987; AVX512F-32-NEXT: retl 2988 %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2989 %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) 2990 %res2 = add i32 %res, %res1 2991 ret i32 %res2 2992} 2993 2994declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64) 2995 2996define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) { 2997; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: 2998; AVX512BW: ## BB#0: 2999; AVX512BW-NEXT: kmovq %rsi, %k1 3000; AVX512BW-NEXT: vpbroadcastb %dil, %zmm0 {%k1} 3001; AVX512BW-NEXT: vpbroadcastb %dil, %zmm1 {%k1} {z} 3002; AVX512BW-NEXT: vpbroadcastb %dil, %zmm2 3003; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 3004; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 3005; AVX512BW-NEXT: retq 3006; 3007; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: 3008; AVX512F-32: # BB#0: 3009; AVX512F-32-NEXT: movb {{[0-9]+}}(%esp), %al 3010; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 3011; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 3012; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 3013; AVX512F-32-NEXT: vpbroadcastb %al, %zmm1 {%k1} {z} 3014; AVX512F-32-NEXT: vpbroadcastb %al, %zmm0 {%k1} 3015; AVX512F-32-NEXT: vpbroadcastb %al, %zmm2 3016; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 3017; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 3018; AVX512F-32-NEXT: retl 3019 %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1) 3020 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask) 3021 %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask) 3022 %res3 = add <64 x i8> %res, %res1 3023 %res4 = add <64 x i8> %res2, %res3 3024 ret <64 x i8> %res4 3025} 3026 3027declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32) 3028 3029define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) { 3030; AVX512BW-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: 3031; AVX512BW: ## BB#0: 3032; AVX512BW-NEXT: kmovd %esi, %k1 3033; AVX512BW-NEXT: vpbroadcastw %di, %zmm0 {%k1} 3034; AVX512BW-NEXT: vpbroadcastw %di, %zmm1 {%k1} {z} 3035; AVX512BW-NEXT: vpbroadcastw %di, %zmm2 3036; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 3037; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 3038; AVX512BW-NEXT: retq 3039; 3040; AVX512F-32-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: 3041; AVX512F-32: # BB#0: 3042; AVX512F-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 3043; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 3044; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm0 {%k1} 3045; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm1 {%k1} {z} 3046; AVX512F-32-NEXT: vpbroadcastw %ax, %zmm2 3047; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 3048; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 3049; AVX512F-32-NEXT: retl 3050 %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) 3051 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) 3052 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask) 3053 %res3 = add <32 x i16> %res, %res1 3054 %res4 = add <32 x i16> %res2, %res3 3055 ret <32 x i16> %res4 3056} 3057