1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 3; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 4 5define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { 6; AVX512BW-LABEL: test_pcmpeq_b: 7; AVX512BW: ## BB#0: 8; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 9; AVX512BW-NEXT: kmovq %k0, %rax 10; AVX512BW-NEXT: retq 11; 12; AVX512F-32-LABEL: test_pcmpeq_b: 13; AVX512F-32: # BB#0: 14; AVX512F-32-NEXT: subl $12, %esp 15; AVX512F-32-NEXT: .Ltmp0: 16; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 17; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 18; AVX512F-32-NEXT: kmovq %k0, (%esp) 19; AVX512F-32-NEXT: movl (%esp), %eax 20; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 21; AVX512F-32-NEXT: addl $12, %esp 22; AVX512F-32-NEXT: retl 23 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 24 ret i64 %res 25} 26 27define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 28; AVX512BW-LABEL: test_mask_pcmpeq_b: 29; AVX512BW: ## BB#0: 30; AVX512BW-NEXT: kmovq %rdi, %k1 31; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 32; AVX512BW-NEXT: kmovq %k0, %rax 33; AVX512BW-NEXT: retq 34; 35; AVX512F-32-LABEL: test_mask_pcmpeq_b: 36; AVX512F-32: # BB#0: 37; AVX512F-32-NEXT: subl $12, %esp 38; AVX512F-32-NEXT: .Ltmp1: 39; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 40; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 41; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 42; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 43; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 44; AVX512F-32-NEXT: kmovq %k0, (%esp) 45; AVX512F-32-NEXT: movl (%esp), %eax 46; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 47; AVX512F-32-NEXT: addl $12, %esp 48; AVX512F-32-NEXT: retl 49 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 50 ret i64 %res 51} 52 53declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) 54 55define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { 56; AVX512BW-LABEL: test_pcmpeq_w: 57; AVX512BW: ## BB#0: 58; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 59; AVX512BW-NEXT: kmovd %k0, %eax 60; AVX512BW-NEXT: retq 61; 62; AVX512F-32-LABEL: test_pcmpeq_w: 63; AVX512F-32: # BB#0: 64; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 65; AVX512F-32-NEXT: kmovd %k0, %eax 66; AVX512F-32-NEXT: retl 67 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 68 ret i32 %res 69} 70 71define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 72; AVX512BW-LABEL: test_mask_pcmpeq_w: 73; AVX512BW: ## BB#0: 74; AVX512BW-NEXT: kmovd %edi, %k1 75; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 76; AVX512BW-NEXT: kmovd %k0, %eax 77; AVX512BW-NEXT: retq 78; 79; AVX512F-32-LABEL: test_mask_pcmpeq_w: 80; AVX512F-32: # BB#0: 81; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 82; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 83; AVX512F-32-NEXT: kmovd %k0, %eax 84; AVX512F-32-NEXT: retl 85 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 86 ret i32 %res 87} 88 89declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) 90 91define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { 92; AVX512BW-LABEL: test_pcmpgt_b: 93; AVX512BW: ## BB#0: 94; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 95; AVX512BW-NEXT: kmovq %k0, %rax 96; AVX512BW-NEXT: retq 97; 98; AVX512F-32-LABEL: test_pcmpgt_b: 99; AVX512F-32: # BB#0: 100; AVX512F-32-NEXT: subl $12, %esp 101; AVX512F-32-NEXT: .Ltmp2: 102; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 103; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 104; AVX512F-32-NEXT: kmovq %k0, (%esp) 105; AVX512F-32-NEXT: movl (%esp), %eax 106; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 107; AVX512F-32-NEXT: addl $12, %esp 108; AVX512F-32-NEXT: retl 109 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 110 ret i64 %res 111} 112 113define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 114; AVX512BW-LABEL: test_mask_pcmpgt_b: 115; AVX512BW: ## BB#0: 116; AVX512BW-NEXT: kmovq %rdi, %k1 117; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} 118; AVX512BW-NEXT: kmovq %k0, %rax 119; AVX512BW-NEXT: retq 120; 121; AVX512F-32-LABEL: test_mask_pcmpgt_b: 122; AVX512F-32: # BB#0: 123; AVX512F-32-NEXT: subl $12, %esp 124; AVX512F-32-NEXT: .Ltmp3: 125; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 126; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 127; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 128; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 129; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} 130; AVX512F-32-NEXT: kmovq %k0, (%esp) 131; AVX512F-32-NEXT: movl (%esp), %eax 132; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 133; AVX512F-32-NEXT: addl $12, %esp 134; AVX512F-32-NEXT: retl 135 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 136 ret i64 %res 137} 138 139declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) 140 141define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { 142; AVX512BW-LABEL: test_pcmpgt_w: 143; AVX512BW: ## BB#0: 144; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 145; AVX512BW-NEXT: kmovd %k0, %eax 146; AVX512BW-NEXT: retq 147; 148; AVX512F-32-LABEL: test_pcmpgt_w: 149; AVX512F-32: # BB#0: 150; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 151; AVX512F-32-NEXT: kmovd %k0, %eax 152; AVX512F-32-NEXT: retl 153 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 154 ret i32 %res 155} 156 157define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 158; AVX512BW-LABEL: test_mask_pcmpgt_w: 159; AVX512BW: ## BB#0: 160; AVX512BW-NEXT: kmovd %edi, %k1 161; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} 162; AVX512BW-NEXT: kmovd %k0, %eax 163; AVX512BW-NEXT: retq 164; 165; AVX512F-32-LABEL: test_mask_pcmpgt_w: 166; AVX512F-32: # BB#0: 167; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 168; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} 169; AVX512F-32-NEXT: kmovd %k0, %eax 170; AVX512F-32-NEXT: retl 171 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 172 ret i32 %res 173} 174 175declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) 176 177define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 178; AVX512BW-LABEL: test_cmp_b_512: 179; AVX512BW: ## BB#0: 180; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 181; AVX512BW-NEXT: kmovq %k0, %rax 182; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 183; AVX512BW-NEXT: kmovq %k0, %rcx 184; AVX512BW-NEXT: addq %rax, %rcx 185; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 186; AVX512BW-NEXT: kmovq %k0, %rax 187; AVX512BW-NEXT: addq %rcx, %rax 188; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 189; AVX512BW-NEXT: kmovq %k0, %rcx 190; AVX512BW-NEXT: addq %rax, %rcx 191; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 192; AVX512BW-NEXT: kmovq %k0, %rax 193; AVX512BW-NEXT: addq %rcx, %rax 194; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 195; AVX512BW-NEXT: kmovq %k0, %rcx 196; AVX512BW-NEXT: addq %rax, %rcx 197; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 198; AVX512BW-NEXT: kmovq %k0, %rdx 199; AVX512BW-NEXT: addq %rcx, %rdx 200; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 201; AVX512BW-NEXT: kmovq %k0, %rax 202; AVX512BW-NEXT: addq %rdx, %rax 203; AVX512BW-NEXT: retq 204; 205; AVX512F-32-LABEL: test_cmp_b_512: 206; AVX512F-32: # BB#0: 207; AVX512F-32-NEXT: subl $68, %esp 208; AVX512F-32-NEXT: .Ltmp4: 209; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 210; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 211; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 212; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 213; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 214; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 215; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 216; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 217; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 218; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 219; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 220; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 221; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 222; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 223; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 224; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 225; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 226; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 227; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 228; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 229; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 230; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 231; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 232; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 233; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 234; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 235; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 236; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 237; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 238; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 239; AVX512F-32-NEXT: kmovq %k0, (%esp) 240; AVX512F-32-NEXT: addl (%esp), %eax 241; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 242; AVX512F-32-NEXT: addl $68, %esp 243; AVX512F-32-NEXT: retl 244 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 245 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 246 %ret1 = add i64 %res0, %res1 247 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 248 %ret2 = add i64 %ret1, %res2 249 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 250 %ret3 = add i64 %ret2, %res3 251 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 252 %ret4 = add i64 %ret3, %res4 253 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 254 %ret5 = add i64 %ret4, %res5 255 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 256 %ret6 = add i64 %ret5, %res6 257 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 258 %ret7 = add i64 %ret6, %res7 259 ret i64 %ret7 260} 261 262define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 263; AVX512BW-LABEL: test_mask_cmp_b_512: 264; AVX512BW: ## BB#0: 265; AVX512BW-NEXT: kmovq %rdi, %k1 266; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 267; AVX512BW-NEXT: kmovq %k0, %rax 268; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} 269; AVX512BW-NEXT: kmovq %k0, %rcx 270; AVX512BW-NEXT: addq %rax, %rcx 271; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} 272; AVX512BW-NEXT: kmovq %k0, %rax 273; AVX512BW-NEXT: addq %rcx, %rax 274; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} 275; AVX512BW-NEXT: kmovq %k0, %rcx 276; AVX512BW-NEXT: addq %rax, %rcx 277; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} 278; AVX512BW-NEXT: kmovq %k0, %rax 279; AVX512BW-NEXT: addq %rcx, %rax 280; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} 281; AVX512BW-NEXT: kmovq %k0, %rcx 282; AVX512BW-NEXT: addq %rax, %rcx 283; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} 284; AVX512BW-NEXT: kmovq %k0, %rdx 285; AVX512BW-NEXT: addq %rcx, %rdx 286; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} 287; AVX512BW-NEXT: kmovq %k0, %rax 288; AVX512BW-NEXT: addq %rdx, %rax 289; AVX512BW-NEXT: retq 290; 291; AVX512F-32-LABEL: test_mask_cmp_b_512: 292; AVX512F-32: # BB#0: 293; AVX512F-32-NEXT: subl $68, %esp 294; AVX512F-32-NEXT: .Ltmp5: 295; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 296; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 297; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 298; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 299; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 300; AVX512F-32-NEXT: kmovq %k0, (%esp) 301; AVX512F-32-NEXT: movl (%esp), %eax 302; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 303; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} 304; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 305; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 306; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 307; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} 308; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 309; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 310; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 311; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} 312; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 313; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 314; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 315; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} 316; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 317; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 318; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 319; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} 320; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 321; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 322; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 323; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} 324; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 325; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 326; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 327; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} 328; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 329; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 330; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 331; AVX512F-32-NEXT: addl $68, %esp 332; AVX512F-32-NEXT: retl 333 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 334 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 335 %ret1 = add i64 %res0, %res1 336 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 337 %ret2 = add i64 %ret1, %res2 338 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 339 %ret3 = add i64 %ret2, %res3 340 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 341 %ret4 = add i64 %ret3, %res4 342 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 343 %ret5 = add i64 %ret4, %res5 344 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 345 %ret6 = add i64 %ret5, %res6 346 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 347 %ret7 = add i64 %ret6, %res7 348 ret i64 %ret7 349} 350 351declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 352 353define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 354; AVX512BW-LABEL: test_ucmp_b_512: 355; AVX512BW: ## BB#0: 356; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 357; AVX512BW-NEXT: kmovq %k0, %rax 358; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 359; AVX512BW-NEXT: kmovq %k0, %rcx 360; AVX512BW-NEXT: addq %rax, %rcx 361; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 362; AVX512BW-NEXT: kmovq %k0, %rax 363; AVX512BW-NEXT: addq %rcx, %rax 364; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 365; AVX512BW-NEXT: kmovq %k0, %rcx 366; AVX512BW-NEXT: addq %rax, %rcx 367; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 368; AVX512BW-NEXT: kmovq %k0, %rax 369; AVX512BW-NEXT: addq %rcx, %rax 370; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 371; AVX512BW-NEXT: kmovq %k0, %rcx 372; AVX512BW-NEXT: addq %rax, %rcx 373; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 374; AVX512BW-NEXT: kmovq %k0, %rdx 375; AVX512BW-NEXT: addq %rcx, %rdx 376; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 377; AVX512BW-NEXT: kmovq %k0, %rax 378; AVX512BW-NEXT: addq %rdx, %rax 379; AVX512BW-NEXT: retq 380; 381; AVX512F-32-LABEL: test_ucmp_b_512: 382; AVX512F-32: # BB#0: 383; AVX512F-32-NEXT: subl $68, %esp 384; AVX512F-32-NEXT: .Ltmp6: 385; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 386; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 387; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 388; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 389; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 390; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 391; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 392; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 393; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 394; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 395; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 396; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 397; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 398; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 399; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 400; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 401; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 402; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 403; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 404; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 405; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 406; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 407; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 408; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 409; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 410; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 411; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 412; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 413; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 414; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 415; AVX512F-32-NEXT: kmovq %k0, (%esp) 416; AVX512F-32-NEXT: addl (%esp), %eax 417; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 418; AVX512F-32-NEXT: addl $68, %esp 419; AVX512F-32-NEXT: retl 420 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 421 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 422 %ret1 = add i64 %res0, %res1 423 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 424 %ret2 = add i64 %ret1, %res2 425 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 426 %ret3 = add i64 %ret2, %res3 427 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 428 %ret4 = add i64 %ret3, %res4 429 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 430 %ret5 = add i64 %ret4, %res5 431 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 432 %ret6 = add i64 %ret5, %res6 433 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 434 %ret7 = add i64 %ret6, %res7 435 ret i64 %ret7 436} 437 438define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 439; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512: 440; AVX512BW: ## BB#0: 441; AVX512BW-NEXT: kmovq %rdi, %k1 442; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1} 443; AVX512BW-NEXT: kmovq %k0, %rax 444; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} 445; AVX512BW-NEXT: kmovq %k0, %rcx 446; AVX512BW-NEXT: addq %rax, %rcx 447; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} 448; AVX512BW-NEXT: kmovq %k0, %rax 449; AVX512BW-NEXT: addq %rcx, %rax 450; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} 451; AVX512BW-NEXT: kmovq %k0, %rcx 452; AVX512BW-NEXT: addq %rax, %rcx 453; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} 454; AVX512BW-NEXT: kmovq %k0, %rax 455; AVX512BW-NEXT: addq %rcx, %rax 456; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} 457; AVX512BW-NEXT: kmovq %k0, %rcx 458; AVX512BW-NEXT: addq %rax, %rcx 459; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} 460; AVX512BW-NEXT: kmovq %k0, %rdx 461; AVX512BW-NEXT: addq %rcx, %rdx 462; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} 463; AVX512BW-NEXT: kmovq %k0, %rax 464; AVX512BW-NEXT: addq %rdx, %rax 465; AVX512BW-NEXT: retq 466; 467; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512: 468; AVX512F-32: # BB#0: 469; AVX512F-32-NEXT: subl $68, %esp 470; AVX512F-32-NEXT: .Ltmp7: 471; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 472; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 473; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 474; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 475; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1} 476; AVX512F-32-NEXT: kmovq %k0, (%esp) 477; AVX512F-32-NEXT: movl (%esp), %eax 478; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 479; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} 480; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 481; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 482; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 483; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} 484; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 485; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 486; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 487; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} 488; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 489; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 490; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 491; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} 492; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 493; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 494; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 495; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} 496; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 497; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 498; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 499; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} 500; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 501; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 502; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 503; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} 504; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 505; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 506; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 507; AVX512F-32-NEXT: addl $68, %esp 508; AVX512F-32-NEXT: retl 509 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 510 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 511 %ret1 = add i64 %res0, %res1 512 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 513 %ret2 = add i64 %ret1, %res2 514 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 515 %ret3 = add i64 %ret2, %res3 516 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 517 %ret4 = add i64 %ret3, %res4 518 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 519 %ret5 = add i64 %ret4, %res5 520 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 521 %ret6 = add i64 %ret5, %res6 522 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 523 %ret7 = add i64 %ret6, %res7 524 ret i64 %ret7 525} 526 527declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 528 529define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 530; AVX512BW-LABEL: test_cmp_w_512: 531; AVX512BW: ## BB#0: 532; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 533; AVX512BW-NEXT: kmovd %k0, %eax 534; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 535; AVX512BW-NEXT: kmovd %k0, %ecx 536; AVX512BW-NEXT: addl %eax, %ecx 537; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 538; AVX512BW-NEXT: kmovd %k0, %eax 539; AVX512BW-NEXT: addl %ecx, %eax 540; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 541; AVX512BW-NEXT: kmovd %k0, %ecx 542; AVX512BW-NEXT: addl %eax, %ecx 543; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 544; AVX512BW-NEXT: kmovd %k0, %eax 545; AVX512BW-NEXT: addl %ecx, %eax 546; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 547; AVX512BW-NEXT: kmovd %k0, %ecx 548; AVX512BW-NEXT: addl %eax, %ecx 549; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 550; AVX512BW-NEXT: kmovd %k0, %edx 551; AVX512BW-NEXT: addl %ecx, %edx 552; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 553; AVX512BW-NEXT: kmovd %k0, %eax 554; AVX512BW-NEXT: addl %edx, %eax 555; AVX512BW-NEXT: retq 556; 557; AVX512F-32-LABEL: test_cmp_w_512: 558; AVX512F-32: # BB#0: 559; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 560; AVX512F-32-NEXT: kmovd %k0, %eax 561; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 562; AVX512F-32-NEXT: kmovd %k0, %ecx 563; AVX512F-32-NEXT: addl %eax, %ecx 564; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 565; AVX512F-32-NEXT: kmovd %k0, %eax 566; AVX512F-32-NEXT: addl %ecx, %eax 567; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 568; AVX512F-32-NEXT: kmovd %k0, %ecx 569; AVX512F-32-NEXT: addl %eax, %ecx 570; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 571; AVX512F-32-NEXT: kmovd %k0, %eax 572; AVX512F-32-NEXT: addl %ecx, %eax 573; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 574; AVX512F-32-NEXT: kmovd %k0, %ecx 575; AVX512F-32-NEXT: addl %eax, %ecx 576; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 577; AVX512F-32-NEXT: kmovd %k0, %edx 578; AVX512F-32-NEXT: addl %ecx, %edx 579; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 580; AVX512F-32-NEXT: kmovd %k0, %eax 581; AVX512F-32-NEXT: addl %edx, %eax 582; AVX512F-32-NEXT: retl 583 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 584 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 585 %ret1 = add i32 %res0, %res1 586 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 587 %ret2 = add i32 %ret1, %res2 588 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 589 %ret3 = add i32 %ret2, %res3 590 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 591 %ret4 = add i32 %ret3, %res4 592 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 593 %ret5 = add i32 %ret4, %res5 594 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 595 %ret6 = add i32 %ret5, %res6 596 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 597 %ret7 = add i32 %ret6, %res7 598 ret i32 %ret7 599} 600 601define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 602; AVX512BW-LABEL: test_mask_cmp_w_512: 603; AVX512BW: ## BB#0: 604; AVX512BW-NEXT: kmovd %edi, %k1 605; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 606; AVX512BW-NEXT: kmovd %k0, %eax 607; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1} 608; AVX512BW-NEXT: kmovd %k0, %ecx 609; AVX512BW-NEXT: addl %eax, %ecx 610; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} 611; AVX512BW-NEXT: kmovd %k0, %eax 612; AVX512BW-NEXT: addl %ecx, %eax 613; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} 614; AVX512BW-NEXT: kmovd %k0, %ecx 615; AVX512BW-NEXT: addl %eax, %ecx 616; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} 617; AVX512BW-NEXT: kmovd %k0, %eax 618; AVX512BW-NEXT: addl %ecx, %eax 619; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} 620; AVX512BW-NEXT: kmovd %k0, %ecx 621; AVX512BW-NEXT: addl %eax, %ecx 622; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} 623; AVX512BW-NEXT: kmovd %k0, %edx 624; AVX512BW-NEXT: addl %ecx, %edx 625; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1} 626; AVX512BW-NEXT: kmovd %k0, %eax 627; AVX512BW-NEXT: addl %edx, %eax 628; AVX512BW-NEXT: retq 629; 630; AVX512F-32-LABEL: test_mask_cmp_w_512: 631; AVX512F-32: # BB#0: 632; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 633; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 634; AVX512F-32-NEXT: kmovd %k0, %eax 635; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1} 636; AVX512F-32-NEXT: kmovd %k0, %ecx 637; AVX512F-32-NEXT: addl %eax, %ecx 638; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} 639; AVX512F-32-NEXT: kmovd %k0, %eax 640; AVX512F-32-NEXT: addl %ecx, %eax 641; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} 642; AVX512F-32-NEXT: kmovd %k0, %ecx 643; AVX512F-32-NEXT: addl %eax, %ecx 644; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} 645; AVX512F-32-NEXT: kmovd %k0, %eax 646; AVX512F-32-NEXT: addl %ecx, %eax 647; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} 648; AVX512F-32-NEXT: kmovd %k0, %ecx 649; AVX512F-32-NEXT: addl %eax, %ecx 650; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} 651; AVX512F-32-NEXT: kmovd %k0, %edx 652; AVX512F-32-NEXT: addl %ecx, %edx 653; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1} 654; AVX512F-32-NEXT: kmovd %k0, %eax 655; AVX512F-32-NEXT: addl %edx, %eax 656; AVX512F-32-NEXT: retl 657 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 658 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 659 %ret1 = add i32 %res0, %res1 660 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 661 %ret2 = add i32 %ret1, %res2 662 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 663 %ret3 = add i32 %ret2, %res3 664 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 665 %ret4 = add i32 %ret3, %res4 666 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 667 %ret5 = add i32 %ret4, %res5 668 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 669 %ret6 = add i32 %ret5, %res6 670 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 671 %ret7 = add i32 %ret6, %res7 672 ret i32 %ret7 673} 674 675declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 676 677define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 678; AVX512BW-LABEL: test_ucmp_w_512: 679; AVX512BW: ## BB#0: 680; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 681; AVX512BW-NEXT: kmovd %k0, %eax 682; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 683; AVX512BW-NEXT: kmovd %k0, %ecx 684; AVX512BW-NEXT: addl %eax, %ecx 685; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 686; AVX512BW-NEXT: kmovd %k0, %eax 687; AVX512BW-NEXT: addl %ecx, %eax 688; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 689; AVX512BW-NEXT: kmovd %k0, %ecx 690; AVX512BW-NEXT: addl %eax, %ecx 691; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 692; AVX512BW-NEXT: kmovd %k0, %eax 693; AVX512BW-NEXT: addl %ecx, %eax 694; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 695; AVX512BW-NEXT: kmovd %k0, %ecx 696; AVX512BW-NEXT: addl %eax, %ecx 697; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 698; AVX512BW-NEXT: kmovd %k0, %edx 699; AVX512BW-NEXT: addl %ecx, %edx 700; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 701; AVX512BW-NEXT: kmovd %k0, %eax 702; AVX512BW-NEXT: addl %edx, %eax 703; AVX512BW-NEXT: retq 704; 705; AVX512F-32-LABEL: test_ucmp_w_512: 706; AVX512F-32: # BB#0: 707; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 708; AVX512F-32-NEXT: kmovd %k0, %eax 709; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 710; AVX512F-32-NEXT: kmovd %k0, %ecx 711; AVX512F-32-NEXT: addl %eax, %ecx 712; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 713; AVX512F-32-NEXT: kmovd %k0, %eax 714; AVX512F-32-NEXT: addl %ecx, %eax 715; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 716; AVX512F-32-NEXT: kmovd %k0, %ecx 717; AVX512F-32-NEXT: addl %eax, %ecx 718; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 719; AVX512F-32-NEXT: kmovd %k0, %eax 720; AVX512F-32-NEXT: addl %ecx, %eax 721; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 722; AVX512F-32-NEXT: kmovd %k0, %ecx 723; AVX512F-32-NEXT: addl %eax, %ecx 724; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 725; AVX512F-32-NEXT: kmovd %k0, %edx 726; AVX512F-32-NEXT: addl %ecx, %edx 727; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 728; AVX512F-32-NEXT: kmovd %k0, %eax 729; AVX512F-32-NEXT: addl %edx, %eax 730; AVX512F-32-NEXT: retl 731 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 732 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 733 %ret1 = add i32 %res0, %res1 734 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 735 %ret2 = add i32 %ret1, %res2 736 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 737 %ret3 = add i32 %ret2, %res3 738 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 739 %ret4 = add i32 %ret3, %res4 740 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 741 %ret5 = add i32 %ret4, %res5 742 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 743 %ret6 = add i32 %ret5, %res6 744 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 745 %ret7 = add i32 %ret6, %res7 746 ret i32 %ret7 747} 748 749define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 750; AVX512BW-LABEL: test_mask_ucmp_w_512: 751; AVX512BW: ## BB#0: 752; AVX512BW-NEXT: kmovd %edi, %k1 753; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1} 754; AVX512BW-NEXT: kmovd %k0, %eax 755; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} 756; AVX512BW-NEXT: kmovd %k0, %ecx 757; AVX512BW-NEXT: addl %eax, %ecx 758; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} 759; AVX512BW-NEXT: kmovd %k0, %eax 760; AVX512BW-NEXT: addl %ecx, %eax 761; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} 762; AVX512BW-NEXT: kmovd %k0, %ecx 763; AVX512BW-NEXT: addl %eax, %ecx 764; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} 765; AVX512BW-NEXT: kmovd %k0, %eax 766; AVX512BW-NEXT: addl %ecx, %eax 767; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} 768; AVX512BW-NEXT: kmovd %k0, %ecx 769; AVX512BW-NEXT: addl %eax, %ecx 770; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} 771; AVX512BW-NEXT: kmovd %k0, %edx 772; AVX512BW-NEXT: addl %ecx, %edx 773; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1} 774; AVX512BW-NEXT: kmovd %k0, %eax 775; AVX512BW-NEXT: addl %edx, %eax 776; AVX512BW-NEXT: retq 777; 778; AVX512F-32-LABEL: test_mask_ucmp_w_512: 779; AVX512F-32: # BB#0: 780; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 781; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1} 782; AVX512F-32-NEXT: kmovd %k0, %eax 783; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} 784; AVX512F-32-NEXT: kmovd %k0, %ecx 785; AVX512F-32-NEXT: addl %eax, %ecx 786; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} 787; AVX512F-32-NEXT: kmovd %k0, %eax 788; AVX512F-32-NEXT: addl %ecx, %eax 789; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} 790; AVX512F-32-NEXT: kmovd %k0, %ecx 791; AVX512F-32-NEXT: addl %eax, %ecx 792; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} 793; AVX512F-32-NEXT: kmovd %k0, %eax 794; AVX512F-32-NEXT: addl %ecx, %eax 795; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} 796; AVX512F-32-NEXT: kmovd %k0, %ecx 797; AVX512F-32-NEXT: addl %eax, %ecx 798; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} 799; AVX512F-32-NEXT: kmovd %k0, %edx 800; AVX512F-32-NEXT: addl %ecx, %edx 801; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1} 802; AVX512F-32-NEXT: kmovd %k0, %eax 803; AVX512F-32-NEXT: addl %edx, %eax 804; AVX512F-32-NEXT: retl 805 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 806 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 807 %ret1 = add i32 %res0, %res1 808 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 809 %ret2 = add i32 %ret1, %res2 810 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 811 %ret3 = add i32 %ret2, %res3 812 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 813 %ret4 = add i32 %ret3, %res4 814 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 815 %ret5 = add i32 %ret4, %res5 816 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 817 %ret6 = add i32 %ret5, %res6 818 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 819 %ret7 = add i32 %ret6, %res7 820 ret i32 %ret7 821} 822 823declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 824 825declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly 826 827define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) { 828; AVX512BW-LABEL: test_x86_mask_blend_w_512: 829; AVX512BW: ## BB#0: 830; AVX512BW-NEXT: kmovd %edi, %k1 831; AVX512BW-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1} 832; AVX512BW-NEXT: retq 833; 834; AVX512F-32-LABEL: test_x86_mask_blend_w_512: 835; AVX512F-32: # BB#0: 836; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 837; AVX512F-32-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1} 838; AVX512F-32-NEXT: retl 839 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1] 840 ret <32 x i16> %res 841} 842declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly 843 844define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) { 845; AVX512BW-LABEL: test_x86_mask_blend_b_512: 846; AVX512BW: ## BB#0: 847; AVX512BW-NEXT: kmovq %rdi, %k1 848; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} 849; AVX512BW-NEXT: retq 850; 851; AVX512F-32-LABEL: test_x86_mask_blend_b_512: 852; AVX512F-32: # BB#0: 853; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 854; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 855; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 856; AVX512F-32-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} 857; AVX512F-32-NEXT: retl 858 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1] 859 ret <64 x i8> %res 860} 861 862define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 863; AVX512BW-LABEL: test_mask_packs_epi32_rr_512: 864; AVX512BW: ## BB#0: 865; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 866; AVX512BW-NEXT: retq 867; 868; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512: 869; AVX512F-32: # BB#0: 870; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 871; AVX512F-32-NEXT: retl 872 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 873 ret <32 x i16> %res 874} 875 876define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 877; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512: 878; AVX512BW: ## BB#0: 879; AVX512BW-NEXT: kmovd %edi, %k1 880; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} 881; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 882; AVX512BW-NEXT: retq 883; 884; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512: 885; AVX512F-32: # BB#0: 886; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 887; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} 888; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 889; AVX512F-32-NEXT: retl 890 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 891 ret <32 x i16> %res 892} 893 894define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 895; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512: 896; AVX512BW: ## BB#0: 897; AVX512BW-NEXT: kmovd %edi, %k1 898; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} 899; AVX512BW-NEXT: retq 900; 901; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512: 902; AVX512F-32: # BB#0: 903; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 904; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} 905; AVX512F-32-NEXT: retl 906 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 907 ret <32 x i16> %res 908} 909 910define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 911; AVX512BW-LABEL: test_mask_packs_epi32_rm_512: 912; AVX512BW: ## BB#0: 913; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 914; AVX512BW-NEXT: retq 915; 916; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512: 917; AVX512F-32: # BB#0: 918; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 919; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 920; AVX512F-32-NEXT: retl 921 %b = load <16 x i32>, <16 x i32>* %ptr_b 922 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 923 ret <32 x i16> %res 924} 925 926define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 927; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512: 928; AVX512BW: ## BB#0: 929; AVX512BW-NEXT: kmovd %esi, %k1 930; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} 931; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 932; AVX512BW-NEXT: retq 933; 934; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512: 935; AVX512F-32: # BB#0: 936; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 937; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 938; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} 939; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 940; AVX512F-32-NEXT: retl 941 %b = load <16 x i32>, <16 x i32>* %ptr_b 942 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 943 ret <32 x i16> %res 944} 945 946define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 947; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512: 948; AVX512BW: ## BB#0: 949; AVX512BW-NEXT: kmovd %esi, %k1 950; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} 951; AVX512BW-NEXT: retq 952; 953; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512: 954; AVX512F-32: # BB#0: 955; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 956; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 957; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} 958; AVX512F-32-NEXT: retl 959 %b = load <16 x i32>, <16 x i32>* %ptr_b 960 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 961 ret <32 x i16> %res 962} 963 964define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 965; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512: 966; AVX512BW: ## BB#0: 967; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 968; AVX512BW-NEXT: retq 969; 970; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512: 971; AVX512F-32: # BB#0: 972; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 973; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 974; AVX512F-32-NEXT: retl 975 %q = load i32, i32* %ptr_b 976 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 977 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 978 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 979 ret <32 x i16> %res 980} 981 982define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 983; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512: 984; AVX512BW: ## BB#0: 985; AVX512BW-NEXT: kmovd %esi, %k1 986; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} 987; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 988; AVX512BW-NEXT: retq 989; 990; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512: 991; AVX512F-32: # BB#0: 992; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 993; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 994; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} 995; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 996; AVX512F-32-NEXT: retl 997 %q = load i32, i32* %ptr_b 998 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 999 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1000 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1001 ret <32 x i16> %res 1002} 1003 1004define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1005; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512: 1006; AVX512BW: ## BB#0: 1007; AVX512BW-NEXT: kmovd %esi, %k1 1008; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 1009; AVX512BW-NEXT: retq 1010; 1011; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512: 1012; AVX512F-32: # BB#0: 1013; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1014; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1015; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} 1016; AVX512F-32-NEXT: retl 1017 %q = load i32, i32* %ptr_b 1018 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1019 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1020 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1021 ret <32 x i16> %res 1022} 1023 1024declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1025 1026define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1027; AVX512BW-LABEL: test_mask_packs_epi16_rr_512: 1028; AVX512BW: ## BB#0: 1029; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 1030; AVX512BW-NEXT: retq 1031; 1032; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512: 1033; AVX512F-32: # BB#0: 1034; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 1035; AVX512F-32-NEXT: retl 1036 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1037 ret <64 x i8> %res 1038} 1039 1040define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1041; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512: 1042; AVX512BW: ## BB#0: 1043; AVX512BW-NEXT: kmovq %rdi, %k1 1044; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} 1045; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1046; AVX512BW-NEXT: retq 1047; 1048; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512: 1049; AVX512F-32: # BB#0: 1050; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1051; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1052; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1053; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} 1054; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1055; AVX512F-32-NEXT: retl 1056 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1057 ret <64 x i8> %res 1058} 1059 1060define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1061; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512: 1062; AVX512BW: ## BB#0: 1063; AVX512BW-NEXT: kmovq %rdi, %k1 1064; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1065; AVX512BW-NEXT: retq 1066; 1067; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512: 1068; AVX512F-32: # BB#0: 1069; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1070; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1071; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1072; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1073; AVX512F-32-NEXT: retl 1074 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1075 ret <64 x i8> %res 1076} 1077 1078define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1079; AVX512BW-LABEL: test_mask_packs_epi16_rm_512: 1080; AVX512BW: ## BB#0: 1081; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 1082; AVX512BW-NEXT: retq 1083; 1084; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512: 1085; AVX512F-32: # BB#0: 1086; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1087; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 1088; AVX512F-32-NEXT: retl 1089 %b = load <32 x i16>, <32 x i16>* %ptr_b 1090 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1091 ret <64 x i8> %res 1092} 1093 1094define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1095; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512: 1096; AVX512BW: ## BB#0: 1097; AVX512BW-NEXT: kmovq %rsi, %k1 1098; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} 1099; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1100; AVX512BW-NEXT: retq 1101; 1102; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512: 1103; AVX512F-32: # BB#0: 1104; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1105; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1106; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1107; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1108; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} 1109; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1110; AVX512F-32-NEXT: retl 1111 %b = load <32 x i16>, <32 x i16>* %ptr_b 1112 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1113 ret <64 x i8> %res 1114} 1115 1116define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1117; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512: 1118; AVX512BW: ## BB#0: 1119; AVX512BW-NEXT: kmovq %rsi, %k1 1120; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} 1121; AVX512BW-NEXT: retq 1122; 1123; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512: 1124; AVX512F-32: # BB#0: 1125; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1126; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1127; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1128; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1129; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} 1130; AVX512F-32-NEXT: retl 1131 %b = load <32 x i16>, <32 x i16>* %ptr_b 1132 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1133 ret <64 x i8> %res 1134} 1135 1136declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1137 1138 1139define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 1140; AVX512BW-LABEL: test_mask_packus_epi32_rr_512: 1141; AVX512BW: ## BB#0: 1142; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 1143; AVX512BW-NEXT: retq 1144; 1145; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512: 1146; AVX512F-32: # BB#0: 1147; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 1148; AVX512F-32-NEXT: retl 1149 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1150 ret <32 x i16> %res 1151} 1152 1153define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 1154; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512: 1155; AVX512BW: ## BB#0: 1156; AVX512BW-NEXT: kmovd %edi, %k1 1157; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} 1158; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1159; AVX512BW-NEXT: retq 1160; 1161; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512: 1162; AVX512F-32: # BB#0: 1163; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1164; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} 1165; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1166; AVX512F-32-NEXT: retl 1167 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1168 ret <32 x i16> %res 1169} 1170 1171define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 1172; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512: 1173; AVX512BW: ## BB#0: 1174; AVX512BW-NEXT: kmovd %edi, %k1 1175; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} 1176; AVX512BW-NEXT: retq 1177; 1178; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512: 1179; AVX512F-32: # BB#0: 1180; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1181; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} 1182; AVX512F-32-NEXT: retl 1183 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1184 ret <32 x i16> %res 1185} 1186 1187define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 1188; AVX512BW-LABEL: test_mask_packus_epi32_rm_512: 1189; AVX512BW: ## BB#0: 1190; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 1191; AVX512BW-NEXT: retq 1192; 1193; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512: 1194; AVX512F-32: # BB#0: 1195; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1196; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 1197; AVX512F-32-NEXT: retl 1198 %b = load <16 x i32>, <16 x i32>* %ptr_b 1199 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1200 ret <32 x i16> %res 1201} 1202 1203define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1204; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512: 1205; AVX512BW: ## BB#0: 1206; AVX512BW-NEXT: kmovd %esi, %k1 1207; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} 1208; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1209; AVX512BW-NEXT: retq 1210; 1211; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512: 1212; AVX512F-32: # BB#0: 1213; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1214; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1215; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} 1216; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1217; AVX512F-32-NEXT: retl 1218 %b = load <16 x i32>, <16 x i32>* %ptr_b 1219 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1220 ret <32 x i16> %res 1221} 1222 1223define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 1224; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512: 1225; AVX512BW: ## BB#0: 1226; AVX512BW-NEXT: kmovd %esi, %k1 1227; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} 1228; AVX512BW-NEXT: retq 1229; 1230; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512: 1231; AVX512F-32: # BB#0: 1232; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1233; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1234; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} 1235; AVX512F-32-NEXT: retl 1236 %b = load <16 x i32>, <16 x i32>* %ptr_b 1237 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1238 ret <32 x i16> %res 1239} 1240 1241define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 1242; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512: 1243; AVX512BW: ## BB#0: 1244; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 1245; AVX512BW-NEXT: retq 1246; 1247; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512: 1248; AVX512F-32: # BB#0: 1249; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1250; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 1251; AVX512F-32-NEXT: retl 1252 %q = load i32, i32* %ptr_b 1253 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1254 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1255 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1256 ret <32 x i16> %res 1257} 1258 1259define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1260; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512: 1261; AVX512BW: ## BB#0: 1262; AVX512BW-NEXT: kmovd %esi, %k1 1263; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} 1264; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1265; AVX512BW-NEXT: retq 1266; 1267; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512: 1268; AVX512F-32: # BB#0: 1269; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1270; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1271; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} 1272; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1273; AVX512F-32-NEXT: retl 1274 %q = load i32, i32* %ptr_b 1275 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1276 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1277 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1278 ret <32 x i16> %res 1279} 1280 1281define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1282; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512: 1283; AVX512BW: ## BB#0: 1284; AVX512BW-NEXT: kmovd %esi, %k1 1285; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 1286; AVX512BW-NEXT: retq 1287; 1288; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512: 1289; AVX512F-32: # BB#0: 1290; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1291; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1292; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} 1293; AVX512F-32-NEXT: retl 1294 %q = load i32, i32* %ptr_b 1295 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1296 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1297 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1298 ret <32 x i16> %res 1299} 1300 1301declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1302 1303define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1304; AVX512BW-LABEL: test_mask_packus_epi16_rr_512: 1305; AVX512BW: ## BB#0: 1306; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1307; AVX512BW-NEXT: retq 1308; 1309; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512: 1310; AVX512F-32: # BB#0: 1311; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1312; AVX512F-32-NEXT: retl 1313 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1314 ret <64 x i8> %res 1315} 1316 1317define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1318; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512: 1319; AVX512BW: ## BB#0: 1320; AVX512BW-NEXT: kmovq %rdi, %k1 1321; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} 1322; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1323; AVX512BW-NEXT: retq 1324; 1325; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512: 1326; AVX512F-32: # BB#0: 1327; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1328; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1329; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1330; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} 1331; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1332; AVX512F-32-NEXT: retl 1333 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1334 ret <64 x i8> %res 1335} 1336 1337define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1338; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512: 1339; AVX512BW: ## BB#0: 1340; AVX512BW-NEXT: kmovq %rdi, %k1 1341; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1342; AVX512BW-NEXT: retq 1343; 1344; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512: 1345; AVX512F-32: # BB#0: 1346; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1347; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1348; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1349; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1350; AVX512F-32-NEXT: retl 1351 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1352 ret <64 x i8> %res 1353} 1354 1355define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1356; AVX512BW-LABEL: test_mask_packus_epi16_rm_512: 1357; AVX512BW: ## BB#0: 1358; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 1359; AVX512BW-NEXT: retq 1360; 1361; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512: 1362; AVX512F-32: # BB#0: 1363; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1364; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 1365; AVX512F-32-NEXT: retl 1366 %b = load <32 x i16>, <32 x i16>* %ptr_b 1367 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1368 ret <64 x i8> %res 1369} 1370 1371define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1372; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512: 1373; AVX512BW: ## BB#0: 1374; AVX512BW-NEXT: kmovq %rsi, %k1 1375; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} 1376; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1377; AVX512BW-NEXT: retq 1378; 1379; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512: 1380; AVX512F-32: # BB#0: 1381; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1382; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1383; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1384; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1385; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} 1386; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1387; AVX512F-32-NEXT: retl 1388 %b = load <32 x i16>, <32 x i16>* %ptr_b 1389 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1390 ret <64 x i8> %res 1391} 1392 1393define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1394; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512: 1395; AVX512BW: ## BB#0: 1396; AVX512BW-NEXT: kmovq %rsi, %k1 1397; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} 1398; AVX512BW-NEXT: retq 1399; 1400; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512: 1401; AVX512F-32: # BB#0: 1402; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1403; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1404; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1405; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1406; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} 1407; AVX512F-32-NEXT: retl 1408 %b = load <32 x i16>, <32 x i16>* %ptr_b 1409 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1410 ret <64 x i8> %res 1411} 1412 1413declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1414 1415define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1416; AVX512BW-LABEL: test_mask_adds_epi16_rr_512: 1417; AVX512BW: ## BB#0: 1418; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 1419; AVX512BW-NEXT: retq 1420; 1421; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512: 1422; AVX512F-32: # BB#0: 1423; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 1424; AVX512F-32-NEXT: retl 1425 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1426 ret <32 x i16> %res 1427} 1428 1429define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1430; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512: 1431; AVX512BW: ## BB#0: 1432; AVX512BW-NEXT: kmovd %edi, %k1 1433; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} 1434; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1435; AVX512BW-NEXT: retq 1436; 1437; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512: 1438; AVX512F-32: # BB#0: 1439; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1440; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} 1441; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1442; AVX512F-32-NEXT: retl 1443 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1444 ret <32 x i16> %res 1445} 1446 1447define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1448; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512: 1449; AVX512BW: ## BB#0: 1450; AVX512BW-NEXT: kmovd %edi, %k1 1451; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1452; AVX512BW-NEXT: retq 1453; 1454; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512: 1455; AVX512F-32: # BB#0: 1456; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1457; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1458; AVX512F-32-NEXT: retl 1459 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1460 ret <32 x i16> %res 1461} 1462 1463define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1464; AVX512BW-LABEL: test_mask_adds_epi16_rm_512: 1465; AVX512BW: ## BB#0: 1466; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 1467; AVX512BW-NEXT: retq 1468; 1469; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512: 1470; AVX512F-32: # BB#0: 1471; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1472; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 1473; AVX512F-32-NEXT: retl 1474 %b = load <32 x i16>, <32 x i16>* %ptr_b 1475 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1476 ret <32 x i16> %res 1477} 1478 1479define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1480; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512: 1481; AVX512BW: ## BB#0: 1482; AVX512BW-NEXT: kmovd %esi, %k1 1483; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} 1484; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1485; AVX512BW-NEXT: retq 1486; 1487; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512: 1488; AVX512F-32: # BB#0: 1489; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1490; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1491; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} 1492; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1493; AVX512F-32-NEXT: retl 1494 %b = load <32 x i16>, <32 x i16>* %ptr_b 1495 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1496 ret <32 x i16> %res 1497} 1498 1499define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1500; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512: 1501; AVX512BW: ## BB#0: 1502; AVX512BW-NEXT: kmovd %esi, %k1 1503; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} 1504; AVX512BW-NEXT: retq 1505; 1506; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512: 1507; AVX512F-32: # BB#0: 1508; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1509; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1510; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} 1511; AVX512F-32-NEXT: retl 1512 %b = load <32 x i16>, <32 x i16>* %ptr_b 1513 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1514 ret <32 x i16> %res 1515} 1516 1517declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1518 1519define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1520; AVX512BW-LABEL: test_mask_subs_epi16_rr_512: 1521; AVX512BW: ## BB#0: 1522; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 1523; AVX512BW-NEXT: retq 1524; 1525; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512: 1526; AVX512F-32: # BB#0: 1527; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 1528; AVX512F-32-NEXT: retl 1529 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1530 ret <32 x i16> %res 1531} 1532 1533define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1534; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512: 1535; AVX512BW: ## BB#0: 1536; AVX512BW-NEXT: kmovd %edi, %k1 1537; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} 1538; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1539; AVX512BW-NEXT: retq 1540; 1541; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512: 1542; AVX512F-32: # BB#0: 1543; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1544; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} 1545; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1546; AVX512F-32-NEXT: retl 1547 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1548 ret <32 x i16> %res 1549} 1550 1551define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1552; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512: 1553; AVX512BW: ## BB#0: 1554; AVX512BW-NEXT: kmovd %edi, %k1 1555; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1556; AVX512BW-NEXT: retq 1557; 1558; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512: 1559; AVX512F-32: # BB#0: 1560; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1561; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1562; AVX512F-32-NEXT: retl 1563 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1564 ret <32 x i16> %res 1565} 1566 1567define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1568; AVX512BW-LABEL: test_mask_subs_epi16_rm_512: 1569; AVX512BW: ## BB#0: 1570; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 1571; AVX512BW-NEXT: retq 1572; 1573; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512: 1574; AVX512F-32: # BB#0: 1575; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1576; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 1577; AVX512F-32-NEXT: retl 1578 %b = load <32 x i16>, <32 x i16>* %ptr_b 1579 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1580 ret <32 x i16> %res 1581} 1582 1583define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1584; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512: 1585; AVX512BW: ## BB#0: 1586; AVX512BW-NEXT: kmovd %esi, %k1 1587; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} 1588; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1589; AVX512BW-NEXT: retq 1590; 1591; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512: 1592; AVX512F-32: # BB#0: 1593; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1594; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1595; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} 1596; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1597; AVX512F-32-NEXT: retl 1598 %b = load <32 x i16>, <32 x i16>* %ptr_b 1599 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1600 ret <32 x i16> %res 1601} 1602 1603define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1604; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512: 1605; AVX512BW: ## BB#0: 1606; AVX512BW-NEXT: kmovd %esi, %k1 1607; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} 1608; AVX512BW-NEXT: retq 1609; 1610; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512: 1611; AVX512F-32: # BB#0: 1612; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1613; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1614; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} 1615; AVX512F-32-NEXT: retl 1616 %b = load <32 x i16>, <32 x i16>* %ptr_b 1617 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1618 ret <32 x i16> %res 1619} 1620 1621declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1622 1623define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1624; AVX512BW-LABEL: test_mask_adds_epu16_rr_512: 1625; AVX512BW: ## BB#0: 1626; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 1627; AVX512BW-NEXT: retq 1628; 1629; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512: 1630; AVX512F-32: # BB#0: 1631; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 1632; AVX512F-32-NEXT: retl 1633 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1634 ret <32 x i16> %res 1635} 1636 1637define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1638; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512: 1639; AVX512BW: ## BB#0: 1640; AVX512BW-NEXT: kmovd %edi, %k1 1641; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} 1642; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1643; AVX512BW-NEXT: retq 1644; 1645; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512: 1646; AVX512F-32: # BB#0: 1647; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1648; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} 1649; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1650; AVX512F-32-NEXT: retl 1651 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1652 ret <32 x i16> %res 1653} 1654 1655define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1656; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512: 1657; AVX512BW: ## BB#0: 1658; AVX512BW-NEXT: kmovd %edi, %k1 1659; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1660; AVX512BW-NEXT: retq 1661; 1662; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512: 1663; AVX512F-32: # BB#0: 1664; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1665; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1666; AVX512F-32-NEXT: retl 1667 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1668 ret <32 x i16> %res 1669} 1670 1671define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1672; AVX512BW-LABEL: test_mask_adds_epu16_rm_512: 1673; AVX512BW: ## BB#0: 1674; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 1675; AVX512BW-NEXT: retq 1676; 1677; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512: 1678; AVX512F-32: # BB#0: 1679; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1680; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 1681; AVX512F-32-NEXT: retl 1682 %b = load <32 x i16>, <32 x i16>* %ptr_b 1683 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1684 ret <32 x i16> %res 1685} 1686 1687define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1688; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512: 1689; AVX512BW: ## BB#0: 1690; AVX512BW-NEXT: kmovd %esi, %k1 1691; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} 1692; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1693; AVX512BW-NEXT: retq 1694; 1695; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512: 1696; AVX512F-32: # BB#0: 1697; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1698; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1699; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} 1700; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1701; AVX512F-32-NEXT: retl 1702 %b = load <32 x i16>, <32 x i16>* %ptr_b 1703 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1704 ret <32 x i16> %res 1705} 1706 1707define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1708; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512: 1709; AVX512BW: ## BB#0: 1710; AVX512BW-NEXT: kmovd %esi, %k1 1711; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} 1712; AVX512BW-NEXT: retq 1713; 1714; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512: 1715; AVX512F-32: # BB#0: 1716; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1717; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1718; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} 1719; AVX512F-32-NEXT: retl 1720 %b = load <32 x i16>, <32 x i16>* %ptr_b 1721 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1722 ret <32 x i16> %res 1723} 1724 1725declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1726 1727define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1728; AVX512BW-LABEL: test_mask_subs_epu16_rr_512: 1729; AVX512BW: ## BB#0: 1730; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 1731; AVX512BW-NEXT: retq 1732; 1733; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512: 1734; AVX512F-32: # BB#0: 1735; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 1736; AVX512F-32-NEXT: retl 1737 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1738 ret <32 x i16> %res 1739} 1740 1741define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1742; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512: 1743; AVX512BW: ## BB#0: 1744; AVX512BW-NEXT: kmovd %edi, %k1 1745; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} 1746; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1747; AVX512BW-NEXT: retq 1748; 1749; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512: 1750; AVX512F-32: # BB#0: 1751; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1752; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} 1753; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1754; AVX512F-32-NEXT: retl 1755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1756 ret <32 x i16> %res 1757} 1758 1759define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1760; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512: 1761; AVX512BW: ## BB#0: 1762; AVX512BW-NEXT: kmovd %edi, %k1 1763; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1764; AVX512BW-NEXT: retq 1765; 1766; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512: 1767; AVX512F-32: # BB#0: 1768; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1769; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1770; AVX512F-32-NEXT: retl 1771 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1772 ret <32 x i16> %res 1773} 1774 1775define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1776; AVX512BW-LABEL: test_mask_subs_epu16_rm_512: 1777; AVX512BW: ## BB#0: 1778; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 1779; AVX512BW-NEXT: retq 1780; 1781; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512: 1782; AVX512F-32: # BB#0: 1783; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1784; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 1785; AVX512F-32-NEXT: retl 1786 %b = load <32 x i16>, <32 x i16>* %ptr_b 1787 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1788 ret <32 x i16> %res 1789} 1790 1791define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1792; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512: 1793; AVX512BW: ## BB#0: 1794; AVX512BW-NEXT: kmovd %esi, %k1 1795; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} 1796; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1797; AVX512BW-NEXT: retq 1798; 1799; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512: 1800; AVX512F-32: # BB#0: 1801; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1802; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1803; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} 1804; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1805; AVX512F-32-NEXT: retl 1806 %b = load <32 x i16>, <32 x i16>* %ptr_b 1807 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1808 ret <32 x i16> %res 1809} 1810 1811define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1812; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512: 1813; AVX512BW: ## BB#0: 1814; AVX512BW-NEXT: kmovd %esi, %k1 1815; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} 1816; AVX512BW-NEXT: retq 1817; 1818; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512: 1819; AVX512F-32: # BB#0: 1820; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1821; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1822; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} 1823; AVX512F-32-NEXT: retl 1824 %b = load <32 x i16>, <32 x i16>* %ptr_b 1825 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1826 ret <32 x i16> %res 1827} 1828 1829declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1830 1831declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1832 1833define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1834; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 1835; AVX512BW: ## BB#0: 1836; AVX512BW-NEXT: kmovq %rdi, %k1 1837; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} 1838; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 1839; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1840; AVX512BW-NEXT: retq 1841; 1842; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 1843; AVX512F-32: # BB#0: 1844; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1845; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1846; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1847; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} 1848; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 1849; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1850; AVX512F-32-NEXT: retl 1851 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1852 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1853 %res2 = add <64 x i8> %res, %res1 1854 ret <64 x i8> %res2 1855} 1856 1857declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1858 1859define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1860; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 1861; AVX512BW: ## BB#0: 1862; AVX512BW-NEXT: kmovd %edi, %k1 1863; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} 1864; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 1865; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1866; AVX512BW-NEXT: retq 1867; 1868; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 1869; AVX512F-32: # BB#0: 1870; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1871; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} 1872; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 1873; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1874; AVX512F-32-NEXT: retl 1875 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1876 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1877 %res2 = add <32 x i16> %res, %res1 1878 ret <32 x i16> %res2 1879} 1880 1881declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1882 1883define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1884; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 1885; AVX512BW: ## BB#0: 1886; AVX512BW-NEXT: kmovq %rdi, %k1 1887; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} 1888; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 1889; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1890; AVX512BW-NEXT: retq 1891; 1892; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 1893; AVX512F-32: # BB#0: 1894; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1895; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1896; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1897; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} 1898; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 1899; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1900; AVX512F-32-NEXT: retl 1901 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1902 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1903 %res2 = add <64 x i8> %res, %res1 1904 ret <64 x i8> %res2 1905} 1906 1907declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1908 1909define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1910; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 1911; AVX512BW: ## BB#0: 1912; AVX512BW-NEXT: kmovd %edi, %k1 1913; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} 1914; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 1915; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1916; AVX512BW-NEXT: retq 1917; 1918; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 1919; AVX512F-32: # BB#0: 1920; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1921; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} 1922; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 1923; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1924; AVX512F-32-NEXT: retl 1925 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1926 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1927 %res2 = add <32 x i16> %res, %res1 1928 ret <32 x i16> %res2 1929} 1930 1931declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1932 1933define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1934; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512: 1935; AVX512BW: ## BB#0: 1936; AVX512BW-NEXT: kmovq %rdi, %k1 1937; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} 1938; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0 1939; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1940; AVX512BW-NEXT: retq 1941; 1942; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512: 1943; AVX512F-32: # BB#0: 1944; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1945; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1946; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1947; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} 1948; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm0 1949; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1950; AVX512F-32-NEXT: retl 1951 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1952 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1953 %res2 = add <64 x i8> %res, %res1 1954 ret <64 x i8> %res2 1955} 1956 1957declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1958 1959define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1960; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512: 1961; AVX512BW: ## BB#0: 1962; AVX512BW-NEXT: kmovd %edi, %k1 1963; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} 1964; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0 1965; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1966; AVX512BW-NEXT: retq 1967; 1968; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512: 1969; AVX512F-32: # BB#0: 1970; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1971; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} 1972; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm0 1973; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1974; AVX512F-32-NEXT: retl 1975 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1976 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1977 %res2 = add <32 x i16> %res, %res1 1978 ret <32 x i16> %res2 1979} 1980 1981declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1982 1983define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1984; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512: 1985; AVX512BW: ## BB#0: 1986; AVX512BW-NEXT: kmovq %rdi, %k1 1987; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} 1988; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0 1989; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1990; AVX512BW-NEXT: retq 1991; 1992; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512: 1993; AVX512F-32: # BB#0: 1994; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1995; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1996; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1997; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} 1998; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm0 1999; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2000; AVX512F-32-NEXT: retl 2001 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2002 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2003 %res2 = add <64 x i8> %res, %res1 2004 ret <64 x i8> %res2 2005} 2006 2007declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2008 2009define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2010; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512: 2011; AVX512BW: ## BB#0: 2012; AVX512BW-NEXT: kmovd %edi, %k1 2013; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} 2014; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0 2015; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2016; AVX512BW-NEXT: retq 2017; 2018; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512: 2019; AVX512F-32: # BB#0: 2020; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2021; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} 2022; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm0 2023; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2024; AVX512F-32-NEXT: retl 2025 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2026 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2027 %res2 = add <32 x i16> %res, %res1 2028 ret <32 x i16> %res2 2029} 2030 2031declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2032 2033define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2034; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 2035; AVX512BW: ## BB#0: 2036; AVX512BW-NEXT: kmovd %edi, %k1 2037; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 2038; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} 2039; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 2040; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2041; AVX512BW-NEXT: retq 2042; 2043; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 2044; AVX512F-32: # BB#0: 2045; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2046; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 2047; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} 2048; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 2049; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2050; AVX512F-32-NEXT: retl 2051 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2052 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2053 %res2 = add <32 x i16> %res, %res1 2054 ret <32 x i16> %res2 2055} 2056 2057declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2058 2059define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2060; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 2061; AVX512BW: ## BB#0: 2062; AVX512BW-NEXT: kmovd %edi, %k1 2063; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 2064; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z} 2065; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 2066; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2067; AVX512BW-NEXT: retq 2068; 2069; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 2070; AVX512F-32: # BB#0: 2071; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2072; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 2073; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z} 2074; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 2075; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2076; AVX512F-32-NEXT: retl 2077 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2078 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2079 %res2 = add <32 x i16> %res, %res1 2080 ret <32 x i16> %res2 2081} 2082 2083declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2084 2085define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2086; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 2087; AVX512BW: ## BB#0: 2088; AVX512BW-NEXT: kmovd %edi, %k1 2089; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 2090; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1} 2091; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 2092; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2093; AVX512BW-NEXT: retq 2094; 2095; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 2096; AVX512F-32: # BB#0: 2097; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2098; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 2099; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1} 2100; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 2101; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2102; AVX512F-32-NEXT: retl 2103 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2104 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2105 %res2 = add <32 x i16> %res, %res1 2106 ret <32 x i16> %res2 2107} 2108 2109declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2110 2111define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2112; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512: 2113; AVX512BW: ## BB#0: 2114; AVX512BW-NEXT: kmovq %rdi, %k1 2115; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} 2116; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm0 2117; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2118; AVX512BW-NEXT: retq 2119; 2120; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512: 2121; AVX512F-32: # BB#0: 2122; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2123; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2124; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2125; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} 2126; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm0 2127; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2128; AVX512F-32-NEXT: retl 2129 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2130 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2131 %res2 = add <64 x i8> %res, %res1 2132 ret <64 x i8> %res2 2133} 2134 2135declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2136 2137define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2138; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512: 2139; AVX512BW: ## BB#0: 2140; AVX512BW-NEXT: kmovd %edi, %k1 2141; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} 2142; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm0 2143; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2144; AVX512BW-NEXT: retq 2145; 2146; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512: 2147; AVX512F-32: # BB#0: 2148; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2149; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} 2150; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm0 2151; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2152; AVX512F-32-NEXT: retl 2153 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2154 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2155 %res2 = add <32 x i16> %res, %res1 2156 ret <32 x i16> %res2 2157} 2158 2159declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2160 2161define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2162; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 2163; AVX512BW: ## BB#0: 2164; AVX512BW-NEXT: kmovq %rdi, %k1 2165; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} 2166; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 2167; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2168; AVX512BW-NEXT: retq 2169; 2170; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 2171; AVX512F-32: # BB#0: 2172; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2173; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2174; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2175; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} 2176; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 2177; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2178; AVX512F-32-NEXT: retl 2179 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2180 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2181 %res2 = add <64 x i8> %res, %res1 2182 ret <64 x i8> %res2 2183} 2184 2185declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) 2186 2187define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2188; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512: 2189; AVX512BW: ## BB#0: 2190; AVX512BW-NEXT: kmovd %edi, %k1 2191; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1} 2192; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 2193; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2194; AVX512BW-NEXT: retq 2195; 2196; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512: 2197; AVX512F-32: # BB#0: 2198; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2199; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1} 2200; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0 2201; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2202; AVX512F-32-NEXT: retl 2203 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2204 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) 2205 %res2 = add <32 x i16> %res, %res1 2206 ret <32 x i16> %res2 2207} 2208 2209declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) 2210 2211define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2212; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2213; AVX512BW: ## BB#0: 2214; AVX512BW-NEXT: kmovq %rdi, %k1 2215; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1} 2216; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 2217; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 2218; AVX512BW-NEXT: retq 2219; 2220; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2221; AVX512F-32: # BB#0: 2222; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2223; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2224; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2225; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1} 2226; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0 2227; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 2228; AVX512F-32-NEXT: retl 2229 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2230 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) 2231 %res2 = add <64 x i8> %res, %res1 2232 ret <64 x i8> %res2 2233} 2234 2235declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2236 2237define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2238; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2239; AVX512BW: ## BB#0: 2240; AVX512BW-NEXT: kmovd %edi, %k1 2241; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} 2242; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 2243; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2244; AVX512BW-NEXT: retq 2245; 2246; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2247; AVX512F-32: # BB#0: 2248; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2249; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} 2250; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 2251; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2252; AVX512F-32-NEXT: retl 2253 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2254 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2255 %res2 = add <32 x i16> %res, %res1 2256 ret <32 x i16> %res2 2257} 2258 2259declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2260 2261define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2262; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2263; AVX512BW: ## BB#0: 2264; AVX512BW-NEXT: kmovd %edi, %k1 2265; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} 2266; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 2267; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2268; AVX512BW-NEXT: retq 2269; 2270; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2271; AVX512F-32: # BB#0: 2272; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2273; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} 2274; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 2275; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2276; AVX512F-32-NEXT: retl 2277 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2278 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2279 %res2 = add <32 x i16> %res, %res1 2280 ret <32 x i16> %res2 2281} 2282 2283declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2284 2285define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2286; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2287; AVX512BW: ## BB#0: 2288; AVX512BW-NEXT: kmovd %edi, %k1 2289; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} 2290; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 2291; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2292; AVX512BW-NEXT: retq 2293; 2294; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2295; AVX512F-32: # BB#0: 2296; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2297; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} 2298; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 2299; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2300; AVX512F-32-NEXT: retl 2301 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2302 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2303 %res2 = add <32 x i16> %res, %res1 2304 ret <32 x i16> %res2 2305} 2306 2307declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) 2308 2309define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2310; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 2311; AVX512BW: ## BB#0: 2312; AVX512BW-NEXT: kmovd %edi, %k1 2313; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 {%k1} 2314; AVX512BW-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} 2315; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 2316; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2317; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2318; AVX512BW-NEXT: retq 2319; 2320; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 2321; AVX512F-32: # BB#0: 2322; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2323; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1} 2324; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} 2325; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0 2326; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2327; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2328; AVX512F-32-NEXT: retl 2329 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2330 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2331 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2332 %res3 = add <32 x i8> %res0, %res1 2333 %res4 = add <32 x i8> %res3, %res2 2334 ret <32 x i8> %res4 2335} 2336 2337declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2338 2339define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2340; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 2341; AVX512BW: ## BB#0: 2342; AVX512BW-NEXT: kmovd %esi, %k1 2343; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) 2344; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1} 2345; AVX512BW-NEXT: retq 2346; 2347; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 2348; AVX512F-32: # BB#0: 2349; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2350; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2351; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) 2352; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1} 2353; AVX512F-32-NEXT: retl 2354 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2355 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2356 ret void 2357} 2358 2359declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) 2360 2361define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2362; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 2363; AVX512BW: ## BB#0: 2364; AVX512BW-NEXT: kmovd %edi, %k1 2365; AVX512BW-NEXT: vpmovswb %zmm0, %ymm1 {%k1} 2366; AVX512BW-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} 2367; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0 2368; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2369; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2370; AVX512BW-NEXT: retq 2371; 2372; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 2373; AVX512F-32: # BB#0: 2374; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2375; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1} 2376; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} 2377; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0 2378; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2379; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2380; AVX512F-32-NEXT: retl 2381 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2382 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2383 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2384 %res3 = add <32 x i8> %res0, %res1 2385 %res4 = add <32 x i8> %res3, %res2 2386 ret <32 x i8> %res4 2387} 2388 2389declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2390 2391define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2392; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 2393; AVX512BW: ## BB#0: 2394; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) 2395; AVX512BW-NEXT: kmovd %esi, %k1 2396; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1} 2397; AVX512BW-NEXT: retq 2398; 2399; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 2400; AVX512F-32: # BB#0: 2401; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2402; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2403; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) 2404; AVX512F-32-NEXT: kmovd %eax, %k1 2405; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1} 2406; AVX512F-32-NEXT: retl 2407 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2408 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2409 ret void 2410} 2411 2412declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) 2413 2414define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2415; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 2416; AVX512BW: ## BB#0: 2417; AVX512BW-NEXT: kmovd %edi, %k1 2418; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} 2419; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} 2420; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 2421; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2422; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2423; AVX512BW-NEXT: retq 2424; 2425; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 2426; AVX512F-32: # BB#0: 2427; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2428; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} 2429; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} 2430; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0 2431; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2432; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2433; AVX512F-32-NEXT: retl 2434 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2435 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2436 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2437 %res3 = add <32 x i8> %res0, %res1 2438 %res4 = add <32 x i8> %res3, %res2 2439 ret <32 x i8> %res4 2440} 2441 2442declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2443 2444define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2445; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 2446; AVX512BW: ## BB#0: 2447; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) 2448; AVX512BW-NEXT: kmovd %esi, %k1 2449; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} 2450; AVX512BW-NEXT: retq 2451; 2452; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 2453; AVX512F-32: # BB#0: 2454; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2455; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2456; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) 2457; AVX512F-32-NEXT: kmovd %eax, %k1 2458; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1} 2459; AVX512F-32-NEXT: retl 2460 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2461 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2462 ret void 2463} 2464 2465declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) 2466 2467define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { 2468; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2469; AVX512BW: ## BB#0: 2470; AVX512BW-NEXT: kmovd %edi, %k1 2471; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} 2472; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 2473; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2474; AVX512BW-NEXT: retq 2475; 2476; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2477; AVX512F-32: # BB#0: 2478; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2479; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} 2480; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 2481; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2482; AVX512F-32-NEXT: retl 2483 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) 2484 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) 2485 %res2 = add <32 x i16> %res, %res1 2486 ret <32 x i16> %res2 2487} 2488 2489declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) 2490 2491define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { 2492; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2493; AVX512BW: ## BB#0: 2494; AVX512BW-NEXT: kmovw %edi, %k1 2495; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} 2496; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 2497; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0 2498; AVX512BW-NEXT: retq 2499; 2500; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2501; AVX512F-32: # BB#0: 2502; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 2503; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} 2504; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 2505; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0 2506; AVX512F-32-NEXT: retl 2507 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) 2508 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) 2509 %res2 = add <16 x i32> %res, %res1 2510 ret <16 x i32> %res2 2511} 2512 2513declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2514 2515define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2516; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 2517; AVX512BW: ## BB#0: 2518; AVX512BW-NEXT: kmovq %rdi, %k1 2519; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63] 2520; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 2521; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2522; AVX512BW-NEXT: retq 2523; 2524; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 2525; AVX512F-32: # BB#0: 2526; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2527; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2528; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2529; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63] 2530; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 2531; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2532; AVX512F-32-NEXT: retl 2533 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2534 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2535 %res2 = add <64 x i8> %res, %res1 2536 ret <64 x i8> %res2 2537} 2538 2539declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2540 2541define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2542; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 2543; AVX512BW: ## BB#0: 2544; AVX512BW-NEXT: kmovq %rdi, %k1 2545; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55] 2546; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 2547; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2548; AVX512BW-NEXT: retq 2549; 2550; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 2551; AVX512F-32: # BB#0: 2552; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2553; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2554; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2555; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55] 2556; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 2557; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2558; AVX512F-32-NEXT: retl 2559 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2560 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2561 %res2 = add <64 x i8> %res, %res1 2562 ret <64 x i8> %res2 2563} 2564 2565declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2566 2567define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2568; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 2569; AVX512BW: ## BB#0: 2570; AVX512BW-NEXT: kmovd %edi, %k1 2571; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31] 2572; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 2573; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2574; AVX512BW-NEXT: retq 2575; 2576; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 2577; AVX512F-32: # BB#0: 2578; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2579; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31] 2580; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 2581; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2582; AVX512F-32-NEXT: retl 2583 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2584 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2585 %res2 = add <32 x i16> %res, %res1 2586 ret <32 x i16> %res2 2587} 2588 2589declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2590 2591define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2592; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 2593; AVX512BW: ## BB#0: 2594; AVX512BW-NEXT: kmovd %edi, %k1 2595; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27] 2596; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 2597; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2598; AVX512BW-NEXT: retq 2599; 2600; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 2601; AVX512F-32: # BB#0: 2602; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2603; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27] 2604; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 2605; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2606; AVX512F-32-NEXT: retl 2607 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2608 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2609 %res2 = add <32 x i16> %res, %res1 2610 ret <32 x i16> %res2 2611} 2612 2613declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64) 2614 2615define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) { 2616; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512: 2617; AVX512BW: ## BB#0: 2618; AVX512BW-NEXT: kmovq %rdi, %k1 2619; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} 2620; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z} 2621; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 2622; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm1 2623; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 2624; AVX512BW-NEXT: retq 2625; 2626; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512: 2627; AVX512F-32: # BB#0: 2628; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 2629; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2630; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2631; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2632; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} 2633; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} 2634; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2635; AVX512F-32-NEXT: vpaddb %zmm3, %zmm0, %zmm0 2636; AVX512F-32-NEXT: retl 2637 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4) 2638 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4) 2639 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1) 2640 %res3 = add <64 x i8> %res, %res1 2641 %res4 = add <64 x i8> %res3, %res2 2642 ret <64 x i8> %res4 2643} 2644 2645declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) 2646 2647define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { 2648; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 2649; AVX512BW: ## BB#0: 2650; AVX512BW-NEXT: kmovd %edi, %k1 2651; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} 2652; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} 2653; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 2654; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2655; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2656; AVX512BW-NEXT: retq 2657; 2658; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 2659; AVX512F-32: # BB#0: 2660; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2661; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} 2662; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} 2663; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 2664; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2665; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2666; AVX512F-32-NEXT: retl 2667 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) 2668 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) 2669 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) 2670 %res3 = add <32 x i16> %res, %res1 2671 %res4 = add <32 x i16> %res3, %res2 2672 ret <32 x i16> %res4 2673} 2674 2675declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) 2676 2677define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) { 2678; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_dq_512: 2679; AVX512BW: ## BB#0: 2680; AVX512BW-NEXT: vpslldq $8, %zmm0, %zmm1 2681; AVX512BW-NEXT: vpslldq $4, %zmm0, %zmm0 2682; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2683; AVX512BW-NEXT: retq 2684; 2685; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_dq_512: 2686; AVX512F-32: # BB#0: 2687; AVX512F-32-NEXT: vpslldq $8, %zmm0, %zmm1 2688; AVX512F-32-NEXT: vpslldq $4, %zmm0, %zmm0 2689; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2690; AVX512F-32-NEXT: retl 2691 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) 2692 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) 2693 %res2 = add <8 x i64> %res, %res1 2694 ret <8 x i64> %res2 2695} 2696 2697declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) 2698 2699define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) { 2700; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_dq_512: 2701; AVX512BW: ## BB#0: 2702; AVX512BW-NEXT: vpsrldq $8, %zmm0, %zmm1 2703; AVX512BW-NEXT: vpsrldq $4, %zmm0, %zmm0 2704; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2705; AVX512BW-NEXT: retq 2706; 2707; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_dq_512: 2708; AVX512F-32: # BB#0: 2709; AVX512F-32-NEXT: vpsrldq $8, %zmm0, %zmm1 2710; AVX512F-32-NEXT: vpsrldq $4, %zmm0, %zmm0 2711; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2712; AVX512F-32-NEXT: retl 2713 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) 2714 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) 2715 %res2 = add <8 x i64> %res, %res1 2716 ret <8 x i64> %res2 2717} 2718declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) 2719 2720define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ 2721; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512: 2722; AVX512BW: ## BB#0: 2723; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 2724; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 2725; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2726; AVX512BW-NEXT: retq 2727; 2728; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512: 2729; AVX512F-32: # BB#0: 2730; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 2731; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 2732; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2733; AVX512F-32-NEXT: retl 2734 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) 2735 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) 2736 %res2 = add <8 x i64> %res, %res1 2737 ret <8 x i64> %res2 2738} 2739 2740declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) 2741 2742define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { 2743; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd: 2744; AVX512BW: ## BB#0: 2745; AVX512BW-NEXT: kmovd %edi, %k0 2746; AVX512BW-NEXT: kmovd %esi, %k1 2747; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0 2748; AVX512BW-NEXT: kmovd %k0, %eax 2749; AVX512BW-NEXT: retq 2750; 2751; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd: 2752; AVX512F-32: # BB#0: 2753; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2754; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2755; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0 2756; AVX512F-32-NEXT: kmovd %k0, %eax 2757; AVX512F-32-NEXT: retl 2758 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) 2759 ret i32 %res 2760} 2761 2762declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) 2763 2764define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { 2765; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd: 2766; AVX512BW: ## BB#0: 2767; AVX512BW-NEXT: kmovq %rdi, %k0 2768; AVX512BW-NEXT: kmovq %rsi, %k1 2769; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0 2770; AVX512BW-NEXT: kmovq %k0, %rax 2771; AVX512BW-NEXT: retq 2772; 2773; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd: 2774; AVX512F-32: # BB#0: 2775; AVX512F-32-NEXT: subl $12, %esp 2776; AVX512F-32-NEXT: .Ltmp8: 2777; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 2778; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2779; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2780; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 2781; AVX512F-32-NEXT: kmovq %k0, (%esp) 2782; AVX512F-32-NEXT: movl (%esp), %eax 2783; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 2784; AVX512F-32-NEXT: addl $12, %esp 2785; AVX512F-32-NEXT: retl 2786 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) 2787 ret i64 %res 2788} 2789