1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) 6 7define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { 8; X86-LABEL: test_int_x86_avx512_kunpck_wd: 9; X86: # %bb.0: 10; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04] 11; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 12; X86-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1] 13; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 14; X86-NEXT: retl # encoding: [0xc3] 15; 16; X64-LABEL: test_int_x86_avx512_kunpck_wd: 17; X64: # %bb.0: 18; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 19; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 20; X64-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1] 21; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 22; X64-NEXT: retq # encoding: [0xc3] 23 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) 24 ret i32 %res 25} 26 27declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) 28 29define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { 30; X86-LABEL: test_int_x86_avx512_kunpck_qd: 31; X86: # %bb.0: 32; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] 33; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c] 34; X86-NEXT: retl # encoding: [0xc3] 35; 36; X64-LABEL: test_int_x86_avx512_kunpck_qd: 37; X64: # %bb.0: 38; X64-NEXT: kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7] 39; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 40; X64-NEXT: kunpckdq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4b,0xc1] 41; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 42; X64-NEXT: retq # encoding: [0xc3] 43 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) 44 ret i64 %res 45} 46 47declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64) 48 49 define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) { 50; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: 51; X86: # %bb.0: 52; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 53; X86-NEXT: vpbroadcastb %eax, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xc8] 54; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 55; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc0] 56; X86-NEXT: vpbroadcastb %eax, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd0] 57; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] 58; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 59; X86-NEXT: retl # encoding: [0xc3] 60; 61; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: 62; X64: # %bb.0: 63; X64-NEXT: vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf] 64; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 65; X64-NEXT: vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7] 66; X64-NEXT: vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7] 67; X64-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] 68; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] 69; X64-NEXT: retq # encoding: [0xc3] 70 %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1) 71 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask) 72 %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask) 73 %res3 = add <64 x i8> %res, %res1 74 %res4 = add <64 x i8> %res2, %res3 75 ret <64 x i8> %res4 76 } 77 78declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32) 79 define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) { 80; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: 81; X86: # %bb.0: 82; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 83; X86-NEXT: vpbroadcastw %eax, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xc8] 84; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 85; X86-NEXT: vpbroadcastw %eax, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc0] 86; X86-NEXT: vpbroadcastw %eax, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd0] 87; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 88; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 89; X86-NEXT: retl # encoding: [0xc3] 90; 91; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: 92; X64: # %bb.0: 93; X64-NEXT: vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf] 94; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 95; X64-NEXT: vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7] 96; X64-NEXT: vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7] 97; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 98; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 99; X64-NEXT: retq # encoding: [0xc3] 100 %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) 101 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) 102 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask) 103 %res3 = add <32 x i16> %res, %res1 104 %res4 = add <32 x i16> %res2, %res3 105 ret <32 x i16> %res4 106 } 107 108declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64) 109 110define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) { 111; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512: 112; X86: # %bb.0: 113; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 114; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 115; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] 116; X86-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01] 117; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00] 118; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 119; X86-NEXT: retl # encoding: [0xc3] 120; 121; X64-LABEL: test_int_x86_avx512_mask_storeu_b_512: 122; X64: # %bb.0: 123; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca] 124; X64-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x07] 125; X64-NEXT: vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06] 126; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 127; X64-NEXT: retq # encoding: [0xc3] 128 call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2) 129 call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1) 130 ret void 131} 132 133declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32) 134 135define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) { 136; X86-LABEL: test_int_x86_avx512_mask_storeu_w_512: 137; X86: # %bb.0: 138; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 139; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 140; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 141; X86-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x01] 142; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00] 143; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 144; X86-NEXT: retl # encoding: [0xc3] 145; 146; X64-LABEL: test_int_x86_avx512_mask_storeu_w_512: 147; X64: # %bb.0: 148; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 149; X64-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x07] 150; X64-NEXT: vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06] 151; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 152; X64-NEXT: retq # encoding: [0xc3] 153 call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2) 154 call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1) 155 ret void 156} 157 158declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32) 159 160define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) { 161; X86-LABEL: test_int_x86_avx512_mask_loadu_w_512: 162; X86: # %bb.0: 163; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 164; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 165; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] 166; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] 167; X86-NEXT: vmovdqu16 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x00] 168; X86-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x09] 169; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 170; X86-NEXT: retl # encoding: [0xc3] 171; 172; X64-LABEL: test_int_x86_avx512_mask_loadu_w_512: 173; X64: # %bb.0: 174; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] 175; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] 176; X64-NEXT: vmovdqu16 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x06] 177; X64-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x0f] 178; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 179; X64-NEXT: retq # encoding: [0xc3] 180 %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) 181 %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask) 182 %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) 183 %res2 = add <32 x i16> %res, %res1 184 ret <32 x i16> %res2 185} 186 187declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64) 188 189define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) { 190; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512: 191; X86: # %bb.0: 192; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 193; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 194; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] 195; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] 196; X86-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00] 197; X86-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09] 198; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] 199; X86-NEXT: retl # encoding: [0xc3] 200; 201; X64-LABEL: test_int_x86_avx512_mask_loadu_b_512: 202; X64: # %bb.0: 203; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] 204; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca] 205; X64-NEXT: vmovdqu8 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x06] 206; X64-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x0f] 207; X64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] 208; X64-NEXT: retq # encoding: [0xc3] 209 %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) 210 %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask) 211 %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) 212 %res2 = add <64 x i8> %res, %res1 213 ret <64 x i8> %res2 214} 215 216declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) 217 218define <8 x i64>@test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) { 219; CHECK-LABEL: test_int_x86_avx512_psll_dq_512: 220; CHECK: # %bb.0: 221; CHECK-NEXT: vpslldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x08] 222; CHECK-NEXT: # zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55] 223; CHECK-NEXT: vpslldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xf8,0x04] 224; CHECK-NEXT: # zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59] 225; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 226; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 227 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) 228 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) 229 %res2 = add <8 x i64> %res, %res1 230 ret <8 x i64> %res2 231} 232 233define <8 x i64>@test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) { 234; X86-LABEL: test_int_x86_avx512_psll_load_dq_512: 235; X86: # %bb.0: 236; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 237; X86-NEXT: vpslldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x38,0x04] 238; X86-NEXT: # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59] 239; X86-NEXT: retl # encoding: [0xc3] 240; 241; X64-LABEL: test_int_x86_avx512_psll_load_dq_512: 242; X64: # %bb.0: 243; X64-NEXT: vpslldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x3f,0x04] 244; X64-NEXT: # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59] 245; X64-NEXT: retq # encoding: [0xc3] 246 %x0 = load <8 x i64>, <8 x i64> *%p0 247 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) 248 ret <8 x i64> %res 249} 250 251declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) 252 253define <8 x i64>@test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) { 254; CHECK-LABEL: test_int_x86_avx512_psrl_dq_512: 255; CHECK: # %bb.0: 256; CHECK-NEXT: vpsrldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x08] 257; CHECK-NEXT: # zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero 258; CHECK-NEXT: vpsrldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xd8,0x04] 259; CHECK-NEXT: # zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 260; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] 261; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 262 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) 263 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) 264 %res2 = add <8 x i64> %res, %res1 265 ret <8 x i64> %res2 266} 267 268define <8 x i64>@test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) { 269; X86-LABEL: test_int_x86_avx512_psrl_load_dq_512: 270; X86: # %bb.0: 271; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 272; X86-NEXT: vpsrldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x18,0x04] 273; X86-NEXT: # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 274; X86-NEXT: retl # encoding: [0xc3] 275; 276; X64-LABEL: test_int_x86_avx512_psrl_load_dq_512: 277; X64: # %bb.0: 278; X64-NEXT: vpsrldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x1f,0x04] 279; X64-NEXT: # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero 280; X64-NEXT: retq # encoding: [0xc3] 281 %x0 = load <8 x i64>, <8 x i64> *%p0 282 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) 283 ret <8 x i64> %res 284} 285 286declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64) 287 288define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) { 289; X86-LABEL: test_int_x86_avx512_mask_palignr_512: 290; X86: # %bb.0: 291; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xd9,0x02] 292; X86-NEXT: # zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 293; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 294; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02] 295; X86-NEXT: # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 296; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02] 297; X86-NEXT: # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 298; X86-NEXT: vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3] 299; X86-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 300; X86-NEXT: retl # encoding: [0xc3] 301; 302; X64-LABEL: test_int_x86_avx512_mask_palignr_512: 303; X64: # %bb.0: 304; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xd9,0x02] 305; X64-NEXT: # zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 306; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 307; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02] 308; X64-NEXT: # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 309; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02] 310; X64-NEXT: # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49] 311; X64-NEXT: vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3] 312; X64-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] 313; X64-NEXT: retq # encoding: [0xc3] 314 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4) 315 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4) 316 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1) 317 %res3 = add <64 x i8> %res, %res1 318 %res4 = add <64 x i8> %res3, %res2 319 ret <64 x i8> %res4 320} 321 322declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32) 323 324define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 325; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_512: 326; X86: # %bb.0: 327; X86-NEXT: vpshufhw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xd0,0x03] 328; X86-NEXT: # zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 329; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 330; X86-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03] 331; X86-NEXT: # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 332; X86-NEXT: vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03] 333; X86-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 334; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 335; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 336; X86-NEXT: retl # encoding: [0xc3] 337; 338; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_512: 339; X64: # %bb.0: 340; X64-NEXT: vpshufhw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xd0,0x03] 341; X64-NEXT: # zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 342; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 343; X64-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03] 344; X64-NEXT: # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 345; X64-NEXT: vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03] 346; X64-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28] 347; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 348; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 349; X64-NEXT: retq # encoding: [0xc3] 350 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 351 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 352 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 353 %res3 = add <32 x i16> %res, %res1 354 %res4 = add <32 x i16> %res3, %res2 355 ret <32 x i16> %res4 356} 357 358declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32) 359 360define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 361; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_512: 362; X86: # %bb.0: 363; X86-NEXT: vpshuflw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xd0,0x03] 364; X86-NEXT: # zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 365; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 366; X86-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03] 367; X86-NEXT: # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 368; X86-NEXT: vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03] 369; X86-NEXT: # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 370; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 371; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 372; X86-NEXT: retl # encoding: [0xc3] 373; 374; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_512: 375; X64: # %bb.0: 376; X64-NEXT: vpshuflw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xd0,0x03] 377; X64-NEXT: # zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 378; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 379; X64-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03] 380; X64-NEXT: # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 381; X64-NEXT: vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03] 382; X64-NEXT: # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31] 383; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 384; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 385; X64-NEXT: retq # encoding: [0xc3] 386 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 387 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 388 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 389 %res3 = add <32 x i16> %res, %res1 390 %res4 = add <32 x i16> %res3, %res2 391 ret <32 x i16> %res4 392} 393 394define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { 395; X86-LABEL: test_pcmpeq_b: 396; X86: # %bb.0: 397; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 398; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 399; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 400; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 401; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 402; X86-NEXT: retl # encoding: [0xc3] 403; 404; X64-LABEL: test_pcmpeq_b: 405; X64: # %bb.0: 406; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 407; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 408; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 409; X64-NEXT: retq # encoding: [0xc3] 410 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 411 ret i64 %res 412} 413 414define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 415; X86-LABEL: test_mask_pcmpeq_b: 416; X86: # %bb.0: 417; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 418; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 419; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 420; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 421; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 422; X86-NEXT: andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08] 423; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 424; X86-NEXT: retl # encoding: [0xc3] 425; 426; X64-LABEL: test_mask_pcmpeq_b: 427; X64: # %bb.0: 428; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 429; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 430; X64-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] 431; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 432; X64-NEXT: retq # encoding: [0xc3] 433 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 434 ret i64 %res 435} 436 437declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) 438 439define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { 440; CHECK-LABEL: test_pcmpeq_w: 441; CHECK: # %bb.0: 442; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 443; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 444; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 445; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 446 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 447 ret i32 %res 448} 449 450define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 451; X86-LABEL: test_mask_pcmpeq_w: 452; X86: # %bb.0: 453; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 454; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 455; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 456; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 457; X86-NEXT: retl # encoding: [0xc3] 458; 459; X64-LABEL: test_mask_pcmpeq_w: 460; X64: # %bb.0: 461; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 462; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 463; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 464; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 465; X64-NEXT: retq # encoding: [0xc3] 466 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 467 ret i32 %res 468} 469 470declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) 471 472define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { 473; X86-LABEL: test_pcmpgt_b: 474; X86: # %bb.0: 475; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 476; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 477; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 478; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 479; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 480; X86-NEXT: retl # encoding: [0xc3] 481; 482; X64-LABEL: test_pcmpgt_b: 483; X64: # %bb.0: 484; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 485; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 486; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 487; X64-NEXT: retq # encoding: [0xc3] 488 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 489 ret i64 %res 490} 491 492define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 493; X86-LABEL: test_mask_pcmpgt_b: 494; X86: # %bb.0: 495; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 496; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 497; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 498; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 499; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 500; X86-NEXT: andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08] 501; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 502; X86-NEXT: retl # encoding: [0xc3] 503; 504; X64-LABEL: test_mask_pcmpgt_b: 505; X64: # %bb.0: 506; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 507; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 508; X64-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8] 509; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 510; X64-NEXT: retq # encoding: [0xc3] 511 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 512 ret i64 %res 513} 514 515declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) 516 517define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { 518; CHECK-LABEL: test_pcmpgt_w: 519; CHECK: # %bb.0: 520; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 521; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 522; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 523; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 524 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 525 ret i32 %res 526} 527 528define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 529; X86-LABEL: test_mask_pcmpgt_w: 530; X86: # %bb.0: 531; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 532; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 533; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04] 534; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 535; X86-NEXT: retl # encoding: [0xc3] 536; 537; X64-LABEL: test_mask_pcmpgt_w: 538; X64: # %bb.0: 539; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 540; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 541; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8] 542; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 543; X64-NEXT: retq # encoding: [0xc3] 544 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 545 ret i32 %res 546} 547 548declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) 549 550declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 551 552define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 553; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 554; X86: # %bb.0: 555; X86-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xd9] 556; X86-NEXT: # zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 557; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 558; X86-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1] 559; X86-NEXT: # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 560; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 561; X86-NEXT: retl # encoding: [0xc3] 562; 563; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 564; X64: # %bb.0: 565; X64-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xd9] 566; X64-NEXT: # zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 567; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 568; X64-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1] 569; X64-NEXT: # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 570; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 571; X64-NEXT: retq # encoding: [0xc3] 572 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 573 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 574 %res2 = add <64 x i8> %res, %res1 575 ret <64 x i8> %res2 576} 577 578declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 579 580define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 581; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 582; X86: # %bb.0: 583; X86-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xd9] 584; X86-NEXT: # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 585; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 586; X86-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1] 587; X86-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 588; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 589; X86-NEXT: retl # encoding: [0xc3] 590; 591; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 592; X64: # %bb.0: 593; X64-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xd9] 594; X64-NEXT: # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 595; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 596; X64-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1] 597; X64-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 598; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 599; X64-NEXT: retq # encoding: [0xc3] 600 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 601 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 602 %res2 = add <64 x i8> %res, %res1 603 ret <64 x i8> %res2 604} 605 606declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 607 608define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 609; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 610; X86: # %bb.0: 611; X86-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xd9] 612; X86-NEXT: # zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 613; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 614; X86-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1] 615; X86-NEXT: # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 616; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 617; X86-NEXT: retl # encoding: [0xc3] 618; 619; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 620; X64: # %bb.0: 621; X64-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xd9] 622; X64-NEXT: # zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 623; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 624; X64-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1] 625; X64-NEXT: # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 626; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 627; X64-NEXT: retq # encoding: [0xc3] 628 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 629 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 630 %res2 = add <32 x i16> %res, %res1 631 ret <32 x i16> %res2 632} 633 634declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 635 636define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 637; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 638; X86: # %bb.0: 639; X86-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xd9] 640; X86-NEXT: # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 641; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 642; X86-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1] 643; X86-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 644; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 645; X86-NEXT: retl # encoding: [0xc3] 646; 647; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 648; X64: # %bb.0: 649; X64-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xd9] 650; X64-NEXT: # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 651; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 652; X64-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1] 653; X64-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 654; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 655; X64-NEXT: retq # encoding: [0xc3] 656 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 657 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 658 %res2 = add <32 x i16> %res, %res1 659 ret <32 x i16> %res2 660} 661 662declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 663 664define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 665; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 666; X86: # %bb.0: 667; X86-NEXT: vpmaxsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xd9] 668; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 669; X86-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1] 670; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 671; X86-NEXT: retl # encoding: [0xc3] 672; 673; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 674; X64: # %bb.0: 675; X64-NEXT: vpmaxsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xd9] 676; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 677; X64-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1] 678; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 679; X64-NEXT: retq # encoding: [0xc3] 680 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 681 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 682 %res2 = add <64 x i8> %res, %res1 683 ret <64 x i8> %res2 684} 685 686declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 687 688define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 689; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 690; X86: # %bb.0: 691; X86-NEXT: vpmaxsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xd9] 692; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 693; X86-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1] 694; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 695; X86-NEXT: retl # encoding: [0xc3] 696; 697; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 698; X64: # %bb.0: 699; X64-NEXT: vpmaxsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xd9] 700; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 701; X64-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1] 702; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 703; X64-NEXT: retq # encoding: [0xc3] 704 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 705 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 706 %res2 = add <32 x i16> %res, %res1 707 ret <32 x i16> %res2 708} 709 710declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 711 712define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 713; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 714; X86: # %bb.0: 715; X86-NEXT: vpmaxub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xd9] 716; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 717; X86-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1] 718; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 719; X86-NEXT: retl # encoding: [0xc3] 720; 721; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 722; X64: # %bb.0: 723; X64-NEXT: vpmaxub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xd9] 724; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 725; X64-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1] 726; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 727; X64-NEXT: retq # encoding: [0xc3] 728 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 729 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 730 %res2 = add <64 x i8> %res, %res1 731 ret <64 x i8> %res2 732} 733 734declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 735 736define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 737; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 738; X86: # %bb.0: 739; X86-NEXT: vpmaxuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xd9] 740; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 741; X86-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1] 742; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 743; X86-NEXT: retl # encoding: [0xc3] 744; 745; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 746; X64: # %bb.0: 747; X64-NEXT: vpmaxuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xd9] 748; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 749; X64-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1] 750; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 751; X64-NEXT: retq # encoding: [0xc3] 752 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 753 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 754 %res2 = add <32 x i16> %res, %res1 755 ret <32 x i16> %res2 756} 757 758declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 759 760define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 761; X86-LABEL: test_int_x86_avx512_mask_pmins_b_512: 762; X86: # %bb.0: 763; X86-NEXT: vpminsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xd9] 764; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 765; X86-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1] 766; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 767; X86-NEXT: retl # encoding: [0xc3] 768; 769; X64-LABEL: test_int_x86_avx512_mask_pmins_b_512: 770; X64: # %bb.0: 771; X64-NEXT: vpminsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xd9] 772; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 773; X64-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1] 774; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 775; X64-NEXT: retq # encoding: [0xc3] 776 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 777 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 778 %res2 = add <64 x i8> %res, %res1 779 ret <64 x i8> %res2 780} 781 782declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 783 784define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 785; X86-LABEL: test_int_x86_avx512_mask_pmins_w_512: 786; X86: # %bb.0: 787; X86-NEXT: vpminsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xd9] 788; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 789; X86-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1] 790; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 791; X86-NEXT: retl # encoding: [0xc3] 792; 793; X64-LABEL: test_int_x86_avx512_mask_pmins_w_512: 794; X64: # %bb.0: 795; X64-NEXT: vpminsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xd9] 796; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 797; X64-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1] 798; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 799; X64-NEXT: retq # encoding: [0xc3] 800 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 801 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 802 %res2 = add <32 x i16> %res, %res1 803 ret <32 x i16> %res2 804} 805 806declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 807 808define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 809; X86-LABEL: test_int_x86_avx512_mask_pminu_b_512: 810; X86: # %bb.0: 811; X86-NEXT: vpminub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xd9] 812; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 813; X86-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1] 814; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 815; X86-NEXT: retl # encoding: [0xc3] 816; 817; X64-LABEL: test_int_x86_avx512_mask_pminu_b_512: 818; X64: # %bb.0: 819; X64-NEXT: vpminub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xd9] 820; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 821; X64-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1] 822; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 823; X64-NEXT: retq # encoding: [0xc3] 824 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 825 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 826 %res2 = add <64 x i8> %res, %res1 827 ret <64 x i8> %res2 828} 829 830declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 831 832define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 833; X86-LABEL: test_int_x86_avx512_mask_pminu_w_512: 834; X86: # %bb.0: 835; X86-NEXT: vpminuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xd9] 836; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 837; X86-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1] 838; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 839; X86-NEXT: retl # encoding: [0xc3] 840; 841; X64-LABEL: test_int_x86_avx512_mask_pminu_w_512: 842; X64: # %bb.0: 843; X64-NEXT: vpminuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xd9] 844; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 845; X64-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1] 846; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 847; X64-NEXT: retq # encoding: [0xc3] 848 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 849 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 850 %res2 = add <32 x i16> %res, %res1 851 ret <32 x i16> %res2 852} 853 854declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32) 855 856define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { 857; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: 858; X86: # %bb.0: 859; X86-NEXT: vpmovzxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xd0] 860; X86-NEXT: # zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 861; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 862; X86-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8] 863; X86-NEXT: # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 864; X86-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0] 865; X86-NEXT: # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 866; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 867; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 868; X86-NEXT: retl # encoding: [0xc3] 869; 870; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: 871; X64: # %bb.0: 872; X64-NEXT: vpmovzxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xd0] 873; X64-NEXT: # zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 874; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 875; X64-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8] 876; X64-NEXT: # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 877; X64-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0] 878; X64-NEXT: # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 879; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 880; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 881; X64-NEXT: retq # encoding: [0xc3] 882 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) 883 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) 884 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) 885 %res3 = add <32 x i16> %res, %res1 886 %res4 = add <32 x i16> %res3, %res2 887 ret <32 x i16> %res4 888} 889 890declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32) 891 892define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) { 893; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512: 894; X86: # %bb.0: 895; X86-NEXT: vpmovsxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xd0] 896; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 897; X86-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8] 898; X86-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0] 899; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 900; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 901; X86-NEXT: retl # encoding: [0xc3] 902; 903; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512: 904; X64: # %bb.0: 905; X64-NEXT: vpmovsxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xd0] 906; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 907; X64-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8] 908; X64-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0] 909; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 910; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 911; X64-NEXT: retq # encoding: [0xc3] 912 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) 913 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2) 914 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1) 915 %res3 = add <32 x i16> %res, %res1 916 %res4 = add <32 x i16> %res3, %res2 917 ret <32 x i16> %res4 918} 919 920declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 921 922define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 923; X86-LABEL: test_int_x86_avx512_mask_psrl_w_512: 924; X86: # %bb.0: 925; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xd9] 926; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 927; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] 928; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] 929; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 930; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 931; X86-NEXT: retl # encoding: [0xc3] 932; 933; X64-LABEL: test_int_x86_avx512_mask_psrl_w_512: 934; X64: # %bb.0: 935; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xd9] 936; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 937; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1] 938; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1] 939; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] 940; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 941; X64-NEXT: retq # encoding: [0xc3] 942 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 943 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 944 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 945 %res3 = add <32 x i16> %res, %res1 946 %res4 = add <32 x i16> %res3, %res2 947 ret <32 x i16> %res4 948} 949 950declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32) 951 952define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 953; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_512: 954; X86: # %bb.0: 955; X86-NEXT: vpsrlw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x03] 956; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 957; X86-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] 958; X86-NEXT: vpsrlw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x03] 959; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 960; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 961; X86-NEXT: retl # encoding: [0xc3] 962; 963; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512: 964; X64: # %bb.0: 965; X64-NEXT: vpsrlw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x03] 966; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 967; X64-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] 968; X64-NEXT: vpsrlw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x03] 969; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 970; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 971; X64-NEXT: retq # encoding: [0xc3] 972 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 973 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 974 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 975 %res3 = add <32 x i16> %res, %res1 976 %res4 = add <32 x i16> %res3, %res2 977 ret <32 x i16> %res4 978} 979 980declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 981 982define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 983; X86-LABEL: test_int_x86_avx512_mask_psra_w_512: 984; X86: # %bb.0: 985; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xd9] 986; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 987; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] 988; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] 989; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 990; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 991; X86-NEXT: retl # encoding: [0xc3] 992; 993; X64-LABEL: test_int_x86_avx512_mask_psra_w_512: 994; X64: # %bb.0: 995; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xd9] 996; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 997; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1] 998; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1] 999; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1000; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1001; X64-NEXT: retq # encoding: [0xc3] 1002 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 1003 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 1004 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 1005 %res3 = add <32 x i16> %res, %res1 1006 %res4 = add <32 x i16> %res3, %res2 1007 ret <32 x i16> %res4 1008} 1009 1010declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32) 1011 1012define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 1013; X86-LABEL: test_int_x86_avx512_mask_psra_wi_512: 1014; X86: # %bb.0: 1015; X86-NEXT: vpsraw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x03] 1016; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1017; X86-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] 1018; X86-NEXT: vpsraw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x03] 1019; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 1020; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1021; X86-NEXT: retl # encoding: [0xc3] 1022; 1023; X64-LABEL: test_int_x86_avx512_mask_psra_wi_512: 1024; X64: # %bb.0: 1025; X64-NEXT: vpsraw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x03] 1026; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1027; X64-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] 1028; X64-NEXT: vpsraw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x03] 1029; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 1030; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1031; X64-NEXT: retq # encoding: [0xc3] 1032 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 1033 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 1034 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 1035 %res3 = add <32 x i16> %res, %res1 1036 %res4 = add <32 x i16> %res3, %res2 1037 ret <32 x i16> %res4 1038} 1039 1040declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) 1041 1042define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1043; X86-LABEL: test_int_x86_avx512_mask_psll_w_512: 1044; X86: # %bb.0: 1045; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xd9] 1046; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1047; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] 1048; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] 1049; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1050; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1051; X86-NEXT: retl # encoding: [0xc3] 1052; 1053; X64-LABEL: test_int_x86_avx512_mask_psll_w_512: 1054; X64: # %bb.0: 1055; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xd9] 1056; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1057; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1] 1058; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1] 1059; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 1060; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 1061; X64-NEXT: retq # encoding: [0xc3] 1062 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) 1063 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 1064 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1) 1065 %res3 = add <32 x i16> %res, %res1 1066 %res4 = add <32 x i16> %res3, %res2 1067 ret <32 x i16> %res4 1068} 1069 1070declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32) 1071 1072define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) { 1073; X86-LABEL: test_int_x86_avx512_mask_psll_wi_512: 1074; X86: # %bb.0: 1075; X86-NEXT: vpsllw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x03] 1076; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1077; X86-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] 1078; X86-NEXT: vpsllw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x03] 1079; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 1080; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1081; X86-NEXT: retl # encoding: [0xc3] 1082; 1083; X64-LABEL: test_int_x86_avx512_mask_psll_wi_512: 1084; X64: # %bb.0: 1085; X64-NEXT: vpsllw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x03] 1086; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1087; X64-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] 1088; X64-NEXT: vpsllw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x03] 1089; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] 1090; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] 1091; X64-NEXT: retq # encoding: [0xc3] 1092 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) 1093 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3) 1094 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1) 1095 %res3 = add <32 x i16> %res, %res1 1096 %res4 = add <32 x i16> %res3, %res2 1097 ret <32 x i16> %res4 1098} 1099 1100declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1101 1102define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1103; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 1104; X86: # %bb.0: 1105; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xd9] 1106; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1107; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] 1108; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 1109; X86-NEXT: retl # encoding: [0xc3] 1110; 1111; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 1112; X64: # %bb.0: 1113; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xd9] 1114; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1115; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1] 1116; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 1117; X64-NEXT: retq # encoding: [0xc3] 1118 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1119 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1120 %res2 = add <64 x i8> %res, %res1 1121 ret <64 x i8> %res2 1122} 1123 1124 1125declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64) 1126 1127define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) { 1128; X86-LABEL: test_int_x86_avx512_cvtmask2b_512: 1129; X86: # %bb.0: 1130; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf8,0x90,0x44,0x24,0x04] 1131; X86-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0] 1132; X86-NEXT: retl # encoding: [0xc3] 1133; 1134; X64-LABEL: test_int_x86_avx512_cvtmask2b_512: 1135; X64: # %bb.0: 1136; X64-NEXT: kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7] 1137; X64-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0] 1138; X64-NEXT: retq # encoding: [0xc3] 1139 %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0) 1140 ret <64 x i8> %res 1141} 1142 1143declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32) 1144 1145define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) { 1146; X86-LABEL: test_int_x86_avx512_cvtmask2w_512: 1147; X86: # %bb.0: 1148; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04] 1149; X86-NEXT: vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0] 1150; X86-NEXT: retl # encoding: [0xc3] 1151; 1152; X64-LABEL: test_int_x86_avx512_cvtmask2w_512: 1153; X64: # %bb.0: 1154; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7] 1155; X64-NEXT: vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0] 1156; X64-NEXT: retq # encoding: [0xc3] 1157 %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0) 1158 ret <32 x i16> %res 1159} 1160define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 1161; CHECK-LABEL: test_mask_packs_epi32_rr_512: 1162; CHECK: # %bb.0: 1163; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1] 1164; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1165 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1166 ret <32 x i16> %res 1167} 1168 1169define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 1170; X86-LABEL: test_mask_packs_epi32_rrk_512: 1171; X86: # %bb.0: 1172; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1173; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] 1174; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1175; X86-NEXT: retl # encoding: [0xc3] 1176; 1177; X64-LABEL: test_mask_packs_epi32_rrk_512: 1178; X64: # %bb.0: 1179; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1180; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] 1181; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1182; X64-NEXT: retq # encoding: [0xc3] 1183 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1184 ret <32 x i16> %res 1185} 1186 1187define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 1188; X86-LABEL: test_mask_packs_epi32_rrkz_512: 1189; X86: # %bb.0: 1190; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1191; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] 1192; X86-NEXT: retl # encoding: [0xc3] 1193; 1194; X64-LABEL: test_mask_packs_epi32_rrkz_512: 1195; X64: # %bb.0: 1196; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1197; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] 1198; X64-NEXT: retq # encoding: [0xc3] 1199 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1200 ret <32 x i16> %res 1201} 1202 1203define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 1204; X86-LABEL: test_mask_packs_epi32_rm_512: 1205; X86: # %bb.0: 1206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1207; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00] 1208; X86-NEXT: retl # encoding: [0xc3] 1209; 1210; X64-LABEL: test_mask_packs_epi32_rm_512: 1211; X64: # %bb.0: 1212; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07] 1213; X64-NEXT: retq # encoding: [0xc3] 1214 %b = load <16 x i32>, <16 x i32>* %ptr_b 1215 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1216 ret <32 x i16> %res 1217} 1218 1219define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1220; X86-LABEL: test_mask_packs_epi32_rmk_512: 1221; X86: # %bb.0: 1222; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1223; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1224; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08] 1225; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1226; X86-NEXT: retl # encoding: [0xc3] 1227; 1228; X64-LABEL: test_mask_packs_epi32_rmk_512: 1229; X64: # %bb.0: 1230; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1231; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f] 1232; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1233; X64-NEXT: retq # encoding: [0xc3] 1234 %b = load <16 x i32>, <16 x i32>* %ptr_b 1235 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1236 ret <32 x i16> %res 1237} 1238 1239define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 1240; X86-LABEL: test_mask_packs_epi32_rmkz_512: 1241; X86: # %bb.0: 1242; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1243; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1244; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00] 1245; X86-NEXT: retl # encoding: [0xc3] 1246; 1247; X64-LABEL: test_mask_packs_epi32_rmkz_512: 1248; X64: # %bb.0: 1249; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1250; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07] 1251; X64-NEXT: retq # encoding: [0xc3] 1252 %b = load <16 x i32>, <16 x i32>* %ptr_b 1253 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1254 ret <32 x i16> %res 1255} 1256 1257define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 1258; X86-LABEL: test_mask_packs_epi32_rmb_512: 1259; X86: # %bb.0: 1260; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1261; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00] 1262; X86-NEXT: retl # encoding: [0xc3] 1263; 1264; X64-LABEL: test_mask_packs_epi32_rmb_512: 1265; X64: # %bb.0: 1266; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07] 1267; X64-NEXT: retq # encoding: [0xc3] 1268 %q = load i32, i32* %ptr_b 1269 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1270 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1271 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1272 ret <32 x i16> %res 1273} 1274 1275define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1276; X86-LABEL: test_mask_packs_epi32_rmbk_512: 1277; X86: # %bb.0: 1278; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1279; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1280; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08] 1281; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1282; X86-NEXT: retl # encoding: [0xc3] 1283; 1284; X64-LABEL: test_mask_packs_epi32_rmbk_512: 1285; X64: # %bb.0: 1286; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1287; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f] 1288; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1289; X64-NEXT: retq # encoding: [0xc3] 1290 %q = load i32, i32* %ptr_b 1291 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1292 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1293 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1294 ret <32 x i16> %res 1295} 1296 1297define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1298; X86-LABEL: test_mask_packs_epi32_rmbkz_512: 1299; X86: # %bb.0: 1300; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1301; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1302; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00] 1303; X86-NEXT: retl # encoding: [0xc3] 1304; 1305; X64-LABEL: test_mask_packs_epi32_rmbkz_512: 1306; X64: # %bb.0: 1307; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1308; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07] 1309; X64-NEXT: retq # encoding: [0xc3] 1310 %q = load i32, i32* %ptr_b 1311 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1312 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1313 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1314 ret <32 x i16> %res 1315} 1316 1317declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1318 1319define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1320; CHECK-LABEL: test_mask_packs_epi16_rr_512: 1321; CHECK: # %bb.0: 1322; CHECK-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1] 1323; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1324 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1325 ret <64 x i8> %res 1326} 1327 1328define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1329; X86-LABEL: test_mask_packs_epi16_rrk_512: 1330; X86: # %bb.0: 1331; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1332; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] 1333; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1334; X86-NEXT: retl # encoding: [0xc3] 1335; 1336; X64-LABEL: test_mask_packs_epi16_rrk_512: 1337; X64: # %bb.0: 1338; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1339; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1] 1340; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1341; X64-NEXT: retq # encoding: [0xc3] 1342 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1343 ret <64 x i8> %res 1344} 1345 1346define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1347; X86-LABEL: test_mask_packs_epi16_rrkz_512: 1348; X86: # %bb.0: 1349; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1350; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] 1351; X86-NEXT: retl # encoding: [0xc3] 1352; 1353; X64-LABEL: test_mask_packs_epi16_rrkz_512: 1354; X64: # %bb.0: 1355; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1356; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1] 1357; X64-NEXT: retq # encoding: [0xc3] 1358 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1359 ret <64 x i8> %res 1360} 1361 1362define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1363; X86-LABEL: test_mask_packs_epi16_rm_512: 1364; X86: # %bb.0: 1365; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1366; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00] 1367; X86-NEXT: retl # encoding: [0xc3] 1368; 1369; X64-LABEL: test_mask_packs_epi16_rm_512: 1370; X64: # %bb.0: 1371; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07] 1372; X64-NEXT: retq # encoding: [0xc3] 1373 %b = load <32 x i16>, <32 x i16>* %ptr_b 1374 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1375 ret <64 x i8> %res 1376} 1377 1378define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1379; X86-LABEL: test_mask_packs_epi16_rmk_512: 1380; X86: # %bb.0: 1381; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1382; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 1383; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08] 1384; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1385; X86-NEXT: retl # encoding: [0xc3] 1386; 1387; X64-LABEL: test_mask_packs_epi16_rmk_512: 1388; X64: # %bb.0: 1389; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 1390; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f] 1391; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1392; X64-NEXT: retq # encoding: [0xc3] 1393 %b = load <32 x i16>, <32 x i16>* %ptr_b 1394 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1395 ret <64 x i8> %res 1396} 1397 1398define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1399; X86-LABEL: test_mask_packs_epi16_rmkz_512: 1400; X86: # %bb.0: 1401; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1402; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 1403; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00] 1404; X86-NEXT: retl # encoding: [0xc3] 1405; 1406; X64-LABEL: test_mask_packs_epi16_rmkz_512: 1407; X64: # %bb.0: 1408; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 1409; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07] 1410; X64-NEXT: retq # encoding: [0xc3] 1411 %b = load <32 x i16>, <32 x i16>* %ptr_b 1412 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1413 ret <64 x i8> %res 1414} 1415 1416declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1417 1418 1419define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 1420; CHECK-LABEL: test_mask_packus_epi32_rr_512: 1421; CHECK: # %bb.0: 1422; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1] 1423; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1424 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1425 ret <32 x i16> %res 1426} 1427 1428define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 1429; X86-LABEL: test_mask_packus_epi32_rrk_512: 1430; X86: # %bb.0: 1431; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1432; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] 1433; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1434; X86-NEXT: retl # encoding: [0xc3] 1435; 1436; X64-LABEL: test_mask_packus_epi32_rrk_512: 1437; X64: # %bb.0: 1438; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1439; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1] 1440; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1441; X64-NEXT: retq # encoding: [0xc3] 1442 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1443 ret <32 x i16> %res 1444} 1445 1446define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 1447; X86-LABEL: test_mask_packus_epi32_rrkz_512: 1448; X86: # %bb.0: 1449; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1450; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] 1451; X86-NEXT: retl # encoding: [0xc3] 1452; 1453; X64-LABEL: test_mask_packus_epi32_rrkz_512: 1454; X64: # %bb.0: 1455; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1456; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1] 1457; X64-NEXT: retq # encoding: [0xc3] 1458 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1459 ret <32 x i16> %res 1460} 1461 1462define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 1463; X86-LABEL: test_mask_packus_epi32_rm_512: 1464; X86: # %bb.0: 1465; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1466; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00] 1467; X86-NEXT: retl # encoding: [0xc3] 1468; 1469; X64-LABEL: test_mask_packus_epi32_rm_512: 1470; X64: # %bb.0: 1471; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07] 1472; X64-NEXT: retq # encoding: [0xc3] 1473 %b = load <16 x i32>, <16 x i32>* %ptr_b 1474 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1475 ret <32 x i16> %res 1476} 1477 1478define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1479; X86-LABEL: test_mask_packus_epi32_rmk_512: 1480; X86: # %bb.0: 1481; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1482; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1483; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08] 1484; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1485; X86-NEXT: retl # encoding: [0xc3] 1486; 1487; X64-LABEL: test_mask_packus_epi32_rmk_512: 1488; X64: # %bb.0: 1489; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1490; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f] 1491; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1492; X64-NEXT: retq # encoding: [0xc3] 1493 %b = load <16 x i32>, <16 x i32>* %ptr_b 1494 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1495 ret <32 x i16> %res 1496} 1497 1498define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 1499; X86-LABEL: test_mask_packus_epi32_rmkz_512: 1500; X86: # %bb.0: 1501; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1502; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1503; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00] 1504; X86-NEXT: retl # encoding: [0xc3] 1505; 1506; X64-LABEL: test_mask_packus_epi32_rmkz_512: 1507; X64: # %bb.0: 1508; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1509; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07] 1510; X64-NEXT: retq # encoding: [0xc3] 1511 %b = load <16 x i32>, <16 x i32>* %ptr_b 1512 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1513 ret <32 x i16> %res 1514} 1515 1516define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 1517; X86-LABEL: test_mask_packus_epi32_rmb_512: 1518; X86: # %bb.0: 1519; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1520; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00] 1521; X86-NEXT: retl # encoding: [0xc3] 1522; 1523; X64-LABEL: test_mask_packus_epi32_rmb_512: 1524; X64: # %bb.0: 1525; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07] 1526; X64-NEXT: retq # encoding: [0xc3] 1527 %q = load i32, i32* %ptr_b 1528 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1529 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1530 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1531 ret <32 x i16> %res 1532} 1533 1534define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1535; X86-LABEL: test_mask_packus_epi32_rmbk_512: 1536; X86: # %bb.0: 1537; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1538; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1539; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08] 1540; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1541; X86-NEXT: retl # encoding: [0xc3] 1542; 1543; X64-LABEL: test_mask_packus_epi32_rmbk_512: 1544; X64: # %bb.0: 1545; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1546; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f] 1547; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1548; X64-NEXT: retq # encoding: [0xc3] 1549 %q = load i32, i32* %ptr_b 1550 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1551 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1552 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1553 ret <32 x i16> %res 1554} 1555 1556define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1557; X86-LABEL: test_mask_packus_epi32_rmbkz_512: 1558; X86: # %bb.0: 1559; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1560; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1561; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00] 1562; X86-NEXT: retl # encoding: [0xc3] 1563; 1564; X64-LABEL: test_mask_packus_epi32_rmbkz_512: 1565; X64: # %bb.0: 1566; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1567; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07] 1568; X64-NEXT: retq # encoding: [0xc3] 1569 %q = load i32, i32* %ptr_b 1570 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1571 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1572 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1573 ret <32 x i16> %res 1574} 1575 1576declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1577 1578define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1579; CHECK-LABEL: test_mask_packus_epi16_rr_512: 1580; CHECK: # %bb.0: 1581; CHECK-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1] 1582; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1583 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1584 ret <64 x i8> %res 1585} 1586 1587define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1588; X86-LABEL: test_mask_packus_epi16_rrk_512: 1589; X86: # %bb.0: 1590; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1591; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] 1592; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1593; X86-NEXT: retl # encoding: [0xc3] 1594; 1595; X64-LABEL: test_mask_packus_epi16_rrk_512: 1596; X64: # %bb.0: 1597; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1598; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1] 1599; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] 1600; X64-NEXT: retq # encoding: [0xc3] 1601 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1602 ret <64 x i8> %res 1603} 1604 1605define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1606; X86-LABEL: test_mask_packus_epi16_rrkz_512: 1607; X86: # %bb.0: 1608; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 1609; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] 1610; X86-NEXT: retl # encoding: [0xc3] 1611; 1612; X64-LABEL: test_mask_packus_epi16_rrkz_512: 1613; X64: # %bb.0: 1614; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1615; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1] 1616; X64-NEXT: retq # encoding: [0xc3] 1617 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1618 ret <64 x i8> %res 1619} 1620 1621define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1622; X86-LABEL: test_mask_packus_epi16_rm_512: 1623; X86: # %bb.0: 1624; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1625; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00] 1626; X86-NEXT: retl # encoding: [0xc3] 1627; 1628; X64-LABEL: test_mask_packus_epi16_rm_512: 1629; X64: # %bb.0: 1630; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07] 1631; X64-NEXT: retq # encoding: [0xc3] 1632 %b = load <32 x i16>, <32 x i16>* %ptr_b 1633 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1634 ret <64 x i8> %res 1635} 1636 1637define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1638; X86-LABEL: test_mask_packus_epi16_rmk_512: 1639; X86: # %bb.0: 1640; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1641; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 1642; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08] 1643; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1644; X86-NEXT: retl # encoding: [0xc3] 1645; 1646; X64-LABEL: test_mask_packus_epi16_rmk_512: 1647; X64: # %bb.0: 1648; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 1649; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f] 1650; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] 1651; X64-NEXT: retq # encoding: [0xc3] 1652 %b = load <32 x i16>, <32 x i16>* %ptr_b 1653 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1654 ret <64 x i8> %res 1655} 1656 1657define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1658; X86-LABEL: test_mask_packus_epi16_rmkz_512: 1659; X86: # %bb.0: 1660; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1661; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] 1662; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00] 1663; X86-NEXT: retl # encoding: [0xc3] 1664; 1665; X64-LABEL: test_mask_packus_epi16_rmkz_512: 1666; X64: # %bb.0: 1667; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] 1668; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07] 1669; X64-NEXT: retq # encoding: [0xc3] 1670 %b = load <32 x i16>, <32 x i16>* %ptr_b 1671 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1672 ret <64 x i8> %res 1673} 1674 1675declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1676 1677define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 1678; X86-LABEL: test_cmp_b_512: 1679; X86: # %bb.0: 1680; X86-NEXT: pushl %edi # encoding: [0x57] 1681; X86-NEXT: .cfi_def_cfa_offset 8 1682; X86-NEXT: pushl %esi # encoding: [0x56] 1683; X86-NEXT: .cfi_def_cfa_offset 12 1684; X86-NEXT: .cfi_offset %esi, -12 1685; X86-NEXT: .cfi_offset %edi, -8 1686; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 1687; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1688; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 1689; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 1690; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0] 1691; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1692; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1693; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 1694; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] 1695; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] 1696; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02] 1697; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1698; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 1699; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 1700; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1] 1701; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0] 1702; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] 1703; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1704; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1705; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 1706; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] 1707; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] 1708; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05] 1709; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1710; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 1711; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 1712; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7] 1713; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1] 1714; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 1715; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1716; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1717; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1718; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 1719; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] 1720; X86-NEXT: addl $-1, %eax # encoding: [0x83,0xc0,0xff] 1721; X86-NEXT: adcl $-1, %edx # encoding: [0x83,0xd2,0xff] 1722; X86-NEXT: popl %esi # encoding: [0x5e] 1723; X86-NEXT: .cfi_def_cfa_offset 8 1724; X86-NEXT: popl %edi # encoding: [0x5f] 1725; X86-NEXT: .cfi_def_cfa_offset 4 1726; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1727; X86-NEXT: retl # encoding: [0xc3] 1728; 1729; X64-LABEL: test_cmp_b_512: 1730; X64: # %bb.0: 1731; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 1732; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1733; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0] 1734; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1735; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 1736; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02] 1737; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1738; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 1739; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] 1740; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1741; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 1742; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05] 1743; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1744; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 1745; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1] 1746; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1747; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff] 1748; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1749; X64-NEXT: retq # encoding: [0xc3] 1750 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 1751 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 1752 %ret1 = add i64 %res0, %res1 1753 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 1754 %ret2 = add i64 %ret1, %res2 1755 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 1756 %ret3 = add i64 %ret2, %res3 1757 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 1758 %ret4 = add i64 %ret3, %res4 1759 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 1760 %ret5 = add i64 %ret4, %res5 1761 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 1762 %ret6 = add i64 %ret5, %res6 1763 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 1764 %ret7 = add i64 %ret6, %res7 1765 ret i64 %ret7 1766} 1767 1768define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 1769; X86-LABEL: test_mask_cmp_b_512: 1770; X86: # %bb.0: 1771; X86-NEXT: pushl %ebp # encoding: [0x55] 1772; X86-NEXT: .cfi_def_cfa_offset 8 1773; X86-NEXT: pushl %ebx # encoding: [0x53] 1774; X86-NEXT: .cfi_def_cfa_offset 12 1775; X86-NEXT: pushl %edi # encoding: [0x57] 1776; X86-NEXT: .cfi_def_cfa_offset 16 1777; X86-NEXT: pushl %esi # encoding: [0x56] 1778; X86-NEXT: .cfi_def_cfa_offset 20 1779; X86-NEXT: .cfi_offset %esi, -20 1780; X86-NEXT: .cfi_offset %edi, -16 1781; X86-NEXT: .cfi_offset %ebx, -12 1782; X86-NEXT: .cfi_offset %ebp, -8 1783; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14] 1784; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18] 1785; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1] 1786; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6] 1787; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1788; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1789; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1790; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 1791; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1792; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] 1793; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xd0] 1794; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1795; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1796; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] 1797; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1798; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] 1799; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] 1800; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] 1801; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x02] 1802; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1803; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1804; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 1805; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1806; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] 1807; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda] 1808; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8] 1809; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04] 1810; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1811; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1812; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] 1813; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1814; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] 1815; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] 1816; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] 1817; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x05] 1818; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1819; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 1820; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb] 1821; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 1822; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 1823; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9] 1824; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd] 1825; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xd1] 1826; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 1827; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9] 1828; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1829; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0] 1830; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1831; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 1832; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea] 1833; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 1834; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18] 1835; X86-NEXT: popl %esi # encoding: [0x5e] 1836; X86-NEXT: .cfi_def_cfa_offset 16 1837; X86-NEXT: popl %edi # encoding: [0x5f] 1838; X86-NEXT: .cfi_def_cfa_offset 12 1839; X86-NEXT: popl %ebx # encoding: [0x5b] 1840; X86-NEXT: .cfi_def_cfa_offset 8 1841; X86-NEXT: popl %ebp # encoding: [0x5d] 1842; X86-NEXT: .cfi_def_cfa_offset 4 1843; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1844; X86-NEXT: retl # encoding: [0xc3] 1845; 1846; X64-LABEL: test_mask_cmp_b_512: 1847; X64: # %bb.0: 1848; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 1849; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] 1850; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1851; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0] 1852; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1853; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 1854; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02] 1855; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1856; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 1857; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] 1858; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1859; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 1860; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05] 1861; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] 1862; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] 1863; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1] 1864; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1865; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] 1866; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] 1867; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1868; X64-NEXT: retq # encoding: [0xc3] 1869 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 1870 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 1871 %ret1 = add i64 %res0, %res1 1872 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 1873 %ret2 = add i64 %ret1, %res2 1874 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 1875 %ret3 = add i64 %ret2, %res3 1876 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 1877 %ret4 = add i64 %ret3, %res4 1878 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 1879 %ret5 = add i64 %ret4, %res5 1880 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 1881 %ret6 = add i64 %ret5, %res6 1882 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 1883 %ret7 = add i64 %ret6, %res7 1884 ret i64 %ret7 1885} 1886 1887declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 1888 1889define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 1890; X86-LABEL: test_ucmp_b_512: 1891; X86: # %bb.0: 1892; X86-NEXT: pushl %edi # encoding: [0x57] 1893; X86-NEXT: .cfi_def_cfa_offset 8 1894; X86-NEXT: pushl %esi # encoding: [0x56] 1895; X86-NEXT: .cfi_def_cfa_offset 12 1896; X86-NEXT: .cfi_offset %esi, -12 1897; X86-NEXT: .cfi_offset %edi, -8 1898; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 1899; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1900; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 1901; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 1902; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01] 1903; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1904; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1905; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 1906; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] 1907; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] 1908; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02] 1909; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1910; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1] 1911; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 1912; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1] 1913; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0] 1914; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] 1915; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1916; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1917; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 1918; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce] 1919; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2] 1920; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05] 1921; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1922; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 1923; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8] 1924; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7] 1925; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1] 1926; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06] 1927; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 1928; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 1929; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 1930; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 1931; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] 1932; X86-NEXT: addl $-1, %eax # encoding: [0x83,0xc0,0xff] 1933; X86-NEXT: adcl $-1, %edx # encoding: [0x83,0xd2,0xff] 1934; X86-NEXT: popl %esi # encoding: [0x5e] 1935; X86-NEXT: .cfi_def_cfa_offset 8 1936; X86-NEXT: popl %edi # encoding: [0x5f] 1937; X86-NEXT: .cfi_def_cfa_offset 4 1938; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1939; X86-NEXT: retl # encoding: [0xc3] 1940; 1941; X64-LABEL: test_ucmp_b_512: 1942; X64: # %bb.0: 1943; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1] 1944; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1945; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01] 1946; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1947; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 1948; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02] 1949; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1950; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 1951; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04] 1952; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1953; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 1954; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05] 1955; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 1956; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 1957; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06] 1958; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 1959; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff] 1960; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1961; X64-NEXT: retq # encoding: [0xc3] 1962 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 1963 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 1964 %ret1 = add i64 %res0, %res1 1965 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 1966 %ret2 = add i64 %ret1, %res2 1967 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 1968 %ret3 = add i64 %ret2, %res3 1969 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 1970 %ret4 = add i64 %ret3, %res4 1971 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 1972 %ret5 = add i64 %ret4, %res5 1973 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 1974 %ret6 = add i64 %ret5, %res6 1975 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 1976 %ret7 = add i64 %ret6, %res7 1977 ret i64 %ret7 1978} 1979 1980define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 1981; X86-LABEL: test_mask_x86_avx512_ucmp_b_512: 1982; X86: # %bb.0: 1983; X86-NEXT: pushl %ebp # encoding: [0x55] 1984; X86-NEXT: .cfi_def_cfa_offset 8 1985; X86-NEXT: pushl %ebx # encoding: [0x53] 1986; X86-NEXT: .cfi_def_cfa_offset 12 1987; X86-NEXT: pushl %edi # encoding: [0x57] 1988; X86-NEXT: .cfi_def_cfa_offset 16 1989; X86-NEXT: pushl %esi # encoding: [0x56] 1990; X86-NEXT: .cfi_def_cfa_offset 20 1991; X86-NEXT: .cfi_offset %esi, -20 1992; X86-NEXT: .cfi_offset %edi, -16 1993; X86-NEXT: .cfi_offset %ebx, -12 1994; X86-NEXT: .cfi_offset %ebp, -8 1995; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14] 1996; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18] 1997; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1] 1998; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6] 1999; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2000; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2001; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2002; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 2003; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2004; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] 2005; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x01] 2006; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2007; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2008; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] 2009; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2010; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] 2011; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] 2012; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] 2013; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x02] 2014; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2015; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2016; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3] 2017; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2018; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2] 2019; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda] 2020; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8] 2021; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04] 2022; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2023; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2024; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb] 2025; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2026; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda] 2027; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3] 2028; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7] 2029; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x05] 2030; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2031; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9] 2032; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb] 2033; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0] 2034; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca] 2035; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9] 2036; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd] 2037; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x06] 2038; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20] 2039; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9] 2040; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 2041; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0] 2042; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2043; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2044; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea] 2045; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2046; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18] 2047; X86-NEXT: popl %esi # encoding: [0x5e] 2048; X86-NEXT: .cfi_def_cfa_offset 16 2049; X86-NEXT: popl %edi # encoding: [0x5f] 2050; X86-NEXT: .cfi_def_cfa_offset 12 2051; X86-NEXT: popl %ebx # encoding: [0x5b] 2052; X86-NEXT: .cfi_def_cfa_offset 8 2053; X86-NEXT: popl %ebp # encoding: [0x5d] 2054; X86-NEXT: .cfi_def_cfa_offset 4 2055; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2056; X86-NEXT: retl # encoding: [0xc3] 2057; 2058; X64-LABEL: test_mask_x86_avx512_ucmp_b_512: 2059; X64: # %bb.0: 2060; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 2061; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1] 2062; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2063; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01] 2064; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2065; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 2066; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02] 2067; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2068; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8] 2069; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04] 2070; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8] 2071; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1] 2072; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05] 2073; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0] 2074; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca] 2075; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06] 2076; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2077; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0] 2078; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] 2079; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2080; X64-NEXT: retq # encoding: [0xc3] 2081 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 2082 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 2083 %ret1 = add i64 %res0, %res1 2084 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 2085 %ret2 = add i64 %ret1, %res2 2086 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 2087 %ret3 = add i64 %ret2, %res3 2088 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 2089 %ret4 = add i64 %ret3, %res4 2090 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 2091 %ret5 = add i64 %ret4, %res5 2092 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 2093 %ret6 = add i64 %ret5, %res6 2094 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 2095 %ret7 = add i64 %ret6, %res7 2096 ret i64 %ret7 2097} 2098 2099declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 2100 2101define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 2102; X86-LABEL: test_cmp_w_512: 2103; X86: # %bb.0: 2104; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 2105; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2106; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0] 2107; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2108; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2109; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02] 2110; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2111; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2112; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] 2113; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2114; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2115; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05] 2116; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2117; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2118; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 2119; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2120; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff] 2121; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2122; X86-NEXT: retl # encoding: [0xc3] 2123; 2124; X64-LABEL: test_cmp_w_512: 2125; X64: # %bb.0: 2126; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 2127; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2128; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0] 2129; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2130; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2131; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02] 2132; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2133; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2134; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] 2135; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2136; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2137; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05] 2138; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2139; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2140; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1] 2141; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2142; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff] 2143; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2144; X64-NEXT: retq # encoding: [0xc3] 2145 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 2146 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 2147 %ret1 = add i32 %res0, %res1 2148 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 2149 %ret2 = add i32 %ret1, %res2 2150 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 2151 %ret3 = add i32 %ret2, %res3 2152 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 2153 %ret4 = add i32 %ret3, %res4 2154 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 2155 %ret5 = add i32 %ret4, %res5 2156 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 2157 %ret6 = add i32 %ret5, %res6 2158 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 2159 %ret7 = add i32 %ret6, %res7 2160 ret i32 %ret7 2161} 2162 2163define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 2164; X86-LABEL: test_mask_cmp_w_512: 2165; X86: # %bb.0: 2166; X86-NEXT: pushl %esi # encoding: [0x56] 2167; X86-NEXT: .cfi_def_cfa_offset 8 2168; X86-NEXT: .cfi_offset %esi, -8 2169; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2170; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2171; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] 2172; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2173; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] 2174; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2175; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] 2176; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] 2177; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2178; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] 2179; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] 2180; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2181; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] 2182; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05] 2183; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2184; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6] 2185; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] 2186; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2187; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2188; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2189; X86-NEXT: popl %esi # encoding: [0x5e] 2190; X86-NEXT: .cfi_def_cfa_offset 4 2191; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2192; X86-NEXT: retl # encoding: [0xc3] 2193; 2194; X64-LABEL: test_mask_cmp_w_512: 2195; X64: # %bb.0: 2196; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2197; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] 2198; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2199; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0] 2200; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2201; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2202; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02] 2203; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2204; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2205; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] 2206; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2207; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2208; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05] 2209; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2210; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] 2211; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1] 2212; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2213; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] 2214; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2215; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2216; X64-NEXT: retq # encoding: [0xc3] 2217 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 2218 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 2219 %ret1 = add i32 %res0, %res1 2220 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 2221 %ret2 = add i32 %ret1, %res2 2222 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 2223 %ret3 = add i32 %ret2, %res3 2224 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 2225 %ret4 = add i32 %ret3, %res4 2226 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 2227 %ret5 = add i32 %ret4, %res5 2228 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 2229 %ret6 = add i32 %ret5, %res6 2230 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 2231 %ret7 = add i32 %ret6, %res7 2232 ret i32 %ret7 2233} 2234 2235declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 2236 2237define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 2238; X86-LABEL: test_ucmp_w_512: 2239; X86: # %bb.0: 2240; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 2241; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2242; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01] 2243; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2244; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2245; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02] 2246; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2247; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2248; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] 2249; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2250; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2251; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05] 2252; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2253; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2254; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06] 2255; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2256; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff] 2257; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2258; X86-NEXT: retl # encoding: [0xc3] 2259; 2260; X64-LABEL: test_ucmp_w_512: 2261; X64: # %bb.0: 2262; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1] 2263; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2264; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01] 2265; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2266; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2267; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02] 2268; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2269; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2270; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04] 2271; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2272; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2273; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05] 2274; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2275; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2276; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06] 2277; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2278; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff] 2279; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2280; X64-NEXT: retq # encoding: [0xc3] 2281 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 2282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 2283 %ret1 = add i32 %res0, %res1 2284 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 2285 %ret2 = add i32 %ret1, %res2 2286 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 2287 %ret3 = add i32 %ret2, %res3 2288 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 2289 %ret4 = add i32 %ret3, %res4 2290 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 2291 %ret5 = add i32 %ret4, %res5 2292 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 2293 %ret6 = add i32 %ret5, %res6 2294 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 2295 %ret7 = add i32 %ret6, %res7 2296 ret i32 %ret7 2297} 2298 2299define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 2300; X86-LABEL: test_mask_ucmp_w_512: 2301; X86: # %bb.0: 2302; X86-NEXT: pushl %esi # encoding: [0x56] 2303; X86-NEXT: .cfi_def_cfa_offset 8 2304; X86-NEXT: .cfi_offset %esi, -8 2305; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2306; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 2307; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] 2308; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2309; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] 2310; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2311; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] 2312; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] 2313; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2314; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] 2315; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] 2316; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2317; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2] 2318; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05] 2319; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2320; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6] 2321; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] 2322; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2323; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2324; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2325; X86-NEXT: popl %esi # encoding: [0x5e] 2326; X86-NEXT: .cfi_def_cfa_offset 4 2327; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2328; X86-NEXT: retl # encoding: [0xc3] 2329; 2330; X64-LABEL: test_mask_ucmp_w_512: 2331; X64: # %bb.0: 2332; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2333; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1] 2334; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2335; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01] 2336; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2337; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2338; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02] 2339; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2340; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2341; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04] 2342; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2343; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1] 2344; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05] 2345; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0] 2346; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca] 2347; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06] 2348; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2349; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0] 2350; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2351; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2352; X64-NEXT: retq # encoding: [0xc3] 2353 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 2354 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 2355 %ret1 = add i32 %res0, %res1 2356 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 2357 %ret2 = add i32 %ret1, %res2 2358 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 2359 %ret3 = add i32 %ret2, %res3 2360 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 2361 %ret4 = add i32 %ret3, %res4 2362 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 2363 %ret5 = add i32 %ret4, %res5 2364 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 2365 %ret6 = add i32 %ret5, %res6 2366 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 2367 %ret7 = add i32 %ret6, %res7 2368 ret i32 %ret7 2369} 2370 2371declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 2372 2373 2374declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2375 2376define <64 x i8>@mm512_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2377; X86-LABEL: mm512_avg_epu8: 2378; X86: # %bb.0: 2379; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xd9] 2380; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 2381; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1] 2382; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 2383; X86-NEXT: retl # encoding: [0xc3] 2384; 2385; X64-LABEL: mm512_avg_epu8: 2386; X64: # %bb.0: 2387; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xd9] 2388; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 2389; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1] 2390; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3] 2391; X64-NEXT: retq # encoding: [0xc3] 2392 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2393 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2394 %res2 = add <64 x i8> %res, %res1 2395 ret <64 x i8> %res2 2396} 2397 2398 2399declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2400 2401define <32 x i16>@mm512_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2402; X86-LABEL: mm512_avg_epu16: 2403; X86: # %bb.0: 2404; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xd9] 2405; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2406; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1] 2407; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2408; X86-NEXT: retl # encoding: [0xc3] 2409; 2410; X64-LABEL: mm512_avg_epu16: 2411; X64: # %bb.0: 2412; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xd9] 2413; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2414; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1] 2415; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2416; X64-NEXT: retq # encoding: [0xc3] 2417 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2418 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2419 %res2 = add <32 x i16> %res, %res1 2420 ret <32 x i16> %res2 2421} 2422 2423declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) 2424 2425define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2426; X86-LABEL: test_int_x86_avx512_mask_pabs_w_512: 2427; X86: # %bb.0: 2428; X86-NEXT: vpabsw %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xd0] 2429; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2430; X86-NEXT: vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8] 2431; X86-NEXT: vpaddw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc2] 2432; X86-NEXT: retl # encoding: [0xc3] 2433; 2434; X64-LABEL: test_int_x86_avx512_mask_pabs_w_512: 2435; X64: # %bb.0: 2436; X64-NEXT: vpabsw %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xd0] 2437; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2438; X64-NEXT: vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8] 2439; X64-NEXT: vpaddw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc2] 2440; X64-NEXT: retq # encoding: [0xc3] 2441 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2442 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) 2443 %res2 = add <32 x i16> %res, %res1 2444 ret <32 x i16> %res2 2445} 2446 2447declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) 2448 2449define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2450; X86-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2451; X86: # %bb.0: 2452; X86-NEXT: vpabsb %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xd0] 2453; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] 2454; X86-NEXT: vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8] 2455; X86-NEXT: vpaddb %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc2] 2456; X86-NEXT: retl # encoding: [0xc3] 2457; 2458; X64-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2459; X64: # %bb.0: 2460; X64-NEXT: vpabsb %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xd0] 2461; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] 2462; X64-NEXT: vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8] 2463; X64-NEXT: vpaddb %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc2] 2464; X64-NEXT: retq # encoding: [0xc3] 2465 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2466 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) 2467 %res2 = add <64 x i8> %res, %res1 2468 ret <64 x i8> %res2 2469} 2470 2471declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64) 2472 2473define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2474; X86-LABEL: test_int_x86_avx512_ptestm_b_512: 2475; X86: # %bb.0: 2476; X86-NEXT: pushl %esi # encoding: [0x56] 2477; X86-NEXT: .cfi_def_cfa_offset 8 2478; X86-NEXT: .cfi_offset %esi, -8 2479; X86-NEXT: vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1] 2480; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2481; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 2482; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2483; X86-NEXT: andl %ecx, %edx # encoding: [0x21,0xca] 2484; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2485; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2486; X86-NEXT: andl %esi, %eax # encoding: [0x21,0xf0] 2487; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2488; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] 2489; X86-NEXT: popl %esi # encoding: [0x5e] 2490; X86-NEXT: .cfi_def_cfa_offset 4 2491; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2492; X86-NEXT: retl # encoding: [0xc3] 2493; 2494; X64-LABEL: test_int_x86_avx512_ptestm_b_512: 2495; X64: # %bb.0: 2496; X64-NEXT: vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1] 2497; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2498; X64-NEXT: andq %rax, %rdi # encoding: [0x48,0x21,0xc7] 2499; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] 2500; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2501; X64-NEXT: retq # encoding: [0xc3] 2502 %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2503 %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) 2504 %res2 = add i64 %res, %res1 2505 ret i64 %res2 2506} 2507 2508declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32) 2509 2510define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2511; X86-LABEL: test_int_x86_avx512_ptestm_w_512: 2512; X86: # %bb.0: 2513; X86-NEXT: vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1] 2514; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2515; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2516; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 2517; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2518; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2519; X86-NEXT: retl # encoding: [0xc3] 2520; 2521; X64-LABEL: test_int_x86_avx512_ptestm_w_512: 2522; X64: # %bb.0: 2523; X64-NEXT: vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1] 2524; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2525; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 2526; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2527; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2528; X64-NEXT: retq # encoding: [0xc3] 2529 %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2530 %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) 2531 %res2 = add i32 %res, %res1 2532 ret i32 %res2 2533} 2534 2535declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2) 2536 2537define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2538; X86-LABEL: test_int_x86_avx512_ptestnm_b_512: 2539; X86: # %bb.0: 2540; X86-NEXT: pushl %esi # encoding: [0x56] 2541; X86-NEXT: .cfi_def_cfa_offset 8 2542; X86-NEXT: .cfi_offset %esi, -8 2543; X86-NEXT: vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1] 2544; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2545; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9] 2546; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2547; X86-NEXT: andl %ecx, %edx # encoding: [0x21,0xca] 2548; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0] 2549; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2550; X86-NEXT: andl %esi, %eax # encoding: [0x21,0xf0] 2551; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0] 2552; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca] 2553; X86-NEXT: popl %esi # encoding: [0x5e] 2554; X86-NEXT: .cfi_def_cfa_offset 4 2555; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2556; X86-NEXT: retl # encoding: [0xc3] 2557; 2558; X64-LABEL: test_int_x86_avx512_ptestnm_b_512: 2559; X64: # %bb.0: 2560; X64-NEXT: vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1] 2561; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2562; X64-NEXT: andq %rax, %rdi # encoding: [0x48,0x21,0xc7] 2563; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8] 2564; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2565; X64-NEXT: retq # encoding: [0xc3] 2566 %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2567 %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) 2568 %res2 = add i64 %res, %res1 2569 ret i64 %res2 2570} 2571 2572declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2) 2573 2574define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2575; X86-LABEL: test_int_x86_avx512_ptestnm_w_512: 2576; X86: # %bb.0: 2577; X86-NEXT: vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1] 2578; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] 2579; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2580; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8] 2581; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8] 2582; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2583; X86-NEXT: retl # encoding: [0xc3] 2584; 2585; X64-LABEL: test_int_x86_avx512_ptestnm_w_512: 2586; X64: # %bb.0: 2587; X64-NEXT: vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1] 2588; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2589; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7] 2590; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8] 2591; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2592; X64-NEXT: retq # encoding: [0xc3] 2593 %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2594 %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) 2595 %res2 = add i32 %res, %res1 2596 ret i32 %res2 2597} 2598 2599declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>) 2600 2601define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) { 2602; X86-LABEL: test_int_x86_avx512_cvtb2mask_512: 2603; X86: # %bb.0: 2604; X86-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0] 2605; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20] 2606; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2607; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1] 2608; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2609; X86-NEXT: retl # encoding: [0xc3] 2610; 2611; X64-LABEL: test_int_x86_avx512_cvtb2mask_512: 2612; X64: # %bb.0: 2613; X64-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0] 2614; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0] 2615; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2616; X64-NEXT: retq # encoding: [0xc3] 2617 %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0) 2618 ret i64 %res 2619} 2620 2621declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>) 2622 2623define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) { 2624; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_512: 2625; CHECK: # %bb.0: 2626; CHECK-NEXT: vpmovw2m %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x29,0xc0] 2627; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] 2628; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 2629; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2630 %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0) 2631 ret i32 %res 2632} 2633 2634declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2635 2636define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2637; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2638; X86: # %bb.0: 2639; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9] 2640; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2641; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] 2642; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2643; X86-NEXT: retl # encoding: [0xc3] 2644; 2645; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2646; X64: # %bb.0: 2647; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9] 2648; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2649; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1] 2650; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2651; X64-NEXT: retq # encoding: [0xc3] 2652 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2653 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2654 %res2 = add <32 x i16> %res, %res1 2655 ret <32 x i16> %res2 2656} 2657 2658declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2659 2660define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2661; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2662; X86: # %bb.0: 2663; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9] 2664; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2665; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] 2666; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2667; X86-NEXT: retl # encoding: [0xc3] 2668; 2669; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2670; X64: # %bb.0: 2671; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9] 2672; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2673; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1] 2674; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2675; X64-NEXT: retq # encoding: [0xc3] 2676 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2677 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2678 %res2 = add <32 x i16> %res, %res1 2679 ret <32 x i16> %res2 2680} 2681 2682declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2683 2684define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2685; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2686; X86: # %bb.0: 2687; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9] 2688; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2689; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] 2690; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2691; X86-NEXT: retl # encoding: [0xc3] 2692; 2693; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2694; X64: # %bb.0: 2695; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9] 2696; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2697; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1] 2698; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2699; X64-NEXT: retq # encoding: [0xc3] 2700 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2701 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2702 %res2 = add <32 x i16> %res, %res1 2703 ret <32 x i16> %res2 2704} 2705 2706declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) 2707 2708define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { 2709; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2710; X86: # %bb.0: 2711; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9] 2712; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2713; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] 2714; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2715; X86-NEXT: retl # encoding: [0xc3] 2716; 2717; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2718; X64: # %bb.0: 2719; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9] 2720; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2721; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1] 2722; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3] 2723; X64-NEXT: retq # encoding: [0xc3] 2724 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) 2725 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) 2726 %res2 = add <32 x i16> %res, %res1 2727 ret <32 x i16> %res2 2728} 2729 2730declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) 2731 2732define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { 2733; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2734; X86: # %bb.0: 2735; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9] 2736; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2737; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] 2738; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2739; X86-NEXT: retl # encoding: [0xc3] 2740; 2741; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2742; X64: # %bb.0: 2743; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9] 2744; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2745; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1] 2746; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3] 2747; X64-NEXT: retq # encoding: [0xc3] 2748 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) 2749 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) 2750 %res2 = add <16 x i32> %res, %res1 2751 ret <16 x i32> %res2 2752} 2753 2754declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2755 2756define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2757; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 2758; X86: # %bb.0: 2759; X86-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8] 2760; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2761; X86-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] 2762; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] 2763; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 2764; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 2765; X86-NEXT: retl # encoding: [0xc3] 2766; 2767; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512: 2768; X64: # %bb.0: 2769; X64-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8] 2770; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2771; X64-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0] 2772; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0] 2773; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 2774; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 2775; X64-NEXT: retq # encoding: [0xc3] 2776 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2777 %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) 2778 %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2779 %res3 = add <32 x i16> %res, %res1 2780 %res4 = add <32 x i16> %res3, %res2 2781 ret <32 x i16> %res4 2782} 2783 2784declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2785 2786define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2787; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 2788; X86: # %bb.0: 2789; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 2790; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] 2791; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2792; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] 2793; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 2794; X86-NEXT: retl # encoding: [0xc3] 2795; 2796; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 2797; X64: # %bb.0: 2798; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 2799; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] 2800; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2801; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca] 2802; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 2803; X64-NEXT: retq # encoding: [0xc3] 2804 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2805 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2806 %res2 = add <32 x i16> %res, %res1 2807 ret <32 x i16> %res2 2808} 2809 2810declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2811 2812define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2813; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 2814; X86: # %bb.0: 2815; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 2816; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] 2817; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2818; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca] 2819; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 2820; X86-NEXT: retl # encoding: [0xc3] 2821; 2822; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 2823; X64: # %bb.0: 2824; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] 2825; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda] 2826; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2827; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca] 2828; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 2829; X64-NEXT: retq # encoding: [0xc3] 2830 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2831 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2832 %res2 = add <32 x i16> %res, %res1 2833 ret <32 x i16> %res2 2834} 2835 2836declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2837 2838define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2839; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 2840; X86: # %bb.0: 2841; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 2842; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda] 2843; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2844; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] 2845; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 2846; X86-NEXT: retl # encoding: [0xc3] 2847; 2848; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 2849; X64: # %bb.0: 2850; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 2851; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda] 2852; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2853; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca] 2854; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3] 2855; X64-NEXT: retq # encoding: [0xc3] 2856 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2857 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2858 %res2 = add <32 x i16> %res, %res1 2859 ret <32 x i16> %res2 2860} 2861 2862declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) 2863 2864define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { 2865; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 2866; X86: # %bb.0: 2867; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] 2868; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2869; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] 2870; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] 2871; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 2872; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 2873; X86-NEXT: retl # encoding: [0xc3] 2874; 2875; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 2876; X64: # %bb.0: 2877; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02] 2878; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2879; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] 2880; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02] 2881; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3] 2882; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] 2883; X64-NEXT: retq # encoding: [0xc3] 2884 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) 2885 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) 2886 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) 2887 %res3 = add <32 x i16> %res, %res1 2888 %res4 = add <32 x i16> %res3, %res2 2889 ret <32 x i16> %res4 2890} 2891