1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 6; CHECK-LABEL: test_mask_packs_epi32_rr_128: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] 9; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 11 ret <8 x i16> %1 12} 13 14define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 15; X86-LABEL: test_mask_packs_epi32_rrk_128: 16; X86: # %bb.0: 17; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 18; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 19; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 20; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 21; X86-NEXT: retl # encoding: [0xc3] 22; 23; X64-LABEL: test_mask_packs_epi32_rrk_128: 24; X64: # %bb.0: 25; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 26; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 27; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 28; X64-NEXT: retq # encoding: [0xc3] 29 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 30 %2 = bitcast i8 %mask to <8 x i1> 31 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 32 ret <8 x i16> %3 33} 34 35define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 36; X86-LABEL: test_mask_packs_epi32_rrkz_128: 37; X86: # %bb.0: 38; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 39; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 40; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 41; X86-NEXT: retl # encoding: [0xc3] 42; 43; X64-LABEL: test_mask_packs_epi32_rrkz_128: 44; X64: # %bb.0: 45; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 46; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 47; X64-NEXT: retq # encoding: [0xc3] 48 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 49 %2 = bitcast i8 %mask to <8 x i1> 50 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 51 ret <8 x i16> %3 52} 53 54define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 55; X86-LABEL: test_mask_packs_epi32_rm_128: 56; X86: # %bb.0: 57; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 58; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00] 59; X86-NEXT: retl # encoding: [0xc3] 60; 61; X64-LABEL: test_mask_packs_epi32_rm_128: 62; X64: # %bb.0: 63; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07] 64; X64-NEXT: retq # encoding: [0xc3] 65 %b = load <4 x i32>, <4 x i32>* %ptr_b 66 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 67 ret <8 x i16> %1 68} 69 70define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 71; X86-LABEL: test_mask_packs_epi32_rmk_128: 72; X86: # %bb.0: 73; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 74; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 75; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 76; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08] 77; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 78; X86-NEXT: retl # encoding: [0xc3] 79; 80; X64-LABEL: test_mask_packs_epi32_rmk_128: 81; X64: # %bb.0: 82; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 83; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] 84; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 85; X64-NEXT: retq # encoding: [0xc3] 86 %b = load <4 x i32>, <4 x i32>* %ptr_b 87 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 88 %2 = bitcast i8 %mask to <8 x i1> 89 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 90 ret <8 x i16> %3 91} 92 93define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 94; X86-LABEL: test_mask_packs_epi32_rmkz_128: 95; X86: # %bb.0: 96; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 97; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 98; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 99; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00] 100; X86-NEXT: retl # encoding: [0xc3] 101; 102; X64-LABEL: test_mask_packs_epi32_rmkz_128: 103; X64: # %bb.0: 104; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 105; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] 106; X64-NEXT: retq # encoding: [0xc3] 107 %b = load <4 x i32>, <4 x i32>* %ptr_b 108 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 109 %2 = bitcast i8 %mask to <8 x i1> 110 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 111 ret <8 x i16> %3 112} 113 114define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 115; X86-LABEL: test_mask_packs_epi32_rmb_128: 116; X86: # %bb.0: 117; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 118; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00] 119; X86-NEXT: retl # encoding: [0xc3] 120; 121; X64-LABEL: test_mask_packs_epi32_rmb_128: 122; X64: # %bb.0: 123; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] 124; X64-NEXT: retq # encoding: [0xc3] 125 %q = load i32, i32* %ptr_b 126 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 127 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 128 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 129 ret <8 x i16> %1 130} 131 132define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 133; X86-LABEL: test_mask_packs_epi32_rmbk_128: 134; X86: # %bb.0: 135; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 136; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 137; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 138; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08] 139; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 140; X86-NEXT: retl # encoding: [0xc3] 141; 142; X64-LABEL: test_mask_packs_epi32_rmbk_128: 143; X64: # %bb.0: 144; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 145; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] 146; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 147; X64-NEXT: retq # encoding: [0xc3] 148 %q = load i32, i32* %ptr_b 149 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 150 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 151 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 152 %2 = bitcast i8 %mask to <8 x i1> 153 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 154 ret <8 x i16> %3 155} 156 157define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 158; X86-LABEL: test_mask_packs_epi32_rmbkz_128: 159; X86: # %bb.0: 160; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 161; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 162; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 163; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00] 164; X86-NEXT: retl # encoding: [0xc3] 165; 166; X64-LABEL: test_mask_packs_epi32_rmbkz_128: 167; X64: # %bb.0: 168; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 169; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] 170; X64-NEXT: retq # encoding: [0xc3] 171 %q = load i32, i32* %ptr_b 172 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 173 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 174 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b) 175 %2 = bitcast i8 %mask to <8 x i1> 176 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 177 ret <8 x i16> %3 178} 179 180declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) 181 182define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 183; CHECK-LABEL: test_mask_packs_epi32_rr_256: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] 186; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 187 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 188 ret <16 x i16> %1 189} 190 191define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 192; X86-LABEL: test_mask_packs_epi32_rrk_256: 193; X86: # %bb.0: 194; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 195; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 196; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 197; X86-NEXT: retl # encoding: [0xc3] 198; 199; X64-LABEL: test_mask_packs_epi32_rrk_256: 200; X64: # %bb.0: 201; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 202; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 203; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 204; X64-NEXT: retq # encoding: [0xc3] 205 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 206 %2 = bitcast i16 %mask to <16 x i1> 207 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 208 ret <16 x i16> %3 209} 210 211define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 212; X86-LABEL: test_mask_packs_epi32_rrkz_256: 213; X86: # %bb.0: 214; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 215; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 216; X86-NEXT: retl # encoding: [0xc3] 217; 218; X64-LABEL: test_mask_packs_epi32_rrkz_256: 219; X64: # %bb.0: 220; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 221; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 222; X64-NEXT: retq # encoding: [0xc3] 223 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 224 %2 = bitcast i16 %mask to <16 x i1> 225 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 226 ret <16 x i16> %3 227} 228 229define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 230; X86-LABEL: test_mask_packs_epi32_rm_256: 231; X86: # %bb.0: 232; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 233; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00] 234; X86-NEXT: retl # encoding: [0xc3] 235; 236; X64-LABEL: test_mask_packs_epi32_rm_256: 237; X64: # %bb.0: 238; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07] 239; X64-NEXT: retq # encoding: [0xc3] 240 %b = load <8 x i32>, <8 x i32>* %ptr_b 241 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 242 ret <16 x i16> %1 243} 244 245define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 246; X86-LABEL: test_mask_packs_epi32_rmk_256: 247; X86: # %bb.0: 248; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 249; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 250; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08] 251; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 252; X86-NEXT: retl # encoding: [0xc3] 253; 254; X64-LABEL: test_mask_packs_epi32_rmk_256: 255; X64: # %bb.0: 256; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 257; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] 258; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 259; X64-NEXT: retq # encoding: [0xc3] 260 %b = load <8 x i32>, <8 x i32>* %ptr_b 261 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 262 %2 = bitcast i16 %mask to <16 x i1> 263 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 264 ret <16 x i16> %3 265} 266 267define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 268; X86-LABEL: test_mask_packs_epi32_rmkz_256: 269; X86: # %bb.0: 270; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 271; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 272; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00] 273; X86-NEXT: retl # encoding: [0xc3] 274; 275; X64-LABEL: test_mask_packs_epi32_rmkz_256: 276; X64: # %bb.0: 277; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 278; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] 279; X64-NEXT: retq # encoding: [0xc3] 280 %b = load <8 x i32>, <8 x i32>* %ptr_b 281 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 282 %2 = bitcast i16 %mask to <16 x i1> 283 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 284 ret <16 x i16> %3 285} 286 287define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 288; X86-LABEL: test_mask_packs_epi32_rmb_256: 289; X86: # %bb.0: 290; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 291; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00] 292; X86-NEXT: retl # encoding: [0xc3] 293; 294; X64-LABEL: test_mask_packs_epi32_rmb_256: 295; X64: # %bb.0: 296; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] 297; X64-NEXT: retq # encoding: [0xc3] 298 %q = load i32, i32* %ptr_b 299 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 300 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 301 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 302 ret <16 x i16> %1 303} 304 305define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 306; X86-LABEL: test_mask_packs_epi32_rmbk_256: 307; X86: # %bb.0: 308; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 309; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 310; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08] 311; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 312; X86-NEXT: retl # encoding: [0xc3] 313; 314; X64-LABEL: test_mask_packs_epi32_rmbk_256: 315; X64: # %bb.0: 316; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 317; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] 318; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 319; X64-NEXT: retq # encoding: [0xc3] 320 %q = load i32, i32* %ptr_b 321 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 322 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 323 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 324 %2 = bitcast i16 %mask to <16 x i1> 325 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 326 ret <16 x i16> %3 327} 328 329define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 330; X86-LABEL: test_mask_packs_epi32_rmbkz_256: 331; X86: # %bb.0: 332; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 333; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 334; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00] 335; X86-NEXT: retl # encoding: [0xc3] 336; 337; X64-LABEL: test_mask_packs_epi32_rmbkz_256: 338; X64: # %bb.0: 339; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 340; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] 341; X64-NEXT: retq # encoding: [0xc3] 342 %q = load i32, i32* %ptr_b 343 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 344 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 345 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b) 346 %2 = bitcast i16 %mask to <16 x i1> 347 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 348 ret <16 x i16> %3 349} 350 351declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) 352 353define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 354; CHECK-LABEL: test_mask_packs_epi16_rr_128: 355; CHECK: # %bb.0: 356; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] 357; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 358 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 359 ret <16 x i8> %1 360} 361 362define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 363; X86-LABEL: test_mask_packs_epi16_rrk_128: 364; X86: # %bb.0: 365; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 366; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 367; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 368; X86-NEXT: retl # encoding: [0xc3] 369; 370; X64-LABEL: test_mask_packs_epi16_rrk_128: 371; X64: # %bb.0: 372; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 373; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 374; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 375; X64-NEXT: retq # encoding: [0xc3] 376 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 377 %2 = bitcast i16 %mask to <16 x i1> 378 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 379 ret <16 x i8> %3 380} 381 382define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 383; X86-LABEL: test_mask_packs_epi16_rrkz_128: 384; X86: # %bb.0: 385; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 386; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 387; X86-NEXT: retl # encoding: [0xc3] 388; 389; X64-LABEL: test_mask_packs_epi16_rrkz_128: 390; X64: # %bb.0: 391; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 392; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 393; X64-NEXT: retq # encoding: [0xc3] 394 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 395 %2 = bitcast i16 %mask to <16 x i1> 396 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 397 ret <16 x i8> %3 398} 399 400define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 401; X86-LABEL: test_mask_packs_epi16_rm_128: 402; X86: # %bb.0: 403; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 404; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00] 405; X86-NEXT: retl # encoding: [0xc3] 406; 407; X64-LABEL: test_mask_packs_epi16_rm_128: 408; X64: # %bb.0: 409; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07] 410; X64-NEXT: retq # encoding: [0xc3] 411 %b = load <8 x i16>, <8 x i16>* %ptr_b 412 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 413 ret <16 x i8> %1 414} 415 416define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 417; X86-LABEL: test_mask_packs_epi16_rmk_128: 418; X86: # %bb.0: 419; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 420; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 421; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08] 422; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 423; X86-NEXT: retl # encoding: [0xc3] 424; 425; X64-LABEL: test_mask_packs_epi16_rmk_128: 426; X64: # %bb.0: 427; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 428; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] 429; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 430; X64-NEXT: retq # encoding: [0xc3] 431 %b = load <8 x i16>, <8 x i16>* %ptr_b 432 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 433 %2 = bitcast i16 %mask to <16 x i1> 434 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 435 ret <16 x i8> %3 436} 437 438define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 439; X86-LABEL: test_mask_packs_epi16_rmkz_128: 440; X86: # %bb.0: 441; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 442; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 443; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00] 444; X86-NEXT: retl # encoding: [0xc3] 445; 446; X64-LABEL: test_mask_packs_epi16_rmkz_128: 447; X64: # %bb.0: 448; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 449; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] 450; X64-NEXT: retq # encoding: [0xc3] 451 %b = load <8 x i16>, <8 x i16>* %ptr_b 452 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b) 453 %2 = bitcast i16 %mask to <16 x i1> 454 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 455 ret <16 x i8> %3 456} 457 458declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) 459 460define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 461; CHECK-LABEL: test_mask_packs_epi16_rr_256: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] 464; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 465 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 466 ret <32 x i8> %1 467} 468 469define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 470; X86-LABEL: test_mask_packs_epi16_rrk_256: 471; X86: # %bb.0: 472; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 473; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 474; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 475; X86-NEXT: retl # encoding: [0xc3] 476; 477; X64-LABEL: test_mask_packs_epi16_rrk_256: 478; X64: # %bb.0: 479; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 480; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 481; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 482; X64-NEXT: retq # encoding: [0xc3] 483 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 484 %2 = bitcast i32 %mask to <32 x i1> 485 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 486 ret <32 x i8> %3 487} 488 489define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 490; X86-LABEL: test_mask_packs_epi16_rrkz_256: 491; X86: # %bb.0: 492; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 493; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 494; X86-NEXT: retl # encoding: [0xc3] 495; 496; X64-LABEL: test_mask_packs_epi16_rrkz_256: 497; X64: # %bb.0: 498; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 499; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 500; X64-NEXT: retq # encoding: [0xc3] 501 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 502 %2 = bitcast i32 %mask to <32 x i1> 503 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 504 ret <32 x i8> %3 505} 506 507define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 508; X86-LABEL: test_mask_packs_epi16_rm_256: 509; X86: # %bb.0: 510; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 511; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00] 512; X86-NEXT: retl # encoding: [0xc3] 513; 514; X64-LABEL: test_mask_packs_epi16_rm_256: 515; X64: # %bb.0: 516; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07] 517; X64-NEXT: retq # encoding: [0xc3] 518 %b = load <16 x i16>, <16 x i16>* %ptr_b 519 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 520 ret <32 x i8> %1 521} 522 523define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 524; X86-LABEL: test_mask_packs_epi16_rmk_256: 525; X86: # %bb.0: 526; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 527; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 528; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08] 529; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 530; X86-NEXT: retl # encoding: [0xc3] 531; 532; X64-LABEL: test_mask_packs_epi16_rmk_256: 533; X64: # %bb.0: 534; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 535; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] 536; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 537; X64-NEXT: retq # encoding: [0xc3] 538 %b = load <16 x i16>, <16 x i16>* %ptr_b 539 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 540 %2 = bitcast i32 %mask to <32 x i1> 541 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 542 ret <32 x i8> %3 543} 544 545define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 546; X86-LABEL: test_mask_packs_epi16_rmkz_256: 547; X86: # %bb.0: 548; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 549; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 550; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00] 551; X86-NEXT: retl # encoding: [0xc3] 552; 553; X64-LABEL: test_mask_packs_epi16_rmkz_256: 554; X64: # %bb.0: 555; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 556; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07] 557; X64-NEXT: retq # encoding: [0xc3] 558 %b = load <16 x i16>, <16 x i16>* %ptr_b 559 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b) 560 %2 = bitcast i32 %mask to <32 x i1> 561 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 562 ret <32 x i8> %3 563} 564 565declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) 566 567 568define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 569; CHECK-LABEL: test_mask_packus_epi32_rr_128: 570; CHECK: # %bb.0: 571; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1] 572; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 573 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 574 ret <8 x i16> %1 575} 576 577define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 578; X86-LABEL: test_mask_packus_epi32_rrk_128: 579; X86: # %bb.0: 580; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 581; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 582; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 583; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 584; X86-NEXT: retl # encoding: [0xc3] 585; 586; X64-LABEL: test_mask_packus_epi32_rrk_128: 587; X64: # %bb.0: 588; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 589; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 590; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 591; X64-NEXT: retq # encoding: [0xc3] 592 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 593 %2 = bitcast i8 %mask to <8 x i1> 594 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 595 ret <8 x i16> %3 596} 597 598define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 599; X86-LABEL: test_mask_packus_epi32_rrkz_128: 600; X86: # %bb.0: 601; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 602; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 603; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 604; X86-NEXT: retl # encoding: [0xc3] 605; 606; X64-LABEL: test_mask_packus_epi32_rrkz_128: 607; X64: # %bb.0: 608; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 609; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 610; X64-NEXT: retq # encoding: [0xc3] 611 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 612 %2 = bitcast i8 %mask to <8 x i1> 613 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 614 ret <8 x i16> %3 615} 616 617define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 618; X86-LABEL: test_mask_packus_epi32_rm_128: 619; X86: # %bb.0: 620; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 621; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00] 622; X86-NEXT: retl # encoding: [0xc3] 623; 624; X64-LABEL: test_mask_packus_epi32_rm_128: 625; X64: # %bb.0: 626; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07] 627; X64-NEXT: retq # encoding: [0xc3] 628 %b = load <4 x i32>, <4 x i32>* %ptr_b 629 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 630 ret <8 x i16> %1 631} 632 633define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 634; X86-LABEL: test_mask_packus_epi32_rmk_128: 635; X86: # %bb.0: 636; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 637; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 638; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 639; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08] 640; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 641; X86-NEXT: retl # encoding: [0xc3] 642; 643; X64-LABEL: test_mask_packus_epi32_rmk_128: 644; X64: # %bb.0: 645; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 646; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] 647; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 648; X64-NEXT: retq # encoding: [0xc3] 649 %b = load <4 x i32>, <4 x i32>* %ptr_b 650 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 651 %2 = bitcast i8 %mask to <8 x i1> 652 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 653 ret <8 x i16> %3 654} 655 656define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 657; X86-LABEL: test_mask_packus_epi32_rmkz_128: 658; X86: # %bb.0: 659; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 660; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 661; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 662; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00] 663; X86-NEXT: retl # encoding: [0xc3] 664; 665; X64-LABEL: test_mask_packus_epi32_rmkz_128: 666; X64: # %bb.0: 667; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 668; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] 669; X64-NEXT: retq # encoding: [0xc3] 670 %b = load <4 x i32>, <4 x i32>* %ptr_b 671 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 672 %2 = bitcast i8 %mask to <8 x i1> 673 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 674 ret <8 x i16> %3 675} 676 677define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 678; X86-LABEL: test_mask_packus_epi32_rmb_128: 679; X86: # %bb.0: 680; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 681; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00] 682; X86-NEXT: retl # encoding: [0xc3] 683; 684; X64-LABEL: test_mask_packus_epi32_rmb_128: 685; X64: # %bb.0: 686; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07] 687; X64-NEXT: retq # encoding: [0xc3] 688 %q = load i32, i32* %ptr_b 689 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 690 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 691 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 692 ret <8 x i16> %1 693} 694 695define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 696; X86-LABEL: test_mask_packus_epi32_rmbk_128: 697; X86: # %bb.0: 698; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 699; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 700; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 701; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08] 702; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 703; X86-NEXT: retl # encoding: [0xc3] 704; 705; X64-LABEL: test_mask_packus_epi32_rmbk_128: 706; X64: # %bb.0: 707; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 708; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] 709; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 710; X64-NEXT: retq # encoding: [0xc3] 711 %q = load i32, i32* %ptr_b 712 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 713 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 714 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 715 %2 = bitcast i8 %mask to <8 x i1> 716 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru 717 ret <8 x i16> %3 718} 719 720define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 721; X86-LABEL: test_mask_packus_epi32_rmbkz_128: 722; X86: # %bb.0: 723; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 724; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 725; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 726; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00] 727; X86-NEXT: retl # encoding: [0xc3] 728; 729; X64-LABEL: test_mask_packus_epi32_rmbkz_128: 730; X64: # %bb.0: 731; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 732; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] 733; X64-NEXT: retq # encoding: [0xc3] 734 %q = load i32, i32* %ptr_b 735 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 736 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 737 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b) 738 %2 = bitcast i8 %mask to <8 x i1> 739 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 740 ret <8 x i16> %3 741} 742 743declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) 744 745define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 746; CHECK-LABEL: test_mask_packus_epi32_rr_256: 747; CHECK: # %bb.0: 748; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 749; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 750 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 751 ret <16 x i16> %1 752} 753 754define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 755; X86-LABEL: test_mask_packus_epi32_rrk_256: 756; X86: # %bb.0: 757; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 758; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 759; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 760; X86-NEXT: retl # encoding: [0xc3] 761; 762; X64-LABEL: test_mask_packus_epi32_rrk_256: 763; X64: # %bb.0: 764; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 765; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 766; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 767; X64-NEXT: retq # encoding: [0xc3] 768 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 769 %2 = bitcast i16 %mask to <16 x i1> 770 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 771 ret <16 x i16> %3 772} 773 774define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 775; X86-LABEL: test_mask_packus_epi32_rrkz_256: 776; X86: # %bb.0: 777; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 778; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 779; X86-NEXT: retl # encoding: [0xc3] 780; 781; X64-LABEL: test_mask_packus_epi32_rrkz_256: 782; X64: # %bb.0: 783; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 784; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 785; X64-NEXT: retq # encoding: [0xc3] 786 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 787 %2 = bitcast i16 %mask to <16 x i1> 788 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 789 ret <16 x i16> %3 790} 791 792define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 793; X86-LABEL: test_mask_packus_epi32_rm_256: 794; X86: # %bb.0: 795; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 796; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00] 797; X86-NEXT: retl # encoding: [0xc3] 798; 799; X64-LABEL: test_mask_packus_epi32_rm_256: 800; X64: # %bb.0: 801; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07] 802; X64-NEXT: retq # encoding: [0xc3] 803 %b = load <8 x i32>, <8 x i32>* %ptr_b 804 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 805 ret <16 x i16> %1 806} 807 808define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 809; X86-LABEL: test_mask_packus_epi32_rmk_256: 810; X86: # %bb.0: 811; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 812; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 813; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08] 814; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 815; X86-NEXT: retl # encoding: [0xc3] 816; 817; X64-LABEL: test_mask_packus_epi32_rmk_256: 818; X64: # %bb.0: 819; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 820; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] 821; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 822; X64-NEXT: retq # encoding: [0xc3] 823 %b = load <8 x i32>, <8 x i32>* %ptr_b 824 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 825 %2 = bitcast i16 %mask to <16 x i1> 826 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 827 ret <16 x i16> %3 828} 829 830define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 831; X86-LABEL: test_mask_packus_epi32_rmkz_256: 832; X86: # %bb.0: 833; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 834; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 835; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00] 836; X86-NEXT: retl # encoding: [0xc3] 837; 838; X64-LABEL: test_mask_packus_epi32_rmkz_256: 839; X64: # %bb.0: 840; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 841; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] 842; X64-NEXT: retq # encoding: [0xc3] 843 %b = load <8 x i32>, <8 x i32>* %ptr_b 844 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 845 %2 = bitcast i16 %mask to <16 x i1> 846 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 847 ret <16 x i16> %3 848} 849 850define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 851; X86-LABEL: test_mask_packus_epi32_rmb_256: 852; X86: # %bb.0: 853; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 854; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00] 855; X86-NEXT: retl # encoding: [0xc3] 856; 857; X64-LABEL: test_mask_packus_epi32_rmb_256: 858; X64: # %bb.0: 859; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07] 860; X64-NEXT: retq # encoding: [0xc3] 861 %q = load i32, i32* %ptr_b 862 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 863 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 864 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 865 ret <16 x i16> %1 866} 867 868define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 869; X86-LABEL: test_mask_packus_epi32_rmbk_256: 870; X86: # %bb.0: 871; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 872; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 873; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08] 874; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 875; X86-NEXT: retl # encoding: [0xc3] 876; 877; X64-LABEL: test_mask_packus_epi32_rmbk_256: 878; X64: # %bb.0: 879; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 880; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] 881; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 882; X64-NEXT: retq # encoding: [0xc3] 883 %q = load i32, i32* %ptr_b 884 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 885 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 886 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 887 %2 = bitcast i16 %mask to <16 x i1> 888 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru 889 ret <16 x i16> %3 890} 891 892define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 893; X86-LABEL: test_mask_packus_epi32_rmbkz_256: 894; X86: # %bb.0: 895; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 896; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 897; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00] 898; X86-NEXT: retl # encoding: [0xc3] 899; 900; X64-LABEL: test_mask_packus_epi32_rmbkz_256: 901; X64: # %bb.0: 902; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 903; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] 904; X64-NEXT: retq # encoding: [0xc3] 905 %q = load i32, i32* %ptr_b 906 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 907 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 908 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b) 909 %2 = bitcast i16 %mask to <16 x i1> 910 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 911 ret <16 x i16> %3 912} 913 914declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) 915 916define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 917; CHECK-LABEL: test_mask_packus_epi16_rr_128: 918; CHECK: # %bb.0: 919; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] 920; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 921 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 922 ret <16 x i8> %1 923} 924 925define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 926; X86-LABEL: test_mask_packus_epi16_rrk_128: 927; X86: # %bb.0: 928; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 929; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 930; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 931; X86-NEXT: retl # encoding: [0xc3] 932; 933; X64-LABEL: test_mask_packus_epi16_rrk_128: 934; X64: # %bb.0: 935; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 936; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 937; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 938; X64-NEXT: retq # encoding: [0xc3] 939 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 940 %2 = bitcast i16 %mask to <16 x i1> 941 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 942 ret <16 x i8> %3 943} 944 945define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 946; X86-LABEL: test_mask_packus_epi16_rrkz_128: 947; X86: # %bb.0: 948; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 949; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 950; X86-NEXT: retl # encoding: [0xc3] 951; 952; X64-LABEL: test_mask_packus_epi16_rrkz_128: 953; X64: # %bb.0: 954; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 955; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 956; X64-NEXT: retq # encoding: [0xc3] 957 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 958 %2 = bitcast i16 %mask to <16 x i1> 959 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 960 ret <16 x i8> %3 961} 962 963define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 964; X86-LABEL: test_mask_packus_epi16_rm_128: 965; X86: # %bb.0: 966; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 967; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00] 968; X86-NEXT: retl # encoding: [0xc3] 969; 970; X64-LABEL: test_mask_packus_epi16_rm_128: 971; X64: # %bb.0: 972; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07] 973; X64-NEXT: retq # encoding: [0xc3] 974 %b = load <8 x i16>, <8 x i16>* %ptr_b 975 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 976 ret <16 x i8> %1 977} 978 979define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 980; X86-LABEL: test_mask_packus_epi16_rmk_128: 981; X86: # %bb.0: 982; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 983; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 984; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08] 985; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 986; X86-NEXT: retl # encoding: [0xc3] 987; 988; X64-LABEL: test_mask_packus_epi16_rmk_128: 989; X64: # %bb.0: 990; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 991; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] 992; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 993; X64-NEXT: retq # encoding: [0xc3] 994 %b = load <8 x i16>, <8 x i16>* %ptr_b 995 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 996 %2 = bitcast i16 %mask to <16 x i1> 997 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru 998 ret <16 x i8> %3 999} 1000 1001define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 1002; X86-LABEL: test_mask_packus_epi16_rmkz_128: 1003; X86: # %bb.0: 1004; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1005; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1006; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00] 1007; X86-NEXT: retl # encoding: [0xc3] 1008; 1009; X64-LABEL: test_mask_packus_epi16_rmkz_128: 1010; X64: # %bb.0: 1011; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1012; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] 1013; X64-NEXT: retq # encoding: [0xc3] 1014 %b = load <8 x i16>, <8 x i16>* %ptr_b 1015 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b) 1016 %2 = bitcast i16 %mask to <16 x i1> 1017 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 1018 ret <16 x i8> %3 1019} 1020 1021declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) 1022 1023define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1024; CHECK-LABEL: test_mask_packus_epi16_rr_256: 1025; CHECK: # %bb.0: 1026; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] 1027; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1028 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1029 ret <32 x i8> %1 1030} 1031 1032define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 1033; X86-LABEL: test_mask_packus_epi16_rrk_256: 1034; X86: # %bb.0: 1035; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1036; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 1037; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1038; X86-NEXT: retl # encoding: [0xc3] 1039; 1040; X64-LABEL: test_mask_packus_epi16_rrk_256: 1041; X64: # %bb.0: 1042; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1043; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 1044; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1045; X64-NEXT: retq # encoding: [0xc3] 1046 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1047 %2 = bitcast i32 %mask to <32 x i1> 1048 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 1049 ret <32 x i8> %3 1050} 1051 1052define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 1053; X86-LABEL: test_mask_packus_epi16_rrkz_256: 1054; X86: # %bb.0: 1055; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 1056; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 1057; X86-NEXT: retl # encoding: [0xc3] 1058; 1059; X64-LABEL: test_mask_packus_epi16_rrkz_256: 1060; X64: # %bb.0: 1061; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1062; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 1063; X64-NEXT: retq # encoding: [0xc3] 1064 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1065 %2 = bitcast i32 %mask to <32 x i1> 1066 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 1067 ret <32 x i8> %3 1068} 1069 1070define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1071; X86-LABEL: test_mask_packus_epi16_rm_256: 1072; X86: # %bb.0: 1073; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1074; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00] 1075; X86-NEXT: retl # encoding: [0xc3] 1076; 1077; X64-LABEL: test_mask_packus_epi16_rm_256: 1078; X64: # %bb.0: 1079; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07] 1080; X64-NEXT: retq # encoding: [0xc3] 1081 %b = load <16 x i16>, <16 x i16>* %ptr_b 1082 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1083 ret <32 x i8> %1 1084} 1085 1086define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 1087; X86-LABEL: test_mask_packus_epi16_rmk_256: 1088; X86: # %bb.0: 1089; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1090; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1091; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08] 1092; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1093; X86-NEXT: retl # encoding: [0xc3] 1094; 1095; X64-LABEL: test_mask_packus_epi16_rmk_256: 1096; X64: # %bb.0: 1097; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1098; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] 1099; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1100; X64-NEXT: retq # encoding: [0xc3] 1101 %b = load <16 x i16>, <16 x i16>* %ptr_b 1102 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1103 %2 = bitcast i32 %mask to <32 x i1> 1104 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru 1105 ret <32 x i8> %3 1106} 1107 1108define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 1109; X86-LABEL: test_mask_packus_epi16_rmkz_256: 1110; X86: # %bb.0: 1111; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1112; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 1113; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00] 1114; X86-NEXT: retl # encoding: [0xc3] 1115; 1116; X64-LABEL: test_mask_packus_epi16_rmkz_256: 1117; X64: # %bb.0: 1118; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1119; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07] 1120; X64-NEXT: retq # encoding: [0xc3] 1121 %b = load <16 x i16>, <16 x i16>* %ptr_b 1122 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b) 1123 %2 = bitcast i32 %mask to <32 x i1> 1124 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 1125 ret <32 x i8> %3 1126} 1127 1128declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) 1129 1130define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1131; CHECK-LABEL: test_mask_adds_epi16_rr_128: 1132; CHECK: # %bb.0: 1133; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] 1134; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1135 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1136 ret <8 x i16> %res 1137} 1138 1139define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1140; X86-LABEL: test_mask_adds_epi16_rrk_128: 1141; X86: # %bb.0: 1142; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1143; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1144; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 1145; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1146; X86-NEXT: retl # encoding: [0xc3] 1147; 1148; X64-LABEL: test_mask_adds_epi16_rrk_128: 1149; X64: # %bb.0: 1150; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1151; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 1152; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1153; X64-NEXT: retq # encoding: [0xc3] 1154 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1155 ret <8 x i16> %res 1156} 1157 1158define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1159; X86-LABEL: test_mask_adds_epi16_rrkz_128: 1160; X86: # %bb.0: 1161; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1162; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1163; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 1164; X86-NEXT: retl # encoding: [0xc3] 1165; 1166; X64-LABEL: test_mask_adds_epi16_rrkz_128: 1167; X64: # %bb.0: 1168; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1169; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 1170; X64-NEXT: retq # encoding: [0xc3] 1171 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1172 ret <8 x i16> %res 1173} 1174 1175define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1176; X86-LABEL: test_mask_adds_epi16_rm_128: 1177; X86: # %bb.0: 1178; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1179; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x00] 1180; X86-NEXT: retl # encoding: [0xc3] 1181; 1182; X64-LABEL: test_mask_adds_epi16_rm_128: 1183; X64: # %bb.0: 1184; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0x07] 1185; X64-NEXT: retq # encoding: [0xc3] 1186 %b = load <8 x i16>, <8 x i16>* %ptr_b 1187 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1188 ret <8 x i16> %res 1189} 1190 1191define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1192; X86-LABEL: test_mask_adds_epi16_rmk_128: 1193; X86: # %bb.0: 1194; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1195; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1196; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1197; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08] 1198; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1199; X86-NEXT: retl # encoding: [0xc3] 1200; 1201; X64-LABEL: test_mask_adds_epi16_rmk_128: 1202; X64: # %bb.0: 1203; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1204; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] 1205; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1206; X64-NEXT: retq # encoding: [0xc3] 1207 %b = load <8 x i16>, <8 x i16>* %ptr_b 1208 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1209 ret <8 x i16> %res 1210} 1211 1212define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1213; X86-LABEL: test_mask_adds_epi16_rmkz_128: 1214; X86: # %bb.0: 1215; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1216; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1217; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1218; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00] 1219; X86-NEXT: retl # encoding: [0xc3] 1220; 1221; X64-LABEL: test_mask_adds_epi16_rmkz_128: 1222; X64: # %bb.0: 1223; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1224; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] 1225; X64-NEXT: retq # encoding: [0xc3] 1226 %b = load <8 x i16>, <8 x i16>* %ptr_b 1227 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1228 ret <8 x i16> %res 1229} 1230 1231declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1232 1233define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1234; CHECK-LABEL: test_mask_adds_epi16_rr_256: 1235; CHECK: # %bb.0: 1236; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1] 1237; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1238 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1239 ret <16 x i16> %res 1240} 1241 1242define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1243; X86-LABEL: test_mask_adds_epi16_rrk_256: 1244; X86: # %bb.0: 1245; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1246; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 1247; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1248; X86-NEXT: retl # encoding: [0xc3] 1249; 1250; X64-LABEL: test_mask_adds_epi16_rrk_256: 1251; X64: # %bb.0: 1252; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1253; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 1254; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1255; X64-NEXT: retq # encoding: [0xc3] 1256 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1257 ret <16 x i16> %res 1258} 1259 1260define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1261; X86-LABEL: test_mask_adds_epi16_rrkz_256: 1262; X86: # %bb.0: 1263; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1264; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 1265; X86-NEXT: retl # encoding: [0xc3] 1266; 1267; X64-LABEL: test_mask_adds_epi16_rrkz_256: 1268; X64: # %bb.0: 1269; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1270; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 1271; X64-NEXT: retq # encoding: [0xc3] 1272 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1273 ret <16 x i16> %res 1274} 1275 1276define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1277; X86-LABEL: test_mask_adds_epi16_rm_256: 1278; X86: # %bb.0: 1279; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1280; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x00] 1281; X86-NEXT: retl # encoding: [0xc3] 1282; 1283; X64-LABEL: test_mask_adds_epi16_rm_256: 1284; X64: # %bb.0: 1285; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0x07] 1286; X64-NEXT: retq # encoding: [0xc3] 1287 %b = load <16 x i16>, <16 x i16>* %ptr_b 1288 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1289 ret <16 x i16> %res 1290} 1291 1292define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1293; X86-LABEL: test_mask_adds_epi16_rmk_256: 1294; X86: # %bb.0: 1295; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1296; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1297; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08] 1298; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1299; X86-NEXT: retl # encoding: [0xc3] 1300; 1301; X64-LABEL: test_mask_adds_epi16_rmk_256: 1302; X64: # %bb.0: 1303; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1304; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] 1305; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1306; X64-NEXT: retq # encoding: [0xc3] 1307 %b = load <16 x i16>, <16 x i16>* %ptr_b 1308 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1309 ret <16 x i16> %res 1310} 1311 1312define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1313; X86-LABEL: test_mask_adds_epi16_rmkz_256: 1314; X86: # %bb.0: 1315; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1316; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1317; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00] 1318; X86-NEXT: retl # encoding: [0xc3] 1319; 1320; X64-LABEL: test_mask_adds_epi16_rmkz_256: 1321; X64: # %bb.0: 1322; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1323; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] 1324; X64-NEXT: retq # encoding: [0xc3] 1325 %b = load <16 x i16>, <16 x i16>* %ptr_b 1326 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1327 ret <16 x i16> %res 1328} 1329 1330declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1331 1332define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1333; CHECK-LABEL: test_mask_subs_epi16_rr_128: 1334; CHECK: # %bb.0: 1335; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] 1336; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1337 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1338 ret <8 x i16> %res 1339} 1340 1341define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1342; X86-LABEL: test_mask_subs_epi16_rrk_128: 1343; X86: # %bb.0: 1344; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1345; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1346; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 1347; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1348; X86-NEXT: retl # encoding: [0xc3] 1349; 1350; X64-LABEL: test_mask_subs_epi16_rrk_128: 1351; X64: # %bb.0: 1352; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1353; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 1354; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1355; X64-NEXT: retq # encoding: [0xc3] 1356 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1357 ret <8 x i16> %res 1358} 1359 1360define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1361; X86-LABEL: test_mask_subs_epi16_rrkz_128: 1362; X86: # %bb.0: 1363; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1364; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1365; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 1366; X86-NEXT: retl # encoding: [0xc3] 1367; 1368; X64-LABEL: test_mask_subs_epi16_rrkz_128: 1369; X64: # %bb.0: 1370; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1371; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 1372; X64-NEXT: retq # encoding: [0xc3] 1373 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1374 ret <8 x i16> %res 1375} 1376 1377define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1378; X86-LABEL: test_mask_subs_epi16_rm_128: 1379; X86: # %bb.0: 1380; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1381; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x00] 1382; X86-NEXT: retl # encoding: [0xc3] 1383; 1384; X64-LABEL: test_mask_subs_epi16_rm_128: 1385; X64: # %bb.0: 1386; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0x07] 1387; X64-NEXT: retq # encoding: [0xc3] 1388 %b = load <8 x i16>, <8 x i16>* %ptr_b 1389 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1390 ret <8 x i16> %res 1391} 1392 1393define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1394; X86-LABEL: test_mask_subs_epi16_rmk_128: 1395; X86: # %bb.0: 1396; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1397; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1398; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1399; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08] 1400; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1401; X86-NEXT: retl # encoding: [0xc3] 1402; 1403; X64-LABEL: test_mask_subs_epi16_rmk_128: 1404; X64: # %bb.0: 1405; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1406; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] 1407; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1408; X64-NEXT: retq # encoding: [0xc3] 1409 %b = load <8 x i16>, <8 x i16>* %ptr_b 1410 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1411 ret <8 x i16> %res 1412} 1413 1414define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1415; X86-LABEL: test_mask_subs_epi16_rmkz_128: 1416; X86: # %bb.0: 1417; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1418; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1419; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1420; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00] 1421; X86-NEXT: retl # encoding: [0xc3] 1422; 1423; X64-LABEL: test_mask_subs_epi16_rmkz_128: 1424; X64: # %bb.0: 1425; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1426; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] 1427; X64-NEXT: retq # encoding: [0xc3] 1428 %b = load <8 x i16>, <8 x i16>* %ptr_b 1429 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1430 ret <8 x i16> %res 1431} 1432 1433declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1434 1435define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1436; CHECK-LABEL: test_mask_subs_epi16_rr_256: 1437; CHECK: # %bb.0: 1438; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1] 1439; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1440 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1441 ret <16 x i16> %res 1442} 1443 1444define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1445; X86-LABEL: test_mask_subs_epi16_rrk_256: 1446; X86: # %bb.0: 1447; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1448; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 1449; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1450; X86-NEXT: retl # encoding: [0xc3] 1451; 1452; X64-LABEL: test_mask_subs_epi16_rrk_256: 1453; X64: # %bb.0: 1454; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1455; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 1456; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1457; X64-NEXT: retq # encoding: [0xc3] 1458 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1459 ret <16 x i16> %res 1460} 1461 1462define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1463; X86-LABEL: test_mask_subs_epi16_rrkz_256: 1464; X86: # %bb.0: 1465; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1466; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 1467; X86-NEXT: retl # encoding: [0xc3] 1468; 1469; X64-LABEL: test_mask_subs_epi16_rrkz_256: 1470; X64: # %bb.0: 1471; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1472; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 1473; X64-NEXT: retq # encoding: [0xc3] 1474 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1475 ret <16 x i16> %res 1476} 1477 1478define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1479; X86-LABEL: test_mask_subs_epi16_rm_256: 1480; X86: # %bb.0: 1481; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1482; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x00] 1483; X86-NEXT: retl # encoding: [0xc3] 1484; 1485; X64-LABEL: test_mask_subs_epi16_rm_256: 1486; X64: # %bb.0: 1487; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0x07] 1488; X64-NEXT: retq # encoding: [0xc3] 1489 %b = load <16 x i16>, <16 x i16>* %ptr_b 1490 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1491 ret <16 x i16> %res 1492} 1493 1494define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1495; X86-LABEL: test_mask_subs_epi16_rmk_256: 1496; X86: # %bb.0: 1497; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1498; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1499; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08] 1500; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1501; X86-NEXT: retl # encoding: [0xc3] 1502; 1503; X64-LABEL: test_mask_subs_epi16_rmk_256: 1504; X64: # %bb.0: 1505; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1506; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] 1507; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1508; X64-NEXT: retq # encoding: [0xc3] 1509 %b = load <16 x i16>, <16 x i16>* %ptr_b 1510 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1511 ret <16 x i16> %res 1512} 1513 1514define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1515; X86-LABEL: test_mask_subs_epi16_rmkz_256: 1516; X86: # %bb.0: 1517; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1518; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1519; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00] 1520; X86-NEXT: retl # encoding: [0xc3] 1521; 1522; X64-LABEL: test_mask_subs_epi16_rmkz_256: 1523; X64: # %bb.0: 1524; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1525; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] 1526; X64-NEXT: retq # encoding: [0xc3] 1527 %b = load <16 x i16>, <16 x i16>* %ptr_b 1528 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1529 ret <16 x i16> %res 1530} 1531 1532declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1533 1534define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1535; CHECK-LABEL: test_mask_adds_epu16_rr_128: 1536; CHECK: # %bb.0: 1537; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] 1538; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1539 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1540 ret <8 x i16> %res 1541} 1542 1543define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1544; X86-LABEL: test_mask_adds_epu16_rrk_128: 1545; X86: # %bb.0: 1546; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1547; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1548; X86-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] 1549; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1550; X86-NEXT: retl # encoding: [0xc3] 1551; 1552; X64-LABEL: test_mask_adds_epu16_rrk_128: 1553; X64: # %bb.0: 1554; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1555; X64-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] 1556; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1557; X64-NEXT: retq # encoding: [0xc3] 1558 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1559 ret <8 x i16> %res 1560} 1561 1562define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1563; X86-LABEL: test_mask_adds_epu16_rrkz_128: 1564; X86: # %bb.0: 1565; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1566; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1567; X86-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] 1568; X86-NEXT: retl # encoding: [0xc3] 1569; 1570; X64-LABEL: test_mask_adds_epu16_rrkz_128: 1571; X64: # %bb.0: 1572; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1573; X64-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] 1574; X64-NEXT: retq # encoding: [0xc3] 1575 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1576 ret <8 x i16> %res 1577} 1578 1579define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1580; X86-LABEL: test_mask_adds_epu16_rm_128: 1581; X86: # %bb.0: 1582; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1583; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x00] 1584; X86-NEXT: retl # encoding: [0xc3] 1585; 1586; X64-LABEL: test_mask_adds_epu16_rm_128: 1587; X64: # %bb.0: 1588; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0x07] 1589; X64-NEXT: retq # encoding: [0xc3] 1590 %b = load <8 x i16>, <8 x i16>* %ptr_b 1591 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1592 ret <8 x i16> %res 1593} 1594 1595define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1596; X86-LABEL: test_mask_adds_epu16_rmk_128: 1597; X86: # %bb.0: 1598; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1599; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1600; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1601; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x08] 1602; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1603; X86-NEXT: retl # encoding: [0xc3] 1604; 1605; X64-LABEL: test_mask_adds_epu16_rmk_128: 1606; X64: # %bb.0: 1607; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1608; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f] 1609; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1610; X64-NEXT: retq # encoding: [0xc3] 1611 %b = load <8 x i16>, <8 x i16>* %ptr_b 1612 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1613 ret <8 x i16> %res 1614} 1615 1616define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1617; X86-LABEL: test_mask_adds_epu16_rmkz_128: 1618; X86: # %bb.0: 1619; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1620; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1621; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1622; X86-NEXT: vpaddusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x00] 1623; X86-NEXT: retl # encoding: [0xc3] 1624; 1625; X64-LABEL: test_mask_adds_epu16_rmkz_128: 1626; X64: # %bb.0: 1627; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1628; X64-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07] 1629; X64-NEXT: retq # encoding: [0xc3] 1630 %b = load <8 x i16>, <8 x i16>* %ptr_b 1631 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1632 ret <8 x i16> %res 1633} 1634 1635declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1636 1637define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1638; CHECK-LABEL: test_mask_adds_epu16_rr_256: 1639; CHECK: # %bb.0: 1640; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1] 1641; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1642 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1643 ret <16 x i16> %res 1644} 1645 1646define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1647; X86-LABEL: test_mask_adds_epu16_rrk_256: 1648; X86: # %bb.0: 1649; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1650; X86-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] 1651; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1652; X86-NEXT: retl # encoding: [0xc3] 1653; 1654; X64-LABEL: test_mask_adds_epu16_rrk_256: 1655; X64: # %bb.0: 1656; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1657; X64-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] 1658; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1659; X64-NEXT: retq # encoding: [0xc3] 1660 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1661 ret <16 x i16> %res 1662} 1663 1664define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1665; X86-LABEL: test_mask_adds_epu16_rrkz_256: 1666; X86: # %bb.0: 1667; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1668; X86-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] 1669; X86-NEXT: retl # encoding: [0xc3] 1670; 1671; X64-LABEL: test_mask_adds_epu16_rrkz_256: 1672; X64: # %bb.0: 1673; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1674; X64-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] 1675; X64-NEXT: retq # encoding: [0xc3] 1676 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1677 ret <16 x i16> %res 1678} 1679 1680define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1681; X86-LABEL: test_mask_adds_epu16_rm_256: 1682; X86: # %bb.0: 1683; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1684; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x00] 1685; X86-NEXT: retl # encoding: [0xc3] 1686; 1687; X64-LABEL: test_mask_adds_epu16_rm_256: 1688; X64: # %bb.0: 1689; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0x07] 1690; X64-NEXT: retq # encoding: [0xc3] 1691 %b = load <16 x i16>, <16 x i16>* %ptr_b 1692 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1693 ret <16 x i16> %res 1694} 1695 1696define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1697; X86-LABEL: test_mask_adds_epu16_rmk_256: 1698; X86: # %bb.0: 1699; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1700; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1701; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x08] 1702; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1703; X86-NEXT: retl # encoding: [0xc3] 1704; 1705; X64-LABEL: test_mask_adds_epu16_rmk_256: 1706; X64: # %bb.0: 1707; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1708; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f] 1709; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1710; X64-NEXT: retq # encoding: [0xc3] 1711 %b = load <16 x i16>, <16 x i16>* %ptr_b 1712 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1713 ret <16 x i16> %res 1714} 1715 1716define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1717; X86-LABEL: test_mask_adds_epu16_rmkz_256: 1718; X86: # %bb.0: 1719; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1720; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1721; X86-NEXT: vpaddusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x00] 1722; X86-NEXT: retl # encoding: [0xc3] 1723; 1724; X64-LABEL: test_mask_adds_epu16_rmkz_256: 1725; X64: # %bb.0: 1726; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1727; X64-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07] 1728; X64-NEXT: retq # encoding: [0xc3] 1729 %b = load <16 x i16>, <16 x i16>* %ptr_b 1730 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1731 ret <16 x i16> %res 1732} 1733 1734declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1735 1736define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1737; CHECK-LABEL: test_mask_subs_epu16_rr_128: 1738; CHECK: # %bb.0: 1739; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] 1740; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1741 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1742 ret <8 x i16> %res 1743} 1744 1745define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 1746; X86-LABEL: test_mask_subs_epu16_rrk_128: 1747; X86: # %bb.0: 1748; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1749; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1750; X86-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] 1751; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1752; X86-NEXT: retl # encoding: [0xc3] 1753; 1754; X64-LABEL: test_mask_subs_epu16_rrk_128: 1755; X64: # %bb.0: 1756; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1757; X64-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] 1758; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1759; X64-NEXT: retq # encoding: [0xc3] 1760 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1761 ret <8 x i16> %res 1762} 1763 1764define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 1765; X86-LABEL: test_mask_subs_epu16_rrkz_128: 1766; X86: # %bb.0: 1767; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1768; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1769; X86-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] 1770; X86-NEXT: retl # encoding: [0xc3] 1771; 1772; X64-LABEL: test_mask_subs_epu16_rrkz_128: 1773; X64: # %bb.0: 1774; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1775; X64-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] 1776; X64-NEXT: retq # encoding: [0xc3] 1777 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1778 ret <8 x i16> %res 1779} 1780 1781define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 1782; X86-LABEL: test_mask_subs_epu16_rm_128: 1783; X86: # %bb.0: 1784; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1785; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x00] 1786; X86-NEXT: retl # encoding: [0xc3] 1787; 1788; X64-LABEL: test_mask_subs_epu16_rm_128: 1789; X64: # %bb.0: 1790; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0x07] 1791; X64-NEXT: retq # encoding: [0xc3] 1792 %b = load <8 x i16>, <8 x i16>* %ptr_b 1793 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 1794 ret <8 x i16> %res 1795} 1796 1797define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 1798; X86-LABEL: test_mask_subs_epu16_rmk_128: 1799; X86: # %bb.0: 1800; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1801; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1802; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1803; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x08] 1804; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1805; X86-NEXT: retl # encoding: [0xc3] 1806; 1807; X64-LABEL: test_mask_subs_epu16_rmk_128: 1808; X64: # %bb.0: 1809; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1810; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f] 1811; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 1812; X64-NEXT: retq # encoding: [0xc3] 1813 %b = load <8 x i16>, <8 x i16>* %ptr_b 1814 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 1815 ret <8 x i16> %res 1816} 1817 1818define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 1819; X86-LABEL: test_mask_subs_epu16_rmkz_128: 1820; X86: # %bb.0: 1821; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1822; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1823; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1824; X86-NEXT: vpsubusw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x00] 1825; X86-NEXT: retl # encoding: [0xc3] 1826; 1827; X64-LABEL: test_mask_subs_epu16_rmkz_128: 1828; X64: # %bb.0: 1829; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1830; X64-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07] 1831; X64-NEXT: retq # encoding: [0xc3] 1832 %b = load <8 x i16>, <8 x i16>* %ptr_b 1833 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 1834 ret <8 x i16> %res 1835} 1836 1837declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1838 1839define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 1840; CHECK-LABEL: test_mask_subs_epu16_rr_256: 1841; CHECK: # %bb.0: 1842; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1] 1843; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1844 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1845 ret <16 x i16> %res 1846} 1847 1848define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 1849; X86-LABEL: test_mask_subs_epu16_rrk_256: 1850; X86: # %bb.0: 1851; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1852; X86-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] 1853; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1854; X86-NEXT: retl # encoding: [0xc3] 1855; 1856; X64-LABEL: test_mask_subs_epu16_rrk_256: 1857; X64: # %bb.0: 1858; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1859; X64-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] 1860; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 1861; X64-NEXT: retq # encoding: [0xc3] 1862 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1863 ret <16 x i16> %res 1864} 1865 1866define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 1867; X86-LABEL: test_mask_subs_epu16_rrkz_256: 1868; X86: # %bb.0: 1869; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1870; X86-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] 1871; X86-NEXT: retl # encoding: [0xc3] 1872; 1873; X64-LABEL: test_mask_subs_epu16_rrkz_256: 1874; X64: # %bb.0: 1875; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1876; X64-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] 1877; X64-NEXT: retq # encoding: [0xc3] 1878 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1879 ret <16 x i16> %res 1880} 1881 1882define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 1883; X86-LABEL: test_mask_subs_epu16_rm_256: 1884; X86: # %bb.0: 1885; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1886; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x00] 1887; X86-NEXT: retl # encoding: [0xc3] 1888; 1889; X64-LABEL: test_mask_subs_epu16_rm_256: 1890; X64: # %bb.0: 1891; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0x07] 1892; X64-NEXT: retq # encoding: [0xc3] 1893 %b = load <16 x i16>, <16 x i16>* %ptr_b 1894 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 1895 ret <16 x i16> %res 1896} 1897 1898define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 1899; X86-LABEL: test_mask_subs_epu16_rmk_256: 1900; X86: # %bb.0: 1901; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1902; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1903; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x08] 1904; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1905; X86-NEXT: retl # encoding: [0xc3] 1906; 1907; X64-LABEL: test_mask_subs_epu16_rmk_256: 1908; X64: # %bb.0: 1909; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1910; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f] 1911; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 1912; X64-NEXT: retq # encoding: [0xc3] 1913 %b = load <16 x i16>, <16 x i16>* %ptr_b 1914 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 1915 ret <16 x i16> %res 1916} 1917 1918define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 1919; X86-LABEL: test_mask_subs_epu16_rmkz_256: 1920; X86: # %bb.0: 1921; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1922; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1923; X86-NEXT: vpsubusw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x00] 1924; X86-NEXT: retl # encoding: [0xc3] 1925; 1926; X64-LABEL: test_mask_subs_epu16_rmkz_256: 1927; X64: # %bb.0: 1928; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1929; X64-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07] 1930; X64-NEXT: retq # encoding: [0xc3] 1931 %b = load <16 x i16>, <16 x i16>* %ptr_b 1932 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 1933 ret <16 x i16> %res 1934} 1935 1936declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1937 1938define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 1939; CHECK-LABEL: test_mask_adds_epi8_rr_128: 1940; CHECK: # %bb.0: 1941; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] 1942; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1943 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 1944 ret <16 x i8> %res 1945} 1946 1947define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 1948; X86-LABEL: test_mask_adds_epi8_rrk_128: 1949; X86: # %bb.0: 1950; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1951; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 1952; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1953; X86-NEXT: retl # encoding: [0xc3] 1954; 1955; X64-LABEL: test_mask_adds_epi8_rrk_128: 1956; X64: # %bb.0: 1957; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1958; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 1959; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 1960; X64-NEXT: retq # encoding: [0xc3] 1961 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 1962 ret <16 x i8> %res 1963} 1964 1965define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 1966; X86-LABEL: test_mask_adds_epi8_rrkz_128: 1967; X86: # %bb.0: 1968; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1969; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 1970; X86-NEXT: retl # encoding: [0xc3] 1971; 1972; X64-LABEL: test_mask_adds_epi8_rrkz_128: 1973; X64: # %bb.0: 1974; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1975; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 1976; X64-NEXT: retq # encoding: [0xc3] 1977 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 1978 ret <16 x i8> %res 1979} 1980 1981define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 1982; X86-LABEL: test_mask_adds_epi8_rm_128: 1983; X86: # %bb.0: 1984; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1985; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00] 1986; X86-NEXT: retl # encoding: [0xc3] 1987; 1988; X64-LABEL: test_mask_adds_epi8_rm_128: 1989; X64: # %bb.0: 1990; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] 1991; X64-NEXT: retq # encoding: [0xc3] 1992 %b = load <16 x i8>, <16 x i8>* %ptr_b 1993 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 1994 ret <16 x i8> %res 1995} 1996 1997define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 1998; X86-LABEL: test_mask_adds_epi8_rmk_128: 1999; X86: # %bb.0: 2000; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2001; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2002; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08] 2003; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2004; X86-NEXT: retl # encoding: [0xc3] 2005; 2006; X64-LABEL: test_mask_adds_epi8_rmk_128: 2007; X64: # %bb.0: 2008; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2009; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] 2010; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2011; X64-NEXT: retq # encoding: [0xc3] 2012 %b = load <16 x i8>, <16 x i8>* %ptr_b 2013 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 2014 ret <16 x i8> %res 2015} 2016 2017define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 2018; X86-LABEL: test_mask_adds_epi8_rmkz_128: 2019; X86: # %bb.0: 2020; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2021; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2022; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00] 2023; X86-NEXT: retl # encoding: [0xc3] 2024; 2025; X64-LABEL: test_mask_adds_epi8_rmkz_128: 2026; X64: # %bb.0: 2027; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2028; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] 2029; X64-NEXT: retq # encoding: [0xc3] 2030 %b = load <16 x i8>, <16 x i8>* %ptr_b 2031 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 2032 ret <16 x i8> %res 2033} 2034 2035declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2036 2037define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 2038; CHECK-LABEL: test_mask_adds_epi8_rr_256: 2039; CHECK: # %bb.0: 2040; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1] 2041; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2042 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 2043 ret <32 x i8> %res 2044} 2045 2046define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 2047; X86-LABEL: test_mask_adds_epi8_rrk_256: 2048; X86: # %bb.0: 2049; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2050; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 2051; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2052; X86-NEXT: retl # encoding: [0xc3] 2053; 2054; X64-LABEL: test_mask_adds_epi8_rrk_256: 2055; X64: # %bb.0: 2056; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2057; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 2058; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2059; X64-NEXT: retq # encoding: [0xc3] 2060 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 2061 ret <32 x i8> %res 2062} 2063 2064define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 2065; X86-LABEL: test_mask_adds_epi8_rrkz_256: 2066; X86: # %bb.0: 2067; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2068; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 2069; X86-NEXT: retl # encoding: [0xc3] 2070; 2071; X64-LABEL: test_mask_adds_epi8_rrkz_256: 2072; X64: # %bb.0: 2073; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2074; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 2075; X64-NEXT: retq # encoding: [0xc3] 2076 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 2077 ret <32 x i8> %res 2078} 2079 2080define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 2081; X86-LABEL: test_mask_adds_epi8_rm_256: 2082; X86: # %bb.0: 2083; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2084; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x00] 2085; X86-NEXT: retl # encoding: [0xc3] 2086; 2087; X64-LABEL: test_mask_adds_epi8_rm_256: 2088; X64: # %bb.0: 2089; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0x07] 2090; X64-NEXT: retq # encoding: [0xc3] 2091 %b = load <32 x i8>, <32 x i8>* %ptr_b 2092 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 2093 ret <32 x i8> %res 2094} 2095 2096define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 2097; X86-LABEL: test_mask_adds_epi8_rmk_256: 2098; X86: # %bb.0: 2099; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2100; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2101; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08] 2102; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2103; X86-NEXT: retl # encoding: [0xc3] 2104; 2105; X64-LABEL: test_mask_adds_epi8_rmk_256: 2106; X64: # %bb.0: 2107; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2108; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] 2109; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2110; X64-NEXT: retq # encoding: [0xc3] 2111 %b = load <32 x i8>, <32 x i8>* %ptr_b 2112 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 2113 ret <32 x i8> %res 2114} 2115 2116define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 2117; X86-LABEL: test_mask_adds_epi8_rmkz_256: 2118; X86: # %bb.0: 2119; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2120; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2121; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00] 2122; X86-NEXT: retl # encoding: [0xc3] 2123; 2124; X64-LABEL: test_mask_adds_epi8_rmkz_256: 2125; X64: # %bb.0: 2126; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2127; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] 2128; X64-NEXT: retq # encoding: [0xc3] 2129 %b = load <32 x i8>, <32 x i8>* %ptr_b 2130 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 2131 ret <32 x i8> %res 2132} 2133 2134declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2135 2136define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 2137; CHECK-LABEL: test_mask_subs_epi8_rr_128: 2138; CHECK: # %bb.0: 2139; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] 2140; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2141 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 2142 ret <16 x i8> %res 2143} 2144 2145define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 2146; X86-LABEL: test_mask_subs_epi8_rrk_128: 2147; X86: # %bb.0: 2148; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2149; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 2150; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2151; X86-NEXT: retl # encoding: [0xc3] 2152; 2153; X64-LABEL: test_mask_subs_epi8_rrk_128: 2154; X64: # %bb.0: 2155; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2156; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 2157; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2158; X64-NEXT: retq # encoding: [0xc3] 2159 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 2160 ret <16 x i8> %res 2161} 2162 2163define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 2164; X86-LABEL: test_mask_subs_epi8_rrkz_128: 2165; X86: # %bb.0: 2166; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2167; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 2168; X86-NEXT: retl # encoding: [0xc3] 2169; 2170; X64-LABEL: test_mask_subs_epi8_rrkz_128: 2171; X64: # %bb.0: 2172; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2173; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 2174; X64-NEXT: retq # encoding: [0xc3] 2175 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 2176 ret <16 x i8> %res 2177} 2178 2179define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 2180; X86-LABEL: test_mask_subs_epi8_rm_128: 2181; X86: # %bb.0: 2182; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2183; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x00] 2184; X86-NEXT: retl # encoding: [0xc3] 2185; 2186; X64-LABEL: test_mask_subs_epi8_rm_128: 2187; X64: # %bb.0: 2188; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0x07] 2189; X64-NEXT: retq # encoding: [0xc3] 2190 %b = load <16 x i8>, <16 x i8>* %ptr_b 2191 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 2192 ret <16 x i8> %res 2193} 2194 2195define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 2196; X86-LABEL: test_mask_subs_epi8_rmk_128: 2197; X86: # %bb.0: 2198; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2199; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2200; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08] 2201; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2202; X86-NEXT: retl # encoding: [0xc3] 2203; 2204; X64-LABEL: test_mask_subs_epi8_rmk_128: 2205; X64: # %bb.0: 2206; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2207; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] 2208; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2209; X64-NEXT: retq # encoding: [0xc3] 2210 %b = load <16 x i8>, <16 x i8>* %ptr_b 2211 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 2212 ret <16 x i8> %res 2213} 2214 2215define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 2216; X86-LABEL: test_mask_subs_epi8_rmkz_128: 2217; X86: # %bb.0: 2218; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2219; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2220; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00] 2221; X86-NEXT: retl # encoding: [0xc3] 2222; 2223; X64-LABEL: test_mask_subs_epi8_rmkz_128: 2224; X64: # %bb.0: 2225; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2226; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] 2227; X64-NEXT: retq # encoding: [0xc3] 2228 %b = load <16 x i8>, <16 x i8>* %ptr_b 2229 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 2230 ret <16 x i8> %res 2231} 2232 2233declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2234 2235define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 2236; CHECK-LABEL: test_mask_subs_epi8_rr_256: 2237; CHECK: # %bb.0: 2238; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1] 2239; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2240 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 2241 ret <32 x i8> %res 2242} 2243 2244define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 2245; X86-LABEL: test_mask_subs_epi8_rrk_256: 2246; X86: # %bb.0: 2247; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2248; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 2249; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2250; X86-NEXT: retl # encoding: [0xc3] 2251; 2252; X64-LABEL: test_mask_subs_epi8_rrk_256: 2253; X64: # %bb.0: 2254; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2255; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 2256; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2257; X64-NEXT: retq # encoding: [0xc3] 2258 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 2259 ret <32 x i8> %res 2260} 2261 2262define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 2263; X86-LABEL: test_mask_subs_epi8_rrkz_256: 2264; X86: # %bb.0: 2265; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2266; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 2267; X86-NEXT: retl # encoding: [0xc3] 2268; 2269; X64-LABEL: test_mask_subs_epi8_rrkz_256: 2270; X64: # %bb.0: 2271; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2272; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 2273; X64-NEXT: retq # encoding: [0xc3] 2274 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 2275 ret <32 x i8> %res 2276} 2277 2278define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 2279; X86-LABEL: test_mask_subs_epi8_rm_256: 2280; X86: # %bb.0: 2281; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2282; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x00] 2283; X86-NEXT: retl # encoding: [0xc3] 2284; 2285; X64-LABEL: test_mask_subs_epi8_rm_256: 2286; X64: # %bb.0: 2287; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0x07] 2288; X64-NEXT: retq # encoding: [0xc3] 2289 %b = load <32 x i8>, <32 x i8>* %ptr_b 2290 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 2291 ret <32 x i8> %res 2292} 2293 2294define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 2295; X86-LABEL: test_mask_subs_epi8_rmk_256: 2296; X86: # %bb.0: 2297; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2298; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2299; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08] 2300; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2301; X86-NEXT: retl # encoding: [0xc3] 2302; 2303; X64-LABEL: test_mask_subs_epi8_rmk_256: 2304; X64: # %bb.0: 2305; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2306; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] 2307; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2308; X64-NEXT: retq # encoding: [0xc3] 2309 %b = load <32 x i8>, <32 x i8>* %ptr_b 2310 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 2311 ret <32 x i8> %res 2312} 2313 2314define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 2315; X86-LABEL: test_mask_subs_epi8_rmkz_256: 2316; X86: # %bb.0: 2317; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2318; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2319; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00] 2320; X86-NEXT: retl # encoding: [0xc3] 2321; 2322; X64-LABEL: test_mask_subs_epi8_rmkz_256: 2323; X64: # %bb.0: 2324; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2325; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] 2326; X64-NEXT: retq # encoding: [0xc3] 2327 %b = load <32 x i8>, <32 x i8>* %ptr_b 2328 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 2329 ret <32 x i8> %res 2330} 2331 2332declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2333 2334define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 2335; CHECK-LABEL: test_mask_adds_epu8_rr_128: 2336; CHECK: # %bb.0: 2337; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] 2338; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2339 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 2340 ret <16 x i8> %res 2341} 2342 2343define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 2344; X86-LABEL: test_mask_adds_epu8_rrk_128: 2345; X86: # %bb.0: 2346; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2347; X86-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] 2348; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2349; X86-NEXT: retl # encoding: [0xc3] 2350; 2351; X64-LABEL: test_mask_adds_epu8_rrk_128: 2352; X64: # %bb.0: 2353; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2354; X64-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] 2355; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2356; X64-NEXT: retq # encoding: [0xc3] 2357 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 2358 ret <16 x i8> %res 2359} 2360 2361define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 2362; X86-LABEL: test_mask_adds_epu8_rrkz_128: 2363; X86: # %bb.0: 2364; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2365; X86-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] 2366; X86-NEXT: retl # encoding: [0xc3] 2367; 2368; X64-LABEL: test_mask_adds_epu8_rrkz_128: 2369; X64: # %bb.0: 2370; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2371; X64-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] 2372; X64-NEXT: retq # encoding: [0xc3] 2373 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 2374 ret <16 x i8> %res 2375} 2376 2377define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 2378; X86-LABEL: test_mask_adds_epu8_rm_128: 2379; X86: # %bb.0: 2380; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2381; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x00] 2382; X86-NEXT: retl # encoding: [0xc3] 2383; 2384; X64-LABEL: test_mask_adds_epu8_rm_128: 2385; X64: # %bb.0: 2386; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0x07] 2387; X64-NEXT: retq # encoding: [0xc3] 2388 %b = load <16 x i8>, <16 x i8>* %ptr_b 2389 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 2390 ret <16 x i8> %res 2391} 2392 2393define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 2394; X86-LABEL: test_mask_adds_epu8_rmk_128: 2395; X86: # %bb.0: 2396; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2397; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2398; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x08] 2399; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2400; X86-NEXT: retl # encoding: [0xc3] 2401; 2402; X64-LABEL: test_mask_adds_epu8_rmk_128: 2403; X64: # %bb.0: 2404; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2405; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f] 2406; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2407; X64-NEXT: retq # encoding: [0xc3] 2408 %b = load <16 x i8>, <16 x i8>* %ptr_b 2409 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 2410 ret <16 x i8> %res 2411} 2412 2413define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 2414; X86-LABEL: test_mask_adds_epu8_rmkz_128: 2415; X86: # %bb.0: 2416; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2417; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2418; X86-NEXT: vpaddusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x00] 2419; X86-NEXT: retl # encoding: [0xc3] 2420; 2421; X64-LABEL: test_mask_adds_epu8_rmkz_128: 2422; X64: # %bb.0: 2423; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2424; X64-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07] 2425; X64-NEXT: retq # encoding: [0xc3] 2426 %b = load <16 x i8>, <16 x i8>* %ptr_b 2427 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 2428 ret <16 x i8> %res 2429} 2430 2431declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2432 2433define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 2434; CHECK-LABEL: test_mask_adds_epu8_rr_256: 2435; CHECK: # %bb.0: 2436; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1] 2437; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2438 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 2439 ret <32 x i8> %res 2440} 2441 2442define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 2443; X86-LABEL: test_mask_adds_epu8_rrk_256: 2444; X86: # %bb.0: 2445; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2446; X86-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] 2447; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2448; X86-NEXT: retl # encoding: [0xc3] 2449; 2450; X64-LABEL: test_mask_adds_epu8_rrk_256: 2451; X64: # %bb.0: 2452; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2453; X64-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] 2454; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2455; X64-NEXT: retq # encoding: [0xc3] 2456 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 2457 ret <32 x i8> %res 2458} 2459 2460define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 2461; X86-LABEL: test_mask_adds_epu8_rrkz_256: 2462; X86: # %bb.0: 2463; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2464; X86-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1] 2465; X86-NEXT: retl # encoding: [0xc3] 2466; 2467; X64-LABEL: test_mask_adds_epu8_rrkz_256: 2468; X64: # %bb.0: 2469; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2470; X64-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1] 2471; X64-NEXT: retq # encoding: [0xc3] 2472 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 2473 ret <32 x i8> %res 2474} 2475 2476define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 2477; X86-LABEL: test_mask_adds_epu8_rm_256: 2478; X86: # %bb.0: 2479; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2480; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x00] 2481; X86-NEXT: retl # encoding: [0xc3] 2482; 2483; X64-LABEL: test_mask_adds_epu8_rm_256: 2484; X64: # %bb.0: 2485; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0x07] 2486; X64-NEXT: retq # encoding: [0xc3] 2487 %b = load <32 x i8>, <32 x i8>* %ptr_b 2488 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 2489 ret <32 x i8> %res 2490} 2491 2492define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 2493; X86-LABEL: test_mask_adds_epu8_rmk_256: 2494; X86: # %bb.0: 2495; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2496; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2497; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x08] 2498; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2499; X86-NEXT: retl # encoding: [0xc3] 2500; 2501; X64-LABEL: test_mask_adds_epu8_rmk_256: 2502; X64: # %bb.0: 2503; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2504; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f] 2505; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2506; X64-NEXT: retq # encoding: [0xc3] 2507 %b = load <32 x i8>, <32 x i8>* %ptr_b 2508 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 2509 ret <32 x i8> %res 2510} 2511 2512define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 2513; X86-LABEL: test_mask_adds_epu8_rmkz_256: 2514; X86: # %bb.0: 2515; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2516; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2517; X86-NEXT: vpaddusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x00] 2518; X86-NEXT: retl # encoding: [0xc3] 2519; 2520; X64-LABEL: test_mask_adds_epu8_rmkz_256: 2521; X64: # %bb.0: 2522; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2523; X64-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07] 2524; X64-NEXT: retq # encoding: [0xc3] 2525 %b = load <32 x i8>, <32 x i8>* %ptr_b 2526 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 2527 ret <32 x i8> %res 2528} 2529 2530declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2531 2532define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 2533; CHECK-LABEL: test_mask_subs_epu8_rr_128: 2534; CHECK: # %bb.0: 2535; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] 2536; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2537 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 2538 ret <16 x i8> %res 2539} 2540 2541define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 2542; X86-LABEL: test_mask_subs_epu8_rrk_128: 2543; X86: # %bb.0: 2544; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2545; X86-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] 2546; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2547; X86-NEXT: retl # encoding: [0xc3] 2548; 2549; X64-LABEL: test_mask_subs_epu8_rrk_128: 2550; X64: # %bb.0: 2551; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2552; X64-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] 2553; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2554; X64-NEXT: retq # encoding: [0xc3] 2555 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 2556 ret <16 x i8> %res 2557} 2558 2559define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 2560; X86-LABEL: test_mask_subs_epu8_rrkz_128: 2561; X86: # %bb.0: 2562; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2563; X86-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] 2564; X86-NEXT: retl # encoding: [0xc3] 2565; 2566; X64-LABEL: test_mask_subs_epu8_rrkz_128: 2567; X64: # %bb.0: 2568; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2569; X64-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] 2570; X64-NEXT: retq # encoding: [0xc3] 2571 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 2572 ret <16 x i8> %res 2573} 2574 2575define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 2576; X86-LABEL: test_mask_subs_epu8_rm_128: 2577; X86: # %bb.0: 2578; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2579; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x00] 2580; X86-NEXT: retl # encoding: [0xc3] 2581; 2582; X64-LABEL: test_mask_subs_epu8_rm_128: 2583; X64: # %bb.0: 2584; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0x07] 2585; X64-NEXT: retq # encoding: [0xc3] 2586 %b = load <16 x i8>, <16 x i8>* %ptr_b 2587 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 2588 ret <16 x i8> %res 2589} 2590 2591define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 2592; X86-LABEL: test_mask_subs_epu8_rmk_128: 2593; X86: # %bb.0: 2594; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2595; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2596; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x08] 2597; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2598; X86-NEXT: retl # encoding: [0xc3] 2599; 2600; X64-LABEL: test_mask_subs_epu8_rmk_128: 2601; X64: # %bb.0: 2602; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2603; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f] 2604; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2605; X64-NEXT: retq # encoding: [0xc3] 2606 %b = load <16 x i8>, <16 x i8>* %ptr_b 2607 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 2608 ret <16 x i8> %res 2609} 2610 2611define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 2612; X86-LABEL: test_mask_subs_epu8_rmkz_128: 2613; X86: # %bb.0: 2614; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2615; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 2616; X86-NEXT: vpsubusb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x00] 2617; X86-NEXT: retl # encoding: [0xc3] 2618; 2619; X64-LABEL: test_mask_subs_epu8_rmkz_128: 2620; X64: # %bb.0: 2621; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2622; X64-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07] 2623; X64-NEXT: retq # encoding: [0xc3] 2624 %b = load <16 x i8>, <16 x i8>* %ptr_b 2625 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 2626 ret <16 x i8> %res 2627} 2628 2629declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 2630 2631define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 2632; CHECK-LABEL: test_mask_subs_epu8_rr_256: 2633; CHECK: # %bb.0: 2634; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1] 2635; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2636 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 2637 ret <32 x i8> %res 2638} 2639 2640define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 2641; X86-LABEL: test_mask_subs_epu8_rrk_256: 2642; X86: # %bb.0: 2643; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2644; X86-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] 2645; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2646; X86-NEXT: retl # encoding: [0xc3] 2647; 2648; X64-LABEL: test_mask_subs_epu8_rrk_256: 2649; X64: # %bb.0: 2650; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2651; X64-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] 2652; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2653; X64-NEXT: retq # encoding: [0xc3] 2654 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 2655 ret <32 x i8> %res 2656} 2657 2658define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 2659; X86-LABEL: test_mask_subs_epu8_rrkz_256: 2660; X86: # %bb.0: 2661; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 2662; X86-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1] 2663; X86-NEXT: retl # encoding: [0xc3] 2664; 2665; X64-LABEL: test_mask_subs_epu8_rrkz_256: 2666; X64: # %bb.0: 2667; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2668; X64-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1] 2669; X64-NEXT: retq # encoding: [0xc3] 2670 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 2671 ret <32 x i8> %res 2672} 2673 2674define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 2675; X86-LABEL: test_mask_subs_epu8_rm_256: 2676; X86: # %bb.0: 2677; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2678; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x00] 2679; X86-NEXT: retl # encoding: [0xc3] 2680; 2681; X64-LABEL: test_mask_subs_epu8_rm_256: 2682; X64: # %bb.0: 2683; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0x07] 2684; X64-NEXT: retq # encoding: [0xc3] 2685 %b = load <32 x i8>, <32 x i8>* %ptr_b 2686 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 2687 ret <32 x i8> %res 2688} 2689 2690define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 2691; X86-LABEL: test_mask_subs_epu8_rmk_256: 2692; X86: # %bb.0: 2693; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2694; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2695; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x08] 2696; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2697; X86-NEXT: retl # encoding: [0xc3] 2698; 2699; X64-LABEL: test_mask_subs_epu8_rmk_256: 2700; X64: # %bb.0: 2701; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2702; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f] 2703; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2704; X64-NEXT: retq # encoding: [0xc3] 2705 %b = load <32 x i8>, <32 x i8>* %ptr_b 2706 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 2707 ret <32 x i8> %res 2708} 2709 2710define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 2711; X86-LABEL: test_mask_subs_epu8_rmkz_256: 2712; X86: # %bb.0: 2713; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2714; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 2715; X86-NEXT: vpsubusb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x00] 2716; X86-NEXT: retl # encoding: [0xc3] 2717; 2718; X64-LABEL: test_mask_subs_epu8_rmkz_256: 2719; X64: # %bb.0: 2720; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 2721; X64-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07] 2722; X64-NEXT: retq # encoding: [0xc3] 2723 %b = load <32 x i8>, <32 x i8>* %ptr_b 2724 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 2725 ret <32 x i8> %res 2726} 2727 2728declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 2729 2730define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2731; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 2732; X86: # %bb.0: 2733; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 2734; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] 2735; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2736; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2737; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 2738; X86-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 2739; X86-NEXT: retl # encoding: [0xc3] 2740; 2741; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 2742; X64: # %bb.0: 2743; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 2744; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] 2745; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2746; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca] 2747; X64-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 2748; X64-NEXT: retq # encoding: [0xc3] 2749 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 2750 %2 = bitcast i8 %x3 to <8 x i1> 2751 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1 2752 %4 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 2753 %res2 = add <8 x i16> %3, %4 2754 ret <8 x i16> %res2 2755} 2756 2757define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2758; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 2759; X86: # %bb.0: 2760; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 2761; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] 2762; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2763; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2764; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca] 2765; X86-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 2766; X86-NEXT: retl # encoding: [0xc3] 2767; 2768; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 2769; X64: # %bb.0: 2770; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 2771; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda] 2772; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2773; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca] 2774; X64-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 2775; X64-NEXT: retq # encoding: [0xc3] 2776 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 2777 %2 = bitcast i8 %x3 to <8 x i1> 2778 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer 2779 %4 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 2780 %res2 = add <8 x i16> %3, %4 2781 ret <8 x i16> %res2 2782} 2783 2784define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2785; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 2786; X86: # %bb.0: 2787; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 2788; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] 2789; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2790; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 2791; X86-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 2792; X86-NEXT: retl # encoding: [0xc3] 2793; 2794; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 2795; X64: # %bb.0: 2796; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 2797; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] 2798; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2799; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca] 2800; X64-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 2801; X64-NEXT: retq # encoding: [0xc3] 2802 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 2803 %2 = bitcast i16 %x3 to <16 x i1> 2804 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1 2805 %4 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 2806 %res2 = add <16 x i16> %3, %4 2807 ret <16 x i16> %res2 2808} 2809 2810define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2811; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 2812; X86: # %bb.0: 2813; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 2814; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] 2815; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2816; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca] 2817; X86-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 2818; X86-NEXT: retl # encoding: [0xc3] 2819; 2820; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 2821; X64: # %bb.0: 2822; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 2823; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda] 2824; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2825; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca] 2826; X64-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 2827; X64-NEXT: retq # encoding: [0xc3] 2828 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 2829 %2 = bitcast i16 %x3 to <16 x i1> 2830 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer 2831 %4 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 2832 %res2 = add <16 x i16> %3, %4 2833 ret <16 x i16> %res2 2834} 2835 2836declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>) 2837 2838define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2839; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 2840; X86: # %bb.0: 2841; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 2842; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda] 2843; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2844; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2845; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 2846; X86-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 2847; X86-NEXT: retl # encoding: [0xc3] 2848; 2849; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 2850; X64: # %bb.0: 2851; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 2852; X64-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda] 2853; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2854; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca] 2855; X64-NEXT: vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3] 2856; X64-NEXT: retq # encoding: [0xc3] 2857 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) 2858 %2 = bitcast i8 %x3 to <8 x i1> 2859 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1 2860 %4 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) 2861 %res2 = add <8 x i16> %3, %4 2862 ret <8 x i16> %res2 2863} 2864 2865declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>) 2866 2867define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2868; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 2869; X86: # %bb.0: 2870; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 2871; X86-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda] 2872; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2873; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 2874; X86-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 2875; X86-NEXT: retl # encoding: [0xc3] 2876; 2877; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 2878; X64: # %bb.0: 2879; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 2880; X64-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda] 2881; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2882; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca] 2883; X64-NEXT: vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3] 2884; X64-NEXT: retq # encoding: [0xc3] 2885 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) 2886 %2 = bitcast i16 %x3 to <16 x i1> 2887 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1 2888 %4 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) 2889 %res2 = add <16 x i16> %3, %4 2890 ret <16 x i16> %res2 2891} 2892 2893declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) 2894 2895define <8 x i16> @test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2896; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 2897; X86: # %bb.0: 2898; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xd9] 2899; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2900; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2901; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 2902; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2903; X86-NEXT: retl # encoding: [0xc3] 2904; 2905; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 2906; X64: # %bb.0: 2907; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xd9] 2908; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2909; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 2910; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2911; X64-NEXT: retq # encoding: [0xc3] 2912 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1) 2913 %2 = bitcast i8 %x3 to <8 x i1> 2914 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 2915 %4 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1) 2916 %res2 = add <8 x i16> %3, %4 2917 ret <8 x i16> %res2 2918} 2919 2920declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) 2921 2922define <16 x i16> @test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2923; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 2924; X86: # %bb.0: 2925; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xd9] 2926; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2927; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 2928; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 2929; X86-NEXT: retl # encoding: [0xc3] 2930; 2931; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 2932; X64: # %bb.0: 2933; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xd9] 2934; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2935; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 2936; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 2937; X64-NEXT: retq # encoding: [0xc3] 2938 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1) 2939 %2 = bitcast i16 %x3 to <16 x i1> 2940 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 2941 %4 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1) 2942 %res2 = add <16 x i16> %3, %4 2943 ret <16 x i16> %res2 2944} 2945 2946declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) 2947 2948define <8 x i16> @test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 2949; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 2950; X86: # %bb.0: 2951; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xd9] 2952; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2953; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 2954; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 2955; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2956; X86-NEXT: retl # encoding: [0xc3] 2957; 2958; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 2959; X64: # %bb.0: 2960; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xd9] 2961; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2962; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 2963; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 2964; X64-NEXT: retq # encoding: [0xc3] 2965 %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %x0, <8 x i16> %x1) 2966 %2 = bitcast i8 %x3 to <8 x i1> 2967 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 2968 %4 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %x0, <8 x i16> %x1) 2969 %res2 = add <8 x i16> %3, %4 2970 ret <8 x i16> %res2 2971} 2972 2973declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) 2974 2975define <16 x i16> @test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 2976; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 2977; X86: # %bb.0: 2978; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xd9] 2979; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 2980; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 2981; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 2982; X86-NEXT: retl # encoding: [0xc3] 2983; 2984; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 2985; X64: # %bb.0: 2986; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xd9] 2987; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 2988; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 2989; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 2990; X64-NEXT: retq # encoding: [0xc3] 2991 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %x0, <16 x i16> %x1) 2992 %2 = bitcast i16 %x3 to <16 x i1> 2993 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 2994 %4 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %x0, <16 x i16> %x1) 2995 %res2 = add <16 x i16> %3, %4 2996 ret <16 x i16> %res2 2997} 2998 2999declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) 3000 3001define <8 x i16> @test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3002; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 3003; X86: # %bb.0: 3004; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xd9] 3005; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3006; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3007; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 3008; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 3009; X86-NEXT: retl # encoding: [0xc3] 3010; 3011; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 3012; X64: # %bb.0: 3013; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xd9] 3014; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3015; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 3016; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 3017; X64-NEXT: retq # encoding: [0xc3] 3018 %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1) 3019 %2 = bitcast i8 %x3 to <8 x i1> 3020 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 3021 %4 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1) 3022 %res2 = add <8 x i16> %3, %4 3023 ret <8 x i16> %res2 3024} 3025 3026declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) 3027 3028define <16 x i16> @test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3029; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 3030; X86: # %bb.0: 3031; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xd9] 3032; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3033; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 3034; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 3035; X86-NEXT: retl # encoding: [0xc3] 3036; 3037; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 3038; X64: # %bb.0: 3039; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xd9] 3040; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3041; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 3042; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 3043; X64-NEXT: retq # encoding: [0xc3] 3044 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %x0, <16 x i16> %x1) 3045 %2 = bitcast i16 %x3 to <16 x i1> 3046 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 3047 %4 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %x0, <16 x i16> %x1) 3048 %res2 = add <16 x i16> %3, %4 3049 ret <16 x i16> %res2 3050} 3051 3052declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) 3053 3054define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 3055; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128: 3056; X86: # %bb.0: 3057; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3058; X86-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] 3059; X86-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] 3060; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3061; X86-NEXT: vpmovwb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0] 3062; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3063; X86-NEXT: retl # encoding: [0xc3] 3064; 3065; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128: 3066; X64: # %bb.0: 3067; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3068; X64-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] 3069; X64-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] 3070; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3071; X64-NEXT: vpmovwb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0] 3072; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3073; X64-NEXT: retq # encoding: [0xc3] 3074 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 3075 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 3076 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 3077 %res3 = add <16 x i8> %res0, %res1 3078 %res4 = add <16 x i8> %res3, %res2 3079 ret <16 x i8> %res4 3080} 3081 3082declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8) 3083 3084define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 3085; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: 3086; X86: # %bb.0: 3087; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3088; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3089; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3090; X86-NEXT: vpmovwb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x00] 3091; X86-NEXT: vpmovwb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x00] 3092; X86-NEXT: retl # encoding: [0xc3] 3093; 3094; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: 3095; X64: # %bb.0: 3096; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3097; X64-NEXT: vpmovwb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07] 3098; X64-NEXT: vpmovwb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07] 3099; X64-NEXT: retq # encoding: [0xc3] 3100 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 3101 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 3102 ret void 3103} 3104 3105declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) 3106 3107define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 3108; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: 3109; X86: # %bb.0: 3110; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3111; X86-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] 3112; X86-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] 3113; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3114; X86-NEXT: vpmovswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0] 3115; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3116; X86-NEXT: retl # encoding: [0xc3] 3117; 3118; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: 3119; X64: # %bb.0: 3120; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3121; X64-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] 3122; X64-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] 3123; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3124; X64-NEXT: vpmovswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0] 3125; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3126; X64-NEXT: retq # encoding: [0xc3] 3127 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 3128 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 3129 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 3130 %res3 = add <16 x i8> %res0, %res1 3131 %res4 = add <16 x i8> %res3, %res2 3132 ret <16 x i8> %res4 3133} 3134 3135declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) 3136 3137define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 3138; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: 3139; X86: # %bb.0: 3140; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3141; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3142; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3143; X86-NEXT: vpmovswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x00] 3144; X86-NEXT: vpmovswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x00] 3145; X86-NEXT: retl # encoding: [0xc3] 3146; 3147; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: 3148; X64: # %bb.0: 3149; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3150; X64-NEXT: vpmovswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] 3151; X64-NEXT: vpmovswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07] 3152; X64-NEXT: retq # encoding: [0xc3] 3153 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 3154 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 3155 ret void 3156} 3157 3158declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) 3159 3160define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 3161; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: 3162; X86: # %bb.0: 3163; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3164; X86-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] 3165; X86-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] 3166; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3167; X86-NEXT: vpmovuswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0] 3168; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3169; X86-NEXT: retl # encoding: [0xc3] 3170; 3171; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: 3172; X64: # %bb.0: 3173; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3174; X64-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] 3175; X64-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] 3176; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3177; X64-NEXT: vpmovuswb %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0] 3178; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3179; X64-NEXT: retq # encoding: [0xc3] 3180 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 3181 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 3182 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 3183 %res3 = add <16 x i8> %res0, %res1 3184 %res4 = add <16 x i8> %res3, %res2 3185 ret <16 x i8> %res4 3186} 3187 3188declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) 3189 3190define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 3191; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: 3192; X86: # %bb.0: 3193; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3194; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3195; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3196; X86-NEXT: vpmovuswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x00] 3197; X86-NEXT: vpmovuswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x00] 3198; X86-NEXT: retl # encoding: [0xc3] 3199; 3200; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: 3201; X64: # %bb.0: 3202; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3203; X64-NEXT: vpmovuswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] 3204; X64-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07] 3205; X64-NEXT: retq # encoding: [0xc3] 3206 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 3207 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 3208 ret void 3209} 3210 3211declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16) 3212 3213define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 3214; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 3215; X86: # %bb.0: 3216; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3217; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 3218; X86-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc2] 3219; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3220; X86-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 3221; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3222; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3223; X86-NEXT: retl # encoding: [0xc3] 3224; 3225; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 3226; X64: # %bb.0: 3227; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3228; X64-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc2] 3229; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 3230; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3231; X64-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 3232; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3233; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3234; X64-NEXT: retq # encoding: [0xc3] 3235 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 3236 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 3237 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 3238 %res3 = add <16 x i8> %res0, %res1 3239 %res4 = add <16 x i8> %res3, %res2 3240 ret <16 x i8> %res4 3241} 3242 3243declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16) 3244 3245define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 3246; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: 3247; X86: # %bb.0: 3248; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3249; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3250; X86-NEXT: vpmovwb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x00] 3251; X86-NEXT: vpmovwb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x00] 3252; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3253; X86-NEXT: retl # encoding: [0xc3] 3254; 3255; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: 3256; X64: # %bb.0: 3257; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3258; X64-NEXT: vpmovwb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07] 3259; X64-NEXT: vpmovwb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07] 3260; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3261; X64-NEXT: retq # encoding: [0xc3] 3262 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 3263 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 3264 ret void 3265} 3266 3267declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16) 3268 3269define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 3270; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: 3271; X86: # %bb.0: 3272; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3273; X86-NEXT: vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] 3274; X86-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc2] 3275; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3276; X86-NEXT: vpmovswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0] 3277; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3278; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3279; X86-NEXT: retl # encoding: [0xc3] 3280; 3281; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: 3282; X64: # %bb.0: 3283; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3284; X64-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc2] 3285; X64-NEXT: vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] 3286; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3287; X64-NEXT: vpmovswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0] 3288; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3289; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3290; X64-NEXT: retq # encoding: [0xc3] 3291 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 3292 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 3293 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 3294 %res3 = add <16 x i8> %res0, %res1 3295 %res4 = add <16 x i8> %res3, %res2 3296 ret <16 x i8> %res4 3297} 3298 3299declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16) 3300 3301define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 3302; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: 3303; X86: # %bb.0: 3304; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3305; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3306; X86-NEXT: vpmovswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x00] 3307; X86-NEXT: vpmovswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x00] 3308; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3309; X86-NEXT: retl # encoding: [0xc3] 3310; 3311; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: 3312; X64: # %bb.0: 3313; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3314; X64-NEXT: vpmovswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] 3315; X64-NEXT: vpmovswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07] 3316; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3317; X64-NEXT: retq # encoding: [0xc3] 3318 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 3319 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 3320 ret void 3321} 3322 3323declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16) 3324 3325define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 3326; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: 3327; X86: # %bb.0: 3328; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3329; X86-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] 3330; X86-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc2] 3331; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3332; X86-NEXT: vpmovuswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0] 3333; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3334; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3335; X86-NEXT: retl # encoding: [0xc3] 3336; 3337; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: 3338; X64: # %bb.0: 3339; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3340; X64-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc2] 3341; X64-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] 3342; X64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xca] 3343; X64-NEXT: vpmovuswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0] 3344; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] 3345; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3346; X64-NEXT: retq # encoding: [0xc3] 3347 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 3348 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 3349 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 3350 %res3 = add <16 x i8> %res0, %res1 3351 %res4 = add <16 x i8> %res3, %res2 3352 ret <16 x i8> %res4 3353} 3354 3355declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16) 3356 3357define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 3358; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: 3359; X86: # %bb.0: 3360; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 3361; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3362; X86-NEXT: vpmovuswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x00] 3363; X86-NEXT: vpmovuswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x00] 3364; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3365; X86-NEXT: retl # encoding: [0xc3] 3366; 3367; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: 3368; X64: # %bb.0: 3369; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 3370; X64-NEXT: vpmovuswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] 3371; X64-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07] 3372; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 3373; X64-NEXT: retq # encoding: [0xc3] 3374 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 3375 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 3376 ret void 3377} 3378 3379declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) 3380 3381define <4 x i32> @test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { 3382; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 3383; X86: # %bb.0: 3384; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xd9] 3385; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3386; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3387; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 3388; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] 3389; X86-NEXT: retl # encoding: [0xc3] 3390; 3391; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 3392; X64: # %bb.0: 3393; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xd9] 3394; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3395; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 3396; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] 3397; X64-NEXT: retq # encoding: [0xc3] 3398 %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1) 3399 %2 = bitcast i8 %x3 to <8 x i1> 3400 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3401 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x2 3402 %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1) 3403 %res2 = add <4 x i32> %3, %4 3404 ret <4 x i32> %res2 3405} 3406 3407declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) 3408 3409define <8 x i32> @test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { 3410; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 3411; X86: # %bb.0: 3412; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xd9] 3413; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3414; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3415; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 3416; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 3417; X86-NEXT: retl # encoding: [0xc3] 3418; 3419; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 3420; X64: # %bb.0: 3421; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xd9] 3422; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3423; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 3424; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 3425; X64-NEXT: retq # encoding: [0xc3] 3426 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1) 3427 %2 = bitcast i8 %x3 to <8 x i1> 3428 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2 3429 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1) 3430 %res2 = add <8 x i32> %3, %4 3431 ret <8 x i32> %res2 3432} 3433 3434declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) 3435 3436define <8 x i16> @test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { 3437; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 3438; X86: # %bb.0: 3439; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xd9] 3440; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3441; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3442; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 3443; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 3444; X86-NEXT: retl # encoding: [0xc3] 3445; 3446; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 3447; X64: # %bb.0: 3448; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xd9] 3449; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3450; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 3451; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 3452; X64-NEXT: retq # encoding: [0xc3] 3453 %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1) 3454 %2 = bitcast i8 %x3 to <8 x i1> 3455 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 3456 %4 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1) 3457 %res2 = add <8 x i16> %3, %4 3458 ret <8 x i16> %res2 3459} 3460 3461declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) 3462 3463define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { 3464; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 3465; X86: # %bb.0: 3466; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xd9] 3467; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3468; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 3469; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 3470; X86-NEXT: retl # encoding: [0xc3] 3471; 3472; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 3473; X64: # %bb.0: 3474; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xd9] 3475; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3476; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 3477; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 3478; X64-NEXT: retq # encoding: [0xc3] 3479 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1) 3480 %2 = bitcast i16 %x3 to <16 x i1> 3481 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 3482 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1) 3483 %res2 = add <16 x i16> %3, %4 3484 ret <16 x i16> %res2 3485} 3486 3487declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32) 3488 3489define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { 3490; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 3491; X86: # %bb.0: 3492; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02] 3493; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3494; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3495; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 3496; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02] 3497; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3498; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3499; X86-NEXT: retl # encoding: [0xc3] 3500; 3501; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 3502; X64: # %bb.0: 3503; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02] 3504; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3505; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 3506; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02] 3507; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3508; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3509; X64-NEXT: retq # encoding: [0xc3] 3510 %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) 3511 %2 = bitcast i8 %x4 to <8 x i1> 3512 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 3513 %4 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) 3514 %5 = bitcast i8 %x4 to <8 x i1> 3515 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer 3516 %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) 3517 %res3 = add <8 x i16> %3, %6 3518 %res4 = add <8 x i16> %7, %res3 3519 ret <8 x i16> %res4 3520} 3521 3522declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32) 3523 3524define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { 3525; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 3526; X86: # %bb.0: 3527; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02] 3528; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3529; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 3530; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02] 3531; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3532; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3533; X86-NEXT: retl # encoding: [0xc3] 3534; 3535; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 3536; X64: # %bb.0: 3537; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02] 3538; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3539; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 3540; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02] 3541; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3542; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3543; X64-NEXT: retq # encoding: [0xc3] 3544 %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) 3545 %2 = bitcast i16 %x4 to <16 x i1> 3546 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 3547 %4 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) 3548 %5 = bitcast i16 %x4 to <16 x i1> 3549 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer 3550 %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) 3551 %res3 = add <16 x i16> %3, %6 3552 %res4 = add <16 x i16> %res3, %7 3553 ret <16 x i16> %res4 3554} 3555 3556declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3557 3558define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3559; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 3560; X86: # %bb.0: 3561; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xd9] 3562; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3563; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 3564; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 3565; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3566; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3567; X86-NEXT: retl # encoding: [0xc3] 3568; 3569; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 3570; X64: # %bb.0: 3571; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xd9] 3572; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3573; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 3574; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1] 3575; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3576; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3577; X64-NEXT: retq # encoding: [0xc3] 3578 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3579 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 3580 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3581 %res3 = add <16 x i16> %res, %res1 3582 %res4 = add <16 x i16> %res3, %res2 3583 ret <16 x i16> %res4 3584} 3585 3586declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3587 3588define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3589; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 3590; X86: # %bb.0: 3591; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xd9] 3592; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3593; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3594; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 3595; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 3596; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3597; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3598; X86-NEXT: retl # encoding: [0xc3] 3599; 3600; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 3601; X64: # %bb.0: 3602; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xd9] 3603; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3604; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 3605; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1] 3606; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3607; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3608; X64-NEXT: retq # encoding: [0xc3] 3609 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3610 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 3611 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3612 %res3 = add <8 x i16> %res, %res1 3613 %res4 = add <8 x i16> %res3, %res2 3614 ret <8 x i16> %res4 3615} 3616 3617declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3618 3619define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3620; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi: 3621; X86: # %bb.0: 3622; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xd9] 3623; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3624; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 3625; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 3626; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3627; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3628; X86-NEXT: retl # encoding: [0xc3] 3629; 3630; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi: 3631; X64: # %bb.0: 3632; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xd9] 3633; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3634; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 3635; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1] 3636; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3637; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3638; X64-NEXT: retq # encoding: [0xc3] 3639 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3640 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 3641 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3642 %res3 = add <16 x i16> %res, %res1 3643 %res4 = add <16 x i16> %res3, %res2 3644 ret <16 x i16> %res4 3645} 3646 3647declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3648 3649define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3650; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi: 3651; X86: # %bb.0: 3652; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xd9] 3653; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3654; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3655; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 3656; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 3657; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3658; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3659; X86-NEXT: retl # encoding: [0xc3] 3660; 3661; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi: 3662; X64: # %bb.0: 3663; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xd9] 3664; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3665; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 3666; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1] 3667; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3668; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3669; X64-NEXT: retq # encoding: [0xc3] 3670 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3671 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 3672 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3673 %res3 = add <8 x i16> %res, %res1 3674 %res4 = add <8 x i16> %res3, %res2 3675 ret <8 x i16> %res4 3676} 3677 3678declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3679 3680define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3681; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi: 3682; X86: # %bb.0: 3683; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xd9] 3684; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3685; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 3686; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 3687; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3688; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3689; X86-NEXT: retl # encoding: [0xc3] 3690; 3691; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi: 3692; X64: # %bb.0: 3693; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xd9] 3694; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3695; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 3696; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1] 3697; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3698; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3699; X64-NEXT: retq # encoding: [0xc3] 3700 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 3701 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 3702 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 3703 %res3 = add <16 x i16> %res, %res1 3704 %res4 = add <16 x i16> %res3, %res2 3705 ret <16 x i16> %res4 3706} 3707 3708declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3709 3710define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3711; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi: 3712; X86: # %bb.0: 3713; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xd9] 3714; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3715; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3716; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 3717; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 3718; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3719; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3720; X86-NEXT: retl # encoding: [0xc3] 3721; 3722; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi: 3723; X64: # %bb.0: 3724; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xd9] 3725; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3726; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 3727; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1] 3728; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3729; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3730; X64-NEXT: retq # encoding: [0xc3] 3731 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 3732 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 3733 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 3734 %res3 = add <8 x i16> %res, %res1 3735 %res4 = add <8 x i16> %res3, %res2 3736 ret <8 x i16> %res4 3737} 3738 3739declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>) 3740 3741define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 3742; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 3743; X86: # %bb.0: 3744; X86-NEXT: vpermw %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8] 3745; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3746; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 3747; X86-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 3748; X86-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 3749; X86-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3750; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3751; X86-NEXT: retl # encoding: [0xc3] 3752; 3753; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 3754; X64: # %bb.0: 3755; X64-NEXT: vpermw %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8] 3756; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3757; X64-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 3758; X64-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0] 3759; X64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] 3760; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 3761; X64-NEXT: retq # encoding: [0xc3] 3762 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 3763 %2 = bitcast i8 %x3 to <8 x i1> 3764 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2 3765 %4 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 3766 %5 = bitcast i8 %x3 to <8 x i1> 3767 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer 3768 %7 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1) 3769 %res3 = add <8 x i16> %3, %6 3770 %res4 = add <8 x i16> %res3, %7 3771 ret <8 x i16> %res4 3772} 3773 3774declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>) 3775 3776define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 3777; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 3778; X86: # %bb.0: 3779; X86-NEXT: vpermw %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8] 3780; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 3781; X86-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 3782; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 3783; X86-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3784; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3785; X86-NEXT: retl # encoding: [0xc3] 3786; 3787; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 3788; X64: # %bb.0: 3789; X64-NEXT: vpermw %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8] 3790; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 3791; X64-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 3792; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0] 3793; X64-NEXT: vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] 3794; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 3795; X64-NEXT: retq # encoding: [0xc3] 3796 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 3797 %2 = bitcast i16 %x3 to <16 x i1> 3798 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2 3799 %4 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 3800 %5 = bitcast i16 %x3 to <16 x i1> 3801 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer 3802 %7 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1) 3803 %res3 = add <16 x i16> %3, %6 3804 %res4 = add <16 x i16> %res3, %7 3805 ret <16 x i16> %res4 3806} 3807 3808