1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) { 6; X86-LABEL: test_mask_expand_load_w_128: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 11; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 12; X86-NEXT: retl # encoding: [0xc3] 13; 14; X64-LABEL: test_mask_expand_load_w_128: 15; X64: # %bb.0: 16; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 17; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 18; X64-NEXT: retq # encoding: [0xc3] 19 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask) 20 ret <8 x i16> %res 21} 22 23define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) { 24; X86-LABEL: test_maskz_expand_load_w_128: 25; X86: # %bb.0: 26; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 27; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 28; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 29; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00] 30; X86-NEXT: retl # encoding: [0xc3] 31; 32; X64-LABEL: test_maskz_expand_load_w_128: 33; X64: # %bb.0: 34; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 35; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07] 36; X64-NEXT: retq # encoding: [0xc3] 37 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> zeroinitializer, i8 %mask) 38 ret <8 x i16> %res 39} 40 41declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask) 42 43define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) { 44; X86-LABEL: test_expand_load_w_128: 45; X86: # %bb.0: 46; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 47; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 48; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 49; X86-NEXT: retl # encoding: [0xc3] 50; 51; X64-LABEL: test_expand_load_w_128: 52; X64: # %bb.0: 53; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 54; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 55; X64-NEXT: retq # encoding: [0xc3] 56 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 -1) 57 ret <8 x i16> %res 58} 59 60define <8 x i16> @test_expand_w_128(<8 x i16> %data) { 61; CHECK-LABEL: test_expand_w_128: 62; CHECK: # %bb.0: 63; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 64 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1) 65 ret <8 x i16> %res 66} 67 68define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 69; X86-LABEL: test_mask_expand_w_128: 70; X86: # %bb.0: 71; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 72; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 73; X86-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 74; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 75; X86-NEXT: retl # encoding: [0xc3] 76; 77; X64-LABEL: test_mask_expand_w_128: 78; X64: # %bb.0: 79; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 80; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 81; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 82; X64-NEXT: retq # encoding: [0xc3] 83 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) 84 ret <8 x i16> %res 85} 86 87define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) { 88; X86-LABEL: test_maskz_expand_w_128: 89; X86: # %bb.0: 90; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 91; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 92; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 93; X86-NEXT: retl # encoding: [0xc3] 94; 95; X64-LABEL: test_maskz_expand_w_128: 96; X64: # %bb.0: 97; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 98; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 99; X64-NEXT: retq # encoding: [0xc3] 100 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask) 101 ret <8 x i16> %res 102} 103 104declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask) 105 106define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) { 107; X86-LABEL: test_mask_expand_load_b_128: 108; X86: # %bb.0: 109; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 110; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 111; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 112; X86-NEXT: retl # encoding: [0xc3] 113; 114; X64-LABEL: test_mask_expand_load_b_128: 115; X64: # %bb.0: 116; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 117; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 118; X64-NEXT: retq # encoding: [0xc3] 119 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask) 120 ret <16 x i8> %res 121} 122 123define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) { 124; X86-LABEL: test_maskz_expand_load_b_128: 125; X86: # %bb.0: 126; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 127; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 128; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00] 129; X86-NEXT: retl # encoding: [0xc3] 130; 131; X64-LABEL: test_maskz_expand_load_b_128: 132; X64: # %bb.0: 133; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 134; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07] 135; X64-NEXT: retq # encoding: [0xc3] 136 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> zeroinitializer, i16 %mask) 137 ret <16 x i8> %res 138} 139 140declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask) 141 142define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) { 143; X86-LABEL: test_expand_load_b_128: 144; X86: # %bb.0: 145; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 146; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 147; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 148; X86-NEXT: retl # encoding: [0xc3] 149; 150; X64-LABEL: test_expand_load_b_128: 151; X64: # %bb.0: 152; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 153; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 154; X64-NEXT: retq # encoding: [0xc3] 155 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 -1) 156 ret <16 x i8> %res 157} 158 159define <16 x i8> @test_expand_b_128(<16 x i8> %data) { 160; CHECK-LABEL: test_expand_b_128: 161; CHECK: # %bb.0: 162; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 163 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1) 164 ret <16 x i8> %res 165} 166 167define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 168; X86-LABEL: test_mask_expand_b_128: 169; X86: # %bb.0: 170; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 171; X86-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 172; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 173; X86-NEXT: retl # encoding: [0xc3] 174; 175; X64-LABEL: test_mask_expand_b_128: 176; X64: # %bb.0: 177; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 178; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 179; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 180; X64-NEXT: retq # encoding: [0xc3] 181 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) 182 ret <16 x i8> %res 183} 184 185define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) { 186; X86-LABEL: test_maskz_expand_b_128: 187; X86: # %bb.0: 188; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 189; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 190; X86-NEXT: retl # encoding: [0xc3] 191; 192; X64-LABEL: test_maskz_expand_b_128: 193; X64: # %bb.0: 194; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 195; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 196; X64-NEXT: retq # encoding: [0xc3] 197 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask) 198 ret <16 x i8> %res 199} 200 201declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask) 202 203define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) { 204; X86-LABEL: test_mask_compress_store_w_128: 205; X86: # %bb.0: 206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 207; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 208; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 209; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 210; X86-NEXT: retl # encoding: [0xc3] 211; 212; X64-LABEL: test_mask_compress_store_w_128: 213; X64: # %bb.0: 214; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 215; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 216; X64-NEXT: retq # encoding: [0xc3] 217 call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask) 218 ret void 219} 220 221declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask) 222 223define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) { 224; X86-LABEL: test_compress_store_w_128: 225; X86: # %bb.0: 226; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 227; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 228; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 229; X86-NEXT: retl # encoding: [0xc3] 230; 231; X64-LABEL: test_compress_store_w_128: 232; X64: # %bb.0: 233; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 234; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 235; X64-NEXT: retq # encoding: [0xc3] 236 call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 -1) 237 ret void 238} 239 240define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 241; X86-LABEL: test_mask_compress_w_128: 242; X86: # %bb.0: 243; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 244; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 245; X86-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 246; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 247; X86-NEXT: retl # encoding: [0xc3] 248; 249; X64-LABEL: test_mask_compress_w_128: 250; X64: # %bb.0: 251; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 252; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 253; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 254; X64-NEXT: retq # encoding: [0xc3] 255 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) 256 ret <8 x i16> %res 257} 258 259define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) { 260; X86-LABEL: test_maskz_compress_w_128: 261; X86: # %bb.0: 262; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 263; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 264; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 265; X86-NEXT: retl # encoding: [0xc3] 266; 267; X64-LABEL: test_maskz_compress_w_128: 268; X64: # %bb.0: 269; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 270; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 271; X64-NEXT: retq # encoding: [0xc3] 272 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask) 273 ret <8 x i16> %res 274} 275 276define <8 x i16> @test_compress_w_128(<8 x i16> %data) { 277; CHECK-LABEL: test_compress_w_128: 278; CHECK: # %bb.0: 279; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 280 %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1) 281 ret <8 x i16> %res 282} 283 284declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask) 285 286define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) { 287; X86-LABEL: test_mask_compress_store_b_128: 288; X86: # %bb.0: 289; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 290; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 291; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 292; X86-NEXT: retl # encoding: [0xc3] 293; 294; X64-LABEL: test_mask_compress_store_b_128: 295; X64: # %bb.0: 296; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 297; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 298; X64-NEXT: retq # encoding: [0xc3] 299 call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask) 300 ret void 301} 302 303declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask) 304 305define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) { 306; X86-LABEL: test_compress_store_b_128: 307; X86: # %bb.0: 308; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 309; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 310; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 311; X86-NEXT: retl # encoding: [0xc3] 312; 313; X64-LABEL: test_compress_store_b_128: 314; X64: # %bb.0: 315; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 316; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 317; X64-NEXT: retq # encoding: [0xc3] 318 call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 -1) 319 ret void 320} 321 322define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 323; X86-LABEL: test_mask_compress_b_128: 324; X86: # %bb.0: 325; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 326; X86-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 327; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 328; X86-NEXT: retl # encoding: [0xc3] 329; 330; X64-LABEL: test_mask_compress_b_128: 331; X64: # %bb.0: 332; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 333; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 334; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 335; X64-NEXT: retq # encoding: [0xc3] 336 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) 337 ret <16 x i8> %res 338} 339 340define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) { 341; X86-LABEL: test_maskz_compress_b_128: 342; X86: # %bb.0: 343; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 344; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 345; X86-NEXT: retl # encoding: [0xc3] 346; 347; X64-LABEL: test_maskz_compress_b_128: 348; X64: # %bb.0: 349; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 350; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 351; X64-NEXT: retq # encoding: [0xc3] 352 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask) 353 ret <16 x i8> %res 354} 355 356define <16 x i8> @test_compress_b_128(<16 x i8> %data) { 357; CHECK-LABEL: test_compress_b_128: 358; CHECK: # %bb.0: 359; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 360 %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1) 361 ret <16 x i8> %res 362} 363 364declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask) 365 366define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) { 367; X86-LABEL: test_mask_expand_load_w_256: 368; X86: # %bb.0: 369; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 370; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 371; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 372; X86-NEXT: retl # encoding: [0xc3] 373; 374; X64-LABEL: test_mask_expand_load_w_256: 375; X64: # %bb.0: 376; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 377; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 378; X64-NEXT: retq # encoding: [0xc3] 379 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask) 380 ret <16 x i16> %res 381} 382 383define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) { 384; X86-LABEL: test_maskz_expand_load_w_256: 385; X86: # %bb.0: 386; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 387; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 388; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00] 389; X86-NEXT: retl # encoding: [0xc3] 390; 391; X64-LABEL: test_maskz_expand_load_w_256: 392; X64: # %bb.0: 393; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 394; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07] 395; X64-NEXT: retq # encoding: [0xc3] 396 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> zeroinitializer, i16 %mask) 397 ret <16 x i16> %res 398} 399 400declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask) 401 402define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) { 403; X86-LABEL: test_expand_load_w_256: 404; X86: # %bb.0: 405; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 406; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 407; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 408; X86-NEXT: retl # encoding: [0xc3] 409; 410; X64-LABEL: test_expand_load_w_256: 411; X64: # %bb.0: 412; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 413; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 414; X64-NEXT: retq # encoding: [0xc3] 415 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 -1) 416 ret <16 x i16> %res 417} 418 419define <16 x i16> @test_expand_w_256(<16 x i16> %data) { 420; CHECK-LABEL: test_expand_w_256: 421; CHECK: # %bb.0: 422; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 423 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1) 424 ret <16 x i16> %res 425} 426 427define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 428; X86-LABEL: test_mask_expand_w_256: 429; X86: # %bb.0: 430; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 431; X86-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 432; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 433; X86-NEXT: retl # encoding: [0xc3] 434; 435; X64-LABEL: test_mask_expand_w_256: 436; X64: # %bb.0: 437; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 438; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 439; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 440; X64-NEXT: retq # encoding: [0xc3] 441 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) 442 ret <16 x i16> %res 443} 444 445define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) { 446; X86-LABEL: test_maskz_expand_w_256: 447; X86: # %bb.0: 448; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 449; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 450; X86-NEXT: retl # encoding: [0xc3] 451; 452; X64-LABEL: test_maskz_expand_w_256: 453; X64: # %bb.0: 454; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 455; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 456; X64-NEXT: retq # encoding: [0xc3] 457 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask) 458 ret <16 x i16> %res 459} 460 461declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask) 462 463define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) { 464; X86-LABEL: test_mask_expand_load_b_256: 465; X86: # %bb.0: 466; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 467; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 468; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 469; X86-NEXT: retl # encoding: [0xc3] 470; 471; X64-LABEL: test_mask_expand_load_b_256: 472; X64: # %bb.0: 473; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 474; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 475; X64-NEXT: retq # encoding: [0xc3] 476 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask) 477 ret <32 x i8> %res 478} 479 480define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) { 481; X86-LABEL: test_maskz_expand_load_b_256: 482; X86: # %bb.0: 483; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 484; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 485; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00] 486; X86-NEXT: retl # encoding: [0xc3] 487; 488; X64-LABEL: test_maskz_expand_load_b_256: 489; X64: # %bb.0: 490; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 491; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07] 492; X64-NEXT: retq # encoding: [0xc3] 493 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> zeroinitializer, i32 %mask) 494 ret <32 x i8> %res 495} 496 497declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask) 498 499define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) { 500; X86-LABEL: test_expand_load_b_256: 501; X86: # %bb.0: 502; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 503; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 504; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 505; X86-NEXT: retl # encoding: [0xc3] 506; 507; X64-LABEL: test_expand_load_b_256: 508; X64: # %bb.0: 509; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 510; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 511; X64-NEXT: retq # encoding: [0xc3] 512 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 -1) 513 ret <32 x i8> %res 514} 515 516define <32 x i8> @test_expand_b_256(<32 x i8> %data) { 517; CHECK-LABEL: test_expand_b_256: 518; CHECK: # %bb.0: 519; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 520 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1) 521 ret <32 x i8> %res 522} 523 524define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 525; X86-LABEL: test_mask_expand_b_256: 526; X86: # %bb.0: 527; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 528; X86-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 529; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 530; X86-NEXT: retl # encoding: [0xc3] 531; 532; X64-LABEL: test_mask_expand_b_256: 533; X64: # %bb.0: 534; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 535; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 536; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 537; X64-NEXT: retq # encoding: [0xc3] 538 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) 539 ret <32 x i8> %res 540} 541 542define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) { 543; X86-LABEL: test_maskz_expand_b_256: 544; X86: # %bb.0: 545; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 546; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 547; X86-NEXT: retl # encoding: [0xc3] 548; 549; X64-LABEL: test_maskz_expand_b_256: 550; X64: # %bb.0: 551; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 552; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 553; X64-NEXT: retq # encoding: [0xc3] 554 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask) 555 ret <32 x i8> %res 556} 557 558declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask) 559 560define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) { 561; X86-LABEL: test_mask_compress_store_w_256: 562; X86: # %bb.0: 563; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 564; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 565; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 566; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 567; X86-NEXT: retl # encoding: [0xc3] 568; 569; X64-LABEL: test_mask_compress_store_w_256: 570; X64: # %bb.0: 571; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 572; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 573; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 574; X64-NEXT: retq # encoding: [0xc3] 575 call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask) 576 ret void 577} 578 579declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask) 580 581define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) { 582; X86-LABEL: test_compress_store_w_256: 583; X86: # %bb.0: 584; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 585; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 586; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 587; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 588; X86-NEXT: retl # encoding: [0xc3] 589; 590; X64-LABEL: test_compress_store_w_256: 591; X64: # %bb.0: 592; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 593; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 594; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 595; X64-NEXT: retq # encoding: [0xc3] 596 call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 -1) 597 ret void 598} 599 600define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 601; X86-LABEL: test_mask_compress_w_256: 602; X86: # %bb.0: 603; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 604; X86-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 605; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 606; X86-NEXT: retl # encoding: [0xc3] 607; 608; X64-LABEL: test_mask_compress_w_256: 609; X64: # %bb.0: 610; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 611; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 612; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 613; X64-NEXT: retq # encoding: [0xc3] 614 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) 615 ret <16 x i16> %res 616} 617 618define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) { 619; X86-LABEL: test_maskz_compress_w_256: 620; X86: # %bb.0: 621; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 622; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 623; X86-NEXT: retl # encoding: [0xc3] 624; 625; X64-LABEL: test_maskz_compress_w_256: 626; X64: # %bb.0: 627; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 628; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 629; X64-NEXT: retq # encoding: [0xc3] 630 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask) 631 ret <16 x i16> %res 632} 633 634define <16 x i16> @test_compress_w_256(<16 x i16> %data) { 635; CHECK-LABEL: test_compress_w_256: 636; CHECK: # %bb.0: 637; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 638 %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1) 639 ret <16 x i16> %res 640} 641 642declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask) 643 644define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) { 645; X86-LABEL: test_mask_compress_store_b_256: 646; X86: # %bb.0: 647; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 648; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 649; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 650; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 651; X86-NEXT: retl # encoding: [0xc3] 652; 653; X64-LABEL: test_mask_compress_store_b_256: 654; X64: # %bb.0: 655; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 656; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 657; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 658; X64-NEXT: retq # encoding: [0xc3] 659 call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask) 660 ret void 661} 662 663declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask) 664 665define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) { 666; X86-LABEL: test_compress_store_b_256: 667; X86: # %bb.0: 668; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 669; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 670; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 671; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 672; X86-NEXT: retl # encoding: [0xc3] 673; 674; X64-LABEL: test_compress_store_b_256: 675; X64: # %bb.0: 676; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 677; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 678; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 679; X64-NEXT: retq # encoding: [0xc3] 680 call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 -1) 681 ret void 682} 683 684define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 685; X86-LABEL: test_mask_compress_b_256: 686; X86: # %bb.0: 687; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 688; X86-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 689; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 690; X86-NEXT: retl # encoding: [0xc3] 691; 692; X64-LABEL: test_mask_compress_b_256: 693; X64: # %bb.0: 694; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 695; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 696; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 697; X64-NEXT: retq # encoding: [0xc3] 698 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) 699 ret <32 x i8> %res 700} 701 702define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) { 703; X86-LABEL: test_maskz_compress_b_256: 704; X86: # %bb.0: 705; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 706; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 707; X86-NEXT: retl # encoding: [0xc3] 708; 709; X64-LABEL: test_maskz_compress_b_256: 710; X64: # %bb.0: 711; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 712; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 713; X64-NEXT: retq # encoding: [0xc3] 714 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask) 715 ret <32 x i8> %res 716} 717 718define <32 x i8> @test_compress_b_256(<32 x i8> %data) { 719; CHECK-LABEL: test_compress_b_256: 720; CHECK: # %bb.0: 721; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 722 %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1) 723 ret <32 x i8> %res 724} 725 726declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask) 727 728define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 729; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 730; X86: # %bb.0: 731; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 732; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 733; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 734; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 735; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] 736; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 737; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 738; X86-NEXT: retl # encoding: [0xc3] 739; 740; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 741; X64: # %bb.0: 742; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 743; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 744; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 745; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] 746; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 747; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 748; X64-NEXT: retq # encoding: [0xc3] 749 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) 750 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1) 751 %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24, <4 x i32> zeroinitializer,i8 %x4) 752 %res3 = add <4 x i32> %res, %res1 753 %res4 = add <4 x i32> %res3, %res2 754 ret <4 x i32> %res4 755} 756declare <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 757 758define <8 x i32>@test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 759; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 760; X86: # %bb.0: 761; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 762; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 763; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 764; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc1,0x17] 765; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 766; X86-NEXT: retl # encoding: [0xc3] 767; 768; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 769; X64: # %bb.0: 770; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 771; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 772; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc1,0x17] 773; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 774; X64-NEXT: retq # encoding: [0xc3] 775 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 776 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23, <8 x i32> %x3, i8 -1) 777 %res2 = add <8 x i32> %res, %res1 778 ret <8 x i32> %res2 779} 780declare <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 781 782define <2 x i64>@test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 783; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 784; X86: # %bb.0: 785; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 786; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 787; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 788; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc1,0x17] 789; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 790; X86-NEXT: retl # encoding: [0xc3] 791; 792; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 793; X64: # %bb.0: 794; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 795; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 796; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc1,0x17] 797; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 798; X64-NEXT: retq # encoding: [0xc3] 799 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4) 800 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23, <2 x i64> %x3, i8 -1) 801 %res2 = add <2 x i64> %res, %res1 802 ret <2 x i64> %res2 803} 804declare <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 805 806define <4 x i64>@test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 807; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 808; X86: # %bb.0: 809; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 810; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 811; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 812; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc1,0x17] 813; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 814; X86-NEXT: retl # encoding: [0xc3] 815; 816; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 817; X64: # %bb.0: 818; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 819; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 820; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc1,0x17] 821; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 822; X64-NEXT: retq # encoding: [0xc3] 823 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 824 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23, <4 x i64> %x3, i8 -1) 825 %res2 = add <4 x i64> %res, %res1 826 ret <4 x i64> %res2 827} 828declare <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 829 830define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 831; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 832; X86: # %bb.0: 833; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 834; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 835; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 836; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc1,0x07] 837; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 838; X86-NEXT: retl # encoding: [0xc3] 839; 840; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 841; X64: # %bb.0: 842; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 843; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 844; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc1,0x07] 845; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 846; X64-NEXT: retq # encoding: [0xc3] 847 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4) 848 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7, <8 x i16> %x3, i8 -1) 849 %res2 = add <8 x i16> %res, %res1 850 ret <8 x i16> %res2 851} 852declare <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8) 853 854define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 855; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 856; X86: # %bb.0: 857; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 858; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 859; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc1,0x07] 860; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 861; X86-NEXT: retl # encoding: [0xc3] 862; 863; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 864; X64: # %bb.0: 865; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 866; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 867; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc1,0x07] 868; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 869; X64-NEXT: retq # encoding: [0xc3] 870 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4) 871 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7, <16 x i16> %x3, i16 -1) 872 %res2 = add <16 x i16> %res, %res1 873 ret <16 x i16> %res2 874} 875declare <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16) 876 877define <4 x i32>@test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 878; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 879; X86: # %bb.0: 880; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 881; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 882; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 883; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 884; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] 885; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 886; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 887; X86-NEXT: retl # encoding: [0xc3] 888; 889; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 890; X64: # %bb.0: 891; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 892; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 893; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 894; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] 895; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 896; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 897; X64-NEXT: retq # encoding: [0xc3] 898 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) 899 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23, <4 x i32> %x3, i8 -1) 900 %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24, <4 x i32> zeroinitializer,i8 %x4) 901 %res3 = add <4 x i32> %res, %res1 902 %res4 = add <4 x i32> %res3, %res2 903 ret <4 x i32> %res4 904} 905declare <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 906 907define <8 x i32>@test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 908; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 909; X86: # %bb.0: 910; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 911; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 912; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 913; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc1,0x17] 914; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 915; X86-NEXT: retl # encoding: [0xc3] 916; 917; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 918; X64: # %bb.0: 919; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 920; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 921; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc1,0x17] 922; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 923; X64-NEXT: retq # encoding: [0xc3] 924 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 925 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23, <8 x i32> %x3, i8 -1) 926 %res2 = add <8 x i32> %res, %res1 927 ret <8 x i32> %res2 928} 929declare <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 930 931define <2 x i64>@test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 932; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 933; X86: # %bb.0: 934; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 935; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 936; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 937; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc1,0x17] 938; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 939; X86-NEXT: retl # encoding: [0xc3] 940; 941; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 942; X64: # %bb.0: 943; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 944; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 945; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc1,0x17] 946; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 947; X64-NEXT: retq # encoding: [0xc3] 948 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4) 949 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23, <2 x i64> %x3, i8 -1) 950 %res2 = add <2 x i64> %res, %res1 951 ret <2 x i64> %res2 952} 953declare <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 954 955define <4 x i64>@test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 956; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 957; X86: # %bb.0: 958; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 959; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 960; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 961; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc1,0x17] 962; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 963; X86-NEXT: retl # encoding: [0xc3] 964; 965; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 966; X64: # %bb.0: 967; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 968; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 969; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc1,0x17] 970; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 971; X64-NEXT: retq # encoding: [0xc3] 972 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 973 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23, <4 x i64> %x3, i8 -1) 974 %res2 = add <4 x i64> %res, %res1 975 ret <4 x i64> %res2 976} 977declare <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 978 979define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 980; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 981; X86: # %bb.0: 982; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 983; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 984; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 985; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc1,0x07] 986; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 987; X86-NEXT: retl # encoding: [0xc3] 988; 989; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 990; X64: # %bb.0: 991; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 992; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 993; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc1,0x07] 994; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 995; X64-NEXT: retq # encoding: [0xc3] 996 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6, <8 x i16> %x3, i8 %x4) 997 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7, <8 x i16> %x3, i8 -1) 998 %res2 = add <8 x i16> %res, %res1 999 ret <8 x i16> %res2 1000} 1001declare <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8) 1002 1003define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 1004; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 1005; X86: # %bb.0: 1006; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1007; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1008; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc1,0x07] 1009; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 1010; X86-NEXT: retl # encoding: [0xc3] 1011; 1012; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 1013; X64: # %bb.0: 1014; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1015; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1016; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc1,0x07] 1017; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 1018; X64-NEXT: retq # encoding: [0xc3] 1019 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6, <16 x i16> %x3, i16 %x4) 1020 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7, <16 x i16> %x3, i16 -1) 1021 %res2 = add <16 x i16> %res, %res1 1022 ret <16 x i16> %res2 1023} 1024declare <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16) 1025 1026define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 1027; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2: 1028; X86: # %bb.0: 1029; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1030; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1031; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 1032; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 1033; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] 1034; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1035; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 1036; X86-NEXT: retl # encoding: [0xc3] 1037; 1038; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128_2: 1039; X64: # %bb.0: 1040; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1041; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 1042; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 1043; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] 1044; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1045; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 1046; X64-NEXT: retq # encoding: [0xc3] 1047 %1 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 1048 %2 = bitcast i8 %x4 to <8 x i1> 1049 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1050 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 1051 %4 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23) 1052 %5 = call <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24) 1053 %6 = bitcast i8 %x4 to <8 x i1> 1054 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1055 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 1056 %res3 = add <4 x i32> %3, %4 1057 %res4 = add <4 x i32> %res3, %7 1058 ret <4 x i32> %res4 1059} 1060declare <4 x i32> @llvm.x86.avx512.vpshld.d.128(<4 x i32>, <4 x i32>, i32) 1061 1062define <8 x i32>@test_int_x86_avx512_mask_vpshld_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 1063; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2: 1064; X86: # %bb.0: 1065; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1066; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1067; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 1068; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc1,0x17] 1069; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 1070; X86-NEXT: retl # encoding: [0xc3] 1071; 1072; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256_2: 1073; X64: # %bb.0: 1074; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1075; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 1076; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc1,0x17] 1077; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 1078; X64-NEXT: retq # encoding: [0xc3] 1079 %1 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22) 1080 %2 = bitcast i8 %x4 to <8 x i1> 1081 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 1082 %4 = call <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23) 1083 %res2 = add <8 x i32> %3, %4 1084 ret <8 x i32> %res2 1085} 1086declare <8 x i32> @llvm.x86.avx512.vpshld.d.256(<8 x i32>, <8 x i32>, i32) 1087 1088define <2 x i64>@test_int_x86_avx512_mask_vpshld_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 1089; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2: 1090; X86: # %bb.0: 1091; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1092; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1093; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 1094; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc1,0x17] 1095; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 1096; X86-NEXT: retl # encoding: [0xc3] 1097; 1098; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128_2: 1099; X64: # %bb.0: 1100; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1101; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 1102; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc1,0x17] 1103; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 1104; X64-NEXT: retq # encoding: [0xc3] 1105 %1 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22) 1106 %2 = bitcast i8 %x4 to <8 x i1> 1107 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 1108 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 1109 %4 = call <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23) 1110 %res2 = add <2 x i64> %3, %4 1111 ret <2 x i64> %res2 1112} 1113declare <2 x i64> @llvm.x86.avx512.vpshld.q.128(<2 x i64>, <2 x i64>, i32) 1114 1115define <4 x i64>@test_int_x86_avx512_mask_vpshld_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 1116; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2: 1117; X86: # %bb.0: 1118; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1119; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1120; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 1121; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc1,0x17] 1122; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 1123; X86-NEXT: retl # encoding: [0xc3] 1124; 1125; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256_2: 1126; X64: # %bb.0: 1127; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1128; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 1129; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc1,0x17] 1130; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 1131; X64-NEXT: retq # encoding: [0xc3] 1132 %1 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22) 1133 %2 = bitcast i8 %x4 to <8 x i1> 1134 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1135 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 1136 %4 = call <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23) 1137 %res2 = add <4 x i64> %3, %4 1138 ret <4 x i64> %res2 1139} 1140declare <4 x i64> @llvm.x86.avx512.vpshld.q.256(<4 x i64>, <4 x i64>, i32) 1141 1142define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 1143; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2: 1144; X86: # %bb.0: 1145; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1146; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1147; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 1148; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc1,0x07] 1149; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 1150; X86-NEXT: retl # encoding: [0xc3] 1151; 1152; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128_2: 1153; X64: # %bb.0: 1154; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1155; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 1156; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc1,0x07] 1157; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 1158; X64-NEXT: retq # encoding: [0xc3] 1159 %1 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6) 1160 %2 = bitcast i8 %x4 to <8 x i1> 1161 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 1162 %4 = call <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7) 1163 %res2 = add <8 x i16> %3, %4 1164 ret <8 x i16> %res2 1165} 1166declare <8 x i16> @llvm.x86.avx512.vpshld.w.128(<8 x i16>, <8 x i16>, i32) 1167 1168define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 1169; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2: 1170; X86: # %bb.0: 1171; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1172; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 1173; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc1,0x07] 1174; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 1175; X86-NEXT: retl # encoding: [0xc3] 1176; 1177; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256_2: 1178; X64: # %bb.0: 1179; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1180; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 1181; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc1,0x07] 1182; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 1183; X64-NEXT: retq # encoding: [0xc3] 1184 %1 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6) 1185 %2 = bitcast i16 %x4 to <16 x i1> 1186 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 1187 %4 = call <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7) 1188 %res2 = add <16 x i16> %3, %4 1189 ret <16 x i16> %res2 1190} 1191declare <16 x i16> @llvm.x86.avx512.vpshld.w.256(<16 x i16>, <16 x i16>, i32) 1192 1193define <4 x i32>@test_int_x86_avx512_mask_vpshrd_d_128_2(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 1194; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2: 1195; X86: # %bb.0: 1196; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1197; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1198; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 1199; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 1200; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] 1201; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1202; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 1203; X86-NEXT: retl # encoding: [0xc3] 1204; 1205; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128_2: 1206; X64: # %bb.0: 1207; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1208; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 1209; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 1210; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] 1211; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1212; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 1213; X64-NEXT: retq # encoding: [0xc3] 1214 %1 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22) 1215 %2 = bitcast i8 %x4 to <8 x i1> 1216 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1217 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 1218 %4 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 23) 1219 %5 = call <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 24) 1220 %6 = bitcast i8 %x4 to <8 x i1> 1221 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1222 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 1223 %res3 = add <4 x i32> %3, %4 1224 %res4 = add <4 x i32> %res3, %7 1225 ret <4 x i32> %res4 1226} 1227declare <4 x i32> @llvm.x86.avx512.vpshrd.d.128(<4 x i32>, <4 x i32>, i32) 1228 1229define <8 x i32>@test_int_x86_avx512_mask_vpshrd_d_256_2(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 1230; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2: 1231; X86: # %bb.0: 1232; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1233; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1234; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 1235; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc1,0x17] 1236; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 1237; X86-NEXT: retl # encoding: [0xc3] 1238; 1239; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256_2: 1240; X64: # %bb.0: 1241; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1242; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 1243; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc1,0x17] 1244; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 1245; X64-NEXT: retq # encoding: [0xc3] 1246 %1 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22) 1247 %2 = bitcast i8 %x4 to <8 x i1> 1248 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 1249 %4 = call <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 23) 1250 %res2 = add <8 x i32> %3, %4 1251 ret <8 x i32> %res2 1252} 1253declare <8 x i32> @llvm.x86.avx512.vpshrd.d.256(<8 x i32>, <8 x i32>, i32) 1254 1255define <2 x i64>@test_int_x86_avx512_mask_vpshrd_q_128_2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 1256; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2: 1257; X86: # %bb.0: 1258; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1259; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1260; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 1261; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc1,0x17] 1262; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 1263; X86-NEXT: retl # encoding: [0xc3] 1264; 1265; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128_2: 1266; X64: # %bb.0: 1267; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1268; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 1269; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc1,0x17] 1270; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 1271; X64-NEXT: retq # encoding: [0xc3] 1272 %1 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22) 1273 %2 = bitcast i8 %x4 to <8 x i1> 1274 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 1275 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 1276 %4 = call <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 23) 1277 %res2 = add <2 x i64> %3, %4 1278 ret <2 x i64> %res2 1279} 1280declare <2 x i64> @llvm.x86.avx512.vpshrd.q.128(<2 x i64>, <2 x i64>, i32) 1281 1282define <4 x i64>@test_int_x86_avx512_mask_vpshrd_q_256_2(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 1283; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2: 1284; X86: # %bb.0: 1285; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1286; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1287; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 1288; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc1,0x17] 1289; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 1290; X86-NEXT: retl # encoding: [0xc3] 1291; 1292; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256_2: 1293; X64: # %bb.0: 1294; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1295; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 1296; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc1,0x17] 1297; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 1298; X64-NEXT: retq # encoding: [0xc3] 1299 %1 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22) 1300 %2 = bitcast i8 %x4 to <8 x i1> 1301 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1302 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 1303 %4 = call <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 23) 1304 %res2 = add <4 x i64> %3, %4 1305 ret <4 x i64> %res2 1306} 1307declare <4 x i64> @llvm.x86.avx512.vpshrd.q.256(<4 x i64>, <4 x i64>, i32) 1308 1309define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128_2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 1310; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2: 1311; X86: # %bb.0: 1312; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1313; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1314; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 1315; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc1,0x07] 1316; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 1317; X86-NEXT: retl # encoding: [0xc3] 1318; 1319; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128_2: 1320; X64: # %bb.0: 1321; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1322; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 1323; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc1,0x07] 1324; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 1325; X64-NEXT: retq # encoding: [0xc3] 1326 %1 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 6) 1327 %2 = bitcast i8 %x4 to <8 x i1> 1328 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 1329 %4 = call <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 7) 1330 %res2 = add <8 x i16> %3, %4 1331 ret <8 x i16> %res2 1332} 1333declare <8 x i16> @llvm.x86.avx512.vpshrd.w.128(<8 x i16>, <8 x i16>, i32) 1334 1335define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256_2(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 1336; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2: 1337; X86: # %bb.0: 1338; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1339; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1340; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc1,0x07] 1341; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 1342; X86-NEXT: retl # encoding: [0xc3] 1343; 1344; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256_2: 1345; X64: # %bb.0: 1346; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1347; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1348; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc1,0x07] 1349; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 1350; X64-NEXT: retq # encoding: [0xc3] 1351 %1 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 6) 1352 %2 = bitcast i16 %x4 to <16 x i1> 1353 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 1354 %4 = call <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 7) 1355 %res2 = add <16 x i16> %3, %4 1356 ret <16 x i16> %res2 1357} 1358declare <16 x i16> @llvm.x86.avx512.vpshrd.w.256(<16 x i16>, <16 x i16>, i32) 1359 1360declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1361declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1362 1363define <8 x i32>@test_int_x86_avx512_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 1364; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_256: 1365; CHECK: # %bb.0: 1366; CHECK-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x73,0xc2] 1367; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1368 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 1369 ret <8 x i32> %res 1370} 1371 1372define <8 x i32>@test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { 1373; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 1374; X86: # %bb.0: 1375; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1376; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1377; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1378; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1379; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x18] 1380; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] 1381; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 1382; X86-NEXT: retl # encoding: [0xc3] 1383; 1384; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 1385; X64: # %bb.0: 1386; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1387; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1388; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x1f] 1389; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] 1390; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 1391; X64-NEXT: retq # encoding: [0xc3] 1392 %x2 = load <8 x i32>, <8 x i32>* %x2p 1393 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 1394 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 1395 %res2 = add <8 x i32> %res, %res1 1396 ret <8 x i32> %res2 1397} 1398 1399declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1400declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1401 1402define <4 x i32>@test_int_x86_avx512_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 1403; CHECK-LABEL: test_int_x86_avx512_vpshrdv_d_128: 1404; CHECK: # %bb.0: 1405; CHECK-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x73,0xc2] 1406; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1407 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 1408 ret <4 x i32> %res 1409} 1410 1411define <4 x i32>@test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { 1412; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 1413; X86: # %bb.0: 1414; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1415; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1416; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1417; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1418; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x18] 1419; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] 1420; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1421; X86-NEXT: retl # encoding: [0xc3] 1422; 1423; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 1424; X64: # %bb.0: 1425; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1426; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1427; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x1f] 1428; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] 1429; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1430; X64-NEXT: retq # encoding: [0xc3] 1431 %x2 = load <4 x i32>, <4 x i32>* %x2p 1432 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 1433 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 1434 %res2 = add <4 x i32> %res, %res1 1435 ret <4 x i32> %res2 1436} 1437 1438declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1439declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1440 1441define <4 x i64>@test_int_x86_avx512_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 1442; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_256: 1443; CHECK: # %bb.0: 1444; CHECK-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x73,0xc2] 1445; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1446 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 1447 ret <4 x i64> %res 1448} 1449 1450define <4 x i64>@test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) { 1451; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 1452; X86: # %bb.0: 1453; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1454; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1455; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1456; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1457; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x18] 1458; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] 1459; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1460; X86-NEXT: retl # encoding: [0xc3] 1461; 1462; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 1463; X64: # %bb.0: 1464; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1465; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1466; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x1f] 1467; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] 1468; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1469; X64-NEXT: retq # encoding: [0xc3] 1470 %x2 = load <4 x i64>, <4 x i64>* %x2p 1471 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1472 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 %x3) 1473 %res2 = add <4 x i64> %res, %res1 1474 ret <4 x i64> %res2 1475} 1476 1477declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1478declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1479 1480define <2 x i64>@test_int_x86_avx512_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 1481; CHECK-LABEL: test_int_x86_avx512_vpshrdv_q_128: 1482; CHECK: # %bb.0: 1483; CHECK-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x73,0xc2] 1484; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1485 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 1486 ret <2 x i64> %res 1487} 1488 1489define <2 x i64>@test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) { 1490; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 1491; X86: # %bb.0: 1492; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1493; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1494; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1495; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1496; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x18] 1497; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] 1498; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1499; X86-NEXT: retl # encoding: [0xc3] 1500; 1501; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 1502; X64: # %bb.0: 1503; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1504; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1505; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x1f] 1506; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] 1507; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1508; X64-NEXT: retq # encoding: [0xc3] 1509 %x2 = load <2 x i64>, <2 x i64>* %x2p 1510 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1511 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 %x3) 1512 %res2 = add <2 x i64> %res, %res1 1513 ret <2 x i64> %res2 1514} 1515 1516declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1517declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1518 1519define <16 x i16>@test_int_x86_avx512_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1520; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_256: 1521; CHECK: # %bb.0: 1522; CHECK-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2] 1523; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1524 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1525 ret <16 x i16> %res 1526} 1527 1528define <16 x i16>@test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) { 1529; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 1530; X86: # %bb.0: 1531; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1532; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1533; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1534; X86-NEXT: vpshrdvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x18] 1535; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xc2] 1536; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1537; X86-NEXT: retl # encoding: [0xc3] 1538; 1539; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 1540; X64: # %bb.0: 1541; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1542; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1543; X64-NEXT: vpshrdvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x1f] 1544; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x72,0xc2] 1545; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1546; X64-NEXT: retq # encoding: [0xc3] 1547 %x2 = load <16 x i16>, <16 x i16>* %x2p 1548 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1549 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 %x3) 1550 %res2 = add <16 x i16> %res, %res1 1551 ret <16 x i16> %res2 1552} 1553 1554declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1555declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1556 1557define <8 x i16>@test_int_x86_avx512_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1558; CHECK-LABEL: test_int_x86_avx512_vpshrdv_w_128: 1559; CHECK: # %bb.0: 1560; CHECK-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x72,0xc2] 1561; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1562 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1563 ret <8 x i16> %res 1564} 1565 1566define <8 x i16>@test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) { 1567; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 1568; X86: # %bb.0: 1569; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1570; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1571; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1572; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1573; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x18] 1574; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] 1575; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1576; X86-NEXT: retl # encoding: [0xc3] 1577; 1578; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 1579; X64: # %bb.0: 1580; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1581; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1582; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x1f] 1583; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] 1584; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1585; X64-NEXT: retq # encoding: [0xc3] 1586 %x2 = load <8 x i16>, <8 x i16>* %x2p 1587 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1588 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 %x3) 1589 %res2 = add <8 x i16> %res, %res1 1590 ret <8 x i16> %res2 1591} 1592 1593declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1594declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1595 1596define <8 x i32>@test_int_x86_avx512_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 1597; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_256: 1598; CHECK: # %bb.0: 1599; CHECK-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x71,0xc2] 1600; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1601 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 1602 ret <8 x i32> %res 1603} 1604 1605define <8 x i32>@test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { 1606; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 1607; X86: # %bb.0: 1608; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1609; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1610; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1611; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1612; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x18] 1613; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] 1614; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 1615; X86-NEXT: retl # encoding: [0xc3] 1616; 1617; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 1618; X64: # %bb.0: 1619; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1620; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1621; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x1f] 1622; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] 1623; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 1624; X64-NEXT: retq # encoding: [0xc3] 1625 %x2 = load <8 x i32>, <8 x i32>* %x2p 1626 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 1627 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 1628 %res2 = add <8 x i32> %res, %res1 1629 ret <8 x i32> %res2 1630} 1631 1632declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1633declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1634 1635define <4 x i32>@test_int_x86_avx512_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 1636; CHECK-LABEL: test_int_x86_avx512_vpshldv_d_128: 1637; CHECK: # %bb.0: 1638; CHECK-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x71,0xc2] 1639; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1640 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 1641 ret <4 x i32> %res 1642} 1643 1644define <4 x i32>@test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { 1645; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 1646; X86: # %bb.0: 1647; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1648; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1649; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1650; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1651; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x18] 1652; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] 1653; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1654; X86-NEXT: retl # encoding: [0xc3] 1655; 1656; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 1657; X64: # %bb.0: 1658; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1659; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1660; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x1f] 1661; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] 1662; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1663; X64-NEXT: retq # encoding: [0xc3] 1664 %x2 = load <4 x i32>, <4 x i32>* %x2p 1665 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 1666 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 1667 %res2 = add <4 x i32> %res, %res1 1668 ret <4 x i32> %res2 1669} 1670 1671declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1672declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 1673 1674define <4 x i64>@test_int_x86_avx512_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) { 1675; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_256: 1676; CHECK: # %bb.0: 1677; CHECK-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x71,0xc2] 1678; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1679 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 1680 ret <4 x i64> %res 1681} 1682 1683define <4 x i64>@test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) { 1684; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1685; X86: # %bb.0: 1686; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1687; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1688; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1689; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1690; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x18] 1691; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] 1692; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1693; X86-NEXT: retl # encoding: [0xc3] 1694; 1695; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1696; X64: # %bb.0: 1697; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1698; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1699; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x1f] 1700; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] 1701; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1702; X64-NEXT: retq # encoding: [0xc3] 1703 %x2 = load <4 x i64>, <4 x i64>* %x2p 1704 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 1705 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 %x3) 1706 %res2 = add <4 x i64> %res, %res1 1707 ret <4 x i64> %res2 1708} 1709 1710declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1711declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 1712 1713define <2 x i64>@test_int_x86_avx512_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) { 1714; CHECK-LABEL: test_int_x86_avx512_vpshldv_q_128: 1715; CHECK: # %bb.0: 1716; CHECK-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x71,0xc2] 1717; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1718 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 1719 ret <2 x i64> %res 1720} 1721 1722define <2 x i64>@test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) { 1723; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1724; X86: # %bb.0: 1725; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1726; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1727; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1728; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1729; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x18] 1730; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] 1731; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1732; X86-NEXT: retl # encoding: [0xc3] 1733; 1734; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1735; X64: # %bb.0: 1736; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1737; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1738; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x1f] 1739; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] 1740; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1741; X64-NEXT: retq # encoding: [0xc3] 1742 %x2 = load <2 x i64>, <2 x i64>* %x2p 1743 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 1744 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 %x3) 1745 %res2 = add <2 x i64> %res, %res1 1746 ret <2 x i64> %res2 1747} 1748 1749declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1750declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 1751 1752define <16 x i16>@test_int_x86_avx512_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) { 1753; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_256: 1754; CHECK: # %bb.0: 1755; CHECK-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x70,0xc2] 1756; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1757 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 1758 ret <16 x i16> %res 1759} 1760 1761define <16 x i16>@test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) { 1762; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1763; X86: # %bb.0: 1764; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1765; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1766; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1767; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x18] 1768; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] 1769; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1770; X86-NEXT: retl # encoding: [0xc3] 1771; 1772; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1773; X64: # %bb.0: 1774; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1775; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1776; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x1f] 1777; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] 1778; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1779; X64-NEXT: retq # encoding: [0xc3] 1780 %x2 = load <16 x i16>, <16 x i16>* %x2p 1781 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 1782 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 %x3) 1783 %res2 = add <16 x i16> %res, %res1 1784 ret <16 x i16> %res2 1785} 1786 1787declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1788declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 1789 1790define <8 x i16>@test_int_x86_avx512_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) { 1791; CHECK-LABEL: test_int_x86_avx512_vpshldv_w_128: 1792; CHECK: # %bb.0: 1793; CHECK-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2] 1794; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1795 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 1796 ret <8 x i16> %res 1797} 1798 1799define <8 x i16>@test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) { 1800; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1801; X86: # %bb.0: 1802; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1803; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1804; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1805; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1806; X86-NEXT: vpshldvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x18] 1807; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xc2] 1808; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1809; X86-NEXT: retl # encoding: [0xc3] 1810; 1811; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1812; X64: # %bb.0: 1813; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1814; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1815; X64-NEXT: vpshldvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x1f] 1816; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x70,0xc2] 1817; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1818; X64-NEXT: retq # encoding: [0xc3] 1819 %x2 = load <8 x i16>, <8 x i16>* %x2p 1820 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 1821 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 %x3) 1822 %res2 = add <8 x i16> %res, %res1 1823 ret <8 x i16> %res2 1824} 1825