1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) { 6; X86-LABEL: test_mask_expand_load_w_128: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 11; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 12; X86-NEXT: retl # encoding: [0xc3] 13; 14; X64-LABEL: test_mask_expand_load_w_128: 15; X64: # %bb.0: 16; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 17; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 18; X64-NEXT: retq # encoding: [0xc3] 19 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask) 20 ret <8 x i16> %res 21} 22 23define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) { 24; X86-LABEL: test_maskz_expand_load_w_128: 25; X86: # %bb.0: 26; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 27; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 28; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 29; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00] 30; X86-NEXT: retl # encoding: [0xc3] 31; 32; X64-LABEL: test_maskz_expand_load_w_128: 33; X64: # %bb.0: 34; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 35; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07] 36; X64-NEXT: retq # encoding: [0xc3] 37 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> zeroinitializer, i8 %mask) 38 ret <8 x i16> %res 39} 40 41declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask) 42 43define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) { 44; X86-LABEL: test_expand_load_w_128: 45; X86: # %bb.0: 46; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 47; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 48; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 49; X86-NEXT: retl # encoding: [0xc3] 50; 51; X64-LABEL: test_expand_load_w_128: 52; X64: # %bb.0: 53; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 54; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 55; X64-NEXT: retq # encoding: [0xc3] 56 %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 -1) 57 ret <8 x i16> %res 58} 59 60define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) { 61; X86-LABEL: test_mask_expand_load_b_128: 62; X86: # %bb.0: 63; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 64; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 65; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 66; X86-NEXT: retl # encoding: [0xc3] 67; 68; X64-LABEL: test_mask_expand_load_b_128: 69; X64: # %bb.0: 70; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 71; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 72; X64-NEXT: retq # encoding: [0xc3] 73 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask) 74 ret <16 x i8> %res 75} 76 77define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) { 78; X86-LABEL: test_maskz_expand_load_b_128: 79; X86: # %bb.0: 80; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 81; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 82; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00] 83; X86-NEXT: retl # encoding: [0xc3] 84; 85; X64-LABEL: test_maskz_expand_load_b_128: 86; X64: # %bb.0: 87; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 88; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07] 89; X64-NEXT: retq # encoding: [0xc3] 90 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> zeroinitializer, i16 %mask) 91 ret <16 x i8> %res 92} 93 94declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask) 95 96define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) { 97; X86-LABEL: test_expand_load_b_128: 98; X86: # %bb.0: 99; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 100; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 101; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 102; X86-NEXT: retl # encoding: [0xc3] 103; 104; X64-LABEL: test_expand_load_b_128: 105; X64: # %bb.0: 106; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 107; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 108; X64-NEXT: retq # encoding: [0xc3] 109 %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 -1) 110 ret <16 x i8> %res 111} 112 113define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) { 114; X86-LABEL: test_mask_compress_store_w_128: 115; X86: # %bb.0: 116; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 117; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 118; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 119; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 120; X86-NEXT: retl # encoding: [0xc3] 121; 122; X64-LABEL: test_mask_compress_store_w_128: 123; X64: # %bb.0: 124; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 125; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 126; X64-NEXT: retq # encoding: [0xc3] 127 call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask) 128 ret void 129} 130 131declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask) 132 133define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) { 134; X86-LABEL: test_compress_store_w_128: 135; X86: # %bb.0: 136; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 137; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 138; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 139; X86-NEXT: retl # encoding: [0xc3] 140; 141; X64-LABEL: test_compress_store_w_128: 142; X64: # %bb.0: 143; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 144; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 145; X64-NEXT: retq # encoding: [0xc3] 146 call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 -1) 147 ret void 148} 149 150define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) { 151; X86-LABEL: test_mask_compress_store_b_128: 152; X86: # %bb.0: 153; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 154; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 155; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 156; X86-NEXT: retl # encoding: [0xc3] 157; 158; X64-LABEL: test_mask_compress_store_b_128: 159; X64: # %bb.0: 160; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 161; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 162; X64-NEXT: retq # encoding: [0xc3] 163 call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask) 164 ret void 165} 166 167declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask) 168 169define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) { 170; X86-LABEL: test_compress_store_b_128: 171; X86: # %bb.0: 172; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 173; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 174; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 175; X86-NEXT: retl # encoding: [0xc3] 176; 177; X64-LABEL: test_compress_store_b_128: 178; X64: # %bb.0: 179; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 180; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 181; X64-NEXT: retq # encoding: [0xc3] 182 call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 -1) 183 ret void 184} 185 186define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) { 187; X86-LABEL: test_mask_expand_load_w_256: 188; X86: # %bb.0: 189; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 190; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 191; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 192; X86-NEXT: retl # encoding: [0xc3] 193; 194; X64-LABEL: test_mask_expand_load_w_256: 195; X64: # %bb.0: 196; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 197; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 198; X64-NEXT: retq # encoding: [0xc3] 199 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask) 200 ret <16 x i16> %res 201} 202 203define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) { 204; X86-LABEL: test_maskz_expand_load_w_256: 205; X86: # %bb.0: 206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 207; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 208; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00] 209; X86-NEXT: retl # encoding: [0xc3] 210; 211; X64-LABEL: test_maskz_expand_load_w_256: 212; X64: # %bb.0: 213; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 214; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07] 215; X64-NEXT: retq # encoding: [0xc3] 216 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> zeroinitializer, i16 %mask) 217 ret <16 x i16> %res 218} 219 220declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask) 221 222define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) { 223; X86-LABEL: test_expand_load_w_256: 224; X86: # %bb.0: 225; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 226; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 227; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 228; X86-NEXT: retl # encoding: [0xc3] 229; 230; X64-LABEL: test_expand_load_w_256: 231; X64: # %bb.0: 232; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 233; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 234; X64-NEXT: retq # encoding: [0xc3] 235 %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 -1) 236 ret <16 x i16> %res 237} 238 239define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) { 240; X86-LABEL: test_mask_expand_load_b_256: 241; X86: # %bb.0: 242; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 243; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 244; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 245; X86-NEXT: retl # encoding: [0xc3] 246; 247; X64-LABEL: test_mask_expand_load_b_256: 248; X64: # %bb.0: 249; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 250; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 251; X64-NEXT: retq # encoding: [0xc3] 252 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask) 253 ret <32 x i8> %res 254} 255 256define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) { 257; X86-LABEL: test_maskz_expand_load_b_256: 258; X86: # %bb.0: 259; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 260; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 261; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00] 262; X86-NEXT: retl # encoding: [0xc3] 263; 264; X64-LABEL: test_maskz_expand_load_b_256: 265; X64: # %bb.0: 266; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 267; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07] 268; X64-NEXT: retq # encoding: [0xc3] 269 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> zeroinitializer, i32 %mask) 270 ret <32 x i8> %res 271} 272 273declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask) 274 275define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) { 276; X86-LABEL: test_expand_load_b_256: 277; X86: # %bb.0: 278; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 279; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 280; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 281; X86-NEXT: retl # encoding: [0xc3] 282; 283; X64-LABEL: test_expand_load_b_256: 284; X64: # %bb.0: 285; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 286; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 287; X64-NEXT: retq # encoding: [0xc3] 288 %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 -1) 289 ret <32 x i8> %res 290} 291 292define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) { 293; X86-LABEL: test_mask_compress_store_w_256: 294; X86: # %bb.0: 295; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 296; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 297; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 298; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 299; X86-NEXT: retl # encoding: [0xc3] 300; 301; X64-LABEL: test_mask_compress_store_w_256: 302; X64: # %bb.0: 303; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 304; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 305; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 306; X64-NEXT: retq # encoding: [0xc3] 307 call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask) 308 ret void 309} 310 311declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask) 312 313define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) { 314; X86-LABEL: test_compress_store_w_256: 315; X86: # %bb.0: 316; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 317; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 318; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 319; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 320; X86-NEXT: retl # encoding: [0xc3] 321; 322; X64-LABEL: test_compress_store_w_256: 323; X64: # %bb.0: 324; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 325; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 326; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 327; X64-NEXT: retq # encoding: [0xc3] 328 call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 -1) 329 ret void 330} 331 332define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) { 333; X86-LABEL: test_mask_compress_store_b_256: 334; X86: # %bb.0: 335; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 336; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 337; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 338; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 339; X86-NEXT: retl # encoding: [0xc3] 340; 341; X64-LABEL: test_mask_compress_store_b_256: 342; X64: # %bb.0: 343; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 344; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 345; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 346; X64-NEXT: retq # encoding: [0xc3] 347 call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask) 348 ret void 349} 350 351declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask) 352 353define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) { 354; X86-LABEL: test_compress_store_b_256: 355; X86: # %bb.0: 356; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 357; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 358; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 359; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 360; X86-NEXT: retl # encoding: [0xc3] 361; 362; X64-LABEL: test_compress_store_b_256: 363; X64: # %bb.0: 364; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 365; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 366; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 367; X64-NEXT: retq # encoding: [0xc3] 368 call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 -1) 369 ret void 370} 371 372define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 373; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 374; X86: # %bb.0: 375; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x16] 376; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 377; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 378; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 379; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x16] 380; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 381; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 382; X86-NEXT: retl # encoding: [0xc3] 383; 384; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 385; X64: # %bb.0: 386; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x16] 387; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 388; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 389; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x16] 390; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 391; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 392; X64-NEXT: retq # encoding: [0xc3] 393 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) 394 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1) 395 %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4) 396 %res3 = add <4 x i32> %res, %res1 397 %res4 = add <4 x i32> %res3, %res2 398 ret <4 x i32> %res4 399} 400declare <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 401 402define <8 x i32>@test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 403; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 404; X86: # %bb.0: 405; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x16] 406; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 407; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 408; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 409; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 410; X86-NEXT: retl # encoding: [0xc3] 411; 412; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 413; X64: # %bb.0: 414; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xd9,0x16] 415; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 416; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 417; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 418; X64-NEXT: retq # encoding: [0xc3] 419 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 420 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) 421 %res2 = add <8 x i32> %res, %res1 422 ret <8 x i32> %res2 423} 424declare <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 425 426define <2 x i64>@test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 427; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 428; X86: # %bb.0: 429; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x16] 430; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 431; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 432; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 433; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 434; X86-NEXT: retl # encoding: [0xc3] 435; 436; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 437; X64: # %bb.0: 438; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xd9,0x16] 439; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 440; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 441; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 442; X64-NEXT: retq # encoding: [0xc3] 443 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4) 444 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1) 445 %res2 = add <2 x i64> %res, %res1 446 ret <2 x i64> %res2 447} 448declare <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 449 450define <4 x i64>@test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 451; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 452; X86: # %bb.0: 453; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x16] 454; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 455; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 456; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 457; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 458; X86-NEXT: retl # encoding: [0xc3] 459; 460; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 461; X64: # %bb.0: 462; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xd9,0x16] 463; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 464; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 465; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 466; X64-NEXT: retq # encoding: [0xc3] 467 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 468 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) 469 %res2 = add <4 x i64> %res, %res1 470 ret <4 x i64> %res2 471} 472declare <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 473 474define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 475; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 476; X86: # %bb.0: 477; X86-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x16] 478; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 479; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 480; X86-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x16] 481; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 482; X86-NEXT: retl # encoding: [0xc3] 483; 484; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 485; X64: # %bb.0: 486; X64-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xd9,0x16] 487; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 488; X64-NEXT: vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x16] 489; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 490; X64-NEXT: retq # encoding: [0xc3] 491 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 %x4) 492 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 -1) 493 %res2 = add <8 x i16> %res, %res1 494 ret <8 x i16> %res2 495} 496declare <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8) 497 498define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 499; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 500; X86: # %bb.0: 501; X86-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x16] 502; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 503; X86-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x16] 504; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 505; X86-NEXT: retl # encoding: [0xc3] 506; 507; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 508; X64: # %bb.0: 509; X64-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xd9,0x16] 510; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 511; X64-NEXT: vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x16] 512; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 513; X64-NEXT: retq # encoding: [0xc3] 514 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 %x4) 515 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 -1) 516 %res2 = add <16 x i16> %res, %res1 517 ret <16 x i16> %res2 518} 519declare <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16) 520 521define <4 x i32>@test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 522; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 523; X86: # %bb.0: 524; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x16] 525; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 526; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 527; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 528; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x16] 529; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 530; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 531; X86-NEXT: retl # encoding: [0xc3] 532; 533; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 534; X64: # %bb.0: 535; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x16] 536; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 537; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 538; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x16] 539; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 540; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 541; X64-NEXT: retq # encoding: [0xc3] 542 %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) 543 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1) 544 %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4) 545 %res3 = add <4 x i32> %res, %res1 546 %res4 = add <4 x i32> %res3, %res2 547 ret <4 x i32> %res4 548} 549declare <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 550 551define <8 x i32>@test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 552; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 553; X86: # %bb.0: 554; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x16] 555; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 556; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 557; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 558; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 559; X86-NEXT: retl # encoding: [0xc3] 560; 561; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 562; X64: # %bb.0: 563; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xd9,0x16] 564; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 565; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 566; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 567; X64-NEXT: retq # encoding: [0xc3] 568 %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 569 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) 570 %res2 = add <8 x i32> %res, %res1 571 ret <8 x i32> %res2 572} 573declare <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 574 575define <2 x i64>@test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 576; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 577; X86: # %bb.0: 578; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x16] 579; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 580; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 581; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 582; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 583; X86-NEXT: retl # encoding: [0xc3] 584; 585; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 586; X64: # %bb.0: 587; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xd9,0x16] 588; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 589; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 590; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 591; X64-NEXT: retq # encoding: [0xc3] 592 %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4) 593 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1) 594 %res2 = add <2 x i64> %res, %res1 595 ret <2 x i64> %res2 596} 597declare <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 598 599define <4 x i64>@test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 600; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 601; X86: # %bb.0: 602; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x16] 603; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 604; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 605; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 606; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 607; X86-NEXT: retl # encoding: [0xc3] 608; 609; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 610; X64: # %bb.0: 611; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xd9,0x16] 612; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 613; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 614; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 615; X64-NEXT: retq # encoding: [0xc3] 616 %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 617 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) 618 %res2 = add <4 x i64> %res, %res1 619 ret <4 x i64> %res2 620} 621declare <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 622 623define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 624; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 625; X86: # %bb.0: 626; X86-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x16] 627; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 628; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 629; X86-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x16] 630; X86-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 631; X86-NEXT: retl # encoding: [0xc3] 632; 633; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 634; X64: # %bb.0: 635; X64-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xd9,0x16] 636; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 637; X64-NEXT: vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x16] 638; X64-NEXT: vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3] 639; X64-NEXT: retq # encoding: [0xc3] 640 %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 %x4) 641 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 -1) 642 %res2 = add <8 x i16> %res, %res1 643 ret <8 x i16> %res2 644} 645declare <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8) 646 647define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 648; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 649; X86: # %bb.0: 650; X86-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x16] 651; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 652; X86-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x16] 653; X86-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 654; X86-NEXT: retl # encoding: [0xc3] 655; 656; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 657; X64: # %bb.0: 658; X64-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xd9,0x16] 659; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 660; X64-NEXT: vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x16] 661; X64-NEXT: vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3] 662; X64-NEXT: retq # encoding: [0xc3] 663 %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 %x4) 664 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 -1) 665 %res2 = add <16 x i16> %res, %res1 666 ret <16 x i16> %res2 667} 668declare <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16) 669