1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <8 x i16> @test_mask_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) { 6; X86-LABEL: test_mask_expand_load_w_128: 7; X86: # %bb.0: 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 11; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 12; X86-NEXT: retl # encoding: [0xc3] 13; 14; X64-LABEL: test_mask_expand_load_w_128: 15; X64: # %bb.0: 16; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 17; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 18; X64-NEXT: retq # encoding: [0xc3] 19 %1 = bitcast i8* %addr to i16* 20 %2 = bitcast i8 %mask to <8 x i1> 21 %3 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> %data) 22 ret <8 x i16> %3 23} 24 25define <8 x i16> @test_maskz_expand_load_w_128(i8* %addr, i8 %mask) { 26; X86-LABEL: test_maskz_expand_load_w_128: 27; X86: # %bb.0: 28; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 29; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 30; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 31; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x00] 32; X86-NEXT: retl # encoding: [0xc3] 33; 34; X64-LABEL: test_maskz_expand_load_w_128: 35; X64: # %bb.0: 36; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 37; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0x07] 38; X64-NEXT: retq # encoding: [0xc3] 39 %1 = bitcast i8* %addr to i16* 40 %2 = bitcast i8 %mask to <8 x i1> 41 %3 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> %2, <8 x i16> zeroinitializer) 42 ret <8 x i16> %3 43} 44 45define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) { 46; X86-LABEL: test_expand_load_w_128: 47; X86: # %bb.0: 48; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 49; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 50; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x00] 51; X86-NEXT: retl # encoding: [0xc3] 52; 53; X64-LABEL: test_expand_load_w_128: 54; X64: # %bb.0: 55; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 56; X64-NEXT: vpexpandw (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0x07] 57; X64-NEXT: retq # encoding: [0xc3] 58 %1 = bitcast i8* %addr to i16* 59 %2 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* %1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %data) 60 ret <8 x i16> %2 61} 62 63define <8 x i16> @test_expand_w_128(<8 x i16> %data) { 64; CHECK-LABEL: test_expand_w_128: 65; CHECK: # %bb.0: 66; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 67 %1 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 68 ret <8 x i16> %1 69} 70 71define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 72; X86-LABEL: test_mask_expand_w_128: 73; X86: # %bb.0: 74; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 75; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 76; X86-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 77; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 78; X86-NEXT: retl # encoding: [0xc3] 79; 80; X64-LABEL: test_mask_expand_w_128: 81; X64: # %bb.0: 82; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 83; X64-NEXT: vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8] 84; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 85; X64-NEXT: retq # encoding: [0xc3] 86 %1 = bitcast i8 %mask to <8 x i1> 87 %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1) 88 ret <8 x i16> %2 89} 90 91define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) { 92; X86-LABEL: test_maskz_expand_w_128: 93; X86: # %bb.0: 94; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 95; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 96; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 97; X86-NEXT: retl # encoding: [0xc3] 98; 99; X64-LABEL: test_maskz_expand_w_128: 100; X64: # %bb.0: 101; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 102; X64-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0] 103; X64-NEXT: retq # encoding: [0xc3] 104 %1 = bitcast i8 %mask to <8 x i1> 105 %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1) 106 ret <8 x i16> %2 107} 108 109define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) { 110; X86-LABEL: test_mask_expand_load_b_128: 111; X86: # %bb.0: 112; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 113; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 114; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 115; X86-NEXT: retl # encoding: [0xc3] 116; 117; X64-LABEL: test_mask_expand_load_b_128: 118; X64: # %bb.0: 119; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 120; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 121; X64-NEXT: retq # encoding: [0xc3] 122 %1 = bitcast i16 %mask to <16 x i1> 123 %2 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* %addr, <16 x i1> %1, <16 x i8> %data) 124 ret <16 x i8> %2 125} 126 127define <16 x i8> @test_maskz_expand_load_b_128(i8* %addr, i16 %mask) { 128; X86-LABEL: test_maskz_expand_load_b_128: 129; X86: # %bb.0: 130; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 131; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 132; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x00] 133; X86-NEXT: retl # encoding: [0xc3] 134; 135; X64-LABEL: test_maskz_expand_load_b_128: 136; X64: # %bb.0: 137; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 138; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0x07] 139; X64-NEXT: retq # encoding: [0xc3] 140 %1 = bitcast i16 %mask to <16 x i1> 141 %2 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* %addr, <16 x i1> %1, <16 x i8> zeroinitializer) 142 ret <16 x i8> %2 143} 144 145define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) { 146; X86-LABEL: test_expand_load_b_128: 147; X86: # %bb.0: 148; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 149; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 150; X86-NEXT: vpexpandb (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x00] 151; X86-NEXT: retl # encoding: [0xc3] 152; 153; X64-LABEL: test_expand_load_b_128: 154; X64: # %bb.0: 155; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 156; X64-NEXT: vpexpandb (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0x07] 157; X64-NEXT: retq # encoding: [0xc3] 158 %1 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %data) 159 ret <16 x i8> %1 160} 161 162define <16 x i8> @test_expand_b_128(<16 x i8> %data) { 163; CHECK-LABEL: test_expand_b_128: 164; CHECK: # %bb.0: 165; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 166 %1 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 167 ret <16 x i8> %1 168} 169 170define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 171; X86-LABEL: test_mask_expand_b_128: 172; X86: # %bb.0: 173; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 174; X86-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 175; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 176; X86-NEXT: retl # encoding: [0xc3] 177; 178; X64-LABEL: test_mask_expand_b_128: 179; X64: # %bb.0: 180; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 181; X64-NEXT: vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8] 182; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 183; X64-NEXT: retq # encoding: [0xc3] 184 %1 = bitcast i16 %mask to <16 x i1> 185 %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1) 186 ret <16 x i8> %2 187} 188 189define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) { 190; X86-LABEL: test_maskz_expand_b_128: 191; X86: # %bb.0: 192; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 193; X86-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 194; X86-NEXT: retl # encoding: [0xc3] 195; 196; X64-LABEL: test_maskz_expand_b_128: 197; X64: # %bb.0: 198; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 199; X64-NEXT: vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0] 200; X64-NEXT: retq # encoding: [0xc3] 201 %1 = bitcast i16 %mask to <16 x i1> 202 %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1) 203 ret <16 x i8> %2 204} 205 206define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) { 207; X86-LABEL: test_mask_compress_store_w_128: 208; X86: # %bb.0: 209; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 210; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 211; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 212; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 213; X86-NEXT: retl # encoding: [0xc3] 214; 215; X64-LABEL: test_mask_compress_store_w_128: 216; X64: # %bb.0: 217; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 218; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 219; X64-NEXT: retq # encoding: [0xc3] 220 %1 = bitcast i8* %addr to i16* 221 %2 = bitcast i8 %mask to <8 x i1> 222 call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, i16* %1, <8 x i1> %2) 223 ret void 224} 225 226define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) { 227; X86-LABEL: test_compress_store_w_128: 228; X86: # %bb.0: 229; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 230; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 231; X86-NEXT: vpcompressw %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x00] 232; X86-NEXT: retl # encoding: [0xc3] 233; 234; X64-LABEL: test_compress_store_w_128: 235; X64: # %bb.0: 236; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 237; X64-NEXT: vpcompressw %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0x07] 238; X64-NEXT: retq # encoding: [0xc3] 239 %1 = bitcast i8* %addr to i16* 240 call void @llvm.masked.compressstore.v8i16(<8 x i16> %data, i16* %1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 241 ret void 242} 243 244define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) { 245; X86-LABEL: test_mask_compress_w_128: 246; X86: # %bb.0: 247; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 248; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 249; X86-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 250; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 251; X86-NEXT: retl # encoding: [0xc3] 252; 253; X64-LABEL: test_mask_compress_w_128: 254; X64: # %bb.0: 255; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 256; X64-NEXT: vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1] 257; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 258; X64-NEXT: retq # encoding: [0xc3] 259 %1 = bitcast i8 %mask to <8 x i1> 260 %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1) 261 ret <8 x i16> %2 262} 263 264define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) { 265; X86-LABEL: test_maskz_compress_w_128: 266; X86: # %bb.0: 267; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 268; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 269; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 270; X86-NEXT: retl # encoding: [0xc3] 271; 272; X64-LABEL: test_maskz_compress_w_128: 273; X64: # %bb.0: 274; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 275; X64-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0] 276; X64-NEXT: retq # encoding: [0xc3] 277 %1 = bitcast i8 %mask to <8 x i1> 278 %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1) 279 ret <8 x i16> %2 280} 281 282define <8 x i16> @test_compress_w_128(<8 x i16> %data) { 283; CHECK-LABEL: test_compress_w_128: 284; CHECK: # %bb.0: 285; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 286 %1 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 287 ret <8 x i16> %1 288} 289 290define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) { 291; X86-LABEL: test_mask_compress_store_b_128: 292; X86: # %bb.0: 293; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 294; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 295; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 296; X86-NEXT: retl # encoding: [0xc3] 297; 298; X64-LABEL: test_mask_compress_store_b_128: 299; X64: # %bb.0: 300; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 301; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 302; X64-NEXT: retq # encoding: [0xc3] 303 %1 = bitcast i16 %mask to <16 x i1> 304 call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, i8* %addr, <16 x i1> %1) 305 ret void 306} 307 308define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) { 309; X86-LABEL: test_compress_store_b_128: 310; X86: # %bb.0: 311; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 312; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 313; X86-NEXT: vpcompressb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x00] 314; X86-NEXT: retl # encoding: [0xc3] 315; 316; X64-LABEL: test_compress_store_b_128: 317; X64: # %bb.0: 318; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 319; X64-NEXT: vpcompressb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0x07] 320; X64-NEXT: retq # encoding: [0xc3] 321 call void @llvm.masked.compressstore.v16i8(<16 x i8> %data, i8* %addr, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 322 ret void 323} 324 325define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) { 326; X86-LABEL: test_mask_compress_b_128: 327; X86: # %bb.0: 328; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 329; X86-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 330; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 331; X86-NEXT: retl # encoding: [0xc3] 332; 333; X64-LABEL: test_mask_compress_b_128: 334; X64: # %bb.0: 335; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 336; X64-NEXT: vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1] 337; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 338; X64-NEXT: retq # encoding: [0xc3] 339 %1 = bitcast i16 %mask to <16 x i1> 340 %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1) 341 ret <16 x i8> %2 342} 343 344define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) { 345; X86-LABEL: test_maskz_compress_b_128: 346; X86: # %bb.0: 347; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 348; X86-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 349; X86-NEXT: retl # encoding: [0xc3] 350; 351; X64-LABEL: test_maskz_compress_b_128: 352; X64: # %bb.0: 353; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 354; X64-NEXT: vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0] 355; X64-NEXT: retq # encoding: [0xc3] 356 %1 = bitcast i16 %mask to <16 x i1> 357 %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1) 358 ret <16 x i8> %2 359} 360 361define <16 x i8> @test_compress_b_128(<16 x i8> %data) { 362; CHECK-LABEL: test_compress_b_128: 363; CHECK: # %bb.0: 364; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 365 %1 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 366 ret <16 x i8> %1 367} 368 369define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) { 370; X86-LABEL: test_mask_expand_load_w_256: 371; X86: # %bb.0: 372; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 373; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 374; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 375; X86-NEXT: retl # encoding: [0xc3] 376; 377; X64-LABEL: test_mask_expand_load_w_256: 378; X64: # %bb.0: 379; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 380; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 381; X64-NEXT: retq # encoding: [0xc3] 382 %1 = bitcast i8* %addr to i16* 383 %2 = bitcast i16 %mask to <16 x i1> 384 %3 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> %data) 385 ret <16 x i16> %3 386} 387 388define <16 x i16> @test_maskz_expand_load_w_256(i8* %addr, i16 %mask) { 389; X86-LABEL: test_maskz_expand_load_w_256: 390; X86: # %bb.0: 391; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 392; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 393; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x00] 394; X86-NEXT: retl # encoding: [0xc3] 395; 396; X64-LABEL: test_maskz_expand_load_w_256: 397; X64: # %bb.0: 398; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 399; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0x07] 400; X64-NEXT: retq # encoding: [0xc3] 401 %1 = bitcast i8* %addr to i16* 402 %2 = bitcast i16 %mask to <16 x i1> 403 %3 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> %2, <16 x i16> zeroinitializer) 404 ret <16 x i16> %3 405} 406 407define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) { 408; X86-LABEL: test_expand_load_w_256: 409; X86: # %bb.0: 410; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 411; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 412; X86-NEXT: vpexpandw (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x00] 413; X86-NEXT: retl # encoding: [0xc3] 414; 415; X64-LABEL: test_expand_load_w_256: 416; X64: # %bb.0: 417; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 418; X64-NEXT: vpexpandw (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0x07] 419; X64-NEXT: retq # encoding: [0xc3] 420 %1 = bitcast i8* %addr to i16* 421 %2 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* %1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %data) 422 ret <16 x i16> %2 423} 424 425define <16 x i16> @test_expand_w_256(<16 x i16> %data) { 426; CHECK-LABEL: test_expand_w_256: 427; CHECK: # %bb.0: 428; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 429 %1 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 430 ret <16 x i16> %1 431} 432 433define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 434; X86-LABEL: test_mask_expand_w_256: 435; X86: # %bb.0: 436; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 437; X86-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 438; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 439; X86-NEXT: retl # encoding: [0xc3] 440; 441; X64-LABEL: test_mask_expand_w_256: 442; X64: # %bb.0: 443; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 444; X64-NEXT: vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8] 445; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 446; X64-NEXT: retq # encoding: [0xc3] 447 %1 = bitcast i16 %mask to <16 x i1> 448 %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1) 449 ret <16 x i16> %2 450} 451 452define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) { 453; X86-LABEL: test_maskz_expand_w_256: 454; X86: # %bb.0: 455; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 456; X86-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 457; X86-NEXT: retl # encoding: [0xc3] 458; 459; X64-LABEL: test_maskz_expand_w_256: 460; X64: # %bb.0: 461; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 462; X64-NEXT: vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0] 463; X64-NEXT: retq # encoding: [0xc3] 464 %1 = bitcast i16 %mask to <16 x i1> 465 %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1) 466 ret <16 x i16> %2 467} 468 469define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) { 470; X86-LABEL: test_mask_expand_load_b_256: 471; X86: # %bb.0: 472; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 473; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 474; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 475; X86-NEXT: retl # encoding: [0xc3] 476; 477; X64-LABEL: test_mask_expand_load_b_256: 478; X64: # %bb.0: 479; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 480; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 481; X64-NEXT: retq # encoding: [0xc3] 482 %1 = bitcast i32 %mask to <32 x i1> 483 %2 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* %addr, <32 x i1> %1, <32 x i8> %data) 484 ret <32 x i8> %2 485} 486 487define <32 x i8> @test_maskz_expand_load_b_256(i8* %addr, i32 %mask) { 488; X86-LABEL: test_maskz_expand_load_b_256: 489; X86: # %bb.0: 490; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 491; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 492; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x00] 493; X86-NEXT: retl # encoding: [0xc3] 494; 495; X64-LABEL: test_maskz_expand_load_b_256: 496; X64: # %bb.0: 497; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 498; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0x07] 499; X64-NEXT: retq # encoding: [0xc3] 500 %1 = bitcast i32 %mask to <32 x i1> 501 %2 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* %addr, <32 x i1> %1, <32 x i8> zeroinitializer) 502 ret <32 x i8> %2 503} 504 505define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) { 506; X86-LABEL: test_expand_load_b_256: 507; X86: # %bb.0: 508; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 509; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 510; X86-NEXT: vpexpandb (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x00] 511; X86-NEXT: retl # encoding: [0xc3] 512; 513; X64-LABEL: test_expand_load_b_256: 514; X64: # %bb.0: 515; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 516; X64-NEXT: vpexpandb (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0x07] 517; X64-NEXT: retq # encoding: [0xc3] 518 %1 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %data) 519 ret <32 x i8> %1 520} 521 522define <32 x i8> @test_expand_b_256(<32 x i8> %data) { 523; CHECK-LABEL: test_expand_b_256: 524; CHECK: # %bb.0: 525; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 526 %1 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 527 ret <32 x i8> %1 528} 529 530define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 531; X86-LABEL: test_mask_expand_b_256: 532; X86: # %bb.0: 533; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 534; X86-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 535; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 536; X86-NEXT: retl # encoding: [0xc3] 537; 538; X64-LABEL: test_mask_expand_b_256: 539; X64: # %bb.0: 540; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 541; X64-NEXT: vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8] 542; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 543; X64-NEXT: retq # encoding: [0xc3] 544 %1 = bitcast i32 %mask to <32 x i1> 545 %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1) 546 ret <32 x i8> %2 547} 548 549define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) { 550; X86-LABEL: test_maskz_expand_b_256: 551; X86: # %bb.0: 552; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 553; X86-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 554; X86-NEXT: retl # encoding: [0xc3] 555; 556; X64-LABEL: test_maskz_expand_b_256: 557; X64: # %bb.0: 558; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 559; X64-NEXT: vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0] 560; X64-NEXT: retq # encoding: [0xc3] 561 %1 = bitcast i32 %mask to <32 x i1> 562 %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1) 563 ret <32 x i8> %2 564} 565 566define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) { 567; X86-LABEL: test_mask_compress_store_w_256: 568; X86: # %bb.0: 569; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 570; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 571; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 572; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 573; X86-NEXT: retl # encoding: [0xc3] 574; 575; X64-LABEL: test_mask_compress_store_w_256: 576; X64: # %bb.0: 577; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 578; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 579; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 580; X64-NEXT: retq # encoding: [0xc3] 581 %1 = bitcast i8* %addr to i16* 582 %2 = bitcast i16 %mask to <16 x i1> 583 call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, i16* %1, <16 x i1> %2) 584 ret void 585} 586 587define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) { 588; X86-LABEL: test_compress_store_w_256: 589; X86: # %bb.0: 590; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 591; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 592; X86-NEXT: vpcompressw %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x00] 593; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 594; X86-NEXT: retl # encoding: [0xc3] 595; 596; X64-LABEL: test_compress_store_w_256: 597; X64: # %bb.0: 598; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 599; X64-NEXT: vpcompressw %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0x07] 600; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 601; X64-NEXT: retq # encoding: [0xc3] 602 %1 = bitcast i8* %addr to i16* 603 call void @llvm.masked.compressstore.v16i16(<16 x i16> %data, i16* %1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 604 ret void 605} 606 607define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) { 608; X86-LABEL: test_mask_compress_w_256: 609; X86: # %bb.0: 610; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 611; X86-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 612; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 613; X86-NEXT: retl # encoding: [0xc3] 614; 615; X64-LABEL: test_mask_compress_w_256: 616; X64: # %bb.0: 617; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 618; X64-NEXT: vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1] 619; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 620; X64-NEXT: retq # encoding: [0xc3] 621 %1 = bitcast i16 %mask to <16 x i1> 622 %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1) 623 ret <16 x i16> %2 624} 625 626define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) { 627; X86-LABEL: test_maskz_compress_w_256: 628; X86: # %bb.0: 629; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 630; X86-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 631; X86-NEXT: retl # encoding: [0xc3] 632; 633; X64-LABEL: test_maskz_compress_w_256: 634; X64: # %bb.0: 635; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 636; X64-NEXT: vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0] 637; X64-NEXT: retq # encoding: [0xc3] 638 %1 = bitcast i16 %mask to <16 x i1> 639 %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1) 640 ret <16 x i16> %2 641} 642 643define <16 x i16> @test_compress_w_256(<16 x i16> %data) { 644; CHECK-LABEL: test_compress_w_256: 645; CHECK: # %bb.0: 646; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 647 %1 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 648 ret <16 x i16> %1 649} 650 651define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) { 652; X86-LABEL: test_mask_compress_store_b_256: 653; X86: # %bb.0: 654; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 655; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] 656; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 657; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 658; X86-NEXT: retl # encoding: [0xc3] 659; 660; X64-LABEL: test_mask_compress_store_b_256: 661; X64: # %bb.0: 662; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 663; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 664; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 665; X64-NEXT: retq # encoding: [0xc3] 666 %1 = bitcast i32 %mask to <32 x i1> 667 call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, i8* %addr, <32 x i1> %1) 668 ret void 669} 670 671define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) { 672; X86-LABEL: test_compress_store_b_256: 673; X86: # %bb.0: 674; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 675; X86-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 676; X86-NEXT: vpcompressb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x00] 677; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 678; X86-NEXT: retl # encoding: [0xc3] 679; 680; X64-LABEL: test_compress_store_b_256: 681; X64: # %bb.0: 682; X64-NEXT: kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8] 683; X64-NEXT: vpcompressb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0x07] 684; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 685; X64-NEXT: retq # encoding: [0xc3] 686 call void @llvm.masked.compressstore.v32i8(<32 x i8> %data, i8* %addr, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 687 ret void 688} 689 690define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) { 691; X86-LABEL: test_mask_compress_b_256: 692; X86: # %bb.0: 693; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 694; X86-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 695; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 696; X86-NEXT: retl # encoding: [0xc3] 697; 698; X64-LABEL: test_mask_compress_b_256: 699; X64: # %bb.0: 700; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 701; X64-NEXT: vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1] 702; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 703; X64-NEXT: retq # encoding: [0xc3] 704 %1 = bitcast i32 %mask to <32 x i1> 705 %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1) 706 ret <32 x i8> %2 707} 708 709define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) { 710; X86-LABEL: test_maskz_compress_b_256: 711; X86: # %bb.0: 712; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 713; X86-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 714; X86-NEXT: retl # encoding: [0xc3] 715; 716; X64-LABEL: test_maskz_compress_b_256: 717; X64: # %bb.0: 718; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 719; X64-NEXT: vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0] 720; X64-NEXT: retq # encoding: [0xc3] 721 %1 = bitcast i32 %mask to <32 x i1> 722 %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1) 723 ret <32 x i8> %2 724} 725 726define <32 x i8> @test_compress_b_256(<32 x i8> %data) { 727; CHECK-LABEL: test_compress_b_256: 728; CHECK: # %bb.0: 729; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 730 %1 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 731 ret <32 x i8> %1 732} 733 734define <4 x i32> @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 735; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 736; X86: # %bb.0: 737; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 738; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 739; X86-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 740; X86-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 741; X86-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] 742; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 743; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 744; X86-NEXT: retl # encoding: [0xc3] 745; 746; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_128: 747; X64: # %bb.0: 748; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 749; X64-NEXT: vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x71,0xd1,0x16] 750; X64-NEXT: vpshldd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x71,0xd9,0x17] 751; X64-NEXT: vpshldd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x71,0xc1,0x18] 752; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 753; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 754; X64-NEXT: retq # encoding: [0xc3] 755 %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 22, i32 22, i32 22, i32 22>) 756 %2 = bitcast i8 %x4 to <8 x i1> 757 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 758 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 759 %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 23, i32 23, i32 23, i32 23>) 760 %5 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 24, i32 24, i32 24, i32 24>) 761 %6 = bitcast i8 %x4 to <8 x i1> 762 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 763 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 764 %res3 = add <4 x i32> %3, %4 765 %res4 = add <4 x i32> %res3, %7 766 ret <4 x i32> %res4 767} 768 769define <8 x i32> @test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 770; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 771; X86: # %bb.0: 772; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 773; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 774; X86-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 775; X86-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc1,0x17] 776; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 777; X86-NEXT: retl # encoding: [0xc3] 778; 779; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_256: 780; X64: # %bb.0: 781; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 782; X64-NEXT: vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x71,0xd1,0x16] 783; X64-NEXT: vpshldd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x71,0xc1,0x17] 784; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 785; X64-NEXT: retq # encoding: [0xc3] 786 %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>) 787 %2 = bitcast i8 %x4 to <8 x i1> 788 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 789 %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>) 790 %res2 = add <8 x i32> %3, %4 791 ret <8 x i32> %res2 792} 793 794define <2 x i64> @test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 795; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 796; X86: # %bb.0: 797; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 798; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 799; X86-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 800; X86-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc1,0x17] 801; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 802; X86-NEXT: retl # encoding: [0xc3] 803; 804; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_128: 805; X64: # %bb.0: 806; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 807; X64-NEXT: vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x71,0xd1,0x16] 808; X64-NEXT: vpshldq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x71,0xc1,0x17] 809; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 810; X64-NEXT: retq # encoding: [0xc3] 811 %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 22, i64 22>) 812 %2 = bitcast i8 %x4 to <8 x i1> 813 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 814 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 815 %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> <i64 23, i64 23>) 816 %res2 = add <2 x i64> %3, %4 817 ret <2 x i64> %res2 818} 819 820define <4 x i64> @test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 821; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 822; X86: # %bb.0: 823; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 824; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 825; X86-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 826; X86-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc1,0x17] 827; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 828; X86-NEXT: retl # encoding: [0xc3] 829; 830; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_256: 831; X64: # %bb.0: 832; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 833; X64-NEXT: vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x71,0xd1,0x16] 834; X64-NEXT: vpshldq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x71,0xc1,0x17] 835; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 836; X64-NEXT: retq # encoding: [0xc3] 837 %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 22, i64 22, i64 22, i64 22>) 838 %2 = bitcast i8 %x4 to <8 x i1> 839 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 840 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 841 %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> <i64 23, i64 23, i64 23, i64 23>) 842 %res2 = add <4 x i64> %3, %4 843 ret <4 x i64> %res2 844} 845 846define <8 x i16> @test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 847; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 848; X86: # %bb.0: 849; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 850; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 851; X86-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 852; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc1,0x07] 853; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 854; X86-NEXT: retl # encoding: [0xc3] 855; 856; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_128: 857; X64: # %bb.0: 858; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 859; X64-NEXT: vpshldw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x70,0xd1,0x06] 860; X64-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x70,0xc1,0x07] 861; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 862; X64-NEXT: retq # encoding: [0xc3] 863 %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 864 %2 = bitcast i8 %x4 to <8 x i1> 865 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 866 %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 867 %res2 = add <8 x i16> %3, %4 868 ret <8 x i16> %res2 869} 870 871define <16 x i16> @test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 872; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 873; X86: # %bb.0: 874; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 875; X86-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 876; X86-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc1,0x07] 877; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 878; X86-NEXT: retl # encoding: [0xc3] 879; 880; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_256: 881; X64: # %bb.0: 882; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 883; X64-NEXT: vpshldw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x70,0xd1,0x06] 884; X64-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x70,0xc1,0x07] 885; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 886; X64-NEXT: retq # encoding: [0xc3] 887 %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 888 %2 = bitcast i16 %x4 to <16 x i1> 889 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 890 %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 891 %res2 = add <16 x i16> %3, %4 892 ret <16 x i16> %res2 893} 894 895define <4 x i32> @test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 896; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 897; X86: # %bb.0: 898; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 899; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 900; X86-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 901; X86-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 902; X86-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] 903; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 904; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 905; X86-NEXT: retl # encoding: [0xc3] 906; 907; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_128: 908; X64: # %bb.0: 909; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 910; X64-NEXT: vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x73,0xd1,0x16] 911; X64-NEXT: vpshrdd $23, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x73,0xd9,0x17] 912; X64-NEXT: vpshrdd $24, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x73,0xc1,0x18] 913; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 914; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 915; X64-NEXT: retq # encoding: [0xc3] 916 %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 22, i32 22, i32 22, i32 22>) 917 %2 = bitcast i8 %x4 to <8 x i1> 918 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 919 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x3 920 %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 23, i32 23, i32 23, i32 23>) 921 %5 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> <i32 24, i32 24, i32 24, i32 24>) 922 %6 = bitcast i8 %x4 to <8 x i1> 923 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 924 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 925 %res3 = add <4 x i32> %3, %4 926 %res4 = add <4 x i32> %res3, %7 927 ret <4 x i32> %res4 928} 929 930define <8 x i32> @test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 931; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 932; X86: # %bb.0: 933; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 934; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 935; X86-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 936; X86-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc1,0x17] 937; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 938; X86-NEXT: retl # encoding: [0xc3] 939; 940; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_256: 941; X64: # %bb.0: 942; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 943; X64-NEXT: vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x73,0xd1,0x16] 944; X64-NEXT: vpshrdd $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x73,0xc1,0x17] 945; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 946; X64-NEXT: retq # encoding: [0xc3] 947 %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22>) 948 %2 = bitcast i8 %x4 to <8 x i1> 949 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x3 950 %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>) 951 %res2 = add <8 x i32> %3, %4 952 ret <8 x i32> %res2 953} 954 955define <2 x i64> @test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 956; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 957; X86: # %bb.0: 958; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 959; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 960; X86-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 961; X86-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc1,0x17] 962; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 963; X86-NEXT: retl # encoding: [0xc3] 964; 965; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_128: 966; X64: # %bb.0: 967; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 968; X64-NEXT: vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x73,0xd1,0x16] 969; X64-NEXT: vpshrdq $23, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x73,0xc1,0x17] 970; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 971; X64-NEXT: retq # encoding: [0xc3] 972 %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 22, i64 22>) 973 %2 = bitcast i8 %x4 to <8 x i1> 974 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 975 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x3 976 %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> <i64 23, i64 23>) 977 %res2 = add <2 x i64> %3, %4 978 ret <2 x i64> %res2 979} 980 981define <4 x i64> @test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 982; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 983; X86: # %bb.0: 984; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 985; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 986; X86-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 987; X86-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc1,0x17] 988; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 989; X86-NEXT: retl # encoding: [0xc3] 990; 991; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_256: 992; X64: # %bb.0: 993; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 994; X64-NEXT: vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x73,0xd1,0x16] 995; X64-NEXT: vpshrdq $23, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x73,0xc1,0x17] 996; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 997; X64-NEXT: retq # encoding: [0xc3] 998 %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 22, i64 22, i64 22, i64 22>) 999 %2 = bitcast i8 %x4 to <8 x i1> 1000 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1001 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x3 1002 %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> <i64 23, i64 23, i64 23, i64 23>) 1003 %res2 = add <4 x i64> %3, %4 1004 ret <4 x i64> %res2 1005} 1006 1007define <8 x i16> @test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) { 1008; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 1009; X86: # %bb.0: 1010; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1011; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] 1012; X86-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 1013; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc1,0x07] 1014; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 1015; X86-NEXT: retl # encoding: [0xc3] 1016; 1017; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_128: 1018; X64: # %bb.0: 1019; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1020; X64-NEXT: vpshrdw $6, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x72,0xd1,0x06] 1021; X64-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x72,0xc1,0x07] 1022; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] 1023; X64-NEXT: retq # encoding: [0xc3] 1024 %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 1025 %2 = bitcast i8 %x4 to <8 x i1> 1026 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3 1027 %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1028 %res2 = add <8 x i16> %3, %4 1029 ret <8 x i16> %res2 1030} 1031 1032define <16 x i16> @test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) { 1033; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 1034; X86: # %bb.0: 1035; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1036; X86-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1037; X86-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc1,0x07] 1038; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 1039; X86-NEXT: retl # encoding: [0xc3] 1040; 1041; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_256: 1042; X64: # %bb.0: 1043; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 1044; X64-NEXT: vpshrdw $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x72,0xd1,0x06] 1045; X64-NEXT: vpshrdw $7, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x72,0xc1,0x07] 1046; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] 1047; X64-NEXT: retq # encoding: [0xc3] 1048 %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6, i16 6>) 1049 %2 = bitcast i16 %x4 to <16 x i1> 1050 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3 1051 %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>) 1052 %res2 = add <16 x i16> %3, %4 1053 ret <16 x i16> %res2 1054} 1055 1056define <8 x i32> @test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { 1057; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 1058; X86: # %bb.0: 1059; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1060; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1061; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1062; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1063; X86-NEXT: vpshrdvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x18] 1064; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] 1065; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 1066; X86-NEXT: retl # encoding: [0xc3] 1067; 1068; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256: 1069; X64: # %bb.0: 1070; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1071; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1072; X64-NEXT: vpshrdvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x73,0x1f] 1073; X64-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x73,0xc2] 1074; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 1075; X64-NEXT: retq # encoding: [0xc3] 1076 %x2 = load <8 x i32>, <8 x i32>* %x2p 1077 %1 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x2) 1078 %2 = bitcast i8 %x3 to <8 x i1> 1079 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0 1080 %4 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %x1, <8 x i32> %x0, <8 x i32> %x4) 1081 %5 = bitcast i8 %x3 to <8 x i1> 1082 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer 1083 %res3 = add <8 x i32> %3, %6 1084 ret <8 x i32> %res3 1085} 1086 1087define <4 x i32> @test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { 1088; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 1089; X86: # %bb.0: 1090; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1091; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1092; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1093; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1094; X86-NEXT: vpshrdvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x18] 1095; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] 1096; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1097; X86-NEXT: retl # encoding: [0xc3] 1098; 1099; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128: 1100; X64: # %bb.0: 1101; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1102; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1103; X64-NEXT: vpshrdvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x73,0x1f] 1104; X64-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x73,0xc2] 1105; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1106; X64-NEXT: retq # encoding: [0xc3] 1107 %x2 = load <4 x i32>, <4 x i32>* %x2p 1108 %1 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x2) 1109 %2 = bitcast i8 %x3 to <8 x i1> 1110 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1111 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0 1112 %4 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x1, <4 x i32> %x0, <4 x i32> %x4) 1113 %5 = bitcast i8 %x3 to <8 x i1> 1114 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1115 %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer 1116 %res3 = add <4 x i32> %3, %6 1117 ret <4 x i32> %res3 1118} 1119 1120define <4 x i64> @test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) { 1121; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 1122; X86: # %bb.0: 1123; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1124; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1125; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1126; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1127; X86-NEXT: vpshrdvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x18] 1128; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] 1129; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1130; X86-NEXT: retl # encoding: [0xc3] 1131; 1132; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256: 1133; X64: # %bb.0: 1134; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1135; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1136; X64-NEXT: vpshrdvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x73,0x1f] 1137; X64-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x73,0xc2] 1138; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1139; X64-NEXT: retq # encoding: [0xc3] 1140 %x2 = load <4 x i64>, <4 x i64>* %x2p 1141 %1 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x2) 1142 %2 = bitcast i8 %x3 to <8 x i1> 1143 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1144 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0 1145 %4 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %x1, <4 x i64> %x0, <4 x i64> %x4) 1146 %5 = bitcast i8 %x3 to <8 x i1> 1147 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1148 %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer 1149 %res3 = add <4 x i64> %3, %6 1150 ret <4 x i64> %res3 1151} 1152 1153define <2 x i64> @test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) { 1154; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 1155; X86: # %bb.0: 1156; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1157; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1158; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1159; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1160; X86-NEXT: vpshrdvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x18] 1161; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] 1162; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1163; X86-NEXT: retl # encoding: [0xc3] 1164; 1165; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128: 1166; X64: # %bb.0: 1167; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1168; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1169; X64-NEXT: vpshrdvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x73,0x1f] 1170; X64-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x73,0xc2] 1171; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1172; X64-NEXT: retq # encoding: [0xc3] 1173 %x2 = load <2 x i64>, <2 x i64>* %x2p 1174 %1 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x2) 1175 %2 = bitcast i8 %x3 to <8 x i1> 1176 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 1177 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0 1178 %4 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x1, <2 x i64> %x0, <2 x i64> %x4) 1179 %5 = bitcast i8 %x3 to <8 x i1> 1180 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> 1181 %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer 1182 %res3 = add <2 x i64> %3, %6 1183 ret <2 x i64> %res3 1184} 1185 1186define <16 x i16> @test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) { 1187; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 1188; X86: # %bb.0: 1189; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1190; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1191; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1192; X86-NEXT: vpshrdvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x18] 1193; X86-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2] 1194; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1195; X86-NEXT: retl # encoding: [0xc3] 1196; 1197; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256: 1198; X64: # %bb.0: 1199; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1200; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1201; X64-NEXT: vpshrdvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x72,0x1f] 1202; X64-NEXT: vpshrdvw %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x72,0xc2] 1203; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1204; X64-NEXT: retq # encoding: [0xc3] 1205 %x2 = load <16 x i16>, <16 x i16>* %x2p 1206 %1 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2) 1207 %2 = bitcast i16 %x3 to <16 x i1> 1208 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0 1209 %4 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x4) 1210 %5 = bitcast i16 %x3 to <16 x i1> 1211 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer 1212 %res3 = add <16 x i16> %3, %4 1213 ret <16 x i16> %res3 1214} 1215 1216define <8 x i16> @test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) { 1217; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 1218; X86: # %bb.0: 1219; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1220; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1221; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1222; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1223; X86-NEXT: vpshrdvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x18] 1224; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] 1225; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1226; X86-NEXT: retl # encoding: [0xc3] 1227; 1228; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128: 1229; X64: # %bb.0: 1230; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1231; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1232; X64-NEXT: vpshrdvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x72,0x1f] 1233; X64-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x72,0xc2] 1234; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1235; X64-NEXT: retq # encoding: [0xc3] 1236 %x2 = load <8 x i16>, <8 x i16>* %x2p 1237 %1 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2) 1238 %2 = bitcast i8 %x3 to <8 x i1> 1239 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0 1240 %4 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x4) 1241 %5 = bitcast i8 %x3 to <8 x i1> 1242 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer 1243 %res3 = add <8 x i16> %3, %6 1244 ret <8 x i16> %res3 1245} 1246 1247define <8 x i32> @test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { 1248; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 1249; X86: # %bb.0: 1250; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1251; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1252; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1253; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1254; X86-NEXT: vpshldvd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x18] 1255; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] 1256; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 1257; X86-NEXT: retl # encoding: [0xc3] 1258; 1259; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_256: 1260; X64: # %bb.0: 1261; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1262; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1263; X64-NEXT: vpshldvd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x71,0x1f] 1264; X64-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x71,0xc2] 1265; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 1266; X64-NEXT: retq # encoding: [0xc3] 1267 %x2 = load <8 x i32>, <8 x i32>* %x2p 1268 %1 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) 1269 %2 = bitcast i8 %x3 to <8 x i1> 1270 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x0 1271 %4 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4) 1272 %5 = bitcast i8 %x3 to <8 x i1> 1273 %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer 1274 %res3 = add <8 x i32> %3, %6 1275 ret <8 x i32> %res3 1276} 1277 1278define <4 x i32> @test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { 1279; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 1280; X86: # %bb.0: 1281; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1282; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1283; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1284; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1285; X86-NEXT: vpshldvd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x18] 1286; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] 1287; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1288; X86-NEXT: retl # encoding: [0xc3] 1289; 1290; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_128: 1291; X64: # %bb.0: 1292; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1293; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1294; X64-NEXT: vpshldvd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x71,0x1f] 1295; X64-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x71,0xc2] 1296; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 1297; X64-NEXT: retq # encoding: [0xc3] 1298 %x2 = load <4 x i32>, <4 x i32>* %x2p 1299 %1 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) 1300 %2 = bitcast i8 %x3 to <8 x i1> 1301 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1302 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x0 1303 %4 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4) 1304 %5 = bitcast i8 %x3 to <8 x i1> 1305 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1306 %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer 1307 %res3 = add <4 x i32> %3, %6 1308 ret <4 x i32> %res3 1309} 1310 1311define <4 x i64> @test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) { 1312; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1313; X86: # %bb.0: 1314; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1315; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1316; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1317; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1318; X86-NEXT: vpshldvq (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x18] 1319; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] 1320; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1321; X86-NEXT: retl # encoding: [0xc3] 1322; 1323; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_256: 1324; X64: # %bb.0: 1325; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1326; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1327; X64-NEXT: vpshldvq (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x71,0x1f] 1328; X64-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x71,0xc2] 1329; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 1330; X64-NEXT: retq # encoding: [0xc3] 1331 %x2 = load <4 x i64>, <4 x i64>* %x2p 1332 %1 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) 1333 %2 = bitcast i8 %x3 to <8 x i1> 1334 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1335 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x0 1336 %4 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4) 1337 %5 = bitcast i8 %x3 to <8 x i1> 1338 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1339 %6 = select <4 x i1> %extract1, <4 x i64> %4, <4 x i64> zeroinitializer 1340 %res3 = add <4 x i64> %3, %6 1341 ret <4 x i64> %res3 1342} 1343 1344define <2 x i64> @test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) { 1345; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1346; X86: # %bb.0: 1347; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1348; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1349; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1350; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1351; X86-NEXT: vpshldvq (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x18] 1352; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] 1353; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1354; X86-NEXT: retl # encoding: [0xc3] 1355; 1356; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_128: 1357; X64: # %bb.0: 1358; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1359; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1360; X64-NEXT: vpshldvq (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x71,0x1f] 1361; X64-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x71,0xc2] 1362; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 1363; X64-NEXT: retq # encoding: [0xc3] 1364 %x2 = load <2 x i64>, <2 x i64>* %x2p 1365 %1 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) 1366 %2 = bitcast i8 %x3 to <8 x i1> 1367 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 1368 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x0 1369 %4 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4) 1370 %5 = bitcast i8 %x3 to <8 x i1> 1371 %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1> 1372 %6 = select <2 x i1> %extract1, <2 x i64> %4, <2 x i64> zeroinitializer 1373 %res3 = add <2 x i64> %3, %6 1374 ret <2 x i64> %res3 1375} 1376 1377define <16 x i16> @test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) { 1378; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1379; X86: # %bb.0: 1380; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1381; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 1382; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1383; X86-NEXT: vpshldvw (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x18] 1384; X86-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] 1385; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1386; X86-NEXT: retl # encoding: [0xc3] 1387; 1388; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_256: 1389; X64: # %bb.0: 1390; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1391; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 1392; X64-NEXT: vpshldvw (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x70,0x1f] 1393; X64-NEXT: vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x70,0xc2] 1394; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] 1395; X64-NEXT: retq # encoding: [0xc3] 1396 %x2 = load <16 x i16>, <16 x i16>* %x2p 1397 %1 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) 1398 %2 = bitcast i16 %x3 to <16 x i1> 1399 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x0 1400 %4 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4) 1401 %5 = bitcast i16 %x3 to <16 x i1> 1402 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer 1403 %res3 = add <16 x i16> %3, %6 1404 ret <16 x i16> %res3 1405} 1406 1407define <8 x i16> @test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) { 1408; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1409; X86: # %bb.0: 1410; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1411; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1412; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] 1413; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1414; X86-NEXT: vpshldvw (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x18] 1415; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2] 1416; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1417; X86-NEXT: retl # encoding: [0xc3] 1418; 1419; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_128: 1420; X64: # %bb.0: 1421; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] 1422; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 1423; X64-NEXT: vpshldvw (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x70,0x1f] 1424; X64-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x70,0xc2] 1425; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] 1426; X64-NEXT: retq # encoding: [0xc3] 1427 %x2 = load <8 x i16>, <8 x i16>* %x2p 1428 %1 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) 1429 %2 = bitcast i8 %x3 to <8 x i1> 1430 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x0 1431 %4 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4) 1432 %5 = bitcast i8 %x3 to <8 x i1> 1433 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer 1434 %res3 = add <8 x i16> %3, %4 1435 ret <8 x i16> %res3 1436} 1437 1438declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 1439declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 1440declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 1441declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 1442declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 1443declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 1444declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 1445declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) 1446declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 1447declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) 1448declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) 1449declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>) 1450declare <8 x i16> @llvm.masked.expandload.v8i16(i16*, <8 x i1>, <8 x i16>) 1451declare <16 x i8> @llvm.masked.expandload.v16i8(i8*, <16 x i1>, <16 x i8>) 1452declare void @llvm.masked.compressstore.v8i16(<8 x i16>, i16*, <8 x i1>) 1453declare void @llvm.masked.compressstore.v16i8(<16 x i8>, i8*, <16 x i1>) 1454declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>) 1455declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>) 1456declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>) 1457declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>) 1458declare <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16>, <8 x i16>, <8 x i1>) 1459declare <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8>, <16 x i8>, <16 x i1>) 1460declare <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16>, <8 x i16>, <8 x i1>) 1461declare <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8>, <16 x i8>, <16 x i1>) 1462declare <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16>, <16 x i16>, <16 x i1>) 1463declare <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8>, <32 x i8>, <32 x i1>) 1464declare <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16>, <16 x i16>, <16 x i1>) 1465declare <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8>, <32 x i8>, <32 x i1>) 1466