1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8) 6 7define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) { 8; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128: 9; X86: # %bb.0: 10; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11; X86-NEXT: vpbroadcastd %eax, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc8] 12; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 14; X86-NEXT: vpbroadcastd %eax, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc0] 15; X86-NEXT: vpbroadcastd %eax, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd0] 16; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 17; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 18; X86-NEXT: retl # encoding: [0xc3] 19; 20; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128: 21; X64: # %bb.0: 22; X64-NEXT: vpbroadcastd %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xcf] 23; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 24; X64-NEXT: vpbroadcastd %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7] 25; X64-NEXT: vpbroadcastd %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd7] 26; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 27; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 28; X64-NEXT: retq # encoding: [0xc3] 29 %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1) 30 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask) 31 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask) 32 %res3 = add <4 x i32> %res, %res1 33 %res4 = add <4 x i32> %res2, %res3 34 ret <4 x i32> %res4 35} 36 37 38declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8) 39 40define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) { 41; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128: 42; X86: # %bb.0: 43; X86-NEXT: vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04] 44; X86-NEXT: # xmm1 = mem[0],zero 45; X86-NEXT: vpbroadcastq %xmm1, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd1] 46; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 47; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 48; X86-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc1] 49; X86-NEXT: vpbroadcastq %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc9] 50; X86-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] 51; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 52; X86-NEXT: retl # encoding: [0xc3] 53; 54; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128: 55; X64: # %bb.0: 56; X64-NEXT: vpbroadcastq %rdi, %xmm1 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xcf] 57; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 58; X64-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7] 59; X64-NEXT: vpbroadcastq %rdi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xd7] 60; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 61; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 62; X64-NEXT: retq # encoding: [0xc3] 63 %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1) 64 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask) 65 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask) 66 %res3 = add <2 x i64> %res, %res1 67 %res4 = add <2 x i64> %res2, %res3 68 ret <2 x i64> %res4 69} 70 71 72 declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8) 73 74 define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) { 75; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256: 76; X86: # %bb.0: 77; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 78; X86-NEXT: vpbroadcastd %eax, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xc8] 79; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 80; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 81; X86-NEXT: vpbroadcastd %eax, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc0] 82; X86-NEXT: vpbroadcastd %eax, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd0] 83; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 84; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 85; X86-NEXT: retl # encoding: [0xc3] 86; 87; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256: 88; X64: # %bb.0: 89; X64-NEXT: vpbroadcastd %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xcf] 90; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 91; X64-NEXT: vpbroadcastd %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7] 92; X64-NEXT: vpbroadcastd %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd7] 93; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 94; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 95; X64-NEXT: retq # encoding: [0xc3] 96 %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1) 97 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask) 98 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask) 99 %res3 = add <8 x i32> %res, %res1 100 %res4 = add <8 x i32> %res2, %res3 101 ret <8 x i32> %res4 102 } 103 104 declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8) 105 106 define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) { 107; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256: 108; X86: # %bb.0: 109; X86-NEXT: vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04] 110; X86-NEXT: # xmm1 = mem[0],zero 111; X86-NEXT: vpbroadcastq %xmm1, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd1] 112; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] 113; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 114; X86-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc1] 115; X86-NEXT: vpbroadcastq %xmm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc9] 116; X86-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1] 117; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 118; X86-NEXT: retl # encoding: [0xc3] 119; 120; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256: 121; X64: # %bb.0: 122; X64-NEXT: vpbroadcastq %rdi, %ymm1 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xcf] 123; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 124; X64-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7] 125; X64-NEXT: vpbroadcastq %rdi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xd7] 126; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 127; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 128; X64-NEXT: retq # encoding: [0xc3] 129 %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1) 130 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask) 131 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask) 132 %res3 = add <4 x i64> %res, %res1 133 %res4 = add <4 x i64> %res2, %res3 134 ret <4 x i64> %res4 135 } 136 137 138 139declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) 140 141define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) { 142; X86-LABEL: test_int_x86_avx512_pbroadcastd_256: 143; X86: # %bb.0: 144; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 145; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] 146; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 147; X86-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8] 148; X86-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0] 149; X86-NEXT: vpaddd (%eax){1to8}, %ymm1, %ymm1 # encoding: [0x62,0xf1,0x75,0x38,0xfe,0x08] 150; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 151; X86-NEXT: retl # encoding: [0xc3] 152; 153; X64-LABEL: test_int_x86_avx512_pbroadcastd_256: 154; X64: # %bb.0: 155; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 156; X64-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8] 157; X64-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0] 158; X64-NEXT: vpaddd (%rsi){1to8}, %ymm1, %ymm1 # encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e] 159; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 160; X64-NEXT: retq # encoding: [0xc3] 161 %y_32 = load i32, i32 * %y_ptr 162 %y = insertelement <4 x i32> undef, i32 %y_32, i32 0 163 %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1) 164 %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) 165 %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) 166 %res3 = add <8 x i32> %res, %res1 167 %res4 = add <8 x i32> %res2, %res3 168 ret <8 x i32> %res4 169} 170 171declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8) 172 173define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { 174; X86-LABEL: test_int_x86_avx512_pbroadcastd_128: 175; X86: # %bb.0: 176; X86-NEXT: vpbroadcastd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0] 177; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 178; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 179; X86-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8] 180; X86-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0] 181; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 182; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 183; X86-NEXT: retl # encoding: [0xc3] 184; 185; X64-LABEL: test_int_x86_avx512_pbroadcastd_128: 186; X64: # %bb.0: 187; X64-NEXT: vpbroadcastd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0] 188; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 189; X64-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8] 190; X64-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0] 191; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 192; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 193; X64-NEXT: retq # encoding: [0xc3] 194 %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 195 %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) 196 %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask) 197 %res3 = add <4 x i32> %res, %res1 198 %res4 = add <4 x i32> %res2, %res3 199 ret <4 x i32> %res4 200} 201 202declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8) 203 204define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) { 205; X86-LABEL: test_int_x86_avx512_pbroadcastq_256: 206; X86: # %bb.0: 207; X86-NEXT: vpbroadcastq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0] 208; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 209; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 210; X86-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8] 211; X86-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0] 212; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 213; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 214; X86-NEXT: retl # encoding: [0xc3] 215; 216; X64-LABEL: test_int_x86_avx512_pbroadcastq_256: 217; X64: # %bb.0: 218; X64-NEXT: vpbroadcastq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0] 219; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 220; X64-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8] 221; X64-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0] 222; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 223; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 224; X64-NEXT: retq # encoding: [0xc3] 225 %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1) 226 %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask) 227 %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask) 228 %res3 = add <4 x i64> %res, %res1 229 %res4 = add <4 x i64> %res2, %res3 230 ret <4 x i64> %res4 231} 232 233declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8) 234 235define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) { 236; X86-LABEL: test_int_x86_avx512_pbroadcastq_128: 237; X86: # %bb.0: 238; X86-NEXT: vpbroadcastq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0] 239; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 240; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 241; X86-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8] 242; X86-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0] 243; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 244; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 245; X86-NEXT: retl # encoding: [0xc3] 246; 247; X64-LABEL: test_int_x86_avx512_pbroadcastq_128: 248; X64: # %bb.0: 249; X64-NEXT: vpbroadcastq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0] 250; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 251; X64-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8] 252; X64-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0] 253; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 254; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 255; X64-NEXT: retq # encoding: [0xc3] 256 %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1) 257 %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask) 258 %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask) 259 %res3 = add <2 x i64> %res, %res1 260 %res4 = add <2 x i64> %res2, %res3 261 ret <2 x i64> %res4 262} 263 264declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly 265 266define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) { 267; X86-LABEL: test_x86_vbroadcast_sd_pd_256: 268; X86: # %bb.0: 269; X86-NEXT: vbroadcastsd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0] 270; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 271; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 272; X86-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8] 273; X86-NEXT: vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9] 274; X86-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0] 275; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 276; X86-NEXT: retl # encoding: [0xc3] 277; 278; X64-LABEL: test_x86_vbroadcast_sd_pd_256: 279; X64: # %bb.0: 280; X64-NEXT: vbroadcastsd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0] 281; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 282; X64-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8] 283; X64-NEXT: vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9] 284; X64-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0] 285; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 286; X64-NEXT: retq # encoding: [0xc3] 287 %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1) 288 %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask) 289 %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 290 %res3 = fadd <4 x double> %res, %res1 291 %res4 = fadd <4 x double> %res2, %res3 292 ret <4 x double> %res4 293} 294 295declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly 296 297define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) { 298; X86-LABEL: test_x86_vbroadcast_ss_ps_256: 299; X86: # %bb.0: 300; X86-NEXT: vbroadcastss %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0] 301; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 302; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 303; X86-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8] 304; X86-NEXT: vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9] 305; X86-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0] 306; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 307; X86-NEXT: retl # encoding: [0xc3] 308; 309; X64-LABEL: test_x86_vbroadcast_ss_ps_256: 310; X64: # %bb.0: 311; X64-NEXT: vbroadcastss %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0] 312; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 313; X64-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8] 314; X64-NEXT: vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9] 315; X64-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0] 316; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 317; X64-NEXT: retq # encoding: [0xc3] 318 %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1) 319 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask) 320 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 321 %res3 = fadd <8 x float> %res, %res1 322 %res4 = fadd <8 x float> %res2, %res3 323 ret <8 x float> %res4 324} 325 326declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly 327 328define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) { 329; X86-LABEL: test_x86_vbroadcast_ss_ps_128: 330; X86: # %bb.0: 331; X86-NEXT: vbroadcastss %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0] 332; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 333; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 334; X86-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8] 335; X86-NEXT: vaddps %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9] 336; X86-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0] 337; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 338; X86-NEXT: retl # encoding: [0xc3] 339; 340; X64-LABEL: test_x86_vbroadcast_ss_ps_128: 341; X64: # %bb.0: 342; X64-NEXT: vbroadcastss %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0] 343; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 344; X64-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8] 345; X64-NEXT: vaddps %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9] 346; X64-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0] 347; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 348; X64-NEXT: retq # encoding: [0xc3] 349 %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) 350 %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) 351 %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) 352 %res3 = fadd <4 x float> %res, %res1 353 %res4 = fadd <4 x float> %res2, %res3 354 ret <4 x float> %res4 355} 356 357declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8) 358 359define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 360; X86-LABEL: test_int_x86_avx512_mask_movsldup_128: 361; X86: # %bb.0: 362; X86-NEXT: vmovsldup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0] 363; X86-NEXT: # xmm2 = xmm0[0,0,2,2] 364; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 365; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 366; X86-NEXT: vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8] 367; X86-NEXT: # xmm1 {%k1} = xmm0[0,0,2,2] 368; X86-NEXT: vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca] 369; X86-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0] 370; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,0,2,2] 371; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 372; X86-NEXT: retl # encoding: [0xc3] 373; 374; X64-LABEL: test_int_x86_avx512_mask_movsldup_128: 375; X64: # %bb.0: 376; X64-NEXT: vmovsldup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0] 377; X64-NEXT: # xmm2 = xmm0[0,0,2,2] 378; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 379; X64-NEXT: vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8] 380; X64-NEXT: # xmm1 {%k1} = xmm0[0,0,2,2] 381; X64-NEXT: vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca] 382; X64-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0] 383; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,0,2,2] 384; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 385; X64-NEXT: retq # encoding: [0xc3] 386 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 387 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 388 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 389 %res3 = fadd <4 x float> %res, %res1 390 %res4 = fadd <4 x float> %res2, %res3 391 ret <4 x float> %res4 392} 393 394declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8) 395 396define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 397; X86-LABEL: test_int_x86_avx512_mask_movsldup_256: 398; X86: # %bb.0: 399; X86-NEXT: vmovsldup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0] 400; X86-NEXT: # ymm2 = ymm0[0,0,2,2,4,4,6,6] 401; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 402; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 403; X86-NEXT: vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8] 404; X86-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] 405; X86-NEXT: vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca] 406; X86-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0] 407; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] 408; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 409; X86-NEXT: retl # encoding: [0xc3] 410; 411; X64-LABEL: test_int_x86_avx512_mask_movsldup_256: 412; X64: # %bb.0: 413; X64-NEXT: vmovsldup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0] 414; X64-NEXT: # ymm2 = ymm0[0,0,2,2,4,4,6,6] 415; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 416; X64-NEXT: vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8] 417; X64-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] 418; X64-NEXT: vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca] 419; X64-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0] 420; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] 421; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 422; X64-NEXT: retq # encoding: [0xc3] 423 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 424 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 425 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 426 %res3 = fadd <8 x float> %res, %res1 427 %res4 = fadd <8 x float> %res2, %res3 428 ret <8 x float> %res4 429} 430 431declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8) 432 433define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) { 434; X86-LABEL: test_int_x86_avx512_mask_movshdup_128: 435; X86: # %bb.0: 436; X86-NEXT: vmovshdup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0] 437; X86-NEXT: # xmm2 = xmm0[1,1,3,3] 438; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 439; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 440; X86-NEXT: vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8] 441; X86-NEXT: # xmm1 {%k1} = xmm0[1,1,3,3] 442; X86-NEXT: vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca] 443; X86-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0] 444; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1,1,3,3] 445; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 446; X86-NEXT: retl # encoding: [0xc3] 447; 448; X64-LABEL: test_int_x86_avx512_mask_movshdup_128: 449; X64: # %bb.0: 450; X64-NEXT: vmovshdup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0] 451; X64-NEXT: # xmm2 = xmm0[1,1,3,3] 452; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 453; X64-NEXT: vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8] 454; X64-NEXT: # xmm1 {%k1} = xmm0[1,1,3,3] 455; X64-NEXT: vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca] 456; X64-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0] 457; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1,1,3,3] 458; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 459; X64-NEXT: retq # encoding: [0xc3] 460 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2) 461 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1) 462 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2) 463 %res3 = fadd <4 x float> %res, %res1 464 %res4 = fadd <4 x float> %res2, %res3 465 ret <4 x float> %res4 466} 467 468declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8) 469 470define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) { 471; X86-LABEL: test_int_x86_avx512_mask_movshdup_256: 472; X86: # %bb.0: 473; X86-NEXT: vmovshdup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0] 474; X86-NEXT: # ymm2 = ymm0[1,1,3,3,5,5,7,7] 475; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 476; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 477; X86-NEXT: vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8] 478; X86-NEXT: # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] 479; X86-NEXT: vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca] 480; X86-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0] 481; X86-NEXT: # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] 482; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 483; X86-NEXT: retl # encoding: [0xc3] 484; 485; X64-LABEL: test_int_x86_avx512_mask_movshdup_256: 486; X64: # %bb.0: 487; X64-NEXT: vmovshdup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0] 488; X64-NEXT: # ymm2 = ymm0[1,1,3,3,5,5,7,7] 489; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 490; X64-NEXT: vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8] 491; X64-NEXT: # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] 492; X64-NEXT: vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca] 493; X64-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0] 494; X64-NEXT: # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] 495; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 496; X64-NEXT: retq # encoding: [0xc3] 497 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2) 498 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1) 499 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2) 500 %res3 = fadd <8 x float> %res, %res1 501 %res4 = fadd <8 x float> %res2, %res3 502 ret <8 x float> %res4 503} 504declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8) 505 506define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) { 507; X86-LABEL: test_int_x86_avx512_mask_movddup_128: 508; X86: # %bb.0: 509; X86-NEXT: vmovddup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0] 510; X86-NEXT: # xmm2 = xmm0[0,0] 511; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 512; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 513; X86-NEXT: vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8] 514; X86-NEXT: # xmm1 {%k1} = xmm0[0,0] 515; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca] 516; X86-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0] 517; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0,0] 518; X86-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 519; X86-NEXT: retl # encoding: [0xc3] 520; 521; X64-LABEL: test_int_x86_avx512_mask_movddup_128: 522; X64: # %bb.0: 523; X64-NEXT: vmovddup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0] 524; X64-NEXT: # xmm2 = xmm0[0,0] 525; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 526; X64-NEXT: vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8] 527; X64-NEXT: # xmm1 {%k1} = xmm0[0,0] 528; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca] 529; X64-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0] 530; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0,0] 531; X64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] 532; X64-NEXT: retq # encoding: [0xc3] 533 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2) 534 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1) 535 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2) 536 %res3 = fadd <2 x double> %res, %res1 537 %res4 = fadd <2 x double> %res2, %res3 538 ret <2 x double> %res4 539} 540 541declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8) 542 543define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) { 544; X86-LABEL: test_int_x86_avx512_mask_movddup_256: 545; X86: # %bb.0: 546; X86-NEXT: vmovddup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0] 547; X86-NEXT: # ymm2 = ymm0[0,0,2,2] 548; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 549; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 550; X86-NEXT: vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8] 551; X86-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2] 552; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca] 553; X86-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0] 554; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2] 555; X86-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 556; X86-NEXT: retl # encoding: [0xc3] 557; 558; X64-LABEL: test_int_x86_avx512_mask_movddup_256: 559; X64: # %bb.0: 560; X64-NEXT: vmovddup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0] 561; X64-NEXT: # ymm2 = ymm0[0,0,2,2] 562; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 563; X64-NEXT: vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8] 564; X64-NEXT: # ymm1 {%k1} = ymm0[0,0,2,2] 565; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca] 566; X64-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0] 567; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,0,2,2] 568; X64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1] 569; X64-NEXT: retq # encoding: [0xc3] 570 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2) 571 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1) 572 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2) 573 %res3 = fadd <4 x double> %res, %res1 574 %res4 = fadd <4 x double> %res2, %res3 575 ret <4 x double> %res4 576} 577 578declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8) 579 580define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 581; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: 582; X86: # %bb.0: 583; X86-NEXT: vpermilpd $6, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06] 584; X86-NEXT: # ymm2 = ymm0[0,1,3,2] 585; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 586; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 587; X86-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06] 588; X86-NEXT: # ymm1 {%k1} = ymm0[0,1,3,2] 589; X86-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06] 590; X86-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,3,2] 591; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 592; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] 593; X86-NEXT: retl # encoding: [0xc3] 594; 595; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: 596; X64: # %bb.0: 597; X64-NEXT: vpermilpd $6, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06] 598; X64-NEXT: # ymm2 = ymm0[0,1,3,2] 599; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 600; X64-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06] 601; X64-NEXT: # ymm1 {%k1} = ymm0[0,1,3,2] 602; X64-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06] 603; X64-NEXT: # ymm0 {%k1} {z} = ymm0[0,1,3,2] 604; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 605; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] 606; X64-NEXT: retq # encoding: [0xc3] 607 %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) 608 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) 609 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1) 610 %res3 = fadd <4 x double> %res, %res1 611 %res4 = fadd <4 x double> %res2, %res3 612 ret <4 x double> %res4 613} 614 615declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8) 616 617define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 618; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: 619; X86: # %bb.0: 620; X86-NEXT: vpermilpd $1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01] 621; X86-NEXT: # xmm2 = xmm0[1,0] 622; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 623; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 624; X86-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] 625; X86-NEXT: # xmm1 {%k1} = xmm0[1,0] 626; X86-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01] 627; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1,0] 628; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 629; X86-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2] 630; X86-NEXT: retl # encoding: [0xc3] 631; 632; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: 633; X64: # %bb.0: 634; X64-NEXT: vpermilpd $1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01] 635; X64-NEXT: # xmm2 = xmm0[1,0] 636; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 637; X64-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] 638; X64-NEXT: # xmm1 {%k1} = xmm0[1,0] 639; X64-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01] 640; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1,0] 641; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 642; X64-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2] 643; X64-NEXT: retq # encoding: [0xc3] 644 %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) 645 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) 646 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1) 647 %res3 = fadd <2 x double> %res, %res1 648 %res4 = fadd <2 x double> %res3, %res2 649 ret <2 x double> %res4 650} 651 652declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8) 653 654define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 655; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: 656; X86: # %bb.0: 657; X86-NEXT: vpermilps $22, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16] 658; X86-NEXT: # ymm2 = ymm0[2,1,1,0,6,5,5,4] 659; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 660; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 661; X86-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] 662; X86-NEXT: # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] 663; X86-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16] 664; X86-NEXT: # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] 665; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 666; X86-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2] 667; X86-NEXT: retl # encoding: [0xc3] 668; 669; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: 670; X64: # %bb.0: 671; X64-NEXT: vpermilps $22, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16] 672; X64-NEXT: # ymm2 = ymm0[2,1,1,0,6,5,5,4] 673; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 674; X64-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] 675; X64-NEXT: # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] 676; X64-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16] 677; X64-NEXT: # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] 678; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 679; X64-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2] 680; X64-NEXT: retq # encoding: [0xc3] 681 %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) 682 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) 683 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1) 684 %res3 = fadd <8 x float> %res, %res1 685 %res4 = fadd <8 x float> %res3, %res2 686 ret <8 x float> %res4 687} 688 689declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8) 690 691define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 692; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: 693; X86: # %bb.0: 694; X86-NEXT: vpermilps $22, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16] 695; X86-NEXT: # xmm2 = xmm0[2,1,1,0] 696; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 697; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 698; X86-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] 699; X86-NEXT: # xmm1 {%k1} = xmm0[2,1,1,0] 700; X86-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16] 701; X86-NEXT: # xmm0 {%k1} {z} = xmm0[2,1,1,0] 702; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 703; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] 704; X86-NEXT: retl # encoding: [0xc3] 705; 706; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: 707; X64: # %bb.0: 708; X64-NEXT: vpermilps $22, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16] 709; X64-NEXT: # xmm2 = xmm0[2,1,1,0] 710; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 711; X64-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] 712; X64-NEXT: # xmm1 {%k1} = xmm0[2,1,1,0] 713; X64-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16] 714; X64-NEXT: # xmm0 {%k1} {z} = xmm0[2,1,1,0] 715; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 716; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] 717; X64-NEXT: retq # encoding: [0xc3] 718 %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) 719 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) 720 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1) 721 %res3 = fadd <4 x float> %res, %res1 722 %res4 = fadd <4 x float> %res2, %res3 723 ret <4 x float> %res4 724} 725 726declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8) 727 728define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) { 729; X86-LABEL: test_int_x86_avx512_mask_perm_df_256: 730; X86: # %bb.0: 731; X86-NEXT: vpermpd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03] 732; X86-NEXT: # ymm2 = ymm0[3,0,0,0] 733; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 734; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 735; X86-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03] 736; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0] 737; X86-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03] 738; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0] 739; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 740; X86-NEXT: vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2] 741; X86-NEXT: retl # encoding: [0xc3] 742; 743; X64-LABEL: test_int_x86_avx512_mask_perm_df_256: 744; X64: # %bb.0: 745; X64-NEXT: vpermpd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03] 746; X64-NEXT: # ymm2 = ymm0[3,0,0,0] 747; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 748; X64-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03] 749; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0] 750; X64-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03] 751; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0] 752; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 753; X64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2] 754; X64-NEXT: retq # encoding: [0xc3] 755 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3) 756 %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3) 757 %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1) 758 %res3 = fadd <4 x double> %res, %res1 759 %res4 = fadd <4 x double> %res3, %res2 760 ret <4 x double> %res4 761} 762 763declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8) 764 765define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 766; X86-LABEL: test_int_x86_avx512_mask_perm_di_256: 767; X86: # %bb.0: 768; X86-NEXT: vpermq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03] 769; X86-NEXT: # ymm2 = ymm0[3,0,0,0] 770; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 771; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 772; X86-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03] 773; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0] 774; X86-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03] 775; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0] 776; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 777; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 778; X86-NEXT: retl # encoding: [0xc3] 779; 780; X64-LABEL: test_int_x86_avx512_mask_perm_di_256: 781; X64: # %bb.0: 782; X64-NEXT: vpermq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03] 783; X64-NEXT: # ymm2 = ymm0[3,0,0,0] 784; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 785; X64-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03] 786; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0] 787; X64-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03] 788; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0] 789; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 790; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 791; X64-NEXT: retq # encoding: [0xc3] 792 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 793 %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 794 %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 795 %res3 = add <4 x i64> %res, %res1 796 %res4 = add <4 x i64> %res3, %res2 797 ret <4 x i64> %res4 798} 799 800declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8) 801 802define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { 803; X86-LABEL: test_int_x86_avx512_mask_store_pd_128: 804; X86: # %bb.0: 805; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 806; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 807; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 808; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 809; X86-NEXT: vmovapd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x01] 810; X86-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] 811; X86-NEXT: retl # encoding: [0xc3] 812; 813; X64-LABEL: test_int_x86_avx512_mask_store_pd_128: 814; X64: # %bb.0: 815; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 816; X64-NEXT: vmovapd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07] 817; X64-NEXT: vmovapd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x06] 818; X64-NEXT: retq # encoding: [0xc3] 819 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) 820 call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) 821 ret void 822} 823 824declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8) 825 826define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { 827; X86-LABEL: test_int_x86_avx512_mask_store_pd_256: 828; X86: # %bb.0: 829; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 830; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 831; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 832; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 833; X86-NEXT: vmovapd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x01] 834; X86-NEXT: vmovapd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x00] 835; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 836; X86-NEXT: retl # encoding: [0xc3] 837; 838; X64-LABEL: test_int_x86_avx512_mask_store_pd_256: 839; X64: # %bb.0: 840; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 841; X64-NEXT: vmovapd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07] 842; X64-NEXT: vmovapd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x06] 843; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 844; X64-NEXT: retq # encoding: [0xc3] 845 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) 846 call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) 847 ret void 848} 849 850declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8) 851 852define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { 853; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_128: 854; X86: # %bb.0: 855; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 856; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 857; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 858; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 859; X86-NEXT: vmovupd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x01] 860; X86-NEXT: vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00] 861; X86-NEXT: retl # encoding: [0xc3] 862; 863; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_128: 864; X64: # %bb.0: 865; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 866; X64-NEXT: vmovupd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07] 867; X64-NEXT: vmovupd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x06] 868; X64-NEXT: retq # encoding: [0xc3] 869 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2) 870 call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1) 871 ret void 872} 873 874declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8) 875 876define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) { 877; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_256: 878; X86: # %bb.0: 879; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 880; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 881; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 882; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 883; X86-NEXT: vmovupd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x01] 884; X86-NEXT: vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00] 885; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 886; X86-NEXT: retl # encoding: [0xc3] 887; 888; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_256: 889; X64: # %bb.0: 890; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 891; X64-NEXT: vmovupd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07] 892; X64-NEXT: vmovupd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x06] 893; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 894; X64-NEXT: retq # encoding: [0xc3] 895 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2) 896 call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1) 897 ret void 898} 899 900declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8) 901 902define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { 903; X86-LABEL: test_int_x86_avx512_mask_store_ps_128: 904; X86: # %bb.0: 905; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 906; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 907; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 908; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 909; X86-NEXT: vmovaps %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x01] 910; X86-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 911; X86-NEXT: retl # encoding: [0xc3] 912; 913; X64-LABEL: test_int_x86_avx512_mask_store_ps_128: 914; X64: # %bb.0: 915; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 916; X64-NEXT: vmovaps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07] 917; X64-NEXT: vmovaps %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x06] 918; X64-NEXT: retq # encoding: [0xc3] 919 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) 920 call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) 921 ret void 922} 923 924declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8) 925 926define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { 927; X86-LABEL: test_int_x86_avx512_mask_store_ps_256: 928; X86: # %bb.0: 929; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 930; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 931; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 932; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 933; X86-NEXT: vmovaps %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x01] 934; X86-NEXT: vmovaps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x00] 935; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 936; X86-NEXT: retl # encoding: [0xc3] 937; 938; X64-LABEL: test_int_x86_avx512_mask_store_ps_256: 939; X64: # %bb.0: 940; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 941; X64-NEXT: vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07] 942; X64-NEXT: vmovaps %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x06] 943; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 944; X64-NEXT: retq # encoding: [0xc3] 945 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) 946 call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) 947 ret void 948} 949 950declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8) 951 952define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) { 953; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_128: 954; X86: # %bb.0: 955; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 956; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 957; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 958; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 959; X86-NEXT: vmovups %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x01] 960; X86-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 961; X86-NEXT: retl # encoding: [0xc3] 962; 963; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_128: 964; X64: # %bb.0: 965; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 966; X64-NEXT: vmovups %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07] 967; X64-NEXT: vmovups %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06] 968; X64-NEXT: retq # encoding: [0xc3] 969 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2) 970 call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1) 971 ret void 972} 973 974declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8) 975 976define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) { 977; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_256: 978; X86: # %bb.0: 979; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 980; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 981; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 982; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 983; X86-NEXT: vmovups %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x01] 984; X86-NEXT: vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00] 985; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 986; X86-NEXT: retl # encoding: [0xc3] 987; 988; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_256: 989; X64: # %bb.0: 990; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 991; X64-NEXT: vmovups %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07] 992; X64-NEXT: vmovups %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06] 993; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 994; X64-NEXT: retq # encoding: [0xc3] 995 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2) 996 call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1) 997 ret void 998} 999 1000declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8) 1001 1002define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { 1003; X86-LABEL: test_int_x86_avx512_mask_storeu_q_128: 1004; X86: # %bb.0: 1005; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1006; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1007; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1008; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1009; X86-NEXT: vmovdqu64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x01] 1010; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 1011; X86-NEXT: retl # encoding: [0xc3] 1012; 1013; X64-LABEL: test_int_x86_avx512_mask_storeu_q_128: 1014; X64: # %bb.0: 1015; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1016; X64-NEXT: vmovdqu64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07] 1017; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 1018; X64-NEXT: retq # encoding: [0xc3] 1019 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) 1020 call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) 1021 ret void 1022} 1023 1024declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8) 1025 1026define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { 1027; X86-LABEL: test_int_x86_avx512_mask_storeu_q_256: 1028; X86: # %bb.0: 1029; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1030; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1031; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1032; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1033; X86-NEXT: vmovdqu64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x01] 1034; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 1035; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1036; X86-NEXT: retl # encoding: [0xc3] 1037; 1038; X64-LABEL: test_int_x86_avx512_mask_storeu_q_256: 1039; X64: # %bb.0: 1040; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1041; X64-NEXT: vmovdqu64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07] 1042; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 1043; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1044; X64-NEXT: retq # encoding: [0xc3] 1045 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) 1046 call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) 1047 ret void 1048} 1049 1050declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8) 1051 1052define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { 1053; X86-LABEL: test_int_x86_avx512_mask_storeu_d_128: 1054; X86: # %bb.0: 1055; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1056; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1057; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1058; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1059; X86-NEXT: vmovdqu32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x01] 1060; X86-NEXT: vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00] 1061; X86-NEXT: retl # encoding: [0xc3] 1062; 1063; X64-LABEL: test_int_x86_avx512_mask_storeu_d_128: 1064; X64: # %bb.0: 1065; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1066; X64-NEXT: vmovdqu32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07] 1067; X64-NEXT: vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06] 1068; X64-NEXT: retq # encoding: [0xc3] 1069 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) 1070 call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) 1071 ret void 1072} 1073 1074declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8) 1075 1076define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { 1077; X86-LABEL: test_int_x86_avx512_mask_storeu_d_256: 1078; X86: # %bb.0: 1079; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1080; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1081; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1082; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1083; X86-NEXT: vmovdqu32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x01] 1084; X86-NEXT: vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00] 1085; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1086; X86-NEXT: retl # encoding: [0xc3] 1087; 1088; X64-LABEL: test_int_x86_avx512_mask_storeu_d_256: 1089; X64: # %bb.0: 1090; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1091; X64-NEXT: vmovdqu32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07] 1092; X64-NEXT: vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] 1093; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1094; X64-NEXT: retq # encoding: [0xc3] 1095 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) 1096 call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) 1097 ret void 1098} 1099 1100declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8) 1101 1102define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) { 1103; X86-LABEL: test_int_x86_avx512_mask_store_q_128: 1104; X86: # %bb.0: 1105; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1106; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1107; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1108; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1109; X86-NEXT: vmovdqa64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x01] 1110; X86-NEXT: vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00] 1111; X86-NEXT: retl # encoding: [0xc3] 1112; 1113; X64-LABEL: test_int_x86_avx512_mask_store_q_128: 1114; X64: # %bb.0: 1115; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1116; X64-NEXT: vmovdqa64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07] 1117; X64-NEXT: vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06] 1118; X64-NEXT: retq # encoding: [0xc3] 1119 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2) 1120 call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1) 1121 ret void 1122} 1123 1124declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8) 1125 1126define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) { 1127; X86-LABEL: test_int_x86_avx512_mask_store_q_256: 1128; X86: # %bb.0: 1129; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1130; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1131; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1132; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1133; X86-NEXT: vmovdqa64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x01] 1134; X86-NEXT: vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00] 1135; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1136; X86-NEXT: retl # encoding: [0xc3] 1137; 1138; X64-LABEL: test_int_x86_avx512_mask_store_q_256: 1139; X64: # %bb.0: 1140; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1141; X64-NEXT: vmovdqa64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07] 1142; X64-NEXT: vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06] 1143; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1144; X64-NEXT: retq # encoding: [0xc3] 1145 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2) 1146 call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1) 1147 ret void 1148} 1149 1150declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8) 1151 1152define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) { 1153; X86-LABEL: test_int_x86_avx512_mask_store_d_128: 1154; X86: # %bb.0: 1155; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1156; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1157; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1158; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1159; X86-NEXT: vmovdqa32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x01] 1160; X86-NEXT: vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00] 1161; X86-NEXT: retl # encoding: [0xc3] 1162; 1163; X64-LABEL: test_int_x86_avx512_mask_store_d_128: 1164; X64: # %bb.0: 1165; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1166; X64-NEXT: vmovdqa32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07] 1167; X64-NEXT: vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06] 1168; X64-NEXT: retq # encoding: [0xc3] 1169 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2) 1170 call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1) 1171 ret void 1172} 1173 1174declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8) 1175 1176define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) { 1177; X86-LABEL: test_int_x86_avx512_mask_store_d_256: 1178; X86: # %bb.0: 1179; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1180; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1181; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1182; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1183; X86-NEXT: vmovdqa32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x01] 1184; X86-NEXT: vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00] 1185; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1186; X86-NEXT: retl # encoding: [0xc3] 1187; 1188; X64-LABEL: test_int_x86_avx512_mask_store_d_256: 1189; X64: # %bb.0: 1190; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1191; X64-NEXT: vmovdqa32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07] 1192; X64-NEXT: vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06] 1193; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1194; X64-NEXT: retq # encoding: [0xc3] 1195 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2) 1196 call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1) 1197 ret void 1198} 1199 1200define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { 1201; X86-LABEL: test_mask_load_aligned_ps_256: 1202; X86: # %bb.0: 1203; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1204; X86-NEXT: vmovaps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x00] 1205; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1206; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1207; X86-NEXT: vmovaps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x00] 1208; X86-NEXT: vmovaps (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x08] 1209; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 1210; X86-NEXT: retl # encoding: [0xc3] 1211; 1212; X64-LABEL: test_mask_load_aligned_ps_256: 1213; X64: # %bb.0: 1214; X64-NEXT: vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 1215; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1216; X64-NEXT: vmovaps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] 1217; X64-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f] 1218; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 1219; X64-NEXT: retq # encoding: [0xc3] 1220 %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) 1221 %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) 1222 %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask) 1223 %res4 = fadd <8 x float> %res2, %res1 1224 ret <8 x float> %res4 1225} 1226 1227declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8) 1228 1229define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) { 1230; X86-LABEL: test_mask_load_unaligned_ps_256: 1231; X86: # %bb.0: 1232; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1233; X86-NEXT: vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00] 1234; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1235; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1236; X86-NEXT: vmovups (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x00] 1237; X86-NEXT: vmovups (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x08] 1238; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 1239; X86-NEXT: retl # encoding: [0xc3] 1240; 1241; X64-LABEL: test_mask_load_unaligned_ps_256: 1242; X64: # %bb.0: 1243; X64-NEXT: vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 1244; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1245; X64-NEXT: vmovups (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] 1246; X64-NEXT: vmovups (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f] 1247; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 1248; X64-NEXT: retq # encoding: [0xc3] 1249 %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1) 1250 %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask) 1251 %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask) 1252 %res4 = fadd <8 x float> %res2, %res1 1253 ret <8 x float> %res4 1254} 1255 1256declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8) 1257 1258define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { 1259; X86-LABEL: test_mask_load_aligned_pd_256: 1260; X86: # %bb.0: 1261; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1262; X86-NEXT: vmovapd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x00] 1263; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1264; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1265; X86-NEXT: vmovapd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x00] 1266; X86-NEXT: vmovapd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x08] 1267; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 1268; X86-NEXT: retl # encoding: [0xc3] 1269; 1270; X64-LABEL: test_mask_load_aligned_pd_256: 1271; X64: # %bb.0: 1272; X64-NEXT: vmovapd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x07] 1273; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1274; X64-NEXT: vmovapd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] 1275; X64-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f] 1276; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 1277; X64-NEXT: retq # encoding: [0xc3] 1278 %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) 1279 %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) 1280 %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask) 1281 %res4 = fadd <4 x double> %res2, %res1 1282 ret <4 x double> %res4 1283} 1284 1285declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8) 1286 1287define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) { 1288; X86-LABEL: test_mask_load_unaligned_pd_256: 1289; X86: # %bb.0: 1290; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1291; X86-NEXT: vmovupd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x00] 1292; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1293; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1294; X86-NEXT: vmovupd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x00] 1295; X86-NEXT: vmovupd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x08] 1296; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 1297; X86-NEXT: retl # encoding: [0xc3] 1298; 1299; X64-LABEL: test_mask_load_unaligned_pd_256: 1300; X64: # %bb.0: 1301; X64-NEXT: vmovupd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x07] 1302; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1303; X64-NEXT: vmovupd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] 1304; X64-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f] 1305; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 1306; X64-NEXT: retq # encoding: [0xc3] 1307 %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1) 1308 %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask) 1309 %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask) 1310 %res4 = fadd <4 x double> %res2, %res1 1311 ret <4 x double> %res4 1312} 1313 1314declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8) 1315 1316define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { 1317; X86-LABEL: test_mask_load_aligned_ps_128: 1318; X86: # %bb.0: 1319; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1320; X86-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1321; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1322; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1323; X86-NEXT: vmovaps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x00] 1324; X86-NEXT: vmovaps (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x08] 1325; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 1326; X86-NEXT: retl # encoding: [0xc3] 1327; 1328; X64-LABEL: test_mask_load_aligned_ps_128: 1329; X64: # %bb.0: 1330; X64-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1331; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1332; X64-NEXT: vmovaps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] 1333; X64-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f] 1334; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 1335; X64-NEXT: retq # encoding: [0xc3] 1336 %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) 1337 %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) 1338 %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask) 1339 %res4 = fadd <4 x float> %res2, %res1 1340 ret <4 x float> %res4 1341} 1342 1343declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8) 1344 1345define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) { 1346; X86-LABEL: test_mask_load_unaligned_ps_128: 1347; X86: # %bb.0: 1348; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1349; X86-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 1350; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1351; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1352; X86-NEXT: vmovups (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x00] 1353; X86-NEXT: vmovups (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x08] 1354; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 1355; X86-NEXT: retl # encoding: [0xc3] 1356; 1357; X64-LABEL: test_mask_load_unaligned_ps_128: 1358; X64: # %bb.0: 1359; X64-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 1360; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1361; X64-NEXT: vmovups (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] 1362; X64-NEXT: vmovups (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f] 1363; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 1364; X64-NEXT: retq # encoding: [0xc3] 1365 %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1) 1366 %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask) 1367 %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask) 1368 %res4 = fadd <4 x float> %res2, %res1 1369 ret <4 x float> %res4 1370} 1371 1372declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8) 1373 1374define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { 1375; X86-LABEL: test_mask_load_aligned_pd_128: 1376; X86: # %bb.0: 1377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1378; X86-NEXT: vmovapd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00] 1379; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1380; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1381; X86-NEXT: vmovapd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x00] 1382; X86-NEXT: vmovapd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x08] 1383; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 1384; X86-NEXT: retl # encoding: [0xc3] 1385; 1386; X64-LABEL: test_mask_load_aligned_pd_128: 1387; X64: # %bb.0: 1388; X64-NEXT: vmovapd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07] 1389; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1390; X64-NEXT: vmovapd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] 1391; X64-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f] 1392; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 1393; X64-NEXT: retq # encoding: [0xc3] 1394 %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) 1395 %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) 1396 %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask) 1397 %res4 = fadd <2 x double> %res2, %res1 1398 ret <2 x double> %res4 1399} 1400 1401declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8) 1402 1403define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) { 1404; X86-LABEL: test_mask_load_unaligned_pd_128: 1405; X86: # %bb.0: 1406; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1407; X86-NEXT: vmovupd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x00] 1408; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1409; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1410; X86-NEXT: vmovupd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x00] 1411; X86-NEXT: vmovupd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x08] 1412; X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 1413; X86-NEXT: retl # encoding: [0xc3] 1414; 1415; X64-LABEL: test_mask_load_unaligned_pd_128: 1416; X64: # %bb.0: 1417; X64-NEXT: vmovupd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x07] 1418; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1419; X64-NEXT: vmovupd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] 1420; X64-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f] 1421; X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 1422; X64-NEXT: retq # encoding: [0xc3] 1423 %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1) 1424 %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask) 1425 %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask) 1426 %res4 = fadd <2 x double> %res2, %res1 1427 ret <2 x double> %res4 1428} 1429 1430declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8) 1431 1432declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8) 1433 1434define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) { 1435; X86-LABEL: test_mask_load_unaligned_d_128: 1436; X86: # %bb.0: 1437; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1438; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1439; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 1440; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1441; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1442; X86-NEXT: vmovdqu32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x00] 1443; X86-NEXT: vmovdqu32 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x09] 1444; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1445; X86-NEXT: retl # encoding: [0xc3] 1446; 1447; X64-LABEL: test_mask_load_unaligned_d_128: 1448; X64: # %bb.0: 1449; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 1450; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1451; X64-NEXT: vmovdqu32 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06] 1452; X64-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f] 1453; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1454; X64-NEXT: retq # encoding: [0xc3] 1455 %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) 1456 %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask) 1457 %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask) 1458 %res4 = add <4 x i32> %res2, %res1 1459 ret <4 x i32> %res4 1460} 1461 1462declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8) 1463 1464define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) { 1465; X86-LABEL: test_mask_load_unaligned_d_256: 1466; X86: # %bb.0: 1467; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1468; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1469; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 1470; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1471; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1472; X86-NEXT: vmovdqu32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x00] 1473; X86-NEXT: vmovdqu32 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x09] 1474; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1475; X86-NEXT: retl # encoding: [0xc3] 1476; 1477; X64-LABEL: test_mask_load_unaligned_d_256: 1478; X64: # %bb.0: 1479; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 1480; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1481; X64-NEXT: vmovdqu32 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06] 1482; X64-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f] 1483; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1484; X64-NEXT: retq # encoding: [0xc3] 1485 %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) 1486 %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask) 1487 %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask) 1488 %res4 = add <8 x i32> %res2, %res1 1489 ret <8 x i32> %res4 1490} 1491 1492declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8) 1493 1494define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) { 1495; X86-LABEL: test_mask_load_unaligned_q_128: 1496; X86: # %bb.0: 1497; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1498; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1499; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] 1500; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1501; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1502; X86-NEXT: vmovdqu64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x00] 1503; X86-NEXT: vmovdqu64 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x09] 1504; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 1505; X86-NEXT: retl # encoding: [0xc3] 1506; 1507; X64-LABEL: test_mask_load_unaligned_q_128: 1508; X64: # %bb.0: 1509; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] 1510; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1511; X64-NEXT: vmovdqu64 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06] 1512; X64-NEXT: vmovdqu64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f] 1513; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 1514; X64-NEXT: retq # encoding: [0xc3] 1515 %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) 1516 %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask) 1517 %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask) 1518 %res4 = add <2 x i64> %res2, %res1 1519 ret <2 x i64> %res4 1520} 1521 1522declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8) 1523 1524define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) { 1525; X86-LABEL: test_mask_load_unaligned_q_256: 1526; X86: # %bb.0: 1527; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1528; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 1529; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] 1530; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] 1531; X86-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1532; X86-NEXT: vmovdqu64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x00] 1533; X86-NEXT: vmovdqu64 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x09] 1534; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 1535; X86-NEXT: retl # encoding: [0xc3] 1536; 1537; X64-LABEL: test_mask_load_unaligned_q_256: 1538; X64: # %bb.0: 1539; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] 1540; X64-NEXT: kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca] 1541; X64-NEXT: vmovdqu64 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06] 1542; X64-NEXT: vmovdqu64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f] 1543; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 1544; X64-NEXT: retq # encoding: [0xc3] 1545 %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) 1546 %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask) 1547 %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask) 1548 %res4 = add <4 x i64> %res2, %res1 1549 ret <4 x i64> %res4 1550} 1551 1552declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8) 1553 1554define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) { 1555; X86-LABEL: test_mask_load_aligned_d_128: 1556; X86: # %bb.0: 1557; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1558; X86-NEXT: vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00] 1559; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1560; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1561; X86-NEXT: vmovdqa32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x00] 1562; X86-NEXT: vmovdqa32 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x08] 1563; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1564; X86-NEXT: retl # encoding: [0xc3] 1565; 1566; X64-LABEL: test_mask_load_aligned_d_128: 1567; X64: # %bb.0: 1568; X64-NEXT: vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07] 1569; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1570; X64-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] 1571; X64-NEXT: vmovdqa32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f] 1572; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1573; X64-NEXT: retq # encoding: [0xc3] 1574 %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1) 1575 %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask) 1576 %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask) 1577 %res4 = add <4 x i32> %res2, %res1 1578 ret <4 x i32> %res4 1579} 1580 1581declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8) 1582 1583define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) { 1584; X86-LABEL: test_mask_load_aligned_d_256: 1585; X86: # %bb.0: 1586; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1587; X86-NEXT: vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00] 1588; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1589; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1590; X86-NEXT: vmovdqa32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x00] 1591; X86-NEXT: vmovdqa32 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x08] 1592; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1593; X86-NEXT: retl # encoding: [0xc3] 1594; 1595; X64-LABEL: test_mask_load_aligned_d_256: 1596; X64: # %bb.0: 1597; X64-NEXT: vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07] 1598; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1599; X64-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] 1600; X64-NEXT: vmovdqa32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f] 1601; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1602; X64-NEXT: retq # encoding: [0xc3] 1603 %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1) 1604 %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask) 1605 %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask) 1606 %res4 = add <8 x i32> %res2, %res1 1607 ret <8 x i32> %res4 1608} 1609 1610declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8) 1611 1612define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) { 1613; X86-LABEL: test_mask_load_aligned_q_128: 1614; X86: # %bb.0: 1615; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1616; X86-NEXT: vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00] 1617; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1618; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1619; X86-NEXT: vmovdqa64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x00] 1620; X86-NEXT: vmovdqa64 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x08] 1621; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 1622; X86-NEXT: retl # encoding: [0xc3] 1623; 1624; X64-LABEL: test_mask_load_aligned_q_128: 1625; X64: # %bb.0: 1626; X64-NEXT: vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07] 1627; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1628; X64-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] 1629; X64-NEXT: vmovdqa64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f] 1630; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 1631; X64-NEXT: retq # encoding: [0xc3] 1632 %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1) 1633 %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask) 1634 %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask) 1635 %res4 = add <2 x i64> %res2, %res1 1636 ret <2 x i64> %res4 1637} 1638 1639declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8) 1640 1641define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) { 1642; X86-LABEL: test_mask_load_aligned_q_256: 1643; X86: # %bb.0: 1644; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1645; X86-NEXT: vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00] 1646; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 1647; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 1648; X86-NEXT: vmovdqa64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x00] 1649; X86-NEXT: vmovdqa64 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x08] 1650; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 1651; X86-NEXT: retl # encoding: [0xc3] 1652; 1653; X64-LABEL: test_mask_load_aligned_q_256: 1654; X64: # %bb.0: 1655; X64-NEXT: vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07] 1656; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1657; X64-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] 1658; X64-NEXT: vmovdqa64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f] 1659; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 1660; X64-NEXT: retq # encoding: [0xc3] 1661 %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1) 1662 %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask) 1663 %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask) 1664 %res4 = add <4 x i64> %res2, %res1 1665 ret <4 x i64> %res4 1666} 1667 1668declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8) 1669 1670define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 1671; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_128: 1672; X86: # %bb.0: 1673; X86-NEXT: vpshufd $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03] 1674; X86-NEXT: # xmm2 = xmm0[3,0,0,0] 1675; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1676; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1677; X86-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03] 1678; X86-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0] 1679; X86-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03] 1680; X86-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0] 1681; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 1682; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1683; X86-NEXT: retl # encoding: [0xc3] 1684; 1685; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_128: 1686; X64: # %bb.0: 1687; X64-NEXT: vpshufd $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03] 1688; X64-NEXT: # xmm2 = xmm0[3,0,0,0] 1689; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1690; X64-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03] 1691; X64-NEXT: # xmm1 {%k1} = xmm0[3,0,0,0] 1692; X64-NEXT: vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03] 1693; X64-NEXT: # xmm0 {%k1} {z} = xmm0[3,0,0,0] 1694; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 1695; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 1696; X64-NEXT: retq # encoding: [0xc3] 1697 %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 1698 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 1699 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 1700 %res3 = add <4 x i32> %res, %res1 1701 %res4 = add <4 x i32> %res3, %res2 1702 ret <4 x i32> %res4 1703} 1704 1705declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8) 1706 1707define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 1708; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_256: 1709; X86: # %bb.0: 1710; X86-NEXT: vpshufd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03] 1711; X86-NEXT: # ymm2 = ymm0[3,0,0,0,7,4,4,4] 1712; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 1713; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1714; X86-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03] 1715; X86-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4] 1716; X86-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03] 1717; X86-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4] 1718; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 1719; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1720; X86-NEXT: retl # encoding: [0xc3] 1721; 1722; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_256: 1723; X64: # %bb.0: 1724; X64-NEXT: vpshufd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03] 1725; X64-NEXT: # ymm2 = ymm0[3,0,0,0,7,4,4,4] 1726; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 1727; X64-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03] 1728; X64-NEXT: # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4] 1729; X64-NEXT: vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03] 1730; X64-NEXT: # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4] 1731; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 1732; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 1733; X64-NEXT: retq # encoding: [0xc3] 1734 %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 1735 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 1736 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 1737 %res3 = add <8 x i32> %res, %res1 1738 %res4 = add <8 x i32> %res3, %res2 1739 ret <8 x i32> %res4 1740} 1741 1742define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) { 1743; CHECK-LABEL: test_pcmpeq_d_256: 1744; CHECK: # %bb.0: 1745; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 1746; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1747; CHECK-NEXT: # kill: def $al killed $al killed $eax 1748; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1749; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1750 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 1751 ret i8 %res 1752} 1753 1754define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1755; X86-LABEL: test_mask_pcmpeq_d_256: 1756; X86: # %bb.0: 1757; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 1758; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1759; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 1760; X86-NEXT: # kill: def $al killed $al killed $eax 1761; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1762; X86-NEXT: retl # encoding: [0xc3] 1763; 1764; X64-LABEL: test_mask_pcmpeq_d_256: 1765; X64: # %bb.0: 1766; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 1767; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1768; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 1769; X64-NEXT: # kill: def $al killed $al killed $eax 1770; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1771; X64-NEXT: retq # encoding: [0xc3] 1772 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 1773 ret i8 %res 1774} 1775 1776declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8) 1777 1778define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) { 1779; CHECK-LABEL: test_pcmpeq_q_256: 1780; CHECK: # %bb.0: 1781; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] 1782; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1783; CHECK-NEXT: # kill: def $al killed $al killed $eax 1784; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1785; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1786 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 1787 ret i8 %res 1788} 1789 1790define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 1791; X86-LABEL: test_mask_pcmpeq_q_256: 1792; X86: # %bb.0: 1793; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1794; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1795; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 1796; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1797; X86-NEXT: # kill: def $al killed $al killed $eax 1798; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1799; X86-NEXT: retl # encoding: [0xc3] 1800; 1801; X64-LABEL: test_mask_pcmpeq_q_256: 1802; X64: # %bb.0: 1803; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1804; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1] 1805; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1806; X64-NEXT: # kill: def $al killed $al killed $eax 1807; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1808; X64-NEXT: retq # encoding: [0xc3] 1809 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 1810 ret i8 %res 1811} 1812 1813declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8) 1814 1815define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) { 1816; CHECK-LABEL: test_pcmpgt_d_256: 1817; CHECK: # %bb.0: 1818; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] 1819; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1820; CHECK-NEXT: # kill: def $al killed $al killed $eax 1821; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1822; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1823 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1) 1824 ret i8 %res 1825} 1826 1827define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1828; X86-LABEL: test_mask_pcmpgt_d_256: 1829; X86: # %bb.0: 1830; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] 1831; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1832; X86-NEXT: andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04] 1833; X86-NEXT: # kill: def $al killed $al killed $eax 1834; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1835; X86-NEXT: retl # encoding: [0xc3] 1836; 1837; X64-LABEL: test_mask_pcmpgt_d_256: 1838; X64: # %bb.0: 1839; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1] 1840; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1841; X64-NEXT: andb %dil, %al # encoding: [0x40,0x20,0xf8] 1842; X64-NEXT: # kill: def $al killed $al killed $eax 1843; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1844; X64-NEXT: retq # encoding: [0xc3] 1845 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask) 1846 ret i8 %res 1847} 1848 1849declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8) 1850 1851define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) { 1852; CHECK-LABEL: test_pcmpgt_q_256: 1853; CHECK: # %bb.0: 1854; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1] 1855; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1856; CHECK-NEXT: # kill: def $al killed $al killed $eax 1857; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1858; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1859 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1) 1860 ret i8 %res 1861} 1862 1863define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 1864; X86-LABEL: test_mask_pcmpgt_q_256: 1865; X86: # %bb.0: 1866; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1867; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1868; X86-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] 1869; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1870; X86-NEXT: # kill: def $al killed $al killed $eax 1871; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1872; X86-NEXT: retl # encoding: [0xc3] 1873; 1874; X64-LABEL: test_mask_pcmpgt_q_256: 1875; X64: # %bb.0: 1876; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1877; X64-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1] 1878; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1879; X64-NEXT: # kill: def $al killed $al killed $eax 1880; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1881; X64-NEXT: retq # encoding: [0xc3] 1882 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask) 1883 ret i8 %res 1884} 1885 1886declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8) 1887 1888define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) { 1889; CHECK-LABEL: test_pcmpeq_d_128: 1890; CHECK: # %bb.0: 1891; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 1892; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1893; CHECK-NEXT: # kill: def $al killed $al killed $eax 1894; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1895 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 1896 ret i8 %res 1897} 1898 1899define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1900; X86-LABEL: test_mask_pcmpeq_d_128: 1901; X86: # %bb.0: 1902; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1903; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1904; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 1905; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1906; X86-NEXT: # kill: def $al killed $al killed $eax 1907; X86-NEXT: retl # encoding: [0xc3] 1908; 1909; X64-LABEL: test_mask_pcmpeq_d_128: 1910; X64: # %bb.0: 1911; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1912; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1] 1913; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1914; X64-NEXT: # kill: def $al killed $al killed $eax 1915; X64-NEXT: retq # encoding: [0xc3] 1916 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 1917 ret i8 %res 1918} 1919 1920declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8) 1921 1922define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) { 1923; CHECK-LABEL: test_pcmpeq_q_128: 1924; CHECK: # %bb.0: 1925; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] 1926; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1927; CHECK-NEXT: # kill: def $al killed $al killed $eax 1928; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1929 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 1930 ret i8 %res 1931} 1932 1933define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 1934; X86-LABEL: test_mask_pcmpeq_q_128: 1935; X86: # %bb.0: 1936; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1937; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1938; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 1939; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1940; X86-NEXT: # kill: def $al killed $al killed $eax 1941; X86-NEXT: retl # encoding: [0xc3] 1942; 1943; X64-LABEL: test_mask_pcmpeq_q_128: 1944; X64: # %bb.0: 1945; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1946; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] 1947; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1948; X64-NEXT: # kill: def $al killed $al killed $eax 1949; X64-NEXT: retq # encoding: [0xc3] 1950 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 1951 ret i8 %res 1952} 1953 1954declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8) 1955 1956define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) { 1957; CHECK-LABEL: test_pcmpgt_d_128: 1958; CHECK: # %bb.0: 1959; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] 1960; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1961; CHECK-NEXT: # kill: def $al killed $al killed $eax 1962; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1963 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1) 1964 ret i8 %res 1965} 1966 1967define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1968; X86-LABEL: test_mask_pcmpgt_d_128: 1969; X86: # %bb.0: 1970; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1971; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1972; X86-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] 1973; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1974; X86-NEXT: # kill: def $al killed $al killed $eax 1975; X86-NEXT: retl # encoding: [0xc3] 1976; 1977; X64-LABEL: test_mask_pcmpgt_d_128: 1978; X64: # %bb.0: 1979; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1980; X64-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1] 1981; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1982; X64-NEXT: # kill: def $al killed $al killed $eax 1983; X64-NEXT: retq # encoding: [0xc3] 1984 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask) 1985 ret i8 %res 1986} 1987 1988declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8) 1989 1990define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) { 1991; CHECK-LABEL: test_pcmpgt_q_128: 1992; CHECK: # %bb.0: 1993; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1] 1994; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 1995; CHECK-NEXT: # kill: def $al killed $al killed $eax 1996; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1997 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1) 1998 ret i8 %res 1999} 2000 2001define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 2002; X86-LABEL: test_mask_pcmpgt_q_128: 2003; X86: # %bb.0: 2004; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2005; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2006; X86-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] 2007; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2008; X86-NEXT: # kill: def $al killed $al killed $eax 2009; X86-NEXT: retl # encoding: [0xc3] 2010; 2011; X64-LABEL: test_mask_pcmpgt_q_128: 2012; X64: # %bb.0: 2013; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2014; X64-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1] 2015; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 2016; X64-NEXT: # kill: def $al killed $al killed $eax 2017; X64-NEXT: retq # encoding: [0xc3] 2018 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask) 2019 ret i8 %res 2020} 2021 2022declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8) 2023 2024declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 2025 2026define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 2027; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: 2028; X86: # %bb.0: 2029; X86-NEXT: vunpckhpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9] 2030; X86-NEXT: # xmm3 = xmm0[1],xmm1[1] 2031; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2032; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2033; X86-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1] 2034; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1] 2035; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] 2036; X86-NEXT: retl # encoding: [0xc3] 2037; 2038; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: 2039; X64: # %bb.0: 2040; X64-NEXT: vunpckhpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9] 2041; X64-NEXT: # xmm3 = xmm0[1],xmm1[1] 2042; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2043; X64-NEXT: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1] 2044; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1] 2045; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] 2046; X64-NEXT: retq # encoding: [0xc3] 2047 %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 2048 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 2049 %res2 = fadd <2 x double> %res, %res1 2050 ret <2 x double> %res2 2051} 2052 2053declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 2054 2055define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 2056; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: 2057; X86: # %bb.0: 2058; X86-NEXT: vunpckhpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9] 2059; X86-NEXT: # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2060; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2061; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2062; X86-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1] 2063; X86-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2064; X86-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] 2065; X86-NEXT: retl # encoding: [0xc3] 2066; 2067; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: 2068; X64: # %bb.0: 2069; X64-NEXT: vunpckhpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9] 2070; X64-NEXT: # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2071; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2072; X64-NEXT: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1] 2073; X64-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2074; X64-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] 2075; X64-NEXT: retq # encoding: [0xc3] 2076 %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 2077 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 2078 %res2 = fadd <4 x double> %res, %res1 2079 ret <4 x double> %res2 2080} 2081 2082declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2083 2084define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 2085; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: 2086; X86: # %bb.0: 2087; X86-NEXT: vunpckhps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9] 2088; X86-NEXT: # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2089; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2090; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2091; X86-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1] 2092; X86-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2093; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] 2094; X86-NEXT: retl # encoding: [0xc3] 2095; 2096; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: 2097; X64: # %bb.0: 2098; X64-NEXT: vunpckhps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9] 2099; X64-NEXT: # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2100; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2101; X64-NEXT: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1] 2102; X64-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2103; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] 2104; X64-NEXT: retq # encoding: [0xc3] 2105 %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 2106 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 2107 %res2 = fadd <4 x float> %res, %res1 2108 ret <4 x float> %res2 2109} 2110 2111declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2112 2113define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 2114; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: 2115; X86: # %bb.0: 2116; X86-NEXT: vunpckhps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9] 2117; X86-NEXT: # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2118; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2119; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2120; X86-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1] 2121; X86-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2122; X86-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] 2123; X86-NEXT: retl # encoding: [0xc3] 2124; 2125; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: 2126; X64: # %bb.0: 2127; X64-NEXT: vunpckhps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9] 2128; X64-NEXT: # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2129; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2130; X64-NEXT: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1] 2131; X64-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2132; X64-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] 2133; X64-NEXT: retq # encoding: [0xc3] 2134 %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 2135 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 2136 %res2 = fadd <8 x float> %res, %res1 2137 ret <8 x float> %res2 2138} 2139 2140declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 2141 2142define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 2143; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: 2144; X86: # %bb.0: 2145; X86-NEXT: vunpcklpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9] 2146; X86-NEXT: # xmm3 = xmm0[0],xmm1[0] 2147; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2148; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2149; X86-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1] 2150; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0] 2151; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] 2152; X86-NEXT: retl # encoding: [0xc3] 2153; 2154; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: 2155; X64: # %bb.0: 2156; X64-NEXT: vunpcklpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9] 2157; X64-NEXT: # xmm3 = xmm0[0],xmm1[0] 2158; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2159; X64-NEXT: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1] 2160; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0] 2161; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3] 2162; X64-NEXT: retq # encoding: [0xc3] 2163 %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 2164 %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 2165 %res2 = fadd <2 x double> %res, %res1 2166 ret <2 x double> %res2 2167} 2168 2169declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 2170 2171define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 2172; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: 2173; X86: # %bb.0: 2174; X86-NEXT: vunpcklpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9] 2175; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2176; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2177; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2178; X86-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1] 2179; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2180; X86-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] 2181; X86-NEXT: retl # encoding: [0xc3] 2182; 2183; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: 2184; X64: # %bb.0: 2185; X64-NEXT: vunpcklpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9] 2186; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2187; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2188; X64-NEXT: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1] 2189; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2190; X64-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] 2191; X64-NEXT: retq # encoding: [0xc3] 2192 %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 2193 %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 2194 %res2 = fadd <4 x double> %res, %res1 2195 ret <4 x double> %res2 2196} 2197 2198declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2199 2200define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 2201; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: 2202; X86: # %bb.0: 2203; X86-NEXT: vunpcklps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9] 2204; X86-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2205; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2206; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2207; X86-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1] 2208; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2209; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] 2210; X86-NEXT: retl # encoding: [0xc3] 2211; 2212; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: 2213; X64: # %bb.0: 2214; X64-NEXT: vunpcklps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9] 2215; X64-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2216; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2217; X64-NEXT: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1] 2218; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2219; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] 2220; X64-NEXT: retq # encoding: [0xc3] 2221 %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 2222 %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 2223 %res2 = fadd <4 x float> %res, %res1 2224 ret <4 x float> %res2 2225} 2226 2227declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2228 2229define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 2230; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: 2231; X86: # %bb.0: 2232; X86-NEXT: vunpcklps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9] 2233; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2234; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2235; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2236; X86-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1] 2237; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2238; X86-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] 2239; X86-NEXT: retl # encoding: [0xc3] 2240; 2241; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: 2242; X64: # %bb.0: 2243; X64-NEXT: vunpcklps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9] 2244; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2245; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2246; X64-NEXT: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1] 2247; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2248; X64-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] 2249; X64-NEXT: retq # encoding: [0xc3] 2250 %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 2251 %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 2252 %res2 = fadd <8 x float> %res, %res1 2253 ret <8 x float> %res2 2254} 2255 2256declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2257 2258define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 2259; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: 2260; X86: # %bb.0: 2261; X86-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9] 2262; X86-NEXT: # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2263; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2264; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2265; X86-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1] 2266; X86-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2267; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] 2268; X86-NEXT: retl # encoding: [0xc3] 2269; 2270; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: 2271; X64: # %bb.0: 2272; X64-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9] 2273; X64-NEXT: # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2274; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2275; X64-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1] 2276; X64-NEXT: # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2277; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] 2278; X64-NEXT: retq # encoding: [0xc3] 2279 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 2280 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 2281 %res2 = add <4 x i32> %res, %res1 2282 ret <4 x i32> %res2 2283} 2284 2285declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2286 2287define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 2288; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_128: 2289; X86: # %bb.0: 2290; X86-NEXT: vpunpckldq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9] 2291; X86-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2292; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2293; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2294; X86-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1] 2295; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2296; X86-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] 2297; X86-NEXT: retl # encoding: [0xc3] 2298; 2299; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_128: 2300; X64: # %bb.0: 2301; X64-NEXT: vpunpckldq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9] 2302; X64-NEXT: # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2303; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2304; X64-NEXT: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1] 2305; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2306; X64-NEXT: vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3] 2307; X64-NEXT: retq # encoding: [0xc3] 2308 %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 2309 %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 2310 %res2 = add <4 x i32> %res, %res1 2311 ret <4 x i32> %res2 2312} 2313 2314declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2315 2316define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2317; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: 2318; X86: # %bb.0: 2319; X86-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9] 2320; X86-NEXT: # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2321; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2322; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2323; X86-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1] 2324; X86-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2325; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 2326; X86-NEXT: retl # encoding: [0xc3] 2327; 2328; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: 2329; X64: # %bb.0: 2330; X64-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9] 2331; X64-NEXT: # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2332; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2333; X64-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1] 2334; X64-NEXT: # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] 2335; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 2336; X64-NEXT: retq # encoding: [0xc3] 2337 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2338 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2339 %res2 = add <8 x i32> %res, %res1 2340 ret <8 x i32> %res2 2341} 2342 2343declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2344 2345define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2346; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_256: 2347; X86: # %bb.0: 2348; X86-NEXT: vpunpckldq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9] 2349; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2350; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2351; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2352; X86-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1] 2353; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2354; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 2355; X86-NEXT: retl # encoding: [0xc3] 2356; 2357; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_256: 2358; X64: # %bb.0: 2359; X64-NEXT: vpunpckldq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9] 2360; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2361; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2362; X64-NEXT: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1] 2363; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] 2364; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 2365; X64-NEXT: retq # encoding: [0xc3] 2366 %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2367 %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2368 %res2 = add <8 x i32> %res, %res1 2369 ret <8 x i32> %res2 2370} 2371 2372declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2373 2374define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2375; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: 2376; X86: # %bb.0: 2377; X86-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9] 2378; X86-NEXT: # xmm3 = xmm0[1],xmm1[1] 2379; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2380; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2381; X86-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1] 2382; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1] 2383; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 2384; X86-NEXT: retl # encoding: [0xc3] 2385; 2386; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: 2387; X64: # %bb.0: 2388; X64-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9] 2389; X64-NEXT: # xmm3 = xmm0[1],xmm1[1] 2390; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2391; X64-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1] 2392; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[1] 2393; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 2394; X64-NEXT: retq # encoding: [0xc3] 2395 %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2396 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2397 %res2 = add <2 x i64> %res, %res1 2398 ret <2 x i64> %res2 2399} 2400 2401declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2402 2403define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2404; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: 2405; X86: # %bb.0: 2406; X86-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9] 2407; X86-NEXT: # xmm3 = xmm0[0],xmm1[0] 2408; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2409; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2410; X86-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1] 2411; X86-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0] 2412; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 2413; X86-NEXT: retl # encoding: [0xc3] 2414; 2415; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: 2416; X64: # %bb.0: 2417; X64-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9] 2418; X64-NEXT: # xmm3 = xmm0[0],xmm1[0] 2419; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2420; X64-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1] 2421; X64-NEXT: # xmm2 {%k1} = xmm0[0],xmm1[0] 2422; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 2423; X64-NEXT: retq # encoding: [0xc3] 2424 %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2425 %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2426 %res2 = add <2 x i64> %res, %res1 2427 ret <2 x i64> %res2 2428} 2429 2430declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2431 2432define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 2433; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: 2434; X86: # %bb.0: 2435; X86-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9] 2436; X86-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2437; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2438; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2439; X86-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1] 2440; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2441; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 2442; X86-NEXT: retl # encoding: [0xc3] 2443; 2444; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: 2445; X64: # %bb.0: 2446; X64-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9] 2447; X64-NEXT: # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2448; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2449; X64-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1] 2450; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2451; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 2452; X64-NEXT: retq # encoding: [0xc3] 2453 %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 2454 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 2455 %res2 = add <4 x i64> %res, %res1 2456 ret <4 x i64> %res2 2457} 2458 2459declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2460 2461define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 2462; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: 2463; X86: # %bb.0: 2464; X86-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9] 2465; X86-NEXT: # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2466; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2467; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2468; X86-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1] 2469; X86-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2470; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 2471; X86-NEXT: retl # encoding: [0xc3] 2472; 2473; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: 2474; X64: # %bb.0: 2475; X64-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9] 2476; X64-NEXT: # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2477; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2478; X64-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1] 2479; X64-NEXT: # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2480; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 2481; X64-NEXT: retq # encoding: [0xc3] 2482 %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 2483 %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 2484 %res2 = add <4 x i64> %res, %res1 2485 ret <4 x i64> %res2 2486} 2487 2488define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2489; CHECK-LABEL: test_mask_and_epi32_rr_128: 2490; CHECK: # %bb.0: 2491; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 2492; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2493 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2494 ret <4 x i32> %res 2495} 2496 2497define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 2498; X86-LABEL: test_mask_and_epi32_rrk_128: 2499; X86: # %bb.0: 2500; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2501; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2502; X86-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] 2503; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2504; X86-NEXT: retl # encoding: [0xc3] 2505; 2506; X64-LABEL: test_mask_and_epi32_rrk_128: 2507; X64: # %bb.0: 2508; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2509; X64-NEXT: vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1] 2510; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2511; X64-NEXT: retq # encoding: [0xc3] 2512 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2513 ret <4 x i32> %res 2514} 2515 2516define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2517; X86-LABEL: test_mask_and_epi32_rrkz_128: 2518; X86: # %bb.0: 2519; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2520; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2521; X86-NEXT: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1] 2522; X86-NEXT: retl # encoding: [0xc3] 2523; 2524; X64-LABEL: test_mask_and_epi32_rrkz_128: 2525; X64: # %bb.0: 2526; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2527; X64-NEXT: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1] 2528; X64-NEXT: retq # encoding: [0xc3] 2529 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2530 ret <4 x i32> %res 2531} 2532 2533define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2534; X86-LABEL: test_mask_and_epi32_rm_128: 2535; X86: # %bb.0: 2536; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2537; X86-NEXT: vandps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x00] 2538; X86-NEXT: retl # encoding: [0xc3] 2539; 2540; X64-LABEL: test_mask_and_epi32_rm_128: 2541; X64: # %bb.0: 2542; X64-NEXT: vandps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x07] 2543; X64-NEXT: retq # encoding: [0xc3] 2544 %b = load <4 x i32>, <4 x i32>* %ptr_b 2545 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2546 ret <4 x i32> %res 2547} 2548 2549define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2550; X86-LABEL: test_mask_and_epi32_rmk_128: 2551; X86: # %bb.0: 2552; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2553; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2554; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2555; X86-NEXT: vpandd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x08] 2556; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2557; X86-NEXT: retl # encoding: [0xc3] 2558; 2559; X64-LABEL: test_mask_and_epi32_rmk_128: 2560; X64: # %bb.0: 2561; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2562; X64-NEXT: vpandd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f] 2563; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2564; X64-NEXT: retq # encoding: [0xc3] 2565 %b = load <4 x i32>, <4 x i32>* %ptr_b 2566 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2567 ret <4 x i32> %res 2568} 2569 2570define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2571; X86-LABEL: test_mask_and_epi32_rmkz_128: 2572; X86: # %bb.0: 2573; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2574; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2575; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2576; X86-NEXT: vpandd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x00] 2577; X86-NEXT: retl # encoding: [0xc3] 2578; 2579; X64-LABEL: test_mask_and_epi32_rmkz_128: 2580; X64: # %bb.0: 2581; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2582; X64-NEXT: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07] 2583; X64-NEXT: retq # encoding: [0xc3] 2584 %b = load <4 x i32>, <4 x i32>* %ptr_b 2585 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2586 ret <4 x i32> %res 2587} 2588 2589define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2590; X86-LABEL: test_mask_and_epi32_rmb_128: 2591; X86: # %bb.0: 2592; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2593; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x00] 2594; X86-NEXT: retl # encoding: [0xc3] 2595; 2596; X64-LABEL: test_mask_and_epi32_rmb_128: 2597; X64: # %bb.0: 2598; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07] 2599; X64-NEXT: retq # encoding: [0xc3] 2600 %q = load i32, i32* %ptr_b 2601 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2602 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2603 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2604 ret <4 x i32> %res 2605} 2606 2607define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2608; X86-LABEL: test_mask_and_epi32_rmbk_128: 2609; X86: # %bb.0: 2610; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2611; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2612; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2613; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x08] 2614; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2615; X86-NEXT: retl # encoding: [0xc3] 2616; 2617; X64-LABEL: test_mask_and_epi32_rmbk_128: 2618; X64: # %bb.0: 2619; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2620; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f] 2621; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2622; X64-NEXT: retq # encoding: [0xc3] 2623 %q = load i32, i32* %ptr_b 2624 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2625 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2626 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2627 ret <4 x i32> %res 2628} 2629 2630define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2631; X86-LABEL: test_mask_and_epi32_rmbkz_128: 2632; X86: # %bb.0: 2633; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2634; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2635; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2636; X86-NEXT: vpandd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x00] 2637; X86-NEXT: retl # encoding: [0xc3] 2638; 2639; X64-LABEL: test_mask_and_epi32_rmbkz_128: 2640; X64: # %bb.0: 2641; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2642; X64-NEXT: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07] 2643; X64-NEXT: retq # encoding: [0xc3] 2644 %q = load i32, i32* %ptr_b 2645 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2646 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2647 %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2648 ret <4 x i32> %res 2649} 2650 2651declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2652 2653define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2654; CHECK-LABEL: test_mask_and_epi32_rr_256: 2655; CHECK: # %bb.0: 2656; CHECK-NEXT: vandps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0xc1] 2657; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2658 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2659 ret <8 x i32> %res 2660} 2661 2662define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 2663; X86-LABEL: test_mask_and_epi32_rrk_256: 2664; X86: # %bb.0: 2665; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2666; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2667; X86-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] 2668; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2669; X86-NEXT: retl # encoding: [0xc3] 2670; 2671; X64-LABEL: test_mask_and_epi32_rrk_256: 2672; X64: # %bb.0: 2673; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2674; X64-NEXT: vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1] 2675; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2676; X64-NEXT: retq # encoding: [0xc3] 2677 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2678 ret <8 x i32> %res 2679} 2680 2681define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 2682; X86-LABEL: test_mask_and_epi32_rrkz_256: 2683; X86: # %bb.0: 2684; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2685; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2686; X86-NEXT: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1] 2687; X86-NEXT: retl # encoding: [0xc3] 2688; 2689; X64-LABEL: test_mask_and_epi32_rrkz_256: 2690; X64: # %bb.0: 2691; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2692; X64-NEXT: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1] 2693; X64-NEXT: retq # encoding: [0xc3] 2694 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2695 ret <8 x i32> %res 2696} 2697 2698define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2699; X86-LABEL: test_mask_and_epi32_rm_256: 2700; X86: # %bb.0: 2701; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2702; X86-NEXT: vandps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x00] 2703; X86-NEXT: retl # encoding: [0xc3] 2704; 2705; X64-LABEL: test_mask_and_epi32_rm_256: 2706; X64: # %bb.0: 2707; X64-NEXT: vandps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x07] 2708; X64-NEXT: retq # encoding: [0xc3] 2709 %b = load <8 x i32>, <8 x i32>* %ptr_b 2710 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2711 ret <8 x i32> %res 2712} 2713 2714define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2715; X86-LABEL: test_mask_and_epi32_rmk_256: 2716; X86: # %bb.0: 2717; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2718; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2719; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2720; X86-NEXT: vpandd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x08] 2721; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2722; X86-NEXT: retl # encoding: [0xc3] 2723; 2724; X64-LABEL: test_mask_and_epi32_rmk_256: 2725; X64: # %bb.0: 2726; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2727; X64-NEXT: vpandd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f] 2728; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2729; X64-NEXT: retq # encoding: [0xc3] 2730 %b = load <8 x i32>, <8 x i32>* %ptr_b 2731 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2732 ret <8 x i32> %res 2733} 2734 2735define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 2736; X86-LABEL: test_mask_and_epi32_rmkz_256: 2737; X86: # %bb.0: 2738; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2739; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2740; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2741; X86-NEXT: vpandd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x00] 2742; X86-NEXT: retl # encoding: [0xc3] 2743; 2744; X64-LABEL: test_mask_and_epi32_rmkz_256: 2745; X64: # %bb.0: 2746; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2747; X64-NEXT: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07] 2748; X64-NEXT: retq # encoding: [0xc3] 2749 %b = load <8 x i32>, <8 x i32>* %ptr_b 2750 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2751 ret <8 x i32> %res 2752} 2753 2754define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2755; X86-LABEL: test_mask_and_epi32_rmb_256: 2756; X86: # %bb.0: 2757; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2758; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x00] 2759; X86-NEXT: retl # encoding: [0xc3] 2760; 2761; X64-LABEL: test_mask_and_epi32_rmb_256: 2762; X64: # %bb.0: 2763; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07] 2764; X64-NEXT: retq # encoding: [0xc3] 2765 %q = load i32, i32* %ptr_b 2766 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2767 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2768 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2769 ret <8 x i32> %res 2770} 2771 2772define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 2773; X86-LABEL: test_mask_and_epi32_rmbk_256: 2774; X86: # %bb.0: 2775; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2776; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2777; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2778; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x08] 2779; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2780; X86-NEXT: retl # encoding: [0xc3] 2781; 2782; X64-LABEL: test_mask_and_epi32_rmbk_256: 2783; X64: # %bb.0: 2784; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2785; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f] 2786; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 2787; X64-NEXT: retq # encoding: [0xc3] 2788 %q = load i32, i32* %ptr_b 2789 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2790 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2791 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 2792 ret <8 x i32> %res 2793} 2794 2795define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 2796; X86-LABEL: test_mask_and_epi32_rmbkz_256: 2797; X86: # %bb.0: 2798; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2799; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2800; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2801; X86-NEXT: vpandd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x00] 2802; X86-NEXT: retl # encoding: [0xc3] 2803; 2804; X64-LABEL: test_mask_and_epi32_rmbkz_256: 2805; X64: # %bb.0: 2806; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2807; X64-NEXT: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07] 2808; X64-NEXT: retq # encoding: [0xc3] 2809 %q = load i32, i32* %ptr_b 2810 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2811 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2812 %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 2813 ret <8 x i32> %res 2814} 2815 2816declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2817 2818define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2819; CHECK-LABEL: test_mask_or_epi32_rr_128: 2820; CHECK: # %bb.0: 2821; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 2822; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2823 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2824 ret <4 x i32> %res 2825} 2826 2827define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 2828; X86-LABEL: test_mask_or_epi32_rrk_128: 2829; X86: # %bb.0: 2830; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2831; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2832; X86-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] 2833; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2834; X86-NEXT: retl # encoding: [0xc3] 2835; 2836; X64-LABEL: test_mask_or_epi32_rrk_128: 2837; X64: # %bb.0: 2838; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2839; X64-NEXT: vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1] 2840; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 2841; X64-NEXT: retq # encoding: [0xc3] 2842 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2843 ret <4 x i32> %res 2844} 2845 2846define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2847; X86-LABEL: test_mask_or_epi32_rrkz_128: 2848; X86: # %bb.0: 2849; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2850; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2851; X86-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1] 2852; X86-NEXT: retl # encoding: [0xc3] 2853; 2854; X64-LABEL: test_mask_or_epi32_rrkz_128: 2855; X64: # %bb.0: 2856; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 2857; X64-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1] 2858; X64-NEXT: retq # encoding: [0xc3] 2859 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2860 ret <4 x i32> %res 2861} 2862 2863define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2864; X86-LABEL: test_mask_or_epi32_rm_128: 2865; X86: # %bb.0: 2866; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2867; X86-NEXT: vorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x00] 2868; X86-NEXT: retl # encoding: [0xc3] 2869; 2870; X64-LABEL: test_mask_or_epi32_rm_128: 2871; X64: # %bb.0: 2872; X64-NEXT: vorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x07] 2873; X64-NEXT: retq # encoding: [0xc3] 2874 %b = load <4 x i32>, <4 x i32>* %ptr_b 2875 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2876 ret <4 x i32> %res 2877} 2878 2879define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2880; X86-LABEL: test_mask_or_epi32_rmk_128: 2881; X86: # %bb.0: 2882; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2883; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2884; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2885; X86-NEXT: vpord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x08] 2886; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2887; X86-NEXT: retl # encoding: [0xc3] 2888; 2889; X64-LABEL: test_mask_or_epi32_rmk_128: 2890; X64: # %bb.0: 2891; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2892; X64-NEXT: vpord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f] 2893; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2894; X64-NEXT: retq # encoding: [0xc3] 2895 %b = load <4 x i32>, <4 x i32>* %ptr_b 2896 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2897 ret <4 x i32> %res 2898} 2899 2900define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2901; X86-LABEL: test_mask_or_epi32_rmkz_128: 2902; X86: # %bb.0: 2903; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2904; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2905; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2906; X86-NEXT: vpord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x00] 2907; X86-NEXT: retl # encoding: [0xc3] 2908; 2909; X64-LABEL: test_mask_or_epi32_rmkz_128: 2910; X64: # %bb.0: 2911; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2912; X64-NEXT: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07] 2913; X64-NEXT: retq # encoding: [0xc3] 2914 %b = load <4 x i32>, <4 x i32>* %ptr_b 2915 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2916 ret <4 x i32> %res 2917} 2918 2919define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2920; X86-LABEL: test_mask_or_epi32_rmb_128: 2921; X86: # %bb.0: 2922; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2923; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x00] 2924; X86-NEXT: retl # encoding: [0xc3] 2925; 2926; X64-LABEL: test_mask_or_epi32_rmb_128: 2927; X64: # %bb.0: 2928; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07] 2929; X64-NEXT: retq # encoding: [0xc3] 2930 %q = load i32, i32* %ptr_b 2931 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2932 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2933 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 2934 ret <4 x i32> %res 2935} 2936 2937define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 2938; X86-LABEL: test_mask_or_epi32_rmbk_128: 2939; X86: # %bb.0: 2940; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2941; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2942; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2943; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x08] 2944; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2945; X86-NEXT: retl # encoding: [0xc3] 2946; 2947; X64-LABEL: test_mask_or_epi32_rmbk_128: 2948; X64: # %bb.0: 2949; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2950; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f] 2951; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 2952; X64-NEXT: retq # encoding: [0xc3] 2953 %q = load i32, i32* %ptr_b 2954 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2955 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2956 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 2957 ret <4 x i32> %res 2958} 2959 2960define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2961; X86-LABEL: test_mask_or_epi32_rmbkz_128: 2962; X86: # %bb.0: 2963; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2964; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 2965; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 2966; X86-NEXT: vpord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x00] 2967; X86-NEXT: retl # encoding: [0xc3] 2968; 2969; X64-LABEL: test_mask_or_epi32_rmbkz_128: 2970; X64: # %bb.0: 2971; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 2972; X64-NEXT: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07] 2973; X64-NEXT: retq # encoding: [0xc3] 2974 %q = load i32, i32* %ptr_b 2975 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2976 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2977 %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 2978 ret <4 x i32> %res 2979} 2980 2981declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2982 2983define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2984; CHECK-LABEL: test_mask_or_epi32_rr_256: 2985; CHECK: # %bb.0: 2986; CHECK-NEXT: vorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0xc1] 2987; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2988 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 2989 ret <8 x i32> %res 2990} 2991 2992define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 2993; X86-LABEL: test_mask_or_epi32_rrk_256: 2994; X86: # %bb.0: 2995; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 2996; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 2997; X86-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] 2998; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 2999; X86-NEXT: retl # encoding: [0xc3] 3000; 3001; X64-LABEL: test_mask_or_epi32_rrk_256: 3002; X64: # %bb.0: 3003; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3004; X64-NEXT: vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1] 3005; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3006; X64-NEXT: retq # encoding: [0xc3] 3007 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3008 ret <8 x i32> %res 3009} 3010 3011define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 3012; X86-LABEL: test_mask_or_epi32_rrkz_256: 3013; X86: # %bb.0: 3014; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3015; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3016; X86-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1] 3017; X86-NEXT: retl # encoding: [0xc3] 3018; 3019; X64-LABEL: test_mask_or_epi32_rrkz_256: 3020; X64: # %bb.0: 3021; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3022; X64-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1] 3023; X64-NEXT: retq # encoding: [0xc3] 3024 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3025 ret <8 x i32> %res 3026} 3027 3028define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3029; X86-LABEL: test_mask_or_epi32_rm_256: 3030; X86: # %bb.0: 3031; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3032; X86-NEXT: vorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x00] 3033; X86-NEXT: retl # encoding: [0xc3] 3034; 3035; X64-LABEL: test_mask_or_epi32_rm_256: 3036; X64: # %bb.0: 3037; X64-NEXT: vorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x07] 3038; X64-NEXT: retq # encoding: [0xc3] 3039 %b = load <8 x i32>, <8 x i32>* %ptr_b 3040 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3041 ret <8 x i32> %res 3042} 3043 3044define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3045; X86-LABEL: test_mask_or_epi32_rmk_256: 3046; X86: # %bb.0: 3047; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3048; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3049; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3050; X86-NEXT: vpord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x08] 3051; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3052; X86-NEXT: retl # encoding: [0xc3] 3053; 3054; X64-LABEL: test_mask_or_epi32_rmk_256: 3055; X64: # %bb.0: 3056; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3057; X64-NEXT: vpord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f] 3058; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3059; X64-NEXT: retq # encoding: [0xc3] 3060 %b = load <8 x i32>, <8 x i32>* %ptr_b 3061 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3062 ret <8 x i32> %res 3063} 3064 3065define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 3066; X86-LABEL: test_mask_or_epi32_rmkz_256: 3067; X86: # %bb.0: 3068; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3069; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3070; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3071; X86-NEXT: vpord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x00] 3072; X86-NEXT: retl # encoding: [0xc3] 3073; 3074; X64-LABEL: test_mask_or_epi32_rmkz_256: 3075; X64: # %bb.0: 3076; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3077; X64-NEXT: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07] 3078; X64-NEXT: retq # encoding: [0xc3] 3079 %b = load <8 x i32>, <8 x i32>* %ptr_b 3080 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3081 ret <8 x i32> %res 3082} 3083 3084define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 3085; X86-LABEL: test_mask_or_epi32_rmb_256: 3086; X86: # %bb.0: 3087; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3088; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x00] 3089; X86-NEXT: retl # encoding: [0xc3] 3090; 3091; X64-LABEL: test_mask_or_epi32_rmb_256: 3092; X64: # %bb.0: 3093; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07] 3094; X64-NEXT: retq # encoding: [0xc3] 3095 %q = load i32, i32* %ptr_b 3096 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3097 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3098 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3099 ret <8 x i32> %res 3100} 3101 3102define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3103; X86-LABEL: test_mask_or_epi32_rmbk_256: 3104; X86: # %bb.0: 3105; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3106; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3107; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3108; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x08] 3109; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3110; X86-NEXT: retl # encoding: [0xc3] 3111; 3112; X64-LABEL: test_mask_or_epi32_rmbk_256: 3113; X64: # %bb.0: 3114; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3115; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f] 3116; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3117; X64-NEXT: retq # encoding: [0xc3] 3118 %q = load i32, i32* %ptr_b 3119 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3120 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3121 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3122 ret <8 x i32> %res 3123} 3124 3125define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 3126; X86-LABEL: test_mask_or_epi32_rmbkz_256: 3127; X86: # %bb.0: 3128; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3129; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3130; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3131; X86-NEXT: vpord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x00] 3132; X86-NEXT: retl # encoding: [0xc3] 3133; 3134; X64-LABEL: test_mask_or_epi32_rmbkz_256: 3135; X64: # %bb.0: 3136; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3137; X64-NEXT: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07] 3138; X64-NEXT: retq # encoding: [0xc3] 3139 %q = load i32, i32* %ptr_b 3140 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3141 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3142 %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3143 ret <8 x i32> %res 3144} 3145 3146declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 3147 3148define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3149; CHECK-LABEL: test_mask_xor_epi32_rr_128: 3150; CHECK: # %bb.0: 3151; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 3152; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3153 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3154 ret <4 x i32> %res 3155} 3156 3157define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 3158; X86-LABEL: test_mask_xor_epi32_rrk_128: 3159; X86: # %bb.0: 3160; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3161; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3162; X86-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] 3163; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3164; X86-NEXT: retl # encoding: [0xc3] 3165; 3166; X64-LABEL: test_mask_xor_epi32_rrk_128: 3167; X64: # %bb.0: 3168; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3169; X64-NEXT: vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1] 3170; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3171; X64-NEXT: retq # encoding: [0xc3] 3172 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3173 ret <4 x i32> %res 3174} 3175 3176define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3177; X86-LABEL: test_mask_xor_epi32_rrkz_128: 3178; X86: # %bb.0: 3179; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3180; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3181; X86-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1] 3182; X86-NEXT: retl # encoding: [0xc3] 3183; 3184; X64-LABEL: test_mask_xor_epi32_rrkz_128: 3185; X64: # %bb.0: 3186; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3187; X64-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1] 3188; X64-NEXT: retq # encoding: [0xc3] 3189 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3190 ret <4 x i32> %res 3191} 3192 3193define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 3194; X86-LABEL: test_mask_xor_epi32_rm_128: 3195; X86: # %bb.0: 3196; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3197; X86-NEXT: vxorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x00] 3198; X86-NEXT: retl # encoding: [0xc3] 3199; 3200; X64-LABEL: test_mask_xor_epi32_rm_128: 3201; X64: # %bb.0: 3202; X64-NEXT: vxorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x07] 3203; X64-NEXT: retq # encoding: [0xc3] 3204 %b = load <4 x i32>, <4 x i32>* %ptr_b 3205 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3206 ret <4 x i32> %res 3207} 3208 3209define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3210; X86-LABEL: test_mask_xor_epi32_rmk_128: 3211; X86: # %bb.0: 3212; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3213; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3214; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3215; X86-NEXT: vpxord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x08] 3216; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3217; X86-NEXT: retl # encoding: [0xc3] 3218; 3219; X64-LABEL: test_mask_xor_epi32_rmk_128: 3220; X64: # %bb.0: 3221; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3222; X64-NEXT: vpxord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f] 3223; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3224; X64-NEXT: retq # encoding: [0xc3] 3225 %b = load <4 x i32>, <4 x i32>* %ptr_b 3226 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3227 ret <4 x i32> %res 3228} 3229 3230define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 3231; X86-LABEL: test_mask_xor_epi32_rmkz_128: 3232; X86: # %bb.0: 3233; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3234; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3235; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3236; X86-NEXT: vpxord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x00] 3237; X86-NEXT: retl # encoding: [0xc3] 3238; 3239; X64-LABEL: test_mask_xor_epi32_rmkz_128: 3240; X64: # %bb.0: 3241; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3242; X64-NEXT: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07] 3243; X64-NEXT: retq # encoding: [0xc3] 3244 %b = load <4 x i32>, <4 x i32>* %ptr_b 3245 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3246 ret <4 x i32> %res 3247} 3248 3249define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 3250; X86-LABEL: test_mask_xor_epi32_rmb_128: 3251; X86: # %bb.0: 3252; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3253; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x00] 3254; X86-NEXT: retl # encoding: [0xc3] 3255; 3256; X64-LABEL: test_mask_xor_epi32_rmb_128: 3257; X64: # %bb.0: 3258; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07] 3259; X64-NEXT: retq # encoding: [0xc3] 3260 %q = load i32, i32* %ptr_b 3261 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3262 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3263 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3264 ret <4 x i32> %res 3265} 3266 3267define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3268; X86-LABEL: test_mask_xor_epi32_rmbk_128: 3269; X86: # %bb.0: 3270; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3271; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3272; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3273; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x08] 3274; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3275; X86-NEXT: retl # encoding: [0xc3] 3276; 3277; X64-LABEL: test_mask_xor_epi32_rmbk_128: 3278; X64: # %bb.0: 3279; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3280; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f] 3281; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3282; X64-NEXT: retq # encoding: [0xc3] 3283 %q = load i32, i32* %ptr_b 3284 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3285 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3286 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3287 ret <4 x i32> %res 3288} 3289 3290define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3291; X86-LABEL: test_mask_xor_epi32_rmbkz_128: 3292; X86: # %bb.0: 3293; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3294; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3295; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3296; X86-NEXT: vpxord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x00] 3297; X86-NEXT: retl # encoding: [0xc3] 3298; 3299; X64-LABEL: test_mask_xor_epi32_rmbkz_128: 3300; X64: # %bb.0: 3301; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3302; X64-NEXT: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07] 3303; X64-NEXT: retq # encoding: [0xc3] 3304 %q = load i32, i32* %ptr_b 3305 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3306 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3307 %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3308 ret <4 x i32> %res 3309} 3310 3311declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 3312 3313define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3314; CHECK-LABEL: test_mask_xor_epi32_rr_256: 3315; CHECK: # %bb.0: 3316; CHECK-NEXT: vxorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0xc1] 3317; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3318 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3319 ret <8 x i32> %res 3320} 3321 3322define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 3323; X86-LABEL: test_mask_xor_epi32_rrk_256: 3324; X86: # %bb.0: 3325; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3326; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3327; X86-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] 3328; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3329; X86-NEXT: retl # encoding: [0xc3] 3330; 3331; X64-LABEL: test_mask_xor_epi32_rrk_256: 3332; X64: # %bb.0: 3333; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3334; X64-NEXT: vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1] 3335; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3336; X64-NEXT: retq # encoding: [0xc3] 3337 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3338 ret <8 x i32> %res 3339} 3340 3341define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 3342; X86-LABEL: test_mask_xor_epi32_rrkz_256: 3343; X86: # %bb.0: 3344; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3345; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3346; X86-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1] 3347; X86-NEXT: retl # encoding: [0xc3] 3348; 3349; X64-LABEL: test_mask_xor_epi32_rrkz_256: 3350; X64: # %bb.0: 3351; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3352; X64-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1] 3353; X64-NEXT: retq # encoding: [0xc3] 3354 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3355 ret <8 x i32> %res 3356} 3357 3358define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3359; X86-LABEL: test_mask_xor_epi32_rm_256: 3360; X86: # %bb.0: 3361; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3362; X86-NEXT: vxorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x00] 3363; X86-NEXT: retl # encoding: [0xc3] 3364; 3365; X64-LABEL: test_mask_xor_epi32_rm_256: 3366; X64: # %bb.0: 3367; X64-NEXT: vxorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x07] 3368; X64-NEXT: retq # encoding: [0xc3] 3369 %b = load <8 x i32>, <8 x i32>* %ptr_b 3370 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3371 ret <8 x i32> %res 3372} 3373 3374define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3375; X86-LABEL: test_mask_xor_epi32_rmk_256: 3376; X86: # %bb.0: 3377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3378; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3379; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3380; X86-NEXT: vpxord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x08] 3381; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3382; X86-NEXT: retl # encoding: [0xc3] 3383; 3384; X64-LABEL: test_mask_xor_epi32_rmk_256: 3385; X64: # %bb.0: 3386; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3387; X64-NEXT: vpxord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f] 3388; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3389; X64-NEXT: retq # encoding: [0xc3] 3390 %b = load <8 x i32>, <8 x i32>* %ptr_b 3391 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3392 ret <8 x i32> %res 3393} 3394 3395define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 3396; X86-LABEL: test_mask_xor_epi32_rmkz_256: 3397; X86: # %bb.0: 3398; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3399; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3400; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3401; X86-NEXT: vpxord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x00] 3402; X86-NEXT: retl # encoding: [0xc3] 3403; 3404; X64-LABEL: test_mask_xor_epi32_rmkz_256: 3405; X64: # %bb.0: 3406; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3407; X64-NEXT: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07] 3408; X64-NEXT: retq # encoding: [0xc3] 3409 %b = load <8 x i32>, <8 x i32>* %ptr_b 3410 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3411 ret <8 x i32> %res 3412} 3413 3414define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 3415; X86-LABEL: test_mask_xor_epi32_rmb_256: 3416; X86: # %bb.0: 3417; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3418; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x00] 3419; X86-NEXT: retl # encoding: [0xc3] 3420; 3421; X64-LABEL: test_mask_xor_epi32_rmb_256: 3422; X64: # %bb.0: 3423; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07] 3424; X64-NEXT: retq # encoding: [0xc3] 3425 %q = load i32, i32* %ptr_b 3426 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3427 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3428 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3429 ret <8 x i32> %res 3430} 3431 3432define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3433; X86-LABEL: test_mask_xor_epi32_rmbk_256: 3434; X86: # %bb.0: 3435; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3436; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3437; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3438; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x08] 3439; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3440; X86-NEXT: retl # encoding: [0xc3] 3441; 3442; X64-LABEL: test_mask_xor_epi32_rmbk_256: 3443; X64: # %bb.0: 3444; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3445; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f] 3446; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3447; X64-NEXT: retq # encoding: [0xc3] 3448 %q = load i32, i32* %ptr_b 3449 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3450 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3451 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3452 ret <8 x i32> %res 3453} 3454 3455define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 3456; X86-LABEL: test_mask_xor_epi32_rmbkz_256: 3457; X86: # %bb.0: 3458; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3459; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3460; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3461; X86-NEXT: vpxord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x00] 3462; X86-NEXT: retl # encoding: [0xc3] 3463; 3464; X64-LABEL: test_mask_xor_epi32_rmbkz_256: 3465; X64: # %bb.0: 3466; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3467; X64-NEXT: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07] 3468; X64-NEXT: retq # encoding: [0xc3] 3469 %q = load i32, i32* %ptr_b 3470 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3471 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3472 %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3473 ret <8 x i32> %res 3474} 3475 3476declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 3477 3478define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 3479; CHECK-LABEL: test_mask_andnot_epi32_rr_128: 3480; CHECK: # %bb.0: 3481; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] 3482; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3483 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3484 ret <4 x i32> %res 3485} 3486 3487define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 3488; X86-LABEL: test_mask_andnot_epi32_rrk_128: 3489; X86: # %bb.0: 3490; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3491; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3492; X86-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] 3493; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3494; X86-NEXT: retl # encoding: [0xc3] 3495; 3496; X64-LABEL: test_mask_andnot_epi32_rrk_128: 3497; X64: # %bb.0: 3498; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3499; X64-NEXT: vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1] 3500; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3501; X64-NEXT: retq # encoding: [0xc3] 3502 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3503 ret <4 x i32> %res 3504} 3505 3506define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 3507; X86-LABEL: test_mask_andnot_epi32_rrkz_128: 3508; X86: # %bb.0: 3509; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3510; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3511; X86-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1] 3512; X86-NEXT: retl # encoding: [0xc3] 3513; 3514; X64-LABEL: test_mask_andnot_epi32_rrkz_128: 3515; X64: # %bb.0: 3516; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3517; X64-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1] 3518; X64-NEXT: retq # encoding: [0xc3] 3519 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3520 ret <4 x i32> %res 3521} 3522 3523define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 3524; X86-LABEL: test_mask_andnot_epi32_rm_128: 3525; X86: # %bb.0: 3526; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3527; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00] 3528; X86-NEXT: retl # encoding: [0xc3] 3529; 3530; X64-LABEL: test_mask_andnot_epi32_rm_128: 3531; X64: # %bb.0: 3532; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07] 3533; X64-NEXT: retq # encoding: [0xc3] 3534 %b = load <4 x i32>, <4 x i32>* %ptr_b 3535 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3536 ret <4 x i32> %res 3537} 3538 3539define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3540; X86-LABEL: test_mask_andnot_epi32_rmk_128: 3541; X86: # %bb.0: 3542; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3543; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3544; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3545; X86-NEXT: vpandnd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x08] 3546; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3547; X86-NEXT: retl # encoding: [0xc3] 3548; 3549; X64-LABEL: test_mask_andnot_epi32_rmk_128: 3550; X64: # %bb.0: 3551; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3552; X64-NEXT: vpandnd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f] 3553; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3554; X64-NEXT: retq # encoding: [0xc3] 3555 %b = load <4 x i32>, <4 x i32>* %ptr_b 3556 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3557 ret <4 x i32> %res 3558} 3559 3560define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 3561; X86-LABEL: test_mask_andnot_epi32_rmkz_128: 3562; X86: # %bb.0: 3563; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3564; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3565; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3566; X86-NEXT: vpandnd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x00] 3567; X86-NEXT: retl # encoding: [0xc3] 3568; 3569; X64-LABEL: test_mask_andnot_epi32_rmkz_128: 3570; X64: # %bb.0: 3571; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3572; X64-NEXT: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07] 3573; X64-NEXT: retq # encoding: [0xc3] 3574 %b = load <4 x i32>, <4 x i32>* %ptr_b 3575 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3576 ret <4 x i32> %res 3577} 3578 3579define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 3580; X86-LABEL: test_mask_andnot_epi32_rmb_128: 3581; X86: # %bb.0: 3582; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3583; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x00] 3584; X86-NEXT: retl # encoding: [0xc3] 3585; 3586; X64-LABEL: test_mask_andnot_epi32_rmb_128: 3587; X64: # %bb.0: 3588; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07] 3589; X64-NEXT: retq # encoding: [0xc3] 3590 %q = load i32, i32* %ptr_b 3591 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3592 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3593 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 3594 ret <4 x i32> %res 3595} 3596 3597define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 3598; X86-LABEL: test_mask_andnot_epi32_rmbk_128: 3599; X86: # %bb.0: 3600; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3601; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3602; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3603; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x08] 3604; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3605; X86-NEXT: retl # encoding: [0xc3] 3606; 3607; X64-LABEL: test_mask_andnot_epi32_rmbk_128: 3608; X64: # %bb.0: 3609; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3610; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f] 3611; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3612; X64-NEXT: retq # encoding: [0xc3] 3613 %q = load i32, i32* %ptr_b 3614 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3615 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3616 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 3617 ret <4 x i32> %res 3618} 3619 3620define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3621; X86-LABEL: test_mask_andnot_epi32_rmbkz_128: 3622; X86: # %bb.0: 3623; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3624; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3625; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3626; X86-NEXT: vpandnd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x00] 3627; X86-NEXT: retl # encoding: [0xc3] 3628; 3629; X64-LABEL: test_mask_andnot_epi32_rmbkz_128: 3630; X64: # %bb.0: 3631; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3632; X64-NEXT: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07] 3633; X64-NEXT: retq # encoding: [0xc3] 3634 %q = load i32, i32* %ptr_b 3635 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3636 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3637 %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 3638 ret <4 x i32> %res 3639} 3640 3641declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 3642 3643define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3644; CHECK-LABEL: test_mask_andnot_epi32_rr_256: 3645; CHECK: # %bb.0: 3646; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1] 3647; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3648 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3649 ret <8 x i32> %res 3650} 3651 3652define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 3653; X86-LABEL: test_mask_andnot_epi32_rrk_256: 3654; X86: # %bb.0: 3655; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3656; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3657; X86-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] 3658; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3659; X86-NEXT: retl # encoding: [0xc3] 3660; 3661; X64-LABEL: test_mask_andnot_epi32_rrk_256: 3662; X64: # %bb.0: 3663; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3664; X64-NEXT: vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1] 3665; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3666; X64-NEXT: retq # encoding: [0xc3] 3667 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3668 ret <8 x i32> %res 3669} 3670 3671define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 3672; X86-LABEL: test_mask_andnot_epi32_rrkz_256: 3673; X86: # %bb.0: 3674; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3675; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3676; X86-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1] 3677; X86-NEXT: retl # encoding: [0xc3] 3678; 3679; X64-LABEL: test_mask_andnot_epi32_rrkz_256: 3680; X64: # %bb.0: 3681; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3682; X64-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1] 3683; X64-NEXT: retq # encoding: [0xc3] 3684 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3685 ret <8 x i32> %res 3686} 3687 3688define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3689; X86-LABEL: test_mask_andnot_epi32_rm_256: 3690; X86: # %bb.0: 3691; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3692; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00] 3693; X86-NEXT: retl # encoding: [0xc3] 3694; 3695; X64-LABEL: test_mask_andnot_epi32_rm_256: 3696; X64: # %bb.0: 3697; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07] 3698; X64-NEXT: retq # encoding: [0xc3] 3699 %b = load <8 x i32>, <8 x i32>* %ptr_b 3700 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3701 ret <8 x i32> %res 3702} 3703 3704define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3705; X86-LABEL: test_mask_andnot_epi32_rmk_256: 3706; X86: # %bb.0: 3707; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3708; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3709; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3710; X86-NEXT: vpandnd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x08] 3711; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3712; X86-NEXT: retl # encoding: [0xc3] 3713; 3714; X64-LABEL: test_mask_andnot_epi32_rmk_256: 3715; X64: # %bb.0: 3716; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3717; X64-NEXT: vpandnd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f] 3718; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3719; X64-NEXT: retq # encoding: [0xc3] 3720 %b = load <8 x i32>, <8 x i32>* %ptr_b 3721 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3722 ret <8 x i32> %res 3723} 3724 3725define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 3726; X86-LABEL: test_mask_andnot_epi32_rmkz_256: 3727; X86: # %bb.0: 3728; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3729; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3730; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3731; X86-NEXT: vpandnd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x00] 3732; X86-NEXT: retl # encoding: [0xc3] 3733; 3734; X64-LABEL: test_mask_andnot_epi32_rmkz_256: 3735; X64: # %bb.0: 3736; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3737; X64-NEXT: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07] 3738; X64-NEXT: retq # encoding: [0xc3] 3739 %b = load <8 x i32>, <8 x i32>* %ptr_b 3740 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3741 ret <8 x i32> %res 3742} 3743 3744define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 3745; X86-LABEL: test_mask_andnot_epi32_rmb_256: 3746; X86: # %bb.0: 3747; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3748; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x00] 3749; X86-NEXT: retl # encoding: [0xc3] 3750; 3751; X64-LABEL: test_mask_andnot_epi32_rmb_256: 3752; X64: # %bb.0: 3753; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07] 3754; X64-NEXT: retq # encoding: [0xc3] 3755 %q = load i32, i32* %ptr_b 3756 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3757 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3758 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 3759 ret <8 x i32> %res 3760} 3761 3762define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 3763; X86-LABEL: test_mask_andnot_epi32_rmbk_256: 3764; X86: # %bb.0: 3765; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3766; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3767; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3768; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x08] 3769; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3770; X86-NEXT: retl # encoding: [0xc3] 3771; 3772; X64-LABEL: test_mask_andnot_epi32_rmbk_256: 3773; X64: # %bb.0: 3774; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3775; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f] 3776; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 3777; X64-NEXT: retq # encoding: [0xc3] 3778 %q = load i32, i32* %ptr_b 3779 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3780 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3781 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 3782 ret <8 x i32> %res 3783} 3784 3785define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 3786; X86-LABEL: test_mask_andnot_epi32_rmbkz_256: 3787; X86: # %bb.0: 3788; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3789; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3790; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3791; X86-NEXT: vpandnd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x00] 3792; X86-NEXT: retl # encoding: [0xc3] 3793; 3794; X64-LABEL: test_mask_andnot_epi32_rmbkz_256: 3795; X64: # %bb.0: 3796; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3797; X64-NEXT: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07] 3798; X64-NEXT: retq # encoding: [0xc3] 3799 %q = load i32, i32* %ptr_b 3800 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3801 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3802 %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 3803 ret <8 x i32> %res 3804} 3805 3806declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 3807 3808define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) { 3809; CHECK-LABEL: test_mask_andnot_epi64_rr_128: 3810; CHECK: # %bb.0: 3811; CHECK-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] 3812; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3813 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 3814 ret <2 x i64> %res 3815} 3816 3817define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) { 3818; X86-LABEL: test_mask_andnot_epi64_rrk_128: 3819; X86: # %bb.0: 3820; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3821; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3822; X86-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] 3823; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3824; X86-NEXT: retl # encoding: [0xc3] 3825; 3826; X64-LABEL: test_mask_andnot_epi64_rrk_128: 3827; X64: # %bb.0: 3828; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3829; X64-NEXT: vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1] 3830; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 3831; X64-NEXT: retq # encoding: [0xc3] 3832 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 3833 ret <2 x i64> %res 3834} 3835 3836define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) { 3837; X86-LABEL: test_mask_andnot_epi64_rrkz_128: 3838; X86: # %bb.0: 3839; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3840; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3841; X86-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1] 3842; X86-NEXT: retl # encoding: [0xc3] 3843; 3844; X64-LABEL: test_mask_andnot_epi64_rrkz_128: 3845; X64: # %bb.0: 3846; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3847; X64-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1] 3848; X64-NEXT: retq # encoding: [0xc3] 3849 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 3850 ret <2 x i64> %res 3851} 3852 3853define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) { 3854; X86-LABEL: test_mask_andnot_epi64_rm_128: 3855; X86: # %bb.0: 3856; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3857; X86-NEXT: vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00] 3858; X86-NEXT: retl # encoding: [0xc3] 3859; 3860; X64-LABEL: test_mask_andnot_epi64_rm_128: 3861; X64: # %bb.0: 3862; X64-NEXT: vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07] 3863; X64-NEXT: retq # encoding: [0xc3] 3864 %b = load <2 x i64>, <2 x i64>* %ptr_b 3865 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 3866 ret <2 x i64> %res 3867} 3868 3869define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) { 3870; X86-LABEL: test_mask_andnot_epi64_rmk_128: 3871; X86: # %bb.0: 3872; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3873; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3874; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3875; X86-NEXT: vpandnq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x08] 3876; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3877; X86-NEXT: retl # encoding: [0xc3] 3878; 3879; X64-LABEL: test_mask_andnot_epi64_rmk_128: 3880; X64: # %bb.0: 3881; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3882; X64-NEXT: vpandnq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f] 3883; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3884; X64-NEXT: retq # encoding: [0xc3] 3885 %b = load <2 x i64>, <2 x i64>* %ptr_b 3886 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 3887 ret <2 x i64> %res 3888} 3889 3890define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) { 3891; X86-LABEL: test_mask_andnot_epi64_rmkz_128: 3892; X86: # %bb.0: 3893; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3894; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 3895; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 3896; X86-NEXT: vpandnq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x00] 3897; X86-NEXT: retl # encoding: [0xc3] 3898; 3899; X64-LABEL: test_mask_andnot_epi64_rmkz_128: 3900; X64: # %bb.0: 3901; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3902; X64-NEXT: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07] 3903; X64-NEXT: retq # encoding: [0xc3] 3904 %b = load <2 x i64>, <2 x i64>* %ptr_b 3905 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 3906 ret <2 x i64> %res 3907} 3908 3909define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) { 3910; X86-LABEL: test_mask_andnot_epi64_rmb_128: 3911; X86: # %bb.0: 3912; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3913; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08] 3914; X86-NEXT: vpandn %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1] 3915; X86-NEXT: retl # encoding: [0xc3] 3916; 3917; X64-LABEL: test_mask_andnot_epi64_rmb_128: 3918; X64: # %bb.0: 3919; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07] 3920; X64-NEXT: retq # encoding: [0xc3] 3921 %q = load i64, i64* %ptr_b 3922 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 3923 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 3924 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1) 3925 ret <2 x i64> %res 3926} 3927 3928define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) { 3929; X86-LABEL: test_mask_andnot_epi64_rmbk_128: 3930; X86: # %bb.0: 3931; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3932; X86-NEXT: vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10] 3933; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3934; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3935; X86-NEXT: vpandnq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xca] 3936; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3937; X86-NEXT: retl # encoding: [0xc3] 3938; 3939; X64-LABEL: test_mask_andnot_epi64_rmbk_128: 3940; X64: # %bb.0: 3941; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3942; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f] 3943; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 3944; X64-NEXT: retq # encoding: [0xc3] 3945 %q = load i64, i64* %ptr_b 3946 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 3947 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 3948 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) 3949 ret <2 x i64> %res 3950} 3951 3952define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) { 3953; X86-LABEL: test_mask_andnot_epi64_rmbkz_128: 3954; X86: # %bb.0: 3955; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3956; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08] 3957; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 3958; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3959; X86-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1] 3960; X86-NEXT: retl # encoding: [0xc3] 3961; 3962; X64-LABEL: test_mask_andnot_epi64_rmbkz_128: 3963; X64: # %bb.0: 3964; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 3965; X64-NEXT: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07] 3966; X64-NEXT: retq # encoding: [0xc3] 3967 %q = load i64, i64* %ptr_b 3968 %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0 3969 %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer 3970 %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask) 3971 ret <2 x i64> %res 3972} 3973 3974declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 3975 3976define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) { 3977; CHECK-LABEL: test_mask_andnot_epi64_rr_256: 3978; CHECK: # %bb.0: 3979; CHECK-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1] 3980; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3981 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 3982 ret <4 x i64> %res 3983} 3984 3985define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) { 3986; X86-LABEL: test_mask_andnot_epi64_rrk_256: 3987; X86: # %bb.0: 3988; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 3989; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 3990; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] 3991; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3992; X86-NEXT: retl # encoding: [0xc3] 3993; 3994; X64-LABEL: test_mask_andnot_epi64_rrk_256: 3995; X64: # %bb.0: 3996; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 3997; X64-NEXT: vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1] 3998; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 3999; X64-NEXT: retq # encoding: [0xc3] 4000 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 4001 ret <4 x i64> %res 4002} 4003 4004define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) { 4005; X86-LABEL: test_mask_andnot_epi64_rrkz_256: 4006; X86: # %bb.0: 4007; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4008; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4009; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1] 4010; X86-NEXT: retl # encoding: [0xc3] 4011; 4012; X64-LABEL: test_mask_andnot_epi64_rrkz_256: 4013; X64: # %bb.0: 4014; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4015; X64-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1] 4016; X64-NEXT: retq # encoding: [0xc3] 4017 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 4018 ret <4 x i64> %res 4019} 4020 4021define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) { 4022; X86-LABEL: test_mask_andnot_epi64_rm_256: 4023; X86: # %bb.0: 4024; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4025; X86-NEXT: vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00] 4026; X86-NEXT: retl # encoding: [0xc3] 4027; 4028; X64-LABEL: test_mask_andnot_epi64_rm_256: 4029; X64: # %bb.0: 4030; X64-NEXT: vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07] 4031; X64-NEXT: retq # encoding: [0xc3] 4032 %b = load <4 x i64>, <4 x i64>* %ptr_b 4033 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 4034 ret <4 x i64> %res 4035} 4036 4037define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) { 4038; X86-LABEL: test_mask_andnot_epi64_rmk_256: 4039; X86: # %bb.0: 4040; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4041; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4042; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4043; X86-NEXT: vpandnq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x08] 4044; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4045; X86-NEXT: retl # encoding: [0xc3] 4046; 4047; X64-LABEL: test_mask_andnot_epi64_rmk_256: 4048; X64: # %bb.0: 4049; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4050; X64-NEXT: vpandnq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f] 4051; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4052; X64-NEXT: retq # encoding: [0xc3] 4053 %b = load <4 x i64>, <4 x i64>* %ptr_b 4054 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 4055 ret <4 x i64> %res 4056} 4057 4058define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) { 4059; X86-LABEL: test_mask_andnot_epi64_rmkz_256: 4060; X86: # %bb.0: 4061; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4062; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4063; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4064; X86-NEXT: vpandnq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x00] 4065; X86-NEXT: retl # encoding: [0xc3] 4066; 4067; X64-LABEL: test_mask_andnot_epi64_rmkz_256: 4068; X64: # %bb.0: 4069; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4070; X64-NEXT: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07] 4071; X64-NEXT: retq # encoding: [0xc3] 4072 %b = load <4 x i64>, <4 x i64>* %ptr_b 4073 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 4074 ret <4 x i64> %res 4075} 4076 4077define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) { 4078; X86-LABEL: test_mask_andnot_epi64_rmb_256: 4079; X86: # %bb.0: 4080; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4081; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08] 4082; X86-NEXT: # xmm1 = mem[0],zero 4083; X86-NEXT: vbroadcastsd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc9] 4084; X86-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1] 4085; X86-NEXT: retl # encoding: [0xc3] 4086; 4087; X64-LABEL: test_mask_andnot_epi64_rmb_256: 4088; X64: # %bb.0: 4089; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07] 4090; X64-NEXT: retq # encoding: [0xc3] 4091 %q = load i64, i64* %ptr_b 4092 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 4093 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 4094 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1) 4095 ret <4 x i64> %res 4096} 4097 4098define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) { 4099; X86-LABEL: test_mask_andnot_epi64_rmbk_256: 4100; X86: # %bb.0: 4101; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4102; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 4103; X86-NEXT: # xmm2 = mem[0],zero 4104; X86-NEXT: vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2] 4105; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4106; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4107; X86-NEXT: vpandnq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xca] 4108; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4109; X86-NEXT: retl # encoding: [0xc3] 4110; 4111; X64-LABEL: test_mask_andnot_epi64_rmbk_256: 4112; X64: # %bb.0: 4113; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4114; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f] 4115; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4116; X64-NEXT: retq # encoding: [0xc3] 4117 %q = load i64, i64* %ptr_b 4118 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 4119 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 4120 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) 4121 ret <4 x i64> %res 4122} 4123 4124define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) { 4125; X86-LABEL: test_mask_andnot_epi64_rmbkz_256: 4126; X86: # %bb.0: 4127; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4128; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 4129; X86-NEXT: # xmm1 = mem[0],zero 4130; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9] 4131; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 4132; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4133; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1] 4134; X86-NEXT: retl # encoding: [0xc3] 4135; 4136; X64-LABEL: test_mask_andnot_epi64_rmbkz_256: 4137; X64: # %bb.0: 4138; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4139; X64-NEXT: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07] 4140; X64-NEXT: retq # encoding: [0xc3] 4141 %q = load i64, i64* %ptr_b 4142 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 4143 %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer 4144 %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask) 4145 ret <4 x i64> %res 4146} 4147 4148declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 4149 4150define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 4151; CHECK-LABEL: test_mask_add_epi32_rr_128: 4152; CHECK: # %bb.0: 4153; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 4154; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4155 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4156 ret <4 x i32> %res 4157} 4158 4159define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 4160; X86-LABEL: test_mask_add_epi32_rrk_128: 4161; X86: # %bb.0: 4162; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4163; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4164; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] 4165; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4166; X86-NEXT: retl # encoding: [0xc3] 4167; 4168; X64-LABEL: test_mask_add_epi32_rrk_128: 4169; X64: # %bb.0: 4170; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4171; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] 4172; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4173; X64-NEXT: retq # encoding: [0xc3] 4174 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4175 ret <4 x i32> %res 4176} 4177 4178define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 4179; X86-LABEL: test_mask_add_epi32_rrkz_128: 4180; X86: # %bb.0: 4181; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4182; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4183; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] 4184; X86-NEXT: retl # encoding: [0xc3] 4185; 4186; X64-LABEL: test_mask_add_epi32_rrkz_128: 4187; X64: # %bb.0: 4188; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4189; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] 4190; X64-NEXT: retq # encoding: [0xc3] 4191 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4192 ret <4 x i32> %res 4193} 4194 4195define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 4196; X86-LABEL: test_mask_add_epi32_rm_128: 4197; X86: # %bb.0: 4198; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4199; X86-NEXT: vpaddd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x00] 4200; X86-NEXT: retl # encoding: [0xc3] 4201; 4202; X64-LABEL: test_mask_add_epi32_rm_128: 4203; X64: # %bb.0: 4204; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07] 4205; X64-NEXT: retq # encoding: [0xc3] 4206 %b = load <4 x i32>, <4 x i32>* %ptr_b 4207 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4208 ret <4 x i32> %res 4209} 4210 4211define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 4212; X86-LABEL: test_mask_add_epi32_rmk_128: 4213; X86: # %bb.0: 4214; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4215; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4216; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4217; X86-NEXT: vpaddd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x08] 4218; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4219; X86-NEXT: retl # encoding: [0xc3] 4220; 4221; X64-LABEL: test_mask_add_epi32_rmk_128: 4222; X64: # %bb.0: 4223; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4224; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f] 4225; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4226; X64-NEXT: retq # encoding: [0xc3] 4227 %b = load <4 x i32>, <4 x i32>* %ptr_b 4228 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4229 ret <4 x i32> %res 4230} 4231 4232define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 4233; X86-LABEL: test_mask_add_epi32_rmkz_128: 4234; X86: # %bb.0: 4235; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4236; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4237; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4238; X86-NEXT: vpaddd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x00] 4239; X86-NEXT: retl # encoding: [0xc3] 4240; 4241; X64-LABEL: test_mask_add_epi32_rmkz_128: 4242; X64: # %bb.0: 4243; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4244; X64-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07] 4245; X64-NEXT: retq # encoding: [0xc3] 4246 %b = load <4 x i32>, <4 x i32>* %ptr_b 4247 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4248 ret <4 x i32> %res 4249} 4250 4251define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 4252; X86-LABEL: test_mask_add_epi32_rmb_128: 4253; X86: # %bb.0: 4254; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4255; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x00] 4256; X86-NEXT: retl # encoding: [0xc3] 4257; 4258; X64-LABEL: test_mask_add_epi32_rmb_128: 4259; X64: # %bb.0: 4260; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07] 4261; X64-NEXT: retq # encoding: [0xc3] 4262 %q = load i32, i32* %ptr_b 4263 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4264 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4265 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4266 ret <4 x i32> %res 4267} 4268 4269define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 4270; X86-LABEL: test_mask_add_epi32_rmbk_128: 4271; X86: # %bb.0: 4272; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4273; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4274; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4275; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x08] 4276; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4277; X86-NEXT: retl # encoding: [0xc3] 4278; 4279; X64-LABEL: test_mask_add_epi32_rmbk_128: 4280; X64: # %bb.0: 4281; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4282; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f] 4283; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4284; X64-NEXT: retq # encoding: [0xc3] 4285 %q = load i32, i32* %ptr_b 4286 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4287 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4288 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4289 ret <4 x i32> %res 4290} 4291 4292define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 4293; X86-LABEL: test_mask_add_epi32_rmbkz_128: 4294; X86: # %bb.0: 4295; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4296; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4297; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4298; X86-NEXT: vpaddd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x00] 4299; X86-NEXT: retl # encoding: [0xc3] 4300; 4301; X64-LABEL: test_mask_add_epi32_rmbkz_128: 4302; X64: # %bb.0: 4303; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4304; X64-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07] 4305; X64-NEXT: retq # encoding: [0xc3] 4306 %q = load i32, i32* %ptr_b 4307 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4308 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4309 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4310 ret <4 x i32> %res 4311} 4312 4313declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 4314 4315define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 4316; CHECK-LABEL: test_mask_sub_epi32_rr_128: 4317; CHECK: # %bb.0: 4318; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] 4319; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4320 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4321 ret <4 x i32> %res 4322} 4323 4324define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 4325; X86-LABEL: test_mask_sub_epi32_rrk_128: 4326; X86: # %bb.0: 4327; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4328; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4329; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] 4330; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4331; X86-NEXT: retl # encoding: [0xc3] 4332; 4333; X64-LABEL: test_mask_sub_epi32_rrk_128: 4334; X64: # %bb.0: 4335; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4336; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] 4337; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 4338; X64-NEXT: retq # encoding: [0xc3] 4339 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4340 ret <4 x i32> %res 4341} 4342 4343define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 4344; X86-LABEL: test_mask_sub_epi32_rrkz_128: 4345; X86: # %bb.0: 4346; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4347; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4348; X86-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1] 4349; X86-NEXT: retl # encoding: [0xc3] 4350; 4351; X64-LABEL: test_mask_sub_epi32_rrkz_128: 4352; X64: # %bb.0: 4353; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4354; X64-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1] 4355; X64-NEXT: retq # encoding: [0xc3] 4356 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4357 ret <4 x i32> %res 4358} 4359 4360define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 4361; X86-LABEL: test_mask_sub_epi32_rm_128: 4362; X86: # %bb.0: 4363; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4364; X86-NEXT: vpsubd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x00] 4365; X86-NEXT: retl # encoding: [0xc3] 4366; 4367; X64-LABEL: test_mask_sub_epi32_rm_128: 4368; X64: # %bb.0: 4369; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x07] 4370; X64-NEXT: retq # encoding: [0xc3] 4371 %b = load <4 x i32>, <4 x i32>* %ptr_b 4372 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4373 ret <4 x i32> %res 4374} 4375 4376define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 4377; X86-LABEL: test_mask_sub_epi32_rmk_128: 4378; X86: # %bb.0: 4379; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4380; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4381; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4382; X86-NEXT: vpsubd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x08] 4383; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4384; X86-NEXT: retl # encoding: [0xc3] 4385; 4386; X64-LABEL: test_mask_sub_epi32_rmk_128: 4387; X64: # %bb.0: 4388; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4389; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f] 4390; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4391; X64-NEXT: retq # encoding: [0xc3] 4392 %b = load <4 x i32>, <4 x i32>* %ptr_b 4393 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4394 ret <4 x i32> %res 4395} 4396 4397define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 4398; X86-LABEL: test_mask_sub_epi32_rmkz_128: 4399; X86: # %bb.0: 4400; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4401; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4402; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4403; X86-NEXT: vpsubd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x00] 4404; X86-NEXT: retl # encoding: [0xc3] 4405; 4406; X64-LABEL: test_mask_sub_epi32_rmkz_128: 4407; X64: # %bb.0: 4408; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4409; X64-NEXT: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07] 4410; X64-NEXT: retq # encoding: [0xc3] 4411 %b = load <4 x i32>, <4 x i32>* %ptr_b 4412 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4413 ret <4 x i32> %res 4414} 4415 4416define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 4417; X86-LABEL: test_mask_sub_epi32_rmb_128: 4418; X86: # %bb.0: 4419; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4420; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x00] 4421; X86-NEXT: retl # encoding: [0xc3] 4422; 4423; X64-LABEL: test_mask_sub_epi32_rmb_128: 4424; X64: # %bb.0: 4425; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07] 4426; X64-NEXT: retq # encoding: [0xc3] 4427 %q = load i32, i32* %ptr_b 4428 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4429 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4430 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 4431 ret <4 x i32> %res 4432} 4433 4434define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 4435; X86-LABEL: test_mask_sub_epi32_rmbk_128: 4436; X86: # %bb.0: 4437; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4438; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4439; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4440; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x08] 4441; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4442; X86-NEXT: retl # encoding: [0xc3] 4443; 4444; X64-LABEL: test_mask_sub_epi32_rmbk_128: 4445; X64: # %bb.0: 4446; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4447; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f] 4448; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 4449; X64-NEXT: retq # encoding: [0xc3] 4450 %q = load i32, i32* %ptr_b 4451 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4452 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4453 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 4454 ret <4 x i32> %res 4455} 4456 4457define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 4458; X86-LABEL: test_mask_sub_epi32_rmbkz_128: 4459; X86: # %bb.0: 4460; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4461; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4462; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4463; X86-NEXT: vpsubd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x00] 4464; X86-NEXT: retl # encoding: [0xc3] 4465; 4466; X64-LABEL: test_mask_sub_epi32_rmbkz_128: 4467; X64: # %bb.0: 4468; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4469; X64-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07] 4470; X64-NEXT: retq # encoding: [0xc3] 4471 %q = load i32, i32* %ptr_b 4472 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 4473 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 4474 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 4475 ret <4 x i32> %res 4476} 4477 4478declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 4479 4480define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 4481; CHECK-LABEL: test_mask_sub_epi32_rr_256: 4482; CHECK: # %bb.0: 4483; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1] 4484; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4485 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4486 ret <8 x i32> %res 4487} 4488 4489define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 4490; X86-LABEL: test_mask_sub_epi32_rrk_256: 4491; X86: # %bb.0: 4492; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4493; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4494; X86-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] 4495; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4496; X86-NEXT: retl # encoding: [0xc3] 4497; 4498; X64-LABEL: test_mask_sub_epi32_rrk_256: 4499; X64: # %bb.0: 4500; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4501; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] 4502; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4503; X64-NEXT: retq # encoding: [0xc3] 4504 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4505 ret <8 x i32> %res 4506} 4507 4508define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 4509; X86-LABEL: test_mask_sub_epi32_rrkz_256: 4510; X86: # %bb.0: 4511; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4512; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4513; X86-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1] 4514; X86-NEXT: retl # encoding: [0xc3] 4515; 4516; X64-LABEL: test_mask_sub_epi32_rrkz_256: 4517; X64: # %bb.0: 4518; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4519; X64-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1] 4520; X64-NEXT: retq # encoding: [0xc3] 4521 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4522 ret <8 x i32> %res 4523} 4524 4525define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 4526; X86-LABEL: test_mask_sub_epi32_rm_256: 4527; X86: # %bb.0: 4528; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4529; X86-NEXT: vpsubd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x00] 4530; X86-NEXT: retl # encoding: [0xc3] 4531; 4532; X64-LABEL: test_mask_sub_epi32_rm_256: 4533; X64: # %bb.0: 4534; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x07] 4535; X64-NEXT: retq # encoding: [0xc3] 4536 %b = load <8 x i32>, <8 x i32>* %ptr_b 4537 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4538 ret <8 x i32> %res 4539} 4540 4541define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 4542; X86-LABEL: test_mask_sub_epi32_rmk_256: 4543; X86: # %bb.0: 4544; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4545; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4546; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4547; X86-NEXT: vpsubd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x08] 4548; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4549; X86-NEXT: retl # encoding: [0xc3] 4550; 4551; X64-LABEL: test_mask_sub_epi32_rmk_256: 4552; X64: # %bb.0: 4553; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4554; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f] 4555; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4556; X64-NEXT: retq # encoding: [0xc3] 4557 %b = load <8 x i32>, <8 x i32>* %ptr_b 4558 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4559 ret <8 x i32> %res 4560} 4561 4562define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 4563; X86-LABEL: test_mask_sub_epi32_rmkz_256: 4564; X86: # %bb.0: 4565; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4566; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4567; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4568; X86-NEXT: vpsubd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x00] 4569; X86-NEXT: retl # encoding: [0xc3] 4570; 4571; X64-LABEL: test_mask_sub_epi32_rmkz_256: 4572; X64: # %bb.0: 4573; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4574; X64-NEXT: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07] 4575; X64-NEXT: retq # encoding: [0xc3] 4576 %b = load <8 x i32>, <8 x i32>* %ptr_b 4577 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4578 ret <8 x i32> %res 4579} 4580 4581define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 4582; X86-LABEL: test_mask_sub_epi32_rmb_256: 4583; X86: # %bb.0: 4584; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4585; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x00] 4586; X86-NEXT: retl # encoding: [0xc3] 4587; 4588; X64-LABEL: test_mask_sub_epi32_rmb_256: 4589; X64: # %bb.0: 4590; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07] 4591; X64-NEXT: retq # encoding: [0xc3] 4592 %q = load i32, i32* %ptr_b 4593 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4594 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4595 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4596 ret <8 x i32> %res 4597} 4598 4599define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 4600; X86-LABEL: test_mask_sub_epi32_rmbk_256: 4601; X86: # %bb.0: 4602; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4603; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4604; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4605; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x08] 4606; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4607; X86-NEXT: retl # encoding: [0xc3] 4608; 4609; X64-LABEL: test_mask_sub_epi32_rmbk_256: 4610; X64: # %bb.0: 4611; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4612; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f] 4613; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4614; X64-NEXT: retq # encoding: [0xc3] 4615 %q = load i32, i32* %ptr_b 4616 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4617 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4618 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4619 ret <8 x i32> %res 4620} 4621 4622define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 4623; X86-LABEL: test_mask_sub_epi32_rmbkz_256: 4624; X86: # %bb.0: 4625; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4626; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4627; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4628; X86-NEXT: vpsubd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x00] 4629; X86-NEXT: retl # encoding: [0xc3] 4630; 4631; X64-LABEL: test_mask_sub_epi32_rmbkz_256: 4632; X64: # %bb.0: 4633; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4634; X64-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07] 4635; X64-NEXT: retq # encoding: [0xc3] 4636 %q = load i32, i32* %ptr_b 4637 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4638 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4639 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4640 ret <8 x i32> %res 4641} 4642 4643declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 4644 4645define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 4646; CHECK-LABEL: test_mask_add_epi32_rr_256: 4647; CHECK: # %bb.0: 4648; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 4649; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4650 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4651 ret <8 x i32> %res 4652} 4653 4654define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 4655; X86-LABEL: test_mask_add_epi32_rrk_256: 4656; X86: # %bb.0: 4657; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4658; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4659; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] 4660; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4661; X86-NEXT: retl # encoding: [0xc3] 4662; 4663; X64-LABEL: test_mask_add_epi32_rrk_256: 4664; X64: # %bb.0: 4665; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4666; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] 4667; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 4668; X64-NEXT: retq # encoding: [0xc3] 4669 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4670 ret <8 x i32> %res 4671} 4672 4673define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 4674; X86-LABEL: test_mask_add_epi32_rrkz_256: 4675; X86: # %bb.0: 4676; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4677; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4678; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] 4679; X86-NEXT: retl # encoding: [0xc3] 4680; 4681; X64-LABEL: test_mask_add_epi32_rrkz_256: 4682; X64: # %bb.0: 4683; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4684; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] 4685; X64-NEXT: retq # encoding: [0xc3] 4686 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4687 ret <8 x i32> %res 4688} 4689 4690define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 4691; X86-LABEL: test_mask_add_epi32_rm_256: 4692; X86: # %bb.0: 4693; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4694; X86-NEXT: vpaddd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x00] 4695; X86-NEXT: retl # encoding: [0xc3] 4696; 4697; X64-LABEL: test_mask_add_epi32_rm_256: 4698; X64: # %bb.0: 4699; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07] 4700; X64-NEXT: retq # encoding: [0xc3] 4701 %b = load <8 x i32>, <8 x i32>* %ptr_b 4702 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4703 ret <8 x i32> %res 4704} 4705 4706define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 4707; X86-LABEL: test_mask_add_epi32_rmk_256: 4708; X86: # %bb.0: 4709; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4710; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4711; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4712; X86-NEXT: vpaddd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x08] 4713; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4714; X86-NEXT: retl # encoding: [0xc3] 4715; 4716; X64-LABEL: test_mask_add_epi32_rmk_256: 4717; X64: # %bb.0: 4718; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4719; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f] 4720; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4721; X64-NEXT: retq # encoding: [0xc3] 4722 %b = load <8 x i32>, <8 x i32>* %ptr_b 4723 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4724 ret <8 x i32> %res 4725} 4726 4727define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 4728; X86-LABEL: test_mask_add_epi32_rmkz_256: 4729; X86: # %bb.0: 4730; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4731; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4732; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4733; X86-NEXT: vpaddd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x00] 4734; X86-NEXT: retl # encoding: [0xc3] 4735; 4736; X64-LABEL: test_mask_add_epi32_rmkz_256: 4737; X64: # %bb.0: 4738; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4739; X64-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07] 4740; X64-NEXT: retq # encoding: [0xc3] 4741 %b = load <8 x i32>, <8 x i32>* %ptr_b 4742 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4743 ret <8 x i32> %res 4744} 4745 4746define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 4747; X86-LABEL: test_mask_add_epi32_rmb_256: 4748; X86: # %bb.0: 4749; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4750; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x00] 4751; X86-NEXT: retl # encoding: [0xc3] 4752; 4753; X64-LABEL: test_mask_add_epi32_rmb_256: 4754; X64: # %bb.0: 4755; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07] 4756; X64-NEXT: retq # encoding: [0xc3] 4757 %q = load i32, i32* %ptr_b 4758 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4759 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4760 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 4761 ret <8 x i32> %res 4762} 4763 4764define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 4765; X86-LABEL: test_mask_add_epi32_rmbk_256: 4766; X86: # %bb.0: 4767; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4768; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4769; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4770; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x08] 4771; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4772; X86-NEXT: retl # encoding: [0xc3] 4773; 4774; X64-LABEL: test_mask_add_epi32_rmbk_256: 4775; X64: # %bb.0: 4776; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4777; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f] 4778; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 4779; X64-NEXT: retq # encoding: [0xc3] 4780 %q = load i32, i32* %ptr_b 4781 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4782 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4783 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 4784 ret <8 x i32> %res 4785} 4786 4787define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 4788; X86-LABEL: test_mask_add_epi32_rmbkz_256: 4789; X86: # %bb.0: 4790; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 4791; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 4792; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 4793; X86-NEXT: vpaddd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x00] 4794; X86-NEXT: retl # encoding: [0xc3] 4795; 4796; X64-LABEL: test_mask_add_epi32_rmbkz_256: 4797; X64: # %bb.0: 4798; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 4799; X64-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07] 4800; X64-NEXT: retq # encoding: [0xc3] 4801 %q = load i32, i32* %ptr_b 4802 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 4803 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 4804 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 4805 ret <8 x i32> %res 4806} 4807 4808declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 4809 4810define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 4811; X86-LABEL: test_mm512_maskz_add_ps_256: 4812; X86: # %bb.0: 4813; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4814; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4815; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1] 4816; X86-NEXT: retl # encoding: [0xc3] 4817; 4818; X64-LABEL: test_mm512_maskz_add_ps_256: 4819; X64: # %bb.0: 4820; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4821; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1] 4822; X64-NEXT: retq # encoding: [0xc3] 4823 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 4824 ret <8 x float> %res 4825} 4826 4827define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 4828; X86-LABEL: test_mm512_mask_add_ps_256: 4829; X86: # %bb.0: 4830; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4831; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4832; X86-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] 4833; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 4834; X86-NEXT: retl # encoding: [0xc3] 4835; 4836; X64-LABEL: test_mm512_mask_add_ps_256: 4837; X64: # %bb.0: 4838; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4839; X64-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] 4840; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 4841; X64-NEXT: retq # encoding: [0xc3] 4842 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 4843 ret <8 x float> %res 4844} 4845 4846define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 4847; CHECK-LABEL: test_mm512_add_ps_256: 4848; CHECK: # %bb.0: 4849; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 4850; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4851 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 4852 ret <8 x float> %res 4853} 4854declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 4855 4856define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 4857; X86-LABEL: test_mm512_maskz_add_ps_128: 4858; X86: # %bb.0: 4859; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4860; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4861; X86-NEXT: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1] 4862; X86-NEXT: retl # encoding: [0xc3] 4863; 4864; X64-LABEL: test_mm512_maskz_add_ps_128: 4865; X64: # %bb.0: 4866; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4867; X64-NEXT: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1] 4868; X64-NEXT: retq # encoding: [0xc3] 4869 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 4870 ret <4 x float> %res 4871} 4872 4873define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 4874; X86-LABEL: test_mm512_mask_add_ps_128: 4875; X86: # %bb.0: 4876; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4877; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4878; X86-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] 4879; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 4880; X86-NEXT: retl # encoding: [0xc3] 4881; 4882; X64-LABEL: test_mm512_mask_add_ps_128: 4883; X64: # %bb.0: 4884; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4885; X64-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] 4886; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 4887; X64-NEXT: retq # encoding: [0xc3] 4888 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 4889 ret <4 x float> %res 4890} 4891 4892define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 4893; CHECK-LABEL: test_mm512_add_ps_128: 4894; CHECK: # %bb.0: 4895; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 4896; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4897 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 4898 ret <4 x float> %res 4899} 4900declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 4901 4902define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 4903; X86-LABEL: test_mm512_maskz_sub_ps_256: 4904; X86: # %bb.0: 4905; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4906; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4907; X86-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1] 4908; X86-NEXT: retl # encoding: [0xc3] 4909; 4910; X64-LABEL: test_mm512_maskz_sub_ps_256: 4911; X64: # %bb.0: 4912; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4913; X64-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1] 4914; X64-NEXT: retq # encoding: [0xc3] 4915 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 4916 ret <8 x float> %res 4917} 4918 4919define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 4920; X86-LABEL: test_mm512_mask_sub_ps_256: 4921; X86: # %bb.0: 4922; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4923; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4924; X86-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] 4925; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 4926; X86-NEXT: retl # encoding: [0xc3] 4927; 4928; X64-LABEL: test_mm512_mask_sub_ps_256: 4929; X64: # %bb.0: 4930; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4931; X64-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] 4932; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 4933; X64-NEXT: retq # encoding: [0xc3] 4934 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 4935 ret <8 x float> %res 4936} 4937 4938define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 4939; CHECK-LABEL: test_mm512_sub_ps_256: 4940; CHECK: # %bb.0: 4941; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5c,0xc1] 4942; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4943 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 4944 ret <8 x float> %res 4945} 4946declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 4947 4948define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 4949; X86-LABEL: test_mm512_maskz_sub_ps_128: 4950; X86: # %bb.0: 4951; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4952; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4953; X86-NEXT: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1] 4954; X86-NEXT: retl # encoding: [0xc3] 4955; 4956; X64-LABEL: test_mm512_maskz_sub_ps_128: 4957; X64: # %bb.0: 4958; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4959; X64-NEXT: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1] 4960; X64-NEXT: retq # encoding: [0xc3] 4961 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 4962 ret <4 x float> %res 4963} 4964 4965define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 4966; X86-LABEL: test_mm512_mask_sub_ps_128: 4967; X86: # %bb.0: 4968; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4969; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4970; X86-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] 4971; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 4972; X86-NEXT: retl # encoding: [0xc3] 4973; 4974; X64-LABEL: test_mm512_mask_sub_ps_128: 4975; X64: # %bb.0: 4976; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 4977; X64-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] 4978; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 4979; X64-NEXT: retq # encoding: [0xc3] 4980 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 4981 ret <4 x float> %res 4982} 4983 4984define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 4985; CHECK-LABEL: test_mm512_sub_ps_128: 4986; CHECK: # %bb.0: 4987; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] 4988; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 4989 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 4990 ret <4 x float> %res 4991} 4992declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 4993 4994define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 4995; X86-LABEL: test_mm512_maskz_mul_ps_256: 4996; X86: # %bb.0: 4997; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 4998; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 4999; X86-NEXT: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1] 5000; X86-NEXT: retl # encoding: [0xc3] 5001; 5002; X64-LABEL: test_mm512_maskz_mul_ps_256: 5003; X64: # %bb.0: 5004; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5005; X64-NEXT: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1] 5006; X64-NEXT: retq # encoding: [0xc3] 5007 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 5008 ret <8 x float> %res 5009} 5010 5011define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 5012; X86-LABEL: test_mm512_mask_mul_ps_256: 5013; X86: # %bb.0: 5014; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5015; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5016; X86-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] 5017; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5018; X86-NEXT: retl # encoding: [0xc3] 5019; 5020; X64-LABEL: test_mm512_mask_mul_ps_256: 5021; X64: # %bb.0: 5022; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5023; X64-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] 5024; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5025; X64-NEXT: retq # encoding: [0xc3] 5026 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 5027 ret <8 x float> %res 5028} 5029 5030define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5031; CHECK-LABEL: test_mm512_mul_ps_256: 5032; CHECK: # %bb.0: 5033; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x59,0xc1] 5034; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5035 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 5036 ret <8 x float> %res 5037} 5038declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 5039 5040define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5041; X86-LABEL: test_mm512_maskz_mul_ps_128: 5042; X86: # %bb.0: 5043; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5044; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5045; X86-NEXT: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1] 5046; X86-NEXT: retl # encoding: [0xc3] 5047; 5048; X64-LABEL: test_mm512_maskz_mul_ps_128: 5049; X64: # %bb.0: 5050; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5051; X64-NEXT: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1] 5052; X64-NEXT: retq # encoding: [0xc3] 5053 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 5054 ret <4 x float> %res 5055} 5056 5057define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 5058; X86-LABEL: test_mm512_mask_mul_ps_128: 5059; X86: # %bb.0: 5060; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5061; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5062; X86-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] 5063; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5064; X86-NEXT: retl # encoding: [0xc3] 5065; 5066; X64-LABEL: test_mm512_mask_mul_ps_128: 5067; X64: # %bb.0: 5068; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5069; X64-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] 5070; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5071; X64-NEXT: retq # encoding: [0xc3] 5072 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 5073 ret <4 x float> %res 5074} 5075 5076define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5077; CHECK-LABEL: test_mm512_mul_ps_128: 5078; CHECK: # %bb.0: 5079; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] 5080; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5081 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 5082 ret <4 x float> %res 5083} 5084declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 5085 5086define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5087; X86-LABEL: test_mm512_maskz_div_ps_256: 5088; X86: # %bb.0: 5089; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5090; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5091; X86-NEXT: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1] 5092; X86-NEXT: retl # encoding: [0xc3] 5093; 5094; X64-LABEL: test_mm512_maskz_div_ps_256: 5095; X64: # %bb.0: 5096; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5097; X64-NEXT: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1] 5098; X64-NEXT: retq # encoding: [0xc3] 5099 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 5100 ret <8 x float> %res 5101} 5102 5103define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 5104; X86-LABEL: test_mm512_mask_div_ps_256: 5105; X86: # %bb.0: 5106; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5107; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5108; X86-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] 5109; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5110; X86-NEXT: retl # encoding: [0xc3] 5111; 5112; X64-LABEL: test_mm512_mask_div_ps_256: 5113; X64: # %bb.0: 5114; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5115; X64-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] 5116; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 5117; X64-NEXT: retq # encoding: [0xc3] 5118 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 5119 ret <8 x float> %res 5120} 5121 5122define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5123; CHECK-LABEL: test_mm512_div_ps_256: 5124; CHECK: # %bb.0: 5125; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5e,0xc1] 5126; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5127 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 5128 ret <8 x float> %res 5129} 5130declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 5131 5132define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5133; X86-LABEL: test_mm512_maskz_div_ps_128: 5134; X86: # %bb.0: 5135; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5136; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5137; X86-NEXT: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1] 5138; X86-NEXT: retl # encoding: [0xc3] 5139; 5140; X64-LABEL: test_mm512_maskz_div_ps_128: 5141; X64: # %bb.0: 5142; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5143; X64-NEXT: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1] 5144; X64-NEXT: retq # encoding: [0xc3] 5145 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 5146 ret <4 x float> %res 5147} 5148 5149define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 5150; X86-LABEL: test_mm512_mask_div_ps_128: 5151; X86: # %bb.0: 5152; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5153; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5154; X86-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] 5155; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5156; X86-NEXT: retl # encoding: [0xc3] 5157; 5158; X64-LABEL: test_mm512_mask_div_ps_128: 5159; X64: # %bb.0: 5160; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5161; X64-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] 5162; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 5163; X64-NEXT: retq # encoding: [0xc3] 5164 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 5165 ret <4 x float> %res 5166} 5167 5168define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5169; CHECK-LABEL: test_mm512_div_ps_128: 5170; CHECK: # %bb.0: 5171; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] 5172; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 5173 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 5174 ret <4 x float> %res 5175} 5176declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 5177 5178declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 5179 5180define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 5181; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: 5182; X86: # %bb.0: 5183; X86-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5184; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5185; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5186; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5187; X86-NEXT: vmovaps %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0] 5188; X86-NEXT: vmovaps %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8] 5189; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] 5190; X86-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 5191; X86-NEXT: retl # encoding: [0xc3] 5192; 5193; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: 5194; X64: # %bb.0: 5195; X64-NEXT: vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0] 5196; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5197; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5198; X64-NEXT: vmovaps %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0] 5199; X64-NEXT: vmovaps %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8] 5200; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] 5201; X64-NEXT: vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0] 5202; X64-NEXT: retq # encoding: [0xc3] 5203 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) 5204 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) 5205 %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4) 5206 %res3 = fadd <8 x float> %res, %res1 5207 %res4 = fadd <8 x float> %res2, %res3 5208 ret <8 x float> %res4 5209} 5210 5211declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 5212 5213define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 5214; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256: 5215; X86: # %bb.0: 5216; X86-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c] 5217; X86-NEXT: # ymm0 = ymm0[0,1],ymm1[2,3] 5218; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5219; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5220; X86-NEXT: vmovapd %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0xd0] 5221; X86-NEXT: vmovapd %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc8] 5222; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] 5223; X86-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 5224; X86-NEXT: retl # encoding: [0xc3] 5225; 5226; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256: 5227; X64: # %bb.0: 5228; X64-NEXT: vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c] 5229; X64-NEXT: # ymm0 = ymm0[0,1],ymm1[2,3] 5230; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5231; X64-NEXT: vmovapd %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0xd0] 5232; X64-NEXT: vmovapd %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc8] 5233; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] 5234; X64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 5235; X64-NEXT: retq # encoding: [0xc3] 5236 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4) 5237 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1) 5238 %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4) 5239 %res3 = fadd <4 x double> %res, %res1 5240 %res4 = fadd <4 x double> %res2, %res3 5241 ret <4 x double> %res4 5242} 5243 5244declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 5245 5246define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 5247; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256: 5248; X86: # %bb.0: 5249; X86-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0] 5250; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5251; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5252; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5253; X86-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xd0] 5254; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 5255; X86-NEXT: retl # encoding: [0xc3] 5256; 5257; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256: 5258; X64: # %bb.0: 5259; X64-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0] 5260; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5261; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5262; X64-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xd0] 5263; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 5264; X64-NEXT: retq # encoding: [0xc3] 5265 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 5266 %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) 5267 %res2 = add <8 x i32> %res, %res1 5268 ret <8 x i32> %res2 5269} 5270 5271declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 5272 5273define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 5274; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256: 5275; X86: # %bb.0: 5276; X86-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0] 5277; X86-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5278; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5279; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5280; X86-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xd0] 5281; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5282; X86-NEXT: retl # encoding: [0xc3] 5283; 5284; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256: 5285; X64: # %bb.0: 5286; X64-NEXT: vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0] 5287; X64-NEXT: # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 5288; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5289; X64-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xd0] 5290; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5291; X64-NEXT: retq # encoding: [0xc3] 5292 %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 5293 %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) 5294 %res2 = add <4 x i64> %res, %res1 5295 ret <4 x i64> %res2 5296} 5297 5298declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) 5299 5300define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 5301; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_128: 5302; X86: # %bb.0: 5303; X86-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01] 5304; X86-NEXT: # xmm3 = xmm0[1],xmm1[0] 5305; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5306; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5307; X86-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01] 5308; X86-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[0] 5309; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3] 5310; X86-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01] 5311; X86-NEXT: # xmm0 {%k1} {z} = xmm0[1],xmm1[0] 5312; X86-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2] 5313; X86-NEXT: retl # encoding: [0xc3] 5314; 5315; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_128: 5316; X64: # %bb.0: 5317; X64-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01] 5318; X64-NEXT: # xmm3 = xmm0[1],xmm1[0] 5319; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5320; X64-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01] 5321; X64-NEXT: # xmm2 {%k1} = xmm0[1],xmm1[0] 5322; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3] 5323; X64-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01] 5324; X64-NEXT: # xmm0 {%k1} {z} = xmm0[1],xmm1[0] 5325; X64-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2] 5326; X64-NEXT: retq # encoding: [0xc3] 5327 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 %x4) 5328 %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 -1) 5329 %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> zeroinitializer, i8 %x4) 5330 %res3 = fadd <2 x double> %res, %res1 5331 %res4 = fadd <2 x double> %res2, %res3 5332 ret <2 x double> %res4 5333} 5334 5335declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 5336 5337define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 5338; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_256: 5339; X86: # %bb.0: 5340; X86-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06] 5341; X86-NEXT: # ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 5342; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5343; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5344; X86-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06] 5345; X86-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 5346; X86-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] 5347; X86-NEXT: retl # encoding: [0xc3] 5348; 5349; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_256: 5350; X64: # %bb.0: 5351; X64-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06] 5352; X64-NEXT: # ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 5353; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5354; X64-NEXT: vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06] 5355; X64-NEXT: # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 5356; X64-NEXT: vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3] 5357; X64-NEXT: retq # encoding: [0xc3] 5358 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 %x4) 5359 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 -1) 5360 %res2 = fadd <4 x double> %res, %res1 5361 ret <4 x double> %res2 5362} 5363 5364declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) 5365 5366define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 5367; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_128: 5368; X86: # %bb.0: 5369; X86-NEXT: vshufps $22, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16] 5370; X86-NEXT: # xmm3 = xmm0[2,1],xmm1[1,0] 5371; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5372; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5373; X86-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16] 5374; X86-NEXT: # xmm2 {%k1} = xmm0[2,1],xmm1[1,0] 5375; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] 5376; X86-NEXT: retl # encoding: [0xc3] 5377; 5378; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_128: 5379; X64: # %bb.0: 5380; X64-NEXT: vshufps $22, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16] 5381; X64-NEXT: # xmm3 = xmm0[2,1],xmm1[1,0] 5382; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5383; X64-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16] 5384; X64-NEXT: # xmm2 {%k1} = xmm0[2,1],xmm1[1,0] 5385; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3] 5386; X64-NEXT: retq # encoding: [0xc3] 5387 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4) 5388 %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1) 5389 %res2 = fadd <4 x float> %res, %res1 5390 ret <4 x float> %res2 5391} 5392 5393declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 5394 5395define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 5396; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_256: 5397; X86: # %bb.0: 5398; X86-NEXT: vshufps $22, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16] 5399; X86-NEXT: # ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 5400; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5401; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5402; X86-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16] 5403; X86-NEXT: # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 5404; X86-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] 5405; X86-NEXT: retl # encoding: [0xc3] 5406; 5407; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_256: 5408; X64: # %bb.0: 5409; X64-NEXT: vshufps $22, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16] 5410; X64-NEXT: # ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 5411; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5412; X64-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16] 5413; X64-NEXT: # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 5414; X64-NEXT: vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3] 5415; X64-NEXT: retq # encoding: [0xc3] 5416 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) 5417 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) 5418 %res2 = fadd <8 x float> %res, %res1 5419 ret <8 x float> %res2 5420} 5421 5422declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5423 5424define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 5425; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_128: 5426; X86: # %bb.0: 5427; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5428; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5429; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1] 5430; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1] 5431; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5432; X86-NEXT: retl # encoding: [0xc3] 5433; 5434; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_128: 5435; X64: # %bb.0: 5436; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5437; X64-NEXT: vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1] 5438; X64-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1] 5439; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5440; X64-NEXT: retq # encoding: [0xc3] 5441 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask) 5442 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 5443 %res2 = add <4 x i32> %res, %res1 5444 ret <4 x i32> %res2 5445} 5446 5447declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5448 5449define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5450; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_256: 5451; X86: # %bb.0: 5452; X86-NEXT: vpmaxsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9] 5453; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5454; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5455; X86-NEXT: vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1] 5456; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 5457; X86-NEXT: retl # encoding: [0xc3] 5458; 5459; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_256: 5460; X64: # %bb.0: 5461; X64-NEXT: vpmaxsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9] 5462; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5463; X64-NEXT: vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1] 5464; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 5465; X64-NEXT: retq # encoding: [0xc3] 5466 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 5467 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 5468 %res2 = add <8 x i32> %res, %res1 5469 ret <8 x i32> %res2 5470} 5471 5472declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5473 5474define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5475; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_128: 5476; X86: # %bb.0: 5477; X86-NEXT: vpmaxsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9] 5478; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5479; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5480; X86-NEXT: vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1] 5481; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 5482; X86-NEXT: retl # encoding: [0xc3] 5483; 5484; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_128: 5485; X64: # %bb.0: 5486; X64-NEXT: vpmaxsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9] 5487; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5488; X64-NEXT: vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1] 5489; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 5490; X64-NEXT: retq # encoding: [0xc3] 5491 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5492 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5493 %res2 = add <2 x i64> %res, %res1 5494 ret <2 x i64> %res2 5495} 5496 5497declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 5498 5499define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 5500; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_256: 5501; X86: # %bb.0: 5502; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5503; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5504; X86-NEXT: vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1] 5505; X86-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1] 5506; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5507; X86-NEXT: retl # encoding: [0xc3] 5508; 5509; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_256: 5510; X64: # %bb.0: 5511; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5512; X64-NEXT: vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1] 5513; X64-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1] 5514; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5515; X64-NEXT: retq # encoding: [0xc3] 5516 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 5517 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 5518 %res2 = add <4 x i64> %res, %res1 5519 ret <4 x i64> %res2 5520} 5521 5522declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5523 5524define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) { 5525; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_128: 5526; X86: # %bb.0: 5527; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5528; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5529; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1] 5530; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1] 5531; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5532; X86-NEXT: retl # encoding: [0xc3] 5533; 5534; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_128: 5535; X64: # %bb.0: 5536; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5537; X64-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1] 5538; X64-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1] 5539; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5540; X64-NEXT: retq # encoding: [0xc3] 5541 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 5542 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 5543 %res2 = add <4 x i32> %res, %res1 5544 ret <4 x i32> %res2 5545} 5546 5547declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5548 5549define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5550; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_256: 5551; X86: # %bb.0: 5552; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9] 5553; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5554; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5555; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1] 5556; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 5557; X86-NEXT: retl # encoding: [0xc3] 5558; 5559; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_256: 5560; X64: # %bb.0: 5561; X64-NEXT: vpmaxud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9] 5562; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5563; X64-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1] 5564; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 5565; X64-NEXT: retq # encoding: [0xc3] 5566 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 5567 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 5568 %res2 = add <8 x i32> %res, %res1 5569 ret <8 x i32> %res2 5570} 5571 5572declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5573 5574define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5575; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_128: 5576; X86: # %bb.0: 5577; X86-NEXT: vpmaxuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9] 5578; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5579; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5580; X86-NEXT: vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1] 5581; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 5582; X86-NEXT: retl # encoding: [0xc3] 5583; 5584; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_128: 5585; X64: # %bb.0: 5586; X64-NEXT: vpmaxuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9] 5587; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5588; X64-NEXT: vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1] 5589; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 5590; X64-NEXT: retq # encoding: [0xc3] 5591 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5592 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5593 %res2 = add <2 x i64> %res, %res1 5594 ret <2 x i64> %res2 5595} 5596 5597declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 5598 5599define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 5600; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_256: 5601; X86: # %bb.0: 5602; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5603; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5604; X86-NEXT: vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1] 5605; X86-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1] 5606; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5607; X86-NEXT: retl # encoding: [0xc3] 5608; 5609; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_256: 5610; X64: # %bb.0: 5611; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5612; X64-NEXT: vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1] 5613; X64-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1] 5614; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5615; X64-NEXT: retq # encoding: [0xc3] 5616 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 5617 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 5618 %res2 = add <4 x i64> %res, %res1 5619 ret <4 x i64> %res2 5620} 5621 5622declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5623 5624define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 5625; X86-LABEL: test_int_x86_avx512_mask_pmins_d_128: 5626; X86: # %bb.0: 5627; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5628; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5629; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1] 5630; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1] 5631; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5632; X86-NEXT: retl # encoding: [0xc3] 5633; 5634; X64-LABEL: test_int_x86_avx512_mask_pmins_d_128: 5635; X64: # %bb.0: 5636; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5637; X64-NEXT: vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1] 5638; X64-NEXT: vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1] 5639; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5640; X64-NEXT: retq # encoding: [0xc3] 5641 %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 5642 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 5643 %res2 = add <4 x i32> %res, %res1 5644 ret <4 x i32> %res2 5645} 5646 5647declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5648 5649define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5650; X86-LABEL: test_int_x86_avx512_mask_pmins_d_256: 5651; X86: # %bb.0: 5652; X86-NEXT: vpminsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9] 5653; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5654; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5655; X86-NEXT: vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1] 5656; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 5657; X86-NEXT: retl # encoding: [0xc3] 5658; 5659; X64-LABEL: test_int_x86_avx512_mask_pmins_d_256: 5660; X64: # %bb.0: 5661; X64-NEXT: vpminsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9] 5662; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5663; X64-NEXT: vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1] 5664; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 5665; X64-NEXT: retq # encoding: [0xc3] 5666 %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 5667 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 5668 %res2 = add <8 x i32> %res, %res1 5669 ret <8 x i32> %res2 5670} 5671 5672declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5673 5674define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5675; X86-LABEL: test_int_x86_avx512_mask_pmins_q_128: 5676; X86: # %bb.0: 5677; X86-NEXT: vpminsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9] 5678; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5679; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5680; X86-NEXT: vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1] 5681; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 5682; X86-NEXT: retl # encoding: [0xc3] 5683; 5684; X64-LABEL: test_int_x86_avx512_mask_pmins_q_128: 5685; X64: # %bb.0: 5686; X64-NEXT: vpminsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9] 5687; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5688; X64-NEXT: vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1] 5689; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 5690; X64-NEXT: retq # encoding: [0xc3] 5691 %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5692 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5693 %res2 = add <2 x i64> %res, %res1 5694 ret <2 x i64> %res2 5695} 5696 5697declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 5698 5699define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 5700; X86-LABEL: test_int_x86_avx512_mask_pmins_q_256: 5701; X86: # %bb.0: 5702; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5703; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5704; X86-NEXT: vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1] 5705; X86-NEXT: vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1] 5706; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5707; X86-NEXT: retl # encoding: [0xc3] 5708; 5709; X64-LABEL: test_int_x86_avx512_mask_pmins_q_256: 5710; X64: # %bb.0: 5711; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5712; X64-NEXT: vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1] 5713; X64-NEXT: vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1] 5714; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5715; X64-NEXT: retq # encoding: [0xc3] 5716 %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 5717 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 5718 %res2 = add <4 x i64> %res, %res1 5719 ret <4 x i64> %res2 5720} 5721 5722declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5723 5724define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 5725; X86-LABEL: test_int_x86_avx512_mask_pminu_d_128: 5726; X86: # %bb.0: 5727; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5728; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5729; X86-NEXT: vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1] 5730; X86-NEXT: vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1] 5731; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5732; X86-NEXT: retl # encoding: [0xc3] 5733; 5734; X64-LABEL: test_int_x86_avx512_mask_pminu_d_128: 5735; X64: # %bb.0: 5736; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5737; X64-NEXT: vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1] 5738; X64-NEXT: vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1] 5739; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5740; X64-NEXT: retq # encoding: [0xc3] 5741 %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 5742 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 5743 %res2 = add <4 x i32> %res, %res1 5744 ret <4 x i32> %res2 5745} 5746 5747declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5748 5749define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5750; X86-LABEL: test_int_x86_avx512_mask_pminu_d_256: 5751; X86: # %bb.0: 5752; X86-NEXT: vpminud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9] 5753; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5754; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5755; X86-NEXT: vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1] 5756; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 5757; X86-NEXT: retl # encoding: [0xc3] 5758; 5759; X64-LABEL: test_int_x86_avx512_mask_pminu_d_256: 5760; X64: # %bb.0: 5761; X64-NEXT: vpminud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9] 5762; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5763; X64-NEXT: vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1] 5764; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 5765; X64-NEXT: retq # encoding: [0xc3] 5766 %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 5767 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 5768 %res2 = add <8 x i32> %res, %res1 5769 ret <8 x i32> %res2 5770} 5771 5772declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5773 5774define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5775; X86-LABEL: test_int_x86_avx512_mask_pminu_q_128: 5776; X86: # %bb.0: 5777; X86-NEXT: vpminuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9] 5778; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5779; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5780; X86-NEXT: vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1] 5781; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 5782; X86-NEXT: retl # encoding: [0xc3] 5783; 5784; X64-LABEL: test_int_x86_avx512_mask_pminu_q_128: 5785; X64: # %bb.0: 5786; X64-NEXT: vpminuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9] 5787; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5788; X64-NEXT: vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1] 5789; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 5790; X64-NEXT: retq # encoding: [0xc3] 5791 %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5792 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5793 %res2 = add <2 x i64> %res, %res1 5794 ret <2 x i64> %res2 5795} 5796 5797declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 5798 5799define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 5800; X86-LABEL: test_int_x86_avx512_mask_pminu_q_256: 5801; X86: # %bb.0: 5802; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5803; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5804; X86-NEXT: vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1] 5805; X86-NEXT: vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1] 5806; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5807; X86-NEXT: retl # encoding: [0xc3] 5808; 5809; X64-LABEL: test_int_x86_avx512_mask_pminu_q_256: 5810; X64: # %bb.0: 5811; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5812; X64-NEXT: vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1] 5813; X64-NEXT: vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1] 5814; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5815; X64-NEXT: retq # encoding: [0xc3] 5816 %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 5817 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 5818 %res2 = add <4 x i64> %res, %res1 5819 ret <4 x i64> %res2 5820} 5821 5822declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5823 5824define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5825; X86-LABEL: test_int_x86_avx512_mask_psrl_q_128: 5826; X86: # %bb.0: 5827; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9] 5828; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5829; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5830; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1] 5831; X86-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1] 5832; X86-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 5833; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 5834; X86-NEXT: retl # encoding: [0xc3] 5835; 5836; X64-LABEL: test_int_x86_avx512_mask_psrl_q_128: 5837; X64: # %bb.0: 5838; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9] 5839; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5840; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1] 5841; X64-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1] 5842; X64-NEXT: vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0] 5843; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 5844; X64-NEXT: retq # encoding: [0xc3] 5845 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5846 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5847 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 5848 %res3 = add <2 x i64> %res, %res1 5849 %res4 = add <2 x i64> %res3, %res2 5850 ret <2 x i64> %res4 5851} 5852 5853declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 5854 5855define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 5856; X86-LABEL: test_int_x86_avx512_mask_psrl_q_256: 5857; X86: # %bb.0: 5858; X86-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9] 5859; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5860; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5861; X86-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1] 5862; X86-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1] 5863; X86-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 5864; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5865; X86-NEXT: retl # encoding: [0xc3] 5866; 5867; X64-LABEL: test_int_x86_avx512_mask_psrl_q_256: 5868; X64: # %bb.0: 5869; X64-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9] 5870; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5871; X64-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1] 5872; X64-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1] 5873; X64-NEXT: vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0] 5874; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 5875; X64-NEXT: retq # encoding: [0xc3] 5876 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 5877 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 5878 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 5879 %res3 = add <4 x i64> %res, %res1 5880 %res4 = add <4 x i64> %res3, %res2 5881 ret <4 x i64> %res4 5882} 5883 5884declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5885 5886define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 5887; X86-LABEL: test_int_x86_avx512_mask_psrl_d_128: 5888; X86: # %bb.0: 5889; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9] 5890; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5891; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5892; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1] 5893; X86-NEXT: vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1] 5894; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 5895; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5896; X86-NEXT: retl # encoding: [0xc3] 5897; 5898; X64-LABEL: test_int_x86_avx512_mask_psrl_d_128: 5899; X64: # %bb.0: 5900; X64-NEXT: vpsrld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9] 5901; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5902; X64-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1] 5903; X64-NEXT: vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1] 5904; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 5905; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5906; X64-NEXT: retq # encoding: [0xc3] 5907 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 5908 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 5909 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 5910 %res3 = add <4 x i32> %res, %res1 5911 %res4 = add <4 x i32> %res3, %res2 5912 ret <4 x i32> %res4 5913} 5914 5915declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 5916 5917define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5918; X86-LABEL: test_int_x86_avx512_mask_psrl_d_256: 5919; X86: # %bb.0: 5920; X86-NEXT: vpsrld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9] 5921; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5922; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5923; X86-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1] 5924; X86-NEXT: vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1] 5925; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 5926; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 5927; X86-NEXT: retl # encoding: [0xc3] 5928; 5929; X64-LABEL: test_int_x86_avx512_mask_psrl_d_256: 5930; X64: # %bb.0: 5931; X64-NEXT: vpsrld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9] 5932; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5933; X64-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1] 5934; X64-NEXT: vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1] 5935; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 5936; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 5937; X64-NEXT: retq # encoding: [0xc3] 5938 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 5939 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 5940 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 5941 %res3 = add <8 x i32> %res, %res1 5942 %res4 = add <8 x i32> %res2, %res3 5943 ret <8 x i32> %res4 5944} 5945 5946declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5947 5948define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 5949; X86-LABEL: test_int_x86_avx512_mask_psra_d_128: 5950; X86: # %bb.0: 5951; X86-NEXT: vpsrad %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9] 5952; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5953; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5954; X86-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1] 5955; X86-NEXT: vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1] 5956; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 5957; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5958; X86-NEXT: retl # encoding: [0xc3] 5959; 5960; X64-LABEL: test_int_x86_avx512_mask_psra_d_128: 5961; X64: # %bb.0: 5962; X64-NEXT: vpsrad %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9] 5963; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5964; X64-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1] 5965; X64-NEXT: vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1] 5966; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 5967; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 5968; X64-NEXT: retq # encoding: [0xc3] 5969 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 5970 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 5971 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 5972 %res3 = add <4 x i32> %res, %res1 5973 %res4 = add <4 x i32> %res3, %res2 5974 ret <4 x i32> %res4 5975} 5976 5977declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 5978 5979define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5980; X86-LABEL: test_int_x86_avx512_mask_psra_d_256: 5981; X86: # %bb.0: 5982; X86-NEXT: vpsrad %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9] 5983; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 5984; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 5985; X86-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1] 5986; X86-NEXT: vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1] 5987; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 5988; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 5989; X86-NEXT: retl # encoding: [0xc3] 5990; 5991; X64-LABEL: test_int_x86_avx512_mask_psra_d_256: 5992; X64: # %bb.0: 5993; X64-NEXT: vpsrad %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9] 5994; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 5995; X64-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1] 5996; X64-NEXT: vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1] 5997; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 5998; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 5999; X64-NEXT: retq # encoding: [0xc3] 6000 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 6001 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6002 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 6003 %res3 = add <8 x i32> %res, %res1 6004 %res4 = add <8 x i32> %res3, %res2 6005 ret <8 x i32> %res4 6006} 6007 6008declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6009 6010define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6011; X86-LABEL: test_int_x86_avx512_mask_psll_d_128: 6012; X86: # %bb.0: 6013; X86-NEXT: vpslld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9] 6014; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6015; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6016; X86-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1] 6017; X86-NEXT: vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1] 6018; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 6019; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6020; X86-NEXT: retl # encoding: [0xc3] 6021; 6022; X64-LABEL: test_int_x86_avx512_mask_psll_d_128: 6023; X64: # %bb.0: 6024; X64-NEXT: vpslld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9] 6025; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6026; X64-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1] 6027; X64-NEXT: vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1] 6028; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 6029; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6030; X64-NEXT: retq # encoding: [0xc3] 6031 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6032 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6033 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6034 %res3 = add <4 x i32> %res, %res1 6035 %res4 = add <4 x i32> %res3, %res2 6036 ret <4 x i32> %res4 6037} 6038 6039declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 6040 6041define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6042; X86-LABEL: test_int_x86_avx512_mask_psll_d_256: 6043; X86: # %bb.0: 6044; X86-NEXT: vpslld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9] 6045; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6046; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6047; X86-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1] 6048; X86-NEXT: vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1] 6049; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 6050; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6051; X86-NEXT: retl # encoding: [0xc3] 6052; 6053; X64-LABEL: test_int_x86_avx512_mask_psll_d_256: 6054; X64: # %bb.0: 6055; X64-NEXT: vpslld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9] 6056; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6057; X64-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1] 6058; X64-NEXT: vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1] 6059; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 6060; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6061; X64-NEXT: retq # encoding: [0xc3] 6062 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 6063 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6064 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 6065 %res3 = add <8 x i32> %res, %res1 6066 %res4 = add <8 x i32> %res3, %res2 6067 ret <8 x i32> %res4 6068} 6069 6070declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 6071 6072define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6073; X86-LABEL: test_int_x86_avx512_mask_psll_q_256: 6074; X86: # %bb.0: 6075; X86-NEXT: vpsllq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9] 6076; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6077; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6078; X86-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1] 6079; X86-NEXT: vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1] 6080; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 6081; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6082; X86-NEXT: retl # encoding: [0xc3] 6083; 6084; X64-LABEL: test_int_x86_avx512_mask_psll_q_256: 6085; X64: # %bb.0: 6086; X64-NEXT: vpsllq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9] 6087; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6088; X64-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1] 6089; X64-NEXT: vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1] 6090; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 6091; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6092; X64-NEXT: retq # encoding: [0xc3] 6093 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 6094 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6095 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 6096 %res3 = add <4 x i64> %res, %res1 6097 %res4 = add <4 x i64> %res3, %res2 6098 ret <4 x i64> %res4 6099} 6100 6101declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i32, <2 x i64>, i8) 6102 6103define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 6104; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_128: 6105; X86: # %bb.0: 6106; X86-NEXT: vpsrlq $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03] 6107; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6108; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6109; X86-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03] 6110; X86-NEXT: vpsrlq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03] 6111; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 6112; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6113; X86-NEXT: retl # encoding: [0xc3] 6114; 6115; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_128: 6116; X64: # %bb.0: 6117; X64-NEXT: vpsrlq $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03] 6118; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6119; X64-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03] 6120; X64-NEXT: vpsrlq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03] 6121; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 6122; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6123; X64-NEXT: retq # encoding: [0xc3] 6124 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 6125 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) 6126 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) 6127 %res3 = add <2 x i64> %res, %res1 6128 %res4 = add <2 x i64> %res2, %res3 6129 ret <2 x i64> %res4 6130} 6131 6132declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i32, <4 x i64>, i8) 6133 6134define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 6135; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_256: 6136; X86: # %bb.0: 6137; X86-NEXT: vpsrlq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03] 6138; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6139; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6140; X86-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03] 6141; X86-NEXT: vpsrlq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03] 6142; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6143; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6144; X86-NEXT: retl # encoding: [0xc3] 6145; 6146; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_256: 6147; X64: # %bb.0: 6148; X64-NEXT: vpsrlq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03] 6149; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6150; X64-NEXT: vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03] 6151; X64-NEXT: vpsrlq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03] 6152; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6153; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6154; X64-NEXT: retq # encoding: [0xc3] 6155 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 6156 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 6157 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 6158 %res3 = add <4 x i64> %res, %res1 6159 %res4 = add <4 x i64> %res2, %res3 6160 ret <4 x i64> %res4 6161} 6162 6163declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i32, <4 x i32>, i8) 6164 6165define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 6166; X86-LABEL: test_int_x86_avx512_mask_psrl_di_128: 6167; X86: # %bb.0: 6168; X86-NEXT: vpsrld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03] 6169; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6170; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6171; X86-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03] 6172; X86-NEXT: vpsrld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03] 6173; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6174; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6175; X86-NEXT: retl # encoding: [0xc3] 6176; 6177; X64-LABEL: test_int_x86_avx512_mask_psrl_di_128: 6178; X64: # %bb.0: 6179; X64-NEXT: vpsrld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03] 6180; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6181; X64-NEXT: vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03] 6182; X64-NEXT: vpsrld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03] 6183; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6184; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6185; X64-NEXT: retq # encoding: [0xc3] 6186 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 6187 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 6188 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 6189 %res3 = add <4 x i32> %res, %res1 6190 %res4 = add <4 x i32> %res2, %res3 6191 ret <4 x i32> %res4 6192} 6193 6194declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i32, <8 x i32>, i8) 6195 6196define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 6197; X86-LABEL: test_int_x86_avx512_mask_psrl_di_256: 6198; X86: # %bb.0: 6199; X86-NEXT: vpsrld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03] 6200; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6201; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6202; X86-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03] 6203; X86-NEXT: vpsrld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03] 6204; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6205; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6206; X86-NEXT: retl # encoding: [0xc3] 6207; 6208; X64-LABEL: test_int_x86_avx512_mask_psrl_di_256: 6209; X64: # %bb.0: 6210; X64-NEXT: vpsrld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03] 6211; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6212; X64-NEXT: vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03] 6213; X64-NEXT: vpsrld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03] 6214; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6215; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6216; X64-NEXT: retq # encoding: [0xc3] 6217 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 6218 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 6219 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 6220 %res3 = add <8 x i32> %res, %res1 6221 %res4 = add <8 x i32> %res2, %res3 6222 ret <8 x i32> %res4 6223} 6224 6225declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i32, <4 x i32>, i8) 6226 6227define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 6228; X86-LABEL: test_int_x86_avx512_mask_psll_di_128: 6229; X86: # %bb.0: 6230; X86-NEXT: vpslld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03] 6231; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6232; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6233; X86-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03] 6234; X86-NEXT: vpslld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03] 6235; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 6236; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6237; X86-NEXT: retl # encoding: [0xc3] 6238; 6239; X64-LABEL: test_int_x86_avx512_mask_psll_di_128: 6240; X64: # %bb.0: 6241; X64-NEXT: vpslld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03] 6242; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6243; X64-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03] 6244; X64-NEXT: vpslld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03] 6245; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 6246; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6247; X64-NEXT: retq # encoding: [0xc3] 6248 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 6249 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 6250 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 6251 %res3 = add <4 x i32> %res, %res1 6252 %res4 = add <4 x i32> %res3, %res2 6253 ret <4 x i32> %res4 6254} 6255 6256declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i32, <8 x i32>, i8) 6257 6258define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 6259; X86-LABEL: test_int_x86_avx512_mask_psll_di_256: 6260; X86: # %bb.0: 6261; X86-NEXT: vpslld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03] 6262; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 6263; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6264; X86-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03] 6265; X86-NEXT: vpslld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03] 6266; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 6267; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6268; X86-NEXT: retl # encoding: [0xc3] 6269; 6270; X64-LABEL: test_int_x86_avx512_mask_psll_di_256: 6271; X64: # %bb.0: 6272; X64-NEXT: vpslld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03] 6273; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 6274; X64-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03] 6275; X64-NEXT: vpslld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03] 6276; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 6277; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6278; X64-NEXT: retq # encoding: [0xc3] 6279 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 6280 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 6281 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 6282 %res3 = add <8 x i32> %res, %res1 6283 %res4 = add <8 x i32> %res3, %res2 6284 ret <8 x i32> %res4 6285} 6286 6287declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6288 6289define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6290; X86-LABEL: test_int_x86_avx512_mask_psrlv2_di: 6291; X86: # %bb.0: 6292; X86-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9] 6293; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6294; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6295; X86-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1] 6296; X86-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1] 6297; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 6298; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 6299; X86-NEXT: retl # encoding: [0xc3] 6300; 6301; X64-LABEL: test_int_x86_avx512_mask_psrlv2_di: 6302; X64: # %bb.0: 6303; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9] 6304; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6305; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1] 6306; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1] 6307; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 6308; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 6309; X64-NEXT: retq # encoding: [0xc3] 6310 %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6311 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 6312 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6313 %res3 = add <2 x i64> %res, %res1 6314 %res4 = add <2 x i64> %res3, %res2 6315 ret <2 x i64> %res4 6316} 6317 6318declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6319 6320define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6321; X86-LABEL: test_int_x86_avx512_mask_psrlv4_di: 6322; X86: # %bb.0: 6323; X86-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9] 6324; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6325; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6326; X86-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1] 6327; X86-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1] 6328; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 6329; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6330; X86-NEXT: retl # encoding: [0xc3] 6331; 6332; X64-LABEL: test_int_x86_avx512_mask_psrlv4_di: 6333; X64: # %bb.0: 6334; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9] 6335; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6336; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1] 6337; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1] 6338; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 6339; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6340; X64-NEXT: retq # encoding: [0xc3] 6341 %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 6342 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6343 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 6344 %res3 = add <4 x i64> %res, %res1 6345 %res4 = add <4 x i64> %res3, %res2 6346 ret <4 x i64> %res4 6347} 6348 6349declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6350 6351define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6352; X86-LABEL: test_int_x86_avx512_mask_psrlv4_si: 6353; X86: # %bb.0: 6354; X86-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9] 6355; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6356; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6357; X86-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1] 6358; X86-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1] 6359; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 6360; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6361; X86-NEXT: retl # encoding: [0xc3] 6362; 6363; X64-LABEL: test_int_x86_avx512_mask_psrlv4_si: 6364; X64: # %bb.0: 6365; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9] 6366; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6367; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1] 6368; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1] 6369; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 6370; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6371; X64-NEXT: retq # encoding: [0xc3] 6372 %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6373 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6374 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6375 %res3 = add <4 x i32> %res, %res1 6376 %res4 = add <4 x i32> %res3, %res2 6377 ret <4 x i32> %res4 6378} 6379 6380declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6381 6382define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6383; X86-LABEL: test_int_x86_avx512_mask_psrlv8_si: 6384; X86: # %bb.0: 6385; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9] 6386; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6387; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6388; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1] 6389; X86-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1] 6390; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 6391; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6392; X86-NEXT: retl # encoding: [0xc3] 6393; 6394; X64-LABEL: test_int_x86_avx512_mask_psrlv8_si: 6395; X64: # %bb.0: 6396; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9] 6397; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6398; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1] 6399; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1] 6400; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 6401; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6402; X64-NEXT: retq # encoding: [0xc3] 6403 %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6404 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6405 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6406 %res3 = add <8 x i32> %res, %res1 6407 %res4 = add <8 x i32> %res3, %res2 6408 ret <8 x i32> %res4 6409} 6410 6411declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6412 6413define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6414; X86-LABEL: test_int_x86_avx512_mask_psrav4_si: 6415; X86: # %bb.0: 6416; X86-NEXT: vpsravd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9] 6417; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6418; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6419; X86-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1] 6420; X86-NEXT: vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1] 6421; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 6422; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6423; X86-NEXT: retl # encoding: [0xc3] 6424; 6425; X64-LABEL: test_int_x86_avx512_mask_psrav4_si: 6426; X64: # %bb.0: 6427; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9] 6428; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6429; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1] 6430; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1] 6431; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 6432; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6433; X64-NEXT: retq # encoding: [0xc3] 6434 %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6435 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6436 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6437 %res3 = add <4 x i32> %res, %res1 6438 %res4 = add <4 x i32> %res3, %res2 6439 ret <4 x i32> %res4 6440} 6441 6442declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6443 6444define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6445; X86-LABEL: test_int_x86_avx512_mask_psrav8_si: 6446; X86: # %bb.0: 6447; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9] 6448; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6449; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6450; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1] 6451; X86-NEXT: vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1] 6452; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 6453; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6454; X86-NEXT: retl # encoding: [0xc3] 6455; 6456; X64-LABEL: test_int_x86_avx512_mask_psrav8_si: 6457; X64: # %bb.0: 6458; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9] 6459; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6460; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1] 6461; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1] 6462; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 6463; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6464; X64-NEXT: retq # encoding: [0xc3] 6465 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6466 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6467 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6468 %res3 = add <8 x i32> %res, %res1 6469 %res4 = add <8 x i32> %res3, %res2 6470 ret <8 x i32> %res4 6471} 6472 6473define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() { 6474; X86-LABEL: test_int_x86_avx512_mask_psrav8_si_const: 6475; X86: # %bb.0: 6476; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 6477; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 6478; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 6479; X86-NEXT: vpsravd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 6480; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 6481; X86-NEXT: retl # encoding: [0xc3] 6482; 6483; X64-LABEL: test_int_x86_avx512_mask_psrav8_si_const: 6484; X64: # %bb.0: 6485; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 6486; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] 6487; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 6488; X64-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 6489; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 6490; X64-NEXT: retq # encoding: [0xc3] 6491 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1) 6492 ret <8 x i32> %res 6493} 6494 6495declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6496 6497define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6498; X86-LABEL: test_int_x86_avx512_mask_psllv2_di: 6499; X86: # %bb.0: 6500; X86-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9] 6501; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6502; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6503; X86-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1] 6504; X86-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1] 6505; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 6506; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 6507; X86-NEXT: retl # encoding: [0xc3] 6508; 6509; X64-LABEL: test_int_x86_avx512_mask_psllv2_di: 6510; X64: # %bb.0: 6511; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9] 6512; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6513; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1] 6514; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1] 6515; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 6516; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 6517; X64-NEXT: retq # encoding: [0xc3] 6518 %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6519 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 6520 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6521 %res3 = add <2 x i64> %res, %res1 6522 %res4 = add <2 x i64> %res3, %res2 6523 ret <2 x i64> %res4 6524} 6525 6526declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6527 6528define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6529; X86-LABEL: test_int_x86_avx512_mask_psllv4_di: 6530; X86: # %bb.0: 6531; X86-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9] 6532; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6533; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6534; X86-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1] 6535; X86-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1] 6536; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 6537; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6538; X86-NEXT: retl # encoding: [0xc3] 6539; 6540; X64-LABEL: test_int_x86_avx512_mask_psllv4_di: 6541; X64: # %bb.0: 6542; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9] 6543; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6544; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1] 6545; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1] 6546; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 6547; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 6548; X64-NEXT: retq # encoding: [0xc3] 6549 %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 6550 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6551 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 6552 %res3 = add <4 x i64> %res, %res1 6553 %res4 = add <4 x i64> %res3, %res2 6554 ret <4 x i64> %res4 6555} 6556 6557declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6558 6559define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6560; X86-LABEL: test_int_x86_avx512_mask_psllv4_si: 6561; X86: # %bb.0: 6562; X86-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9] 6563; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6564; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6565; X86-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1] 6566; X86-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1] 6567; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 6568; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6569; X86-NEXT: retl # encoding: [0xc3] 6570; 6571; X64-LABEL: test_int_x86_avx512_mask_psllv4_si: 6572; X64: # %bb.0: 6573; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9] 6574; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6575; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1] 6576; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1] 6577; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 6578; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 6579; X64-NEXT: retq # encoding: [0xc3] 6580 %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6581 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6582 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6583 %res3 = add <4 x i32> %res, %res1 6584 %res4 = add <4 x i32> %res3, %res2 6585 ret <4 x i32> %res4 6586} 6587 6588declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6589 6590define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6591; X86-LABEL: test_int_x86_avx512_mask_psllv8_si: 6592; X86: # %bb.0: 6593; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9] 6594; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6595; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6596; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1] 6597; X86-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1] 6598; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 6599; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6600; X86-NEXT: retl # encoding: [0xc3] 6601; 6602; X64-LABEL: test_int_x86_avx512_mask_psllv8_si: 6603; X64: # %bb.0: 6604; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9] 6605; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6606; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1] 6607; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1] 6608; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 6609; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 6610; X64-NEXT: retq # encoding: [0xc3] 6611 %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6612 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6613 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6614 %res3 = add <8 x i32> %res, %res1 6615 %res4 = add <8 x i32> %res3, %res2 6616 ret <8 x i32> %res4 6617} 6618 6619declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8) 6620 6621define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { 6622; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128: 6623; X86: # %bb.0: 6624; X86-NEXT: vpmovzxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0] 6625; X86-NEXT: # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6626; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6627; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6628; X86-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8] 6629; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6630; X86-NEXT: vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0] 6631; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6632; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 6633; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6634; X86-NEXT: retl # encoding: [0xc3] 6635; 6636; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128: 6637; X64: # %bb.0: 6638; X64-NEXT: vpmovzxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0] 6639; X64-NEXT: # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6640; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6641; X64-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8] 6642; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6643; X64-NEXT: vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0] 6644; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6645; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 6646; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6647; X64-NEXT: retq # encoding: [0xc3] 6648 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) 6649 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) 6650 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1) 6651 %res3 = add <4 x i32> %res, %res1 6652 %res4 = add <4 x i32> %res3, %res2 6653 ret <4 x i32> %res4 6654} 6655 6656declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8) 6657 6658define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { 6659; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256: 6660; X86: # %bb.0: 6661; X86-NEXT: vpmovzxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0] 6662; X86-NEXT: # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6663; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6664; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6665; X86-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8] 6666; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6667; X86-NEXT: vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0] 6668; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6669; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 6670; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6671; X86-NEXT: retl # encoding: [0xc3] 6672; 6673; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256: 6674; X64: # %bb.0: 6675; X64-NEXT: vpmovzxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0] 6676; X64-NEXT: # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6677; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6678; X64-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8] 6679; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6680; X64-NEXT: vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0] 6681; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6682; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 6683; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6684; X64-NEXT: retq # encoding: [0xc3] 6685 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) 6686 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) 6687 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1) 6688 %res3 = add <8 x i32> %res, %res1 6689 %res4 = add <8 x i32> %res3, %res2 6690 ret <8 x i32> %res4 6691} 6692 6693declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8) 6694 6695define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { 6696; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128: 6697; X86: # %bb.0: 6698; X86-NEXT: vpmovzxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0] 6699; X86-NEXT: # xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6700; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6701; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6702; X86-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8] 6703; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6704; X86-NEXT: vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0] 6705; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6706; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 6707; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6708; X86-NEXT: retl # encoding: [0xc3] 6709; 6710; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128: 6711; X64: # %bb.0: 6712; X64-NEXT: vpmovzxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0] 6713; X64-NEXT: # xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6714; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6715; X64-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8] 6716; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6717; X64-NEXT: vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0] 6718; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6719; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 6720; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6721; X64-NEXT: retq # encoding: [0xc3] 6722 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) 6723 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) 6724 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1) 6725 %res3 = add <2 x i64> %res, %res1 6726 %res4 = add <2 x i64> %res3, %res2 6727 ret <2 x i64> %res4 6728} 6729 6730declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8) 6731 6732define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { 6733; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256: 6734; X86: # %bb.0: 6735; X86-NEXT: vpmovzxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0] 6736; X86-NEXT: # ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6737; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6738; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6739; X86-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8] 6740; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6741; X86-NEXT: vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0] 6742; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6743; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 6744; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6745; X86-NEXT: retl # encoding: [0xc3] 6746; 6747; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256: 6748; X64: # %bb.0: 6749; X64-NEXT: vpmovzxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0] 6750; X64-NEXT: # ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6751; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6752; X64-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8] 6753; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6754; X64-NEXT: vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0] 6755; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6756; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 6757; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6758; X64-NEXT: retq # encoding: [0xc3] 6759 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) 6760 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) 6761 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1) 6762 %res3 = add <4 x i64> %res, %res1 6763 %res4 = add <4 x i64> %res3, %res2 6764 ret <4 x i64> %res4 6765} 6766 6767declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8) 6768 6769define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { 6770; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128: 6771; X86: # %bb.0: 6772; X86-NEXT: vpmovzxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0] 6773; X86-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero 6774; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6775; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6776; X86-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8] 6777; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero 6778; X86-NEXT: vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0] 6779; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero 6780; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 6781; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6782; X86-NEXT: retl # encoding: [0xc3] 6783; 6784; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128: 6785; X64: # %bb.0: 6786; X64-NEXT: vpmovzxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0] 6787; X64-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero 6788; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6789; X64-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8] 6790; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero 6791; X64-NEXT: vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0] 6792; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero 6793; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 6794; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6795; X64-NEXT: retq # encoding: [0xc3] 6796 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) 6797 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) 6798 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1) 6799 %res3 = add <2 x i64> %res, %res1 6800 %res4 = add <2 x i64> %res3, %res2 6801 ret <2 x i64> %res4 6802} 6803 6804declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8) 6805 6806define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { 6807; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256: 6808; X86: # %bb.0: 6809; X86-NEXT: vpmovzxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0] 6810; X86-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6811; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6812; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6813; X86-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8] 6814; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6815; X86-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0] 6816; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6817; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 6818; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6819; X86-NEXT: retl # encoding: [0xc3] 6820; 6821; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256: 6822; X64: # %bb.0: 6823; X64-NEXT: vpmovzxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0] 6824; X64-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6825; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6826; X64-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8] 6827; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6828; X64-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0] 6829; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6830; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 6831; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6832; X64-NEXT: retq # encoding: [0xc3] 6833 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) 6834 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) 6835 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1) 6836 %res3 = add <4 x i64> %res, %res1 6837 %res4 = add <4 x i64> %res3, %res2 6838 ret <4 x i64> %res4 6839} 6840 6841declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8) 6842 6843define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { 6844; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128: 6845; X86: # %bb.0: 6846; X86-NEXT: vpmovzxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0] 6847; X86-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6848; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6849; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6850; X86-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8] 6851; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6852; X86-NEXT: vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0] 6853; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6854; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 6855; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6856; X86-NEXT: retl # encoding: [0xc3] 6857; 6858; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128: 6859; X64: # %bb.0: 6860; X64-NEXT: vpmovzxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0] 6861; X64-NEXT: # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6862; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6863; X64-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8] 6864; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6865; X64-NEXT: vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0] 6866; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6867; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 6868; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 6869; X64-NEXT: retq # encoding: [0xc3] 6870 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) 6871 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) 6872 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1) 6873 %res3 = add <4 x i32> %res, %res1 6874 %res4 = add <4 x i32> %res3, %res2 6875 ret <4 x i32> %res4 6876} 6877 6878declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8) 6879 6880define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { 6881; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256: 6882; X86: # %bb.0: 6883; X86-NEXT: vpmovzxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0] 6884; X86-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6885; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6886; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6887; X86-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8] 6888; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6889; X86-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0] 6890; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6891; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 6892; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6893; X86-NEXT: retl # encoding: [0xc3] 6894; 6895; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256: 6896; X64: # %bb.0: 6897; X64-NEXT: vpmovzxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0] 6898; X64-NEXT: # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6899; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6900; X64-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8] 6901; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6902; X64-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0] 6903; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6904; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 6905; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 6906; X64-NEXT: retq # encoding: [0xc3] 6907 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) 6908 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) 6909 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1) 6910 %res3 = add <8 x i32> %res, %res1 6911 %res4 = add <8 x i32> %res3, %res2 6912 ret <8 x i32> %res4 6913} 6914 6915declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8) 6916 6917define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { 6918; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128: 6919; X86: # %bb.0: 6920; X86-NEXT: vpmovzxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0] 6921; X86-NEXT: # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6922; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6923; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6924; X86-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8] 6925; X86-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6926; X86-NEXT: vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0] 6927; X86-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6928; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 6929; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6930; X86-NEXT: retl # encoding: [0xc3] 6931; 6932; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128: 6933; X64: # %bb.0: 6934; X64-NEXT: vpmovzxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0] 6935; X64-NEXT: # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6936; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6937; X64-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8] 6938; X64-NEXT: # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6939; X64-NEXT: vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0] 6940; X64-NEXT: # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6941; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 6942; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 6943; X64-NEXT: retq # encoding: [0xc3] 6944 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) 6945 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) 6946 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1) 6947 %res3 = add <2 x i64> %res, %res1 6948 %res4 = add <2 x i64> %res3, %res2 6949 ret <2 x i64> %res4 6950} 6951 6952declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8) 6953 6954define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { 6955; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256: 6956; X86: # %bb.0: 6957; X86-NEXT: vpmovzxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0] 6958; X86-NEXT: # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6959; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6960; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6961; X86-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8] 6962; X86-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6963; X86-NEXT: vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0] 6964; X86-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6965; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 6966; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6967; X86-NEXT: retl # encoding: [0xc3] 6968; 6969; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256: 6970; X64: # %bb.0: 6971; X64-NEXT: vpmovzxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0] 6972; X64-NEXT: # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6973; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 6974; X64-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8] 6975; X64-NEXT: # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6976; X64-NEXT: vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0] 6977; X64-NEXT: # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6978; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 6979; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 6980; X64-NEXT: retq # encoding: [0xc3] 6981 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) 6982 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) 6983 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1) 6984 %res3 = add <4 x i64> %res, %res1 6985 %res4 = add <4 x i64> %res3, %res2 6986 ret <4 x i64> %res4 6987} 6988 6989declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8) 6990 6991define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { 6992; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128: 6993; X86: # %bb.0: 6994; X86-NEXT: vpmovsxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0] 6995; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 6996; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 6997; X86-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8] 6998; X86-NEXT: vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0] 6999; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 7000; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 7001; X86-NEXT: retl # encoding: [0xc3] 7002; 7003; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128: 7004; X64: # %bb.0: 7005; X64-NEXT: vpmovsxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0] 7006; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7007; X64-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8] 7008; X64-NEXT: vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0] 7009; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 7010; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 7011; X64-NEXT: retq # encoding: [0xc3] 7012 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) 7013 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) 7014 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1) 7015 %res3 = add <4 x i32> %res, %res1 7016 %res4 = add <4 x i32> %res3, %res2 7017 ret <4 x i32> %res4 7018} 7019 7020declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8) 7021 7022define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { 7023; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256: 7024; X86: # %bb.0: 7025; X86-NEXT: vpmovsxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0] 7026; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7027; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7028; X86-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8] 7029; X86-NEXT: vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0] 7030; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 7031; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 7032; X86-NEXT: retl # encoding: [0xc3] 7033; 7034; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256: 7035; X64: # %bb.0: 7036; X64-NEXT: vpmovsxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0] 7037; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7038; X64-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8] 7039; X64-NEXT: vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0] 7040; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 7041; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 7042; X64-NEXT: retq # encoding: [0xc3] 7043 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) 7044 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) 7045 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1) 7046 %res3 = add <8 x i32> %res, %res1 7047 %res4 = add <8 x i32> %res3, %res2 7048 ret <8 x i32> %res4 7049} 7050 7051declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8) 7052 7053define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { 7054; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128: 7055; X86: # %bb.0: 7056; X86-NEXT: vpmovsxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0] 7057; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7058; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7059; X86-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8] 7060; X86-NEXT: vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0] 7061; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 7062; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 7063; X86-NEXT: retl # encoding: [0xc3] 7064; 7065; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128: 7066; X64: # %bb.0: 7067; X64-NEXT: vpmovsxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0] 7068; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7069; X64-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8] 7070; X64-NEXT: vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0] 7071; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 7072; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 7073; X64-NEXT: retq # encoding: [0xc3] 7074 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) 7075 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) 7076 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1) 7077 %res3 = add <2 x i64> %res, %res1 7078 %res4 = add <2 x i64> %res3, %res2 7079 ret <2 x i64> %res4 7080} 7081 7082declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8) 7083 7084define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { 7085; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256: 7086; X86: # %bb.0: 7087; X86-NEXT: vpmovsxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0] 7088; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7089; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7090; X86-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8] 7091; X86-NEXT: vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0] 7092; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 7093; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 7094; X86-NEXT: retl # encoding: [0xc3] 7095; 7096; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256: 7097; X64: # %bb.0: 7098; X64-NEXT: vpmovsxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0] 7099; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7100; X64-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8] 7101; X64-NEXT: vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0] 7102; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 7103; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 7104; X64-NEXT: retq # encoding: [0xc3] 7105 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) 7106 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) 7107 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1) 7108 %res3 = add <4 x i64> %res, %res1 7109 %res4 = add <4 x i64> %res3, %res2 7110 ret <4 x i64> %res4 7111} 7112 7113declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8) 7114 7115define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { 7116; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128: 7117; X86: # %bb.0: 7118; X86-NEXT: vpmovsxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0] 7119; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7120; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7121; X86-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8] 7122; X86-NEXT: vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0] 7123; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 7124; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 7125; X86-NEXT: retl # encoding: [0xc3] 7126; 7127; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128: 7128; X64: # %bb.0: 7129; X64-NEXT: vpmovsxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0] 7130; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7131; X64-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8] 7132; X64-NEXT: vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0] 7133; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 7134; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 7135; X64-NEXT: retq # encoding: [0xc3] 7136 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) 7137 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) 7138 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1) 7139 %res3 = add <4 x i32> %res, %res1 7140 %res4 = add <4 x i32> %res3, %res2 7141 ret <4 x i32> %res4 7142} 7143 7144declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8) 7145 7146define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { 7147; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256: 7148; X86: # %bb.0: 7149; X86-NEXT: vpmovsxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0] 7150; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7151; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7152; X86-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8] 7153; X86-NEXT: vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0] 7154; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 7155; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 7156; X86-NEXT: retl # encoding: [0xc3] 7157; 7158; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256: 7159; X64: # %bb.0: 7160; X64-NEXT: vpmovsxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0] 7161; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7162; X64-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8] 7163; X64-NEXT: vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0] 7164; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 7165; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 7166; X64-NEXT: retq # encoding: [0xc3] 7167 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) 7168 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) 7169 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1) 7170 %res3 = add <8 x i32> %res, %res1 7171 %res4 = add <8 x i32> %res3, %res2 7172 ret <8 x i32> %res4 7173} 7174 7175declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8) 7176 7177define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { 7178; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128: 7179; X86: # %bb.0: 7180; X86-NEXT: vpmovsxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0] 7181; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7182; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7183; X86-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8] 7184; X86-NEXT: vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0] 7185; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 7186; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 7187; X86-NEXT: retl # encoding: [0xc3] 7188; 7189; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128: 7190; X64: # %bb.0: 7191; X64-NEXT: vpmovsxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0] 7192; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7193; X64-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8] 7194; X64-NEXT: vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0] 7195; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 7196; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 7197; X64-NEXT: retq # encoding: [0xc3] 7198 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) 7199 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) 7200 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1) 7201 %res3 = add <2 x i64> %res, %res1 7202 %res4 = add <2 x i64> %res3, %res2 7203 ret <2 x i64> %res4 7204} 7205 7206declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8) 7207 7208define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { 7209; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256: 7210; X86: # %bb.0: 7211; X86-NEXT: vpmovsxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0] 7212; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7213; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7214; X86-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8] 7215; X86-NEXT: vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0] 7216; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 7217; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 7218; X86-NEXT: retl # encoding: [0xc3] 7219; 7220; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256: 7221; X64: # %bb.0: 7222; X64-NEXT: vpmovsxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0] 7223; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7224; X64-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8] 7225; X64-NEXT: vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0] 7226; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 7227; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 7228; X64-NEXT: retq # encoding: [0xc3] 7229 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) 7230 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) 7231 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1) 7232 %res3 = add <4 x i64> %res, %res1 7233 %res4 = add <4 x i64> %res3, %res2 7234 ret <4 x i64> %res4 7235} 7236 7237declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 7238 7239define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 7240; X86-LABEL: test_int_x86_avx512_mask_psra_q_128: 7241; X86: # %bb.0: 7242; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xd9] 7243; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7244; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7245; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1] 7246; X86-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1] 7247; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 7248; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 7249; X86-NEXT: retl # encoding: [0xc3] 7250; 7251; X64-LABEL: test_int_x86_avx512_mask_psra_q_128: 7252; X64: # %bb.0: 7253; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xd9] 7254; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7255; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1] 7256; X64-NEXT: vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1] 7257; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 7258; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 7259; X64-NEXT: retq # encoding: [0xc3] 7260 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 7261 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 7262 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 7263 %res3 = add <2 x i64> %res, %res1 7264 %res4 = add <2 x i64> %res3, %res2 7265 ret <2 x i64> %res4 7266} 7267 7268declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 7269 7270define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 7271; X86-LABEL: test_int_x86_avx512_mask_psra_q_256: 7272; X86: # %bb.0: 7273; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm3 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xd9] 7274; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7275; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7276; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1] 7277; X86-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1] 7278; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 7279; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 7280; X86-NEXT: retl # encoding: [0xc3] 7281; 7282; X64-LABEL: test_int_x86_avx512_mask_psra_q_256: 7283; X64: # %bb.0: 7284; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm3 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xd9] 7285; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7286; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1] 7287; X64-NEXT: vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1] 7288; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 7289; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 7290; X64-NEXT: retq # encoding: [0xc3] 7291 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 7292 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 7293 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 7294 %res3 = add <4 x i64> %res, %res1 7295 %res4 = add <4 x i64> %res3, %res2 7296 ret <4 x i64> %res4 7297} 7298 7299declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i32, <2 x i64>, i8) 7300 7301define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 7302; X86-LABEL: test_int_x86_avx512_mask_psra_qi_128: 7303; X86: # %bb.0: 7304; X86-NEXT: vpsraq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x03] 7305; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 7306; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7307; X86-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03] 7308; X86-NEXT: vpsraq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03] 7309; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 7310; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 7311; X86-NEXT: retl # encoding: [0xc3] 7312; 7313; X64-LABEL: test_int_x86_avx512_mask_psra_qi_128: 7314; X64: # %bb.0: 7315; X64-NEXT: vpsraq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x03] 7316; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 7317; X64-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03] 7318; X64-NEXT: vpsraq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03] 7319; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 7320; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 7321; X64-NEXT: retq # encoding: [0xc3] 7322 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 7323 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) 7324 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) 7325 %res3 = add <2 x i64> %res, %res1 7326 %res4 = add <2 x i64> %res3, %res2 7327 ret <2 x i64> %res4 7328} 7329 7330declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i32, <4 x i64>, i8) 7331 7332define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 7333; X86-LABEL: test_int_x86_avx512_mask_psra_qi_256: 7334; X86: # %bb.0: 7335; X86-NEXT: vpsraq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x03] 7336; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 7337; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7338; X86-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03] 7339; X86-NEXT: vpsraq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03] 7340; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 7341; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 7342; X86-NEXT: retl # encoding: [0xc3] 7343; 7344; X64-LABEL: test_int_x86_avx512_mask_psra_qi_256: 7345; X64: # %bb.0: 7346; X64-NEXT: vpsraq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x03] 7347; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 7348; X64-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03] 7349; X64-NEXT: vpsraq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03] 7350; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 7351; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 7352; X64-NEXT: retq # encoding: [0xc3] 7353 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 7354 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 7355 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 7356 %res3 = add <4 x i64> %res, %res1 7357 %res4 = add <4 x i64> %res3, %res2 7358 ret <4 x i64> %res4 7359} 7360 7361declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 7362 7363define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 7364; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128: 7365; X86: # %bb.0: 7366; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xd9] 7367; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7368; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7369; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1] 7370; X86-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1] 7371; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 7372; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 7373; X86-NEXT: retl # encoding: [0xc3] 7374; 7375; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128: 7376; X64: # %bb.0: 7377; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xd9] 7378; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7379; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1] 7380; X64-NEXT: vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1] 7381; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 7382; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 7383; X64-NEXT: retq # encoding: [0xc3] 7384 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 7385 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 7386 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 7387 %res3 = add <2 x i64> %res, %res1 7388 %res4 = add <2 x i64> %res3, %res2 7389 ret <2 x i64> %res4 7390} 7391 7392define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) { 7393; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128_const: 7394; X86: # %bb.0: 7395; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,0,4294967287,4294967295] 7396; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 7397; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 7398; X86-NEXT: vpsravq {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] 7399; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 7400; X86-NEXT: retl # encoding: [0xc3] 7401; 7402; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128_const: 7403; X64: # %bb.0: 7404; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,18446744073709551607] 7405; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] 7406; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 7407; X64-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] 7408; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte 7409; X64-NEXT: retq # encoding: [0xc3] 7410 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1) 7411 ret <2 x i64> %res 7412} 7413 7414declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 7415 7416define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 7417; X86-LABEL: test_int_x86_avx512_mask_psrav_q_256: 7418; X86: # %bb.0: 7419; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xd9] 7420; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7421; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7422; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1] 7423; X86-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1] 7424; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 7425; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 7426; X86-NEXT: retl # encoding: [0xc3] 7427; 7428; X64-LABEL: test_int_x86_avx512_mask_psrav_q_256: 7429; X64: # %bb.0: 7430; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xd9] 7431; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7432; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1] 7433; X64-NEXT: vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1] 7434; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 7435; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 7436; X64-NEXT: retq # encoding: [0xc3] 7437 %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 7438 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 7439 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 7440 %res3 = add <4 x i64> %res, %res1 7441 %res4 = add <4 x i64> %res3, %res2 7442 ret <4 x i64> %res4 7443} 7444 7445declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8) 7446 7447define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { 7448; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: 7449; X86: # %bb.0: 7450; X86-NEXT: vcvtdq2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0] 7451; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7452; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7453; X86-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8] 7454; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 7455; X86-NEXT: retl # encoding: [0xc3] 7456; 7457; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: 7458; X64: # %bb.0: 7459; X64-NEXT: vcvtdq2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0] 7460; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7461; X64-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8] 7462; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 7463; X64-NEXT: retq # encoding: [0xc3] 7464 %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) 7465 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) 7466 %res2 = fadd <2 x double> %res, %res1 7467 ret <2 x double> %res2 7468} 7469 7470declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8) 7471 7472define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { 7473; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: 7474; X86: # %bb.0: 7475; X86-NEXT: vcvtdq2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0] 7476; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7477; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7478; X86-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8] 7479; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 7480; X86-NEXT: retl # encoding: [0xc3] 7481; 7482; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: 7483; X64: # %bb.0: 7484; X64-NEXT: vcvtdq2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0] 7485; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7486; X64-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8] 7487; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 7488; X64-NEXT: retq # encoding: [0xc3] 7489 %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) 7490 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) 7491 %res2 = fadd <4 x double> %res, %res1 7492 ret <4 x double> %res2 7493} 7494 7495declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8) 7496 7497define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { 7498; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: 7499; X86: # %bb.0: 7500; X86-NEXT: vcvtudq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0] 7501; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7502; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7503; X86-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8] 7504; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 7505; X86-NEXT: retl # encoding: [0xc3] 7506; 7507; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: 7508; X64: # %bb.0: 7509; X64-NEXT: vcvtudq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0] 7510; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7511; X64-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8] 7512; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 7513; X64-NEXT: retq # encoding: [0xc3] 7514 %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) 7515 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) 7516 %res2 = fadd <2 x double> %res, %res1 7517 ret <2 x double> %res2 7518} 7519 7520declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8) 7521 7522define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { 7523; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: 7524; X86: # %bb.0: 7525; X86-NEXT: vcvtudq2pd %xmm0, %ymm2 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0] 7526; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7527; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7528; X86-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8] 7529; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 7530; X86-NEXT: retl # encoding: [0xc3] 7531; 7532; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: 7533; X64: # %bb.0: 7534; X64-NEXT: vcvtudq2pd %xmm0, %ymm2 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0] 7535; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7536; X64-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8] 7537; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 7538; X64-NEXT: retq # encoding: [0xc3] 7539 %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) 7540 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) 7541 %res2 = fadd <4 x double> %res, %res1 7542 ret <4 x double> %res2 7543} 7544 7545declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 7546 7547define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 7548; X86-LABEL: test_int_x86_avx512_mask_valign_d_128: 7549; X86: # %bb.0: 7550; X86-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08] 7551; X86-NEXT: # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 7552; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7553; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7554; X86-NEXT: valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02] 7555; X86-NEXT: # xmm2 {%k1} = xmm1[2,3],xmm0[0,1] 7556; X86-NEXT: valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02] 7557; X86-NEXT: # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1] 7558; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 7559; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 7560; X86-NEXT: retl # encoding: [0xc3] 7561; 7562; X64-LABEL: test_int_x86_avx512_mask_valign_d_128: 7563; X64: # %bb.0: 7564; X64-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08] 7565; X64-NEXT: # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 7566; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7567; X64-NEXT: valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02] 7568; X64-NEXT: # xmm2 {%k1} = xmm1[2,3],xmm0[0,1] 7569; X64-NEXT: valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02] 7570; X64-NEXT: # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1] 7571; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] 7572; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 7573; X64-NEXT: retq # encoding: [0xc3] 7574 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 %x4) 7575 %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 -1) 7576 %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> zeroinitializer,i8 %x4) 7577 %res3 = add <4 x i32> %res, %res1 7578 %res4 = add <4 x i32> %res3, %res2 7579 ret <4 x i32> %res4 7580} 7581 7582declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 7583 7584define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 7585; X86-LABEL: test_int_x86_avx512_mask_valign_d_256: 7586; X86: # %bb.0: 7587; X86-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03] 7588; X86-NEXT: # ymm3 = ymm1[3],ymm0[0,1,2] 7589; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7590; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7591; X86-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06] 7592; X86-NEXT: # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5] 7593; X86-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 7594; X86-NEXT: retl # encoding: [0xc3] 7595; 7596; X64-LABEL: test_int_x86_avx512_mask_valign_d_256: 7597; X64: # %bb.0: 7598; X64-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03] 7599; X64-NEXT: # ymm3 = ymm1[3],ymm0[0,1,2] 7600; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7601; X64-NEXT: valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06] 7602; X64-NEXT: # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5] 7603; X64-NEXT: vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3] 7604; X64-NEXT: retq # encoding: [0xc3] 7605 %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 %x4) 7606 %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 -1) 7607 %res2 = add <8 x i32> %res, %res1 7608 ret <8 x i32> %res2 7609} 7610 7611declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 7612 7613define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 7614; X86-LABEL: test_int_x86_avx512_mask_valign_q_128: 7615; X86: # %bb.0: 7616; X86-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08] 7617; X86-NEXT: # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 7618; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7619; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7620; X86-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01] 7621; X86-NEXT: # xmm2 {%k1} = xmm1[1],xmm0[0] 7622; X86-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 7623; X86-NEXT: retl # encoding: [0xc3] 7624; 7625; X64-LABEL: test_int_x86_avx512_mask_valign_q_128: 7626; X64: # %bb.0: 7627; X64-NEXT: vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08] 7628; X64-NEXT: # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7] 7629; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7630; X64-NEXT: valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01] 7631; X64-NEXT: # xmm2 {%k1} = xmm1[1],xmm0[0] 7632; X64-NEXT: vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3] 7633; X64-NEXT: retq # encoding: [0xc3] 7634 %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 %x4) 7635 %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 -1) 7636 %res2 = add <2 x i64> %res, %res1 7637 ret <2 x i64> %res2 7638} 7639 7640declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 7641 7642define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 7643; X86-LABEL: test_int_x86_avx512_mask_valign_q_256: 7644; X86: # %bb.0: 7645; X86-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03] 7646; X86-NEXT: # ymm3 = ymm1[3],ymm0[0,1,2] 7647; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7648; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7649; X86-NEXT: valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03] 7650; X86-NEXT: # ymm2 {%k1} = ymm1[3],ymm0[0,1,2] 7651; X86-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 7652; X86-NEXT: retl # encoding: [0xc3] 7653; 7654; X64-LABEL: test_int_x86_avx512_mask_valign_q_256: 7655; X64: # %bb.0: 7656; X64-NEXT: valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03] 7657; X64-NEXT: # ymm3 = ymm1[3],ymm0[0,1,2] 7658; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7659; X64-NEXT: valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03] 7660; X64-NEXT: # ymm2 {%k1} = ymm1[3],ymm0[0,1,2] 7661; X64-NEXT: vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3] 7662; X64-NEXT: retq # encoding: [0xc3] 7663 %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 %x4) 7664 %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 -1) 7665 %res2 = add <4 x i64> %res, %res1 7666 ret <4 x i64> %res2 7667} 7668 7669declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 7670 7671define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 7672; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: 7673; X86: # %bb.0: 7674; X86-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9] 7675; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7676; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7677; X86-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1] 7678; X86-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1] 7679; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] 7680; X86-NEXT: vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0] 7681; X86-NEXT: retl # encoding: [0xc3] 7682; 7683; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: 7684; X64: # %bb.0: 7685; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9] 7686; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7687; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1] 7688; X64-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1] 7689; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] 7690; X64-NEXT: vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0] 7691; X64-NEXT: retq # encoding: [0xc3] 7692 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 7693 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) 7694 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 7695 %res3 = fadd <4 x double> %res, %res1 7696 %res4 = fadd <4 x double> %res2, %res3 7697 ret <4 x double> %res4 7698} 7699 7700declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 7701 7702define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 7703; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: 7704; X86: # %bb.0: 7705; X86-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9] 7706; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7707; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7708; X86-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1] 7709; X86-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1] 7710; X86-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] 7711; X86-NEXT: vaddpd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3] 7712; X86-NEXT: retl # encoding: [0xc3] 7713; 7714; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: 7715; X64: # %bb.0: 7716; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9] 7717; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7718; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1] 7719; X64-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1] 7720; X64-NEXT: vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0] 7721; X64-NEXT: vaddpd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3] 7722; X64-NEXT: retq # encoding: [0xc3] 7723 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) 7724 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3) 7725 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 7726 %res3 = fadd <2 x double> %res, %res1 7727 %res4 = fadd <2 x double> %res3, %res2 7728 ret <2 x double> %res4 7729} 7730 7731declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 7732 7733define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 7734; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: 7735; X86: # %bb.0: 7736; X86-NEXT: vpermilps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9] 7737; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7738; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7739; X86-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1] 7740; X86-NEXT: vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1] 7741; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] 7742; X86-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] 7743; X86-NEXT: retl # encoding: [0xc3] 7744; 7745; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: 7746; X64: # %bb.0: 7747; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9] 7748; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7749; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1] 7750; X64-NEXT: vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1] 7751; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] 7752; X64-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] 7753; X64-NEXT: retq # encoding: [0xc3] 7754 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 7755 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) 7756 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 7757 %res3 = fadd <8 x float> %res, %res1 7758 %res4 = fadd <8 x float> %res3, %res2 7759 ret <8 x float> %res4 7760} 7761 7762declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 7763 7764define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { 7765; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: 7766; X86: # %bb.0: 7767; X86-NEXT: vpermilps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9] 7768; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7769; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7770; X86-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1] 7771; X86-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1] 7772; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] 7773; X86-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] 7774; X86-NEXT: retl # encoding: [0xc3] 7775; 7776; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: 7777; X64: # %bb.0: 7778; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9] 7779; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7780; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1] 7781; X64-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1] 7782; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] 7783; X64-NEXT: vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0] 7784; X64-NEXT: retq # encoding: [0xc3] 7785 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) 7786 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3) 7787 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 7788 %res3 = fadd <4 x float> %res, %res1 7789 %res4 = fadd <4 x float> %res2, %res3 7790 ret <4 x float> %res4 7791} 7792 7793declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8) 7794 7795define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) { 7796; X86-LABEL: test_int_x86_avx512_mask_vextractf32x4_256: 7797; X86: # %bb.0: 7798; X86-NEXT: vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] 7799; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7800; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7801; X86-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01] 7802; X86-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01] 7803; X86-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 7804; X86-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] 7805; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 7806; X86-NEXT: retl # encoding: [0xc3] 7807; 7808; X64-LABEL: test_int_x86_avx512_mask_vextractf32x4_256: 7809; X64: # %bb.0: 7810; X64-NEXT: vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01] 7811; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7812; X64-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01] 7813; X64-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01] 7814; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0] 7815; X64-NEXT: vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0] 7816; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 7817; X64-NEXT: retq # encoding: [0xc3] 7818 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3) 7819 %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3) 7820 %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1) 7821 %res3 = fadd <4 x float> %res, %res1 7822 %res4 = fadd <4 x float> %res2, %res3 7823 ret <4 x float> %res4 7824} 7825 7826declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8) 7827 7828define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) { 7829; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_256: 7830; X86: # %bb.0: 7831; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01] 7832; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7833; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7834; X86-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01] 7835; X86-NEXT: vaddps %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xd3] 7836; X86-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01] 7837; X86-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2] 7838; X86-NEXT: retl # encoding: [0xc3] 7839; 7840; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_256: 7841; X64: # %bb.0: 7842; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01] 7843; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7844; X64-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01] 7845; X64-NEXT: vaddps %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xd3] 7846; X64-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01] 7847; X64-NEXT: vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2] 7848; X64-NEXT: retq # encoding: [0xc3] 7849 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4) 7850 %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1) 7851 %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4) 7852 %res3 = fadd <8 x float> %res, %res1 7853 %res4 = fadd <8 x float> %res2, %res3 7854 ret <8 x float> %res4 7855} 7856 7857declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8) 7858 7859define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) { 7860; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_256: 7861; X86: # %bb.0: 7862; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01] 7863; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7864; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7865; X86-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01] 7866; X86-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01] 7867; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 7868; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 7869; X86-NEXT: retl # encoding: [0xc3] 7870; 7871; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_256: 7872; X64: # %bb.0: 7873; X64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01] 7874; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7875; X64-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01] 7876; X64-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01] 7877; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] 7878; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 7879; X64-NEXT: retq # encoding: [0xc3] 7880 7881 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4) 7882 %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1) 7883 %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4) 7884 %res3 = add <8 x i32> %res, %res1 7885 %res4 = add <8 x i32> %res2, %res3 7886 ret <8 x i32> %res4 7887} 7888 7889define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 7890; X86-LABEL: test_mm512_maskz_max_ps_256: 7891; X86: # %bb.0: 7892; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7893; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7894; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1] 7895; X86-NEXT: retl # encoding: [0xc3] 7896; 7897; X64-LABEL: test_mm512_maskz_max_ps_256: 7898; X64: # %bb.0: 7899; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7900; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1] 7901; X64-NEXT: retq # encoding: [0xc3] 7902 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 7903 ret <8 x float> %res 7904} 7905 7906define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 7907; X86-LABEL: test_mm512_mask_max_ps_256: 7908; X86: # %bb.0: 7909; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7910; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7911; X86-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1] 7912; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 7913; X86-NEXT: retl # encoding: [0xc3] 7914; 7915; X64-LABEL: test_mm512_mask_max_ps_256: 7916; X64: # %bb.0: 7917; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7918; X64-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1] 7919; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 7920; X64-NEXT: retq # encoding: [0xc3] 7921 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 7922 ret <8 x float> %res 7923} 7924 7925define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 7926; CHECK-LABEL: test_mm512_max_ps_256: 7927; CHECK: # %bb.0: 7928; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1] 7929; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7930 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 7931 ret <8 x float> %res 7932} 7933declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 7934 7935define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 7936; X86-LABEL: test_mm512_maskz_max_ps_128: 7937; X86: # %bb.0: 7938; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7939; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7940; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1] 7941; X86-NEXT: retl # encoding: [0xc3] 7942; 7943; X64-LABEL: test_mm512_maskz_max_ps_128: 7944; X64: # %bb.0: 7945; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7946; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1] 7947; X64-NEXT: retq # encoding: [0xc3] 7948 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 7949 ret <4 x float> %res 7950} 7951 7952define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 7953; X86-LABEL: test_mm512_mask_max_ps_128: 7954; X86: # %bb.0: 7955; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7956; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7957; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1] 7958; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 7959; X86-NEXT: retl # encoding: [0xc3] 7960; 7961; X64-LABEL: test_mm512_mask_max_ps_128: 7962; X64: # %bb.0: 7963; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7964; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1] 7965; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 7966; X64-NEXT: retq # encoding: [0xc3] 7967 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 7968 ret <4 x float> %res 7969} 7970 7971define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 7972; CHECK-LABEL: test_mm512_max_ps_128: 7973; CHECK: # %bb.0: 7974; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] 7975; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 7976 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 7977 ret <4 x float> %res 7978} 7979declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 7980 7981define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 7982; X86-LABEL: test_mm512_maskz_min_ps_256: 7983; X86: # %bb.0: 7984; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 7985; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 7986; X86-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1] 7987; X86-NEXT: retl # encoding: [0xc3] 7988; 7989; X64-LABEL: test_mm512_maskz_min_ps_256: 7990; X64: # %bb.0: 7991; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 7992; X64-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1] 7993; X64-NEXT: retq # encoding: [0xc3] 7994 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 7995 ret <8 x float> %res 7996} 7997 7998define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 7999; X86-LABEL: test_mm512_mask_min_ps_256: 8000; X86: # %bb.0: 8001; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8002; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8003; X86-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1] 8004; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 8005; X86-NEXT: retl # encoding: [0xc3] 8006; 8007; X64-LABEL: test_mm512_mask_min_ps_256: 8008; X64: # %bb.0: 8009; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8010; X64-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1] 8011; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 8012; X64-NEXT: retq # encoding: [0xc3] 8013 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 8014 ret <8 x float> %res 8015} 8016 8017define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 8018; CHECK-LABEL: test_mm512_min_ps_256: 8019; CHECK: # %bb.0: 8020; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1] 8021; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8022 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 8023 ret <8 x float> %res 8024} 8025declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 8026 8027define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 8028; X86-LABEL: test_mm512_maskz_min_ps_128: 8029; X86: # %bb.0: 8030; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8031; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8032; X86-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1] 8033; X86-NEXT: retl # encoding: [0xc3] 8034; 8035; X64-LABEL: test_mm512_maskz_min_ps_128: 8036; X64: # %bb.0: 8037; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8038; X64-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1] 8039; X64-NEXT: retq # encoding: [0xc3] 8040 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 8041 ret <4 x float> %res 8042} 8043 8044define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 8045; X86-LABEL: test_mm512_mask_min_ps_128: 8046; X86: # %bb.0: 8047; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8048; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8049; X86-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1] 8050; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 8051; X86-NEXT: retl # encoding: [0xc3] 8052; 8053; X64-LABEL: test_mm512_mask_min_ps_128: 8054; X64: # %bb.0: 8055; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8056; X64-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1] 8057; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 8058; X64-NEXT: retq # encoding: [0xc3] 8059 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 8060 ret <4 x float> %res 8061} 8062 8063define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 8064; CHECK-LABEL: test_mm512_min_ps_128: 8065; CHECK: # %bb.0: 8066; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] 8067; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8068 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 8069 ret <4 x float> %res 8070} 8071declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 8072 8073define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { 8074; CHECK-LABEL: test_cmp_d_256: 8075; CHECK: # %bb.0: 8076; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 8077; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc8] 8078; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02] 8079; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04] 8080; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x05] 8081; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe9] 8082; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8083; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8084; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8085; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8086; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8087; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8088; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8089; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8090; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8091; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8092; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8093; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8094; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8095; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 8096; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8097; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8098; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8099 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 8100 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8101 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 8102 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8103 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 8104 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8105 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 8106 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8107 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 8108 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8109 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 8110 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8111 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 8112 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8113 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 8114 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8115 ret <8 x i8> %vec7 8116} 8117 8118define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { 8119; X86-LABEL: test_mask_cmp_d_256: 8120; X86: # %bb.0: 8121; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 8122; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8123; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 8124; X86-NEXT: vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0] 8125; X86-NEXT: vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02] 8126; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04] 8127; X86-NEXT: vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05] 8128; X86-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9] 8129; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 8130; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8131; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 8132; X86-NEXT: kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca] 8133; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 8134; X86-NEXT: kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb] 8135; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 8136; X86-NEXT: kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc] 8137; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 8138; X86-NEXT: kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd] 8139; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 8140; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 8141; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 8142; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8143; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8144; X86-NEXT: retl # encoding: [0xc3] 8145; 8146; X64-LABEL: test_mask_cmp_d_256: 8147; X64: # %bb.0: 8148; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8149; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 8150; X64-NEXT: vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0] 8151; X64-NEXT: vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02] 8152; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04] 8153; X64-NEXT: vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05] 8154; X64-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9] 8155; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8156; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8157; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8158; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8159; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8160; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8161; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8162; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8163; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8164; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8165; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8166; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8167; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8168; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 8169; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8170; X64-NEXT: retq # encoding: [0xc3] 8171 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 8172 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8173 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) 8174 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8175 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask) 8176 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8177 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask) 8178 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8179 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask) 8180 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8181 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask) 8182 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8183 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask) 8184 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8185 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask) 8186 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8187 ret <8 x i8> %vec7 8188} 8189 8190declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone 8191 8192define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { 8193; CHECK-LABEL: test_ucmp_d_256: 8194; CHECK: # %bb.0: 8195; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1] 8196; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01] 8197; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02] 8198; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04] 8199; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x05] 8200; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x06] 8201; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8202; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8203; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8204; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8205; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8206; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8207; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8208; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8209; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8210; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8211; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8212; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8213; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8214; CHECK-NEXT: movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00] 8215; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8216; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8217; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8218 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 8219 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8220 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 8221 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8222 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 8223 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8224 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 8225 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8226 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 8227 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8228 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 8229 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8230 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 8231 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8232 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 8233 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8234 ret <8 x i8> %vec7 8235} 8236 8237define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { 8238; X86-LABEL: test_mask_ucmp_d_256: 8239; X86: # %bb.0: 8240; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] 8241; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 8242; X86-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 8243; X86-NEXT: vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01] 8244; X86-NEXT: vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02] 8245; X86-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04] 8246; X86-NEXT: vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05] 8247; X86-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06] 8248; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 8249; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8250; X86-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x00] 8251; X86-NEXT: kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca] 8252; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01] 8253; X86-NEXT: kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb] 8254; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02] 8255; X86-NEXT: kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc] 8256; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] 8257; X86-NEXT: kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd] 8258; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05] 8259; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 8260; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06] 8261; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8262; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8263; X86-NEXT: retl # encoding: [0xc3] 8264; 8265; X64-LABEL: test_mask_ucmp_d_256: 8266; X64: # %bb.0: 8267; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 8268; X64-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1] 8269; X64-NEXT: vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01] 8270; X64-NEXT: vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02] 8271; X64-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04] 8272; X64-NEXT: vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05] 8273; X64-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06] 8274; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8275; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8276; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8277; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8278; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8279; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8280; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8281; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8282; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8283; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8284; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8285; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8286; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8287; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07] 8288; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8289; X64-NEXT: retq # encoding: [0xc3] 8290 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 8291 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8292 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) 8293 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8294 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask) 8295 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8296 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask) 8297 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8298 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask) 8299 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8300 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask) 8301 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8302 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask) 8303 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8304 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask) 8305 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8306 ret <8 x i8> %vec7 8307} 8308 8309declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone 8310 8311define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { 8312; CHECK-LABEL: test_cmp_q_256: 8313; CHECK: # %bb.0: 8314; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] 8315; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 # encoding: [0x62,0xf2,0xf5,0x28,0x37,0xc8] 8316; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02] 8317; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04] 8318; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x05] 8319; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9] 8320; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8321; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8322; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8323; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8324; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8325; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8326; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8327; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8328; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8329; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8330; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8331; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8332; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8333; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00] 8334; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8335; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8336; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8337 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) 8338 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8339 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1) 8340 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8341 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1) 8342 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8343 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1) 8344 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8345 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1) 8346 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8347 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1) 8348 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8349 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1) 8350 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8351 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1) 8352 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8353 ret <8 x i8> %vec7 8354} 8355 8356define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { 8357; X86-LABEL: test_mask_cmp_q_256: 8358; X86: # %bb.0: 8359; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8360; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0] 8361; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1] 8362; X86-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8] 8363; X86-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02] 8364; X86-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04] 8365; X86-NEXT: vpcmpnltq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe9,0x05] 8366; X86-NEXT: vpcmpgtq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1] 8367; X86-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c] 8368; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] 8369; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8370; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8371; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8372; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8373; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8374; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8375; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8376; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8377; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8378; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8379; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8380; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8381; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8382; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8383; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8384; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8385; X86-NEXT: retl # encoding: [0xc3] 8386; 8387; X64-LABEL: test_mask_cmp_q_256: 8388; X64: # %bb.0: 8389; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7] 8390; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1] 8391; X64-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8] 8392; X64-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02] 8393; X64-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04] 8394; X64-NEXT: vpcmpnltq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe9,0x05] 8395; X64-NEXT: vpcmpgtq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1] 8396; X64-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c] 8397; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] 8398; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8399; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8400; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8401; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8402; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8403; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8404; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8405; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8406; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8407; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8408; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8409; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8410; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8411; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8412; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8413; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8414; X64-NEXT: retq # encoding: [0xc3] 8415 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) 8416 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8417 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask) 8418 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8419 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask) 8420 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8421 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask) 8422 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8423 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask) 8424 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8425 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask) 8426 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8427 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask) 8428 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8429 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask) 8430 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8431 ret <8 x i8> %vec7 8432} 8433 8434declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone 8435 8436define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { 8437; CHECK-LABEL: test_ucmp_q_256: 8438; CHECK: # %bb.0: 8439; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1] 8440; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01] 8441; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02] 8442; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04] 8443; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05] 8444; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06] 8445; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8446; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8447; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8448; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8449; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8450; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8451; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8452; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8453; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8454; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8455; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8456; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8457; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8458; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00] 8459; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8460; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8461; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8462 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) 8463 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8464 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1) 8465 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8466 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1) 8467 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8468 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1) 8469 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8470 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1) 8471 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8472 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1) 8473 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8474 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1) 8475 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8476 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1) 8477 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8478 ret <8 x i8> %vec7 8479} 8480 8481define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { 8482; X86-LABEL: test_mask_ucmp_q_256: 8483; X86: # %bb.0: 8484; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8485; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0] 8486; X86-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1] 8487; X86-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01] 8488; X86-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02] 8489; X86-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04] 8490; X86-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05] 8491; X86-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06] 8492; X86-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c] 8493; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] 8494; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8495; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8496; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8497; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8498; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8499; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8500; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8501; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8502; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8503; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8504; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8505; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8506; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8507; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8508; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8509; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8510; X86-NEXT: retl # encoding: [0xc3] 8511; 8512; X64-LABEL: test_mask_ucmp_q_256: 8513; X64: # %bb.0: 8514; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7] 8515; X64-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1] 8516; X64-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01] 8517; X64-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02] 8518; X64-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04] 8519; X64-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05] 8520; X64-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06] 8521; X64-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c] 8522; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] 8523; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8524; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8525; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8526; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8527; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8528; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8529; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8530; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8531; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8532; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8533; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8534; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8535; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8536; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8537; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8538; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 8539; X64-NEXT: retq # encoding: [0xc3] 8540 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) 8541 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8542 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask) 8543 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8544 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask) 8545 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8546 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask) 8547 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8548 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask) 8549 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8550 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask) 8551 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8552 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask) 8553 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8554 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask) 8555 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8556 ret <8 x i8> %vec7 8557} 8558 8559declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone 8560 8561define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { 8562; CHECK-LABEL: test_cmp_d_128: 8563; CHECK: # %bb.0: 8564; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 8565; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc8] 8566; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02] 8567; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04] 8568; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x05] 8569; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9] 8570; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8571; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8572; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8573; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8574; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8575; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8576; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8577; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8578; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8579; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8580; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8581; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8582; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8583; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00] 8584; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8585; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8586 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) 8587 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8588 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1) 8589 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8590 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1) 8591 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8592 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1) 8593 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8594 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1) 8595 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8596 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1) 8597 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8598 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1) 8599 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8600 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1) 8601 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8602 ret <8 x i8> %vec7 8603} 8604 8605define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { 8606; X86-LABEL: test_mask_cmp_d_128: 8607; X86: # %bb.0: 8608; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8609; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0] 8610; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1] 8611; X86-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8] 8612; X86-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02] 8613; X86-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04] 8614; X86-NEXT: vpcmpnltd %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe9,0x05] 8615; X86-NEXT: vpcmpgtd %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1] 8616; X86-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c] 8617; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] 8618; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8619; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8620; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8621; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8622; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8623; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8624; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8625; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8626; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8627; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8628; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8629; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8630; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8631; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8632; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8633; X86-NEXT: retl # encoding: [0xc3] 8634; 8635; X64-LABEL: test_mask_cmp_d_128: 8636; X64: # %bb.0: 8637; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7] 8638; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1] 8639; X64-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8] 8640; X64-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02] 8641; X64-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04] 8642; X64-NEXT: vpcmpnltd %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe9,0x05] 8643; X64-NEXT: vpcmpgtd %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1] 8644; X64-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c] 8645; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] 8646; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8647; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8648; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8649; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8650; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8651; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8652; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8653; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8654; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8655; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8656; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8657; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8658; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8659; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8660; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8661; X64-NEXT: retq # encoding: [0xc3] 8662 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) 8663 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8664 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask) 8665 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8666 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask) 8667 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8668 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask) 8669 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8670 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask) 8671 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8672 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask) 8673 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8674 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask) 8675 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8676 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask) 8677 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8678 ret <8 x i8> %vec7 8679} 8680 8681declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone 8682 8683define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { 8684; CHECK-LABEL: test_ucmp_d_128: 8685; CHECK: # %bb.0: 8686; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] 8687; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01] 8688; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02] 8689; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04] 8690; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05] 8691; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06] 8692; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8693; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8694; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8695; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8696; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8697; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8698; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8699; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8700; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8701; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8702; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8703; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8704; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8705; CHECK-NEXT: movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00] 8706; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8707; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8708 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) 8709 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8710 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1) 8711 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8712 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1) 8713 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8714 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1) 8715 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8716 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1) 8717 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8718 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1) 8719 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8720 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1) 8721 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8722 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1) 8723 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8724 ret <8 x i8> %vec7 8725} 8726 8727define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { 8728; X86-LABEL: test_mask_ucmp_d_128: 8729; X86: # %bb.0: 8730; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8731; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0] 8732; X86-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1] 8733; X86-NEXT: vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01] 8734; X86-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02] 8735; X86-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04] 8736; X86-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05] 8737; X86-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06] 8738; X86-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c] 8739; X86-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] 8740; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8741; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8742; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8743; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8744; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8745; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8746; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8747; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8748; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8749; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8750; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8751; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8752; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8753; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8754; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8755; X86-NEXT: retl # encoding: [0xc3] 8756; 8757; X64-LABEL: test_mask_ucmp_d_128: 8758; X64: # %bb.0: 8759; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7] 8760; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1] 8761; X64-NEXT: vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01] 8762; X64-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02] 8763; X64-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04] 8764; X64-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05] 8765; X64-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06] 8766; X64-NEXT: kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c] 8767; X64-NEXT: kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c] 8768; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8769; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8770; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8771; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8772; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8773; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8774; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8775; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8776; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8777; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8778; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8779; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8780; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8781; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8782; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8783; X64-NEXT: retq # encoding: [0xc3] 8784 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) 8785 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8786 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask) 8787 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8788 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask) 8789 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8790 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask) 8791 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8792 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask) 8793 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8794 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask) 8795 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8796 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask) 8797 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8798 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask) 8799 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8800 ret <8 x i8> %vec7 8801} 8802 8803declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone 8804 8805define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { 8806; CHECK-LABEL: test_cmp_q_128: 8807; CHECK: # %bb.0: 8808; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] 8809; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x08,0x37,0xc8] 8810; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02] 8811; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04] 8812; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x05] 8813; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9] 8814; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8815; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8816; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8817; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8818; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8819; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8820; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8821; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8822; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8823; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8824; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8825; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8826; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8827; CHECK-NEXT: movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00] 8828; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8829; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8830 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) 8831 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8832 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1) 8833 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8834 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1) 8835 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8836 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1) 8837 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8838 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1) 8839 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8840 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1) 8841 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8842 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1) 8843 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8844 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1) 8845 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8846 ret <8 x i8> %vec7 8847} 8848 8849define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { 8850; X86-LABEL: test_mask_cmp_q_128: 8851; X86: # %bb.0: 8852; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8853; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0] 8854; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1] 8855; X86-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8] 8856; X86-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02] 8857; X86-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04] 8858; X86-NEXT: vpcmpnltq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe9,0x05] 8859; X86-NEXT: vpcmpgtq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1] 8860; X86-NEXT: kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e] 8861; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e] 8862; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8863; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8864; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8865; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8866; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8867; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8868; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8869; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8870; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8871; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8872; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8873; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8874; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8875; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8876; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8877; X86-NEXT: retl # encoding: [0xc3] 8878; 8879; X64-LABEL: test_mask_cmp_q_128: 8880; X64: # %bb.0: 8881; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7] 8882; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1] 8883; X64-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8] 8884; X64-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02] 8885; X64-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04] 8886; X64-NEXT: vpcmpnltq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe9,0x05] 8887; X64-NEXT: vpcmpgtq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1] 8888; X64-NEXT: kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e] 8889; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e] 8890; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8891; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8892; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8893; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8894; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8895; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8896; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8897; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8898; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8899; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8900; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8901; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8902; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8903; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8904; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8905; X64-NEXT: retq # encoding: [0xc3] 8906 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) 8907 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8908 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask) 8909 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8910 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask) 8911 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8912 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask) 8913 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8914 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask) 8915 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8916 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask) 8917 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8918 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask) 8919 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8920 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask) 8921 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8922 ret <8 x i8> %vec7 8923} 8924 8925declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone 8926 8927define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { 8928; CHECK-LABEL: test_ucmp_q_128: 8929; CHECK: # %bb.0: 8930; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1] 8931; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01] 8932; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02] 8933; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04] 8934; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05] 8935; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06] 8936; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8937; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8938; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8939; CHECK-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8940; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8941; CHECK-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8942; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8943; CHECK-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8944; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8945; CHECK-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8946; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8947; CHECK-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8948; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8949; CHECK-NEXT: movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00] 8950; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8951; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 8952 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) 8953 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 8954 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1) 8955 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 8956 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1) 8957 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 8958 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1) 8959 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 8960 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1) 8961 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 8962 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1) 8963 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 8964 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1) 8965 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 8966 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1) 8967 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 8968 ret <8 x i8> %vec7 8969} 8970 8971define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { 8972; X86-LABEL: test_mask_ucmp_q_128: 8973; X86: # %bb.0: 8974; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 8975; X86-NEXT: kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0] 8976; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1] 8977; X86-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01] 8978; X86-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02] 8979; X86-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04] 8980; X86-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05] 8981; X86-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06] 8982; X86-NEXT: kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e] 8983; X86-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e] 8984; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 8985; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 8986; X86-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 8987; X86-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 8988; X86-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 8989; X86-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 8990; X86-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 8991; X86-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 8992; X86-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 8993; X86-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 8994; X86-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 8995; X86-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 8996; X86-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 8997; X86-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 8998; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 8999; X86-NEXT: retl # encoding: [0xc3] 9000; 9001; X64-LABEL: test_mask_ucmp_q_128: 9002; X64: # %bb.0: 9003; X64-NEXT: kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7] 9004; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1] 9005; X64-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01] 9006; X64-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02] 9007; X64-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04] 9008; X64-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05] 9009; X64-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06] 9010; X64-NEXT: kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e] 9011; X64-NEXT: kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e] 9012; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9013; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] 9014; X64-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00] 9015; X64-NEXT: kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1] 9016; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] 9017; X64-NEXT: kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3] 9018; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] 9019; X64-NEXT: kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4] 9020; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] 9021; X64-NEXT: kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5] 9022; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] 9023; X64-NEXT: kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6] 9024; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] 9025; X64-NEXT: kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2] 9026; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] 9027; X64-NEXT: retq # encoding: [0xc3] 9028 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) 9029 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 9030 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask) 9031 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 9032 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask) 9033 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 9034 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask) 9035 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 9036 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask) 9037 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 9038 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask) 9039 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 9040 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask) 9041 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 9042 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask) 9043 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 9044 ret <8 x i8> %vec7 9045} 9046 9047declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone 9048 9049declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8) 9050 9051define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) { 9052; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256: 9053; X86: # %bb.0: 9054; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 9055; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01] 9056; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9057; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9058; X86-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01] 9059; X86-NEXT: vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9] 9060; X86-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01] 9061; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 9062; X86-NEXT: retl # encoding: [0xc3] 9063; 9064; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256: 9065; X64: # %bb.0: 9066; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 9067; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01] 9068; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9069; X64-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01] 9070; X64-NEXT: vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9] 9071; X64-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01] 9072; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1] 9073; X64-NEXT: retq # encoding: [0xc3] 9074 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1) 9075 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask) 9076 %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask) 9077 %res4 = fadd <8 x float> %res1, %res2 9078 %res5 = fadd <8 x float> %res3, %res4 9079 ret <8 x float> %res5 9080} 9081 9082define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256_load(<4 x float>* %x0ptr, <8 x float> %x2, i8 %mask) { 9083; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load: 9084; X86: # %bb.0: 9085; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9086; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 9087; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 9088; X86-NEXT: vbroadcastf32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x00] 9089; X86-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 9090; X86-NEXT: retl # encoding: [0xc3] 9091; 9092; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load: 9093; X64: # %bb.0: 9094; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9095; X64-NEXT: vbroadcastf32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x07] 9096; X64-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 9097; X64-NEXT: retq # encoding: [0xc3] 9098 %x0 = load <4 x float>, <4 x float>* %x0ptr 9099 %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask) 9100 ret <8 x float> %res 9101} 9102 9103declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8) 9104 9105define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) { 9106; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256: 9107; X86: # %bb.0: 9108; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 9109; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] 9110; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9111; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9112; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01] 9113; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] 9114; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 9115; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 9116; X86-NEXT: retl # encoding: [0xc3] 9117; 9118; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256: 9119; X64: # %bb.0: 9120; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 9121; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01] 9122; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9123; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01] 9124; X64-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01] 9125; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 9126; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 9127; X64-NEXT: retq # encoding: [0xc3] 9128 %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1) 9129 %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) 9130 %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) 9131 %res4 = add <8 x i32> %res1, %res2 9132 %res5 = add <8 x i32> %res3, %res4 9133 ret <8 x i32> %res5 9134} 9135 9136define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256_load(<4 x i32>* %x0ptr, <8 x i32> %x2, i8 %mask) { 9137; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load: 9138; X86: # %bb.0: 9139; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9140; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 9141; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 9142; X86-NEXT: vbroadcasti32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x00] 9143; X86-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 9144; X86-NEXT: retl # encoding: [0xc3] 9145; 9146; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load: 9147; X64: # %bb.0: 9148; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9149; X64-NEXT: vbroadcasti32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x07] 9150; X64-NEXT: # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 9151; X64-NEXT: retq # encoding: [0xc3] 9152 %x0 = load <4 x i32>, <4 x i32>* %x0ptr 9153 %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) 9154 ret <8 x i32> %res 9155} 9156 9157declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) 9158 9159define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 9160; X86-LABEL: test_int_x86_avx512_mask_pabs_q_128: 9161; X86: # %bb.0: 9162; X86-NEXT: vpabsq %xmm0, %xmm2 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0] 9163; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9164; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9165; X86-NEXT: vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8] 9166; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2] 9167; X86-NEXT: retl # encoding: [0xc3] 9168; 9169; X64-LABEL: test_int_x86_avx512_mask_pabs_q_128: 9170; X64: # %bb.0: 9171; X64-NEXT: vpabsq %xmm0, %xmm2 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0] 9172; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9173; X64-NEXT: vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8] 9174; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2] 9175; X64-NEXT: retq # encoding: [0xc3] 9176 %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 9177 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 9178 %res2 = add <2 x i64> %res, %res1 9179 ret <2 x i64> %res2 9180} 9181 9182declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) 9183 9184define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 9185; X86-LABEL: test_int_x86_avx512_mask_pabs_q_256: 9186; X86: # %bb.0: 9187; X86-NEXT: vpabsq %ymm0, %ymm2 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0] 9188; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9189; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9190; X86-NEXT: vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8] 9191; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2] 9192; X86-NEXT: retl # encoding: [0xc3] 9193; 9194; X64-LABEL: test_int_x86_avx512_mask_pabs_q_256: 9195; X64: # %bb.0: 9196; X64-NEXT: vpabsq %ymm0, %ymm2 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0] 9197; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9198; X64-NEXT: vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8] 9199; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2] 9200; X64-NEXT: retq # encoding: [0xc3] 9201 %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 9202 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 9203 %res2 = add <4 x i64> %res, %res1 9204 ret <4 x i64> %res2 9205} 9206 9207declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8) 9208 9209define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 9210; X86-LABEL: test_int_x86_avx512_mask_pabs_d_128: 9211; X86: # %bb.0: 9212; X86-NEXT: vpabsd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0] 9213; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9214; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9215; X86-NEXT: vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8] 9216; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] 9217; X86-NEXT: retl # encoding: [0xc3] 9218; 9219; X64-LABEL: test_int_x86_avx512_mask_pabs_d_128: 9220; X64: # %bb.0: 9221; X64-NEXT: vpabsd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0] 9222; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9223; X64-NEXT: vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8] 9224; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] 9225; X64-NEXT: retq # encoding: [0xc3] 9226 %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 9227 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 9228 %res2 = add <4 x i32> %res, %res1 9229 ret <4 x i32> %res2 9230} 9231 9232declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8) 9233 9234define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 9235; X86-LABEL: test_int_x86_avx512_mask_pabs_d_256: 9236; X86: # %bb.0: 9237; X86-NEXT: vpabsd %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0] 9238; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9239; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9240; X86-NEXT: vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8] 9241; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] 9242; X86-NEXT: retl # encoding: [0xc3] 9243; 9244; X64-LABEL: test_int_x86_avx512_mask_pabs_d_256: 9245; X64: # %bb.0: 9246; X64-NEXT: vpabsd %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0] 9247; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9248; X64-NEXT: vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8] 9249; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] 9250; X64-NEXT: retq # encoding: [0xc3] 9251 %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 9252 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 9253 %res2 = add <8 x i32> %res, %res1 9254 ret <8 x i32> %res2 9255} 9256 9257declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8) 9258 9259define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 9260; X86-LABEL: test_int_x86_avx512_ptestm_d_128: 9261; X86: # %bb.0: 9262; X86-NEXT: vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1] 9263; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9264; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9265; X86-NEXT: vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9] 9266; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9267; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9268; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9269; X86-NEXT: # kill: def $al killed $al killed $eax 9270; X86-NEXT: retl # encoding: [0xc3] 9271; 9272; X64-LABEL: test_int_x86_avx512_ptestm_d_128: 9273; X64: # %bb.0: 9274; X64-NEXT: vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1] 9275; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9276; X64-NEXT: vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9] 9277; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9278; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9279; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9280; X64-NEXT: # kill: def $al killed $al killed $eax 9281; X64-NEXT: retq # encoding: [0xc3] 9282 %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 9283 %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) 9284 %res2 = add i8 %res, %res1 9285 ret i8 %res2 9286} 9287 9288declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8) 9289 9290define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 9291; X86-LABEL: test_int_x86_avx512_ptestm_d_256: 9292; X86: # %bb.0: 9293; X86-NEXT: vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] 9294; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9295; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 9296; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 9297; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9298; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9299; X86-NEXT: retl # encoding: [0xc3] 9300; 9301; X64-LABEL: test_int_x86_avx512_ptestm_d_256: 9302; X64: # %bb.0: 9303; X64-NEXT: vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] 9304; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9305; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 9306; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 9307; X64-NEXT: # kill: def $al killed $al killed $eax 9308; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9309; X64-NEXT: retq # encoding: [0xc3] 9310 %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 9311 %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) 9312 %res2 = add i8 %res, %res1 9313 ret i8 %res2 9314} 9315 9316declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8) 9317 9318define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 9319; X86-LABEL: test_int_x86_avx512_ptestm_q_128: 9320; X86: # %bb.0: 9321; X86-NEXT: vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1] 9322; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9323; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9324; X86-NEXT: vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9] 9325; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9326; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9327; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9328; X86-NEXT: # kill: def $al killed $al killed $eax 9329; X86-NEXT: retl # encoding: [0xc3] 9330; 9331; X64-LABEL: test_int_x86_avx512_ptestm_q_128: 9332; X64: # %bb.0: 9333; X64-NEXT: vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1] 9334; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9335; X64-NEXT: vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9] 9336; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9337; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9338; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9339; X64-NEXT: # kill: def $al killed $al killed $eax 9340; X64-NEXT: retq # encoding: [0xc3] 9341 %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 9342 %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) 9343 %res2 = add i8 %res, %res1 9344 ret i8 %res2 9345} 9346 9347declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8) 9348 9349define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 9350; X86-LABEL: test_int_x86_avx512_ptestm_q_256: 9351; X86: # %bb.0: 9352; X86-NEXT: vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1] 9353; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9354; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9355; X86-NEXT: vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9] 9356; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9357; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9358; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9359; X86-NEXT: # kill: def $al killed $al killed $eax 9360; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9361; X86-NEXT: retl # encoding: [0xc3] 9362; 9363; X64-LABEL: test_int_x86_avx512_ptestm_q_256: 9364; X64: # %bb.0: 9365; X64-NEXT: vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1] 9366; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9367; X64-NEXT: vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9] 9368; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9369; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9370; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9371; X64-NEXT: # kill: def $al killed $al killed $eax 9372; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9373; X64-NEXT: retq # encoding: [0xc3] 9374 %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 9375 %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) 9376 %res2 = add i8 %res, %res1 9377 ret i8 %res2 9378} 9379 9380declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2) 9381 9382define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 9383; X86-LABEL: test_int_x86_avx512_ptestnm_d_128: 9384; X86: # %bb.0: 9385; X86-NEXT: vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1] 9386; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9387; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9388; X86-NEXT: vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9] 9389; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9390; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9391; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9392; X86-NEXT: # kill: def $al killed $al killed $eax 9393; X86-NEXT: retl # encoding: [0xc3] 9394; 9395; X64-LABEL: test_int_x86_avx512_ptestnm_d_128: 9396; X64: # %bb.0: 9397; X64-NEXT: vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1] 9398; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9399; X64-NEXT: vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9] 9400; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9401; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9402; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9403; X64-NEXT: # kill: def $al killed $al killed $eax 9404; X64-NEXT: retq # encoding: [0xc3] 9405 %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 9406 %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) 9407 %res2 = add i8 %res, %res1 9408 ret i8 %res2 9409} 9410 9411declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2) 9412 9413define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 9414; X86-LABEL: test_int_x86_avx512_ptestnm_d_256: 9415; X86: # %bb.0: 9416; X86-NEXT: vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] 9417; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] 9418; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] 9419; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] 9420; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9421; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9422; X86-NEXT: retl # encoding: [0xc3] 9423; 9424; X64-LABEL: test_int_x86_avx512_ptestnm_d_256: 9425; X64: # %bb.0: 9426; X64-NEXT: vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] 9427; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9428; X64-NEXT: andb %al, %dil # encoding: [0x40,0x20,0xc7] 9429; X64-NEXT: addb %dil, %al # encoding: [0x40,0x00,0xf8] 9430; X64-NEXT: # kill: def $al killed $al killed $eax 9431; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9432; X64-NEXT: retq # encoding: [0xc3] 9433 %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 9434 %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) 9435 %res2 = add i8 %res, %res1 9436 ret i8 %res2 9437} 9438 9439declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2) 9440 9441define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 9442; X86-LABEL: test_int_x86_avx512_ptestnm_q_128: 9443; X86: # %bb.0: 9444; X86-NEXT: vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1] 9445; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9446; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9447; X86-NEXT: vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9] 9448; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9449; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9450; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9451; X86-NEXT: # kill: def $al killed $al killed $eax 9452; X86-NEXT: retl # encoding: [0xc3] 9453; 9454; X64-LABEL: test_int_x86_avx512_ptestnm_q_128: 9455; X64: # %bb.0: 9456; X64-NEXT: vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1] 9457; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9458; X64-NEXT: vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9] 9459; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9460; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9461; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9462; X64-NEXT: # kill: def $al killed $al killed $eax 9463; X64-NEXT: retq # encoding: [0xc3] 9464 %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 9465 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) 9466 %res2 = add i8 %res, %res1 9467 ret i8 %res2 9468} 9469 9470declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2) 9471 9472define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 9473; X86-LABEL: test_int_x86_avx512_ptestnm_q_256: 9474; X86: # %bb.0: 9475; X86-NEXT: vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1] 9476; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9477; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9478; X86-NEXT: vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9] 9479; X86-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9480; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9481; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9482; X86-NEXT: # kill: def $al killed $al killed $eax 9483; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9484; X86-NEXT: retl # encoding: [0xc3] 9485; 9486; X64-LABEL: test_int_x86_avx512_ptestnm_q_256: 9487; X64: # %bb.0: 9488; X64-NEXT: vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1] 9489; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9490; X64-NEXT: vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9] 9491; X64-NEXT: kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9] 9492; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9493; X64-NEXT: addb %cl, %al # encoding: [0x00,0xc8] 9494; X64-NEXT: # kill: def $al killed $al killed $eax 9495; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9496; X64-NEXT: retq # encoding: [0xc3] 9497 %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 9498 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) 9499 %res2 = add i8 %res, %res1 9500 ret i8 %res2 9501} 9502 9503define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) { 9504; CHECK-LABEL: test_cmpps_256: 9505; CHECK: # %bb.0: 9506; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02] 9507; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9508; CHECK-NEXT: # kill: def $al killed $al killed $eax 9509; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9510; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9511 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1) 9512 ret i8 %res 9513} 9514declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8) 9515 9516define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) { 9517; CHECK-LABEL: test_cmpps_128: 9518; CHECK: # %bb.0: 9519; CHECK-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 9520; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9521; CHECK-NEXT: # kill: def $al killed $al killed $eax 9522; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9523 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1) 9524 ret i8 %res 9525} 9526declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8) 9527 9528define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) { 9529; CHECK-LABEL: test_cmppd_256: 9530; CHECK: # %bb.0: 9531; CHECK-NEXT: vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02] 9532; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9533; CHECK-NEXT: # kill: def $al killed $al killed $eax 9534; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 9535; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9536 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1) 9537 ret i8 %res 9538} 9539declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8) 9540 9541define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) { 9542; CHECK-LABEL: test_cmppd_128: 9543; CHECK: # %bb.0: 9544; CHECK-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] 9545; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] 9546; CHECK-NEXT: # kill: def $al killed $al killed $eax 9547; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9548 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1) 9549 ret i8 %res 9550} 9551declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8) 9552 9553define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { 9554; CHECK-LABEL: test_mask_mul_epi32_rr_128: 9555; CHECK: # %bb.0: 9556; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1] 9557; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9558 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 9559 ret < 2 x i64> %res 9560} 9561 9562define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { 9563; X86-LABEL: test_mask_mul_epi32_rrk_128: 9564; X86: # %bb.0: 9565; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9566; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9567; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] 9568; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9569; X86-NEXT: retl # encoding: [0xc3] 9570; 9571; X64-LABEL: test_mask_mul_epi32_rrk_128: 9572; X64: # %bb.0: 9573; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9574; X64-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] 9575; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9576; X64-NEXT: retq # encoding: [0xc3] 9577 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 9578 ret < 2 x i64> %res 9579} 9580 9581define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { 9582; X86-LABEL: test_mask_mul_epi32_rrkz_128: 9583; X86: # %bb.0: 9584; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9585; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9586; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1] 9587; X86-NEXT: retl # encoding: [0xc3] 9588; 9589; X64-LABEL: test_mask_mul_epi32_rrkz_128: 9590; X64: # %bb.0: 9591; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9592; X64-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1] 9593; X64-NEXT: retq # encoding: [0xc3] 9594 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 9595 ret < 2 x i64> %res 9596} 9597 9598define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { 9599; X86-LABEL: test_mask_mul_epi32_rm_128: 9600; X86: # %bb.0: 9601; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9602; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x00] 9603; X86-NEXT: retl # encoding: [0xc3] 9604; 9605; X64-LABEL: test_mask_mul_epi32_rm_128: 9606; X64: # %bb.0: 9607; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x07] 9608; X64-NEXT: retq # encoding: [0xc3] 9609 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 9610 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 9611 ret < 2 x i64> %res 9612} 9613 9614define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 9615; X86-LABEL: test_mask_mul_epi32_rmk_128: 9616; X86: # %bb.0: 9617; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9618; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 9619; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 9620; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x08] 9621; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9622; X86-NEXT: retl # encoding: [0xc3] 9623; 9624; X64-LABEL: test_mask_mul_epi32_rmk_128: 9625; X64: # %bb.0: 9626; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9627; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f] 9628; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9629; X64-NEXT: retq # encoding: [0xc3] 9630 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 9631 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 9632 ret < 2 x i64> %res 9633} 9634 9635define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { 9636; X86-LABEL: test_mask_mul_epi32_rmkz_128: 9637; X86: # %bb.0: 9638; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9639; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 9640; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 9641; X86-NEXT: vpmuldq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x00] 9642; X86-NEXT: retl # encoding: [0xc3] 9643; 9644; X64-LABEL: test_mask_mul_epi32_rmkz_128: 9645; X64: # %bb.0: 9646; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9647; X64-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07] 9648; X64-NEXT: retq # encoding: [0xc3] 9649 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 9650 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 9651 ret < 2 x i64> %res 9652} 9653 9654define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { 9655; X86-LABEL: test_mask_mul_epi32_rmb_128: 9656; X86: # %bb.0: 9657; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9658; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08] 9659; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1] 9660; X86-NEXT: retl # encoding: [0xc3] 9661; 9662; X64-LABEL: test_mask_mul_epi32_rmb_128: 9663; X64: # %bb.0: 9664; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07] 9665; X64-NEXT: retq # encoding: [0xc3] 9666 %q = load i64, i64* %ptr_b 9667 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 9668 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 9669 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 9670 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 9671 ret < 2 x i64> %res 9672} 9673 9674define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 9675; X86-LABEL: test_mask_mul_epi32_rmbk_128: 9676; X86: # %bb.0: 9677; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9678; X86-NEXT: vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10] 9679; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 9680; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9681; X86-NEXT: vpmuldq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xca] 9682; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9683; X86-NEXT: retl # encoding: [0xc3] 9684; 9685; X64-LABEL: test_mask_mul_epi32_rmbk_128: 9686; X64: # %bb.0: 9687; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9688; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f] 9689; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9690; X64-NEXT: retq # encoding: [0xc3] 9691 %q = load i64, i64* %ptr_b 9692 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 9693 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 9694 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 9695 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 9696 ret < 2 x i64> %res 9697} 9698 9699define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { 9700; X86-LABEL: test_mask_mul_epi32_rmbkz_128: 9701; X86: # %bb.0: 9702; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9703; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08] 9704; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 9705; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9706; X86-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1] 9707; X86-NEXT: retl # encoding: [0xc3] 9708; 9709; X64-LABEL: test_mask_mul_epi32_rmbkz_128: 9710; X64: # %bb.0: 9711; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9712; X64-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07] 9713; X64-NEXT: retq # encoding: [0xc3] 9714 %q = load i64, i64* %ptr_b 9715 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 9716 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer 9717 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 9718 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 9719 ret < 2 x i64> %res 9720} 9721 9722declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) 9723 9724define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { 9725; CHECK-LABEL: test_mask_mul_epi32_rr_256: 9726; CHECK: # %bb.0: 9727; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1] 9728; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9729 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 9730 ret < 4 x i64> %res 9731} 9732 9733define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { 9734; X86-LABEL: test_mask_mul_epi32_rrk_256: 9735; X86: # %bb.0: 9736; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9737; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9738; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] 9739; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9740; X86-NEXT: retl # encoding: [0xc3] 9741; 9742; X64-LABEL: test_mask_mul_epi32_rrk_256: 9743; X64: # %bb.0: 9744; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9745; X64-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] 9746; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 9747; X64-NEXT: retq # encoding: [0xc3] 9748 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 9749 ret < 4 x i64> %res 9750} 9751 9752define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { 9753; X86-LABEL: test_mask_mul_epi32_rrkz_256: 9754; X86: # %bb.0: 9755; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9756; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9757; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1] 9758; X86-NEXT: retl # encoding: [0xc3] 9759; 9760; X64-LABEL: test_mask_mul_epi32_rrkz_256: 9761; X64: # %bb.0: 9762; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9763; X64-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1] 9764; X64-NEXT: retq # encoding: [0xc3] 9765 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 9766 ret < 4 x i64> %res 9767} 9768 9769define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { 9770; X86-LABEL: test_mask_mul_epi32_rm_256: 9771; X86: # %bb.0: 9772; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9773; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x00] 9774; X86-NEXT: retl # encoding: [0xc3] 9775; 9776; X64-LABEL: test_mask_mul_epi32_rm_256: 9777; X64: # %bb.0: 9778; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x07] 9779; X64-NEXT: retq # encoding: [0xc3] 9780 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 9781 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 9782 ret < 4 x i64> %res 9783} 9784 9785define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 9786; X86-LABEL: test_mask_mul_epi32_rmk_256: 9787; X86: # %bb.0: 9788; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9789; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 9790; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 9791; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x08] 9792; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9793; X86-NEXT: retl # encoding: [0xc3] 9794; 9795; X64-LABEL: test_mask_mul_epi32_rmk_256: 9796; X64: # %bb.0: 9797; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9798; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f] 9799; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9800; X64-NEXT: retq # encoding: [0xc3] 9801 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 9802 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 9803 ret < 4 x i64> %res 9804} 9805 9806define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { 9807; X86-LABEL: test_mask_mul_epi32_rmkz_256: 9808; X86: # %bb.0: 9809; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9810; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 9811; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 9812; X86-NEXT: vpmuldq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x00] 9813; X86-NEXT: retl # encoding: [0xc3] 9814; 9815; X64-LABEL: test_mask_mul_epi32_rmkz_256: 9816; X64: # %bb.0: 9817; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9818; X64-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07] 9819; X64-NEXT: retq # encoding: [0xc3] 9820 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 9821 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 9822 ret < 4 x i64> %res 9823} 9824 9825define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { 9826; X86-LABEL: test_mask_mul_epi32_rmb_256: 9827; X86: # %bb.0: 9828; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9829; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 9830; X86-NEXT: # xmm1 = mem[0],zero 9831; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9] 9832; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1] 9833; X86-NEXT: retl # encoding: [0xc3] 9834; 9835; X64-LABEL: test_mask_mul_epi32_rmb_256: 9836; X64: # %bb.0: 9837; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07] 9838; X64-NEXT: retq # encoding: [0xc3] 9839 %q = load i64, i64* %ptr_b 9840 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 9841 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 9842 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 9843 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 9844 ret < 4 x i64> %res 9845} 9846 9847define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 9848; X86-LABEL: test_mask_mul_epi32_rmbk_256: 9849; X86: # %bb.0: 9850; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9851; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 9852; X86-NEXT: # xmm2 = mem[0],zero 9853; X86-NEXT: vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2] 9854; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 9855; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9856; X86-NEXT: vpmuldq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xca] 9857; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9858; X86-NEXT: retl # encoding: [0xc3] 9859; 9860; X64-LABEL: test_mask_mul_epi32_rmbk_256: 9861; X64: # %bb.0: 9862; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9863; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f] 9864; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 9865; X64-NEXT: retq # encoding: [0xc3] 9866 %q = load i64, i64* %ptr_b 9867 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 9868 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 9869 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 9870 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 9871 ret < 4 x i64> %res 9872} 9873 9874define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { 9875; X86-LABEL: test_mask_mul_epi32_rmbkz_256: 9876; X86: # %bb.0: 9877; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9878; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 9879; X86-NEXT: # xmm1 = mem[0],zero 9880; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9] 9881; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 9882; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9883; X86-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1] 9884; X86-NEXT: retl # encoding: [0xc3] 9885; 9886; X64-LABEL: test_mask_mul_epi32_rmbkz_256: 9887; X64: # %bb.0: 9888; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9889; X64-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07] 9890; X64-NEXT: retq # encoding: [0xc3] 9891 %q = load i64, i64* %ptr_b 9892 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 9893 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 9894 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 9895 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 9896 ret < 4 x i64> %res 9897} 9898 9899declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) 9900 9901define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { 9902; CHECK-LABEL: test_mask_mul_epu32_rr_128: 9903; CHECK: # %bb.0: 9904; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1] 9905; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 9906 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 9907 ret < 2 x i64> %res 9908} 9909 9910define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { 9911; X86-LABEL: test_mask_mul_epu32_rrk_128: 9912; X86: # %bb.0: 9913; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9914; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9915; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] 9916; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9917; X86-NEXT: retl # encoding: [0xc3] 9918; 9919; X64-LABEL: test_mask_mul_epu32_rrk_128: 9920; X64: # %bb.0: 9921; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9922; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] 9923; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] 9924; X64-NEXT: retq # encoding: [0xc3] 9925 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 9926 ret < 2 x i64> %res 9927} 9928 9929define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { 9930; X86-LABEL: test_mask_mul_epu32_rrkz_128: 9931; X86: # %bb.0: 9932; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 9933; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 9934; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1] 9935; X86-NEXT: retl # encoding: [0xc3] 9936; 9937; X64-LABEL: test_mask_mul_epu32_rrkz_128: 9938; X64: # %bb.0: 9939; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 9940; X64-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1] 9941; X64-NEXT: retq # encoding: [0xc3] 9942 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 9943 ret < 2 x i64> %res 9944} 9945 9946define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { 9947; X86-LABEL: test_mask_mul_epu32_rm_128: 9948; X86: # %bb.0: 9949; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9950; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x00] 9951; X86-NEXT: retl # encoding: [0xc3] 9952; 9953; X64-LABEL: test_mask_mul_epu32_rm_128: 9954; X64: # %bb.0: 9955; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x07] 9956; X64-NEXT: retq # encoding: [0xc3] 9957 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 9958 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 9959 ret < 2 x i64> %res 9960} 9961 9962define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 9963; X86-LABEL: test_mask_mul_epu32_rmk_128: 9964; X86: # %bb.0: 9965; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9966; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 9967; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 9968; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x08] 9969; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9970; X86-NEXT: retl # encoding: [0xc3] 9971; 9972; X64-LABEL: test_mask_mul_epu32_rmk_128: 9973; X64: # %bb.0: 9974; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9975; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f] 9976; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 9977; X64-NEXT: retq # encoding: [0xc3] 9978 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 9979 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 9980 ret < 2 x i64> %res 9981} 9982 9983define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { 9984; X86-LABEL: test_mask_mul_epu32_rmkz_128: 9985; X86: # %bb.0: 9986; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 9987; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 9988; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 9989; X86-NEXT: vpmuludq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x00] 9990; X86-NEXT: retl # encoding: [0xc3] 9991; 9992; X64-LABEL: test_mask_mul_epu32_rmkz_128: 9993; X64: # %bb.0: 9994; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 9995; X64-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07] 9996; X64-NEXT: retq # encoding: [0xc3] 9997 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 9998 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 9999 ret < 2 x i64> %res 10000} 10001 10002define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { 10003; X86-LABEL: test_mask_mul_epu32_rmb_128: 10004; X86: # %bb.0: 10005; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10006; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08] 10007; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1] 10008; X86-NEXT: retl # encoding: [0xc3] 10009; 10010; X64-LABEL: test_mask_mul_epu32_rmb_128: 10011; X64: # %bb.0: 10012; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07] 10013; X64-NEXT: retq # encoding: [0xc3] 10014 %q = load i64, i64* %ptr_b 10015 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 10016 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 10017 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 10018 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 10019 ret < 2 x i64> %res 10020} 10021 10022define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 10023; X86-LABEL: test_mask_mul_epu32_rmbk_128: 10024; X86: # %bb.0: 10025; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10026; X86-NEXT: vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10] 10027; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 10028; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10029; X86-NEXT: vpmuludq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xca] 10030; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 10031; X86-NEXT: retl # encoding: [0xc3] 10032; 10033; X64-LABEL: test_mask_mul_epu32_rmbk_128: 10034; X64: # %bb.0: 10035; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 10036; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f] 10037; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] 10038; X64-NEXT: retq # encoding: [0xc3] 10039 %q = load i64, i64* %ptr_b 10040 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 10041 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 10042 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 10043 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 10044 ret < 2 x i64> %res 10045} 10046 10047define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { 10048; X86-LABEL: test_mask_mul_epu32_rmbkz_128: 10049; X86: # %bb.0: 10050; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10051; X86-NEXT: vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08] 10052; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 10053; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10054; X86-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1] 10055; X86-NEXT: retl # encoding: [0xc3] 10056; 10057; X64-LABEL: test_mask_mul_epu32_rmbkz_128: 10058; X64: # %bb.0: 10059; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 10060; X64-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07] 10061; X64-NEXT: retq # encoding: [0xc3] 10062 %q = load i64, i64* %ptr_b 10063 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 10064 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer 10065 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 10066 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 10067 ret < 2 x i64> %res 10068} 10069 10070declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) 10071 10072define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { 10073; CHECK-LABEL: test_mask_mul_epu32_rr_256: 10074; CHECK: # %bb.0: 10075; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1] 10076; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10077 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 10078 ret < 4 x i64> %res 10079} 10080 10081define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { 10082; X86-LABEL: test_mask_mul_epu32_rrk_256: 10083; X86: # %bb.0: 10084; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10085; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10086; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] 10087; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 10088; X86-NEXT: retl # encoding: [0xc3] 10089; 10090; X64-LABEL: test_mask_mul_epu32_rrk_256: 10091; X64: # %bb.0: 10092; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10093; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] 10094; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] 10095; X64-NEXT: retq # encoding: [0xc3] 10096 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 10097 ret < 4 x i64> %res 10098} 10099 10100define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { 10101; X86-LABEL: test_mask_mul_epu32_rrkz_256: 10102; X86: # %bb.0: 10103; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10104; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10105; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1] 10106; X86-NEXT: retl # encoding: [0xc3] 10107; 10108; X64-LABEL: test_mask_mul_epu32_rrkz_256: 10109; X64: # %bb.0: 10110; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10111; X64-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1] 10112; X64-NEXT: retq # encoding: [0xc3] 10113 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 10114 ret < 4 x i64> %res 10115} 10116 10117define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { 10118; X86-LABEL: test_mask_mul_epu32_rm_256: 10119; X86: # %bb.0: 10120; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10121; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x00] 10122; X86-NEXT: retl # encoding: [0xc3] 10123; 10124; X64-LABEL: test_mask_mul_epu32_rm_256: 10125; X64: # %bb.0: 10126; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x07] 10127; X64-NEXT: retq # encoding: [0xc3] 10128 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 10129 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 10130 ret < 4 x i64> %res 10131} 10132 10133define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 10134; X86-LABEL: test_mask_mul_epu32_rmk_256: 10135; X86: # %bb.0: 10136; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10137; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10138; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 10139; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x08] 10140; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10141; X86-NEXT: retl # encoding: [0xc3] 10142; 10143; X64-LABEL: test_mask_mul_epu32_rmk_256: 10144; X64: # %bb.0: 10145; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 10146; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f] 10147; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10148; X64-NEXT: retq # encoding: [0xc3] 10149 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 10150 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 10151 ret < 4 x i64> %res 10152} 10153 10154define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { 10155; X86-LABEL: test_mask_mul_epu32_rmkz_256: 10156; X86: # %bb.0: 10157; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10158; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 10159; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 10160; X86-NEXT: vpmuludq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x00] 10161; X86-NEXT: retl # encoding: [0xc3] 10162; 10163; X64-LABEL: test_mask_mul_epu32_rmkz_256: 10164; X64: # %bb.0: 10165; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 10166; X64-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07] 10167; X64-NEXT: retq # encoding: [0xc3] 10168 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 10169 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 10170 ret < 4 x i64> %res 10171} 10172 10173define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { 10174; X86-LABEL: test_mask_mul_epu32_rmb_256: 10175; X86: # %bb.0: 10176; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10177; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 10178; X86-NEXT: # xmm1 = mem[0],zero 10179; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9] 10180; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1] 10181; X86-NEXT: retl # encoding: [0xc3] 10182; 10183; X64-LABEL: test_mask_mul_epu32_rmb_256: 10184; X64: # %bb.0: 10185; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07] 10186; X64-NEXT: retq # encoding: [0xc3] 10187 %q = load i64, i64* %ptr_b 10188 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 10189 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 10190 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 10191 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 10192 ret < 4 x i64> %res 10193} 10194 10195define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 10196; X86-LABEL: test_mask_mul_epu32_rmbk_256: 10197; X86: # %bb.0: 10198; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10199; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10] 10200; X86-NEXT: # xmm2 = mem[0],zero 10201; X86-NEXT: vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2] 10202; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 10203; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10204; X86-NEXT: vpmuludq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xca] 10205; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10206; X86-NEXT: retl # encoding: [0xc3] 10207; 10208; X64-LABEL: test_mask_mul_epu32_rmbk_256: 10209; X64: # %bb.0: 10210; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 10211; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f] 10212; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] 10213; X64-NEXT: retq # encoding: [0xc3] 10214 %q = load i64, i64* %ptr_b 10215 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 10216 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 10217 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 10218 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 10219 ret < 4 x i64> %res 10220} 10221 10222define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { 10223; X86-LABEL: test_mask_mul_epu32_rmbkz_256: 10224; X86: # %bb.0: 10225; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 10226; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08] 10227; X86-NEXT: # xmm1 = mem[0],zero 10228; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9] 10229; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 10230; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10231; X86-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1] 10232; X86-NEXT: retl # encoding: [0xc3] 10233; 10234; X64-LABEL: test_mask_mul_epu32_rmbkz_256: 10235; X64: # %bb.0: 10236; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 10237; X64-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07] 10238; X64-NEXT: retq # encoding: [0xc3] 10239 %q = load i64, i64* %ptr_b 10240 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 10241 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 10242 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 10243 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 10244 ret < 4 x i64> %res 10245} 10246 10247declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) 10248 10249declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8) 10250 10251define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { 10252; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128: 10253; X86: # %bb.0: 10254; X86-NEXT: vcvtdq2ps %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xd0] 10255; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10256; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10257; X86-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8] 10258; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] 10259; X86-NEXT: retl # encoding: [0xc3] 10260; 10261; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128: 10262; X64: # %bb.0: 10263; X64-NEXT: vcvtdq2ps %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xd0] 10264; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10265; X64-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8] 10266; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] 10267; X64-NEXT: retq # encoding: [0xc3] 10268 %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) 10269 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) 10270 %res2 = fadd <4 x float> %res, %res1 10271 ret <4 x float> %res2 10272} 10273 10274declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8) 10275 10276define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { 10277; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256: 10278; X86: # %bb.0: 10279; X86-NEXT: vcvtdq2ps %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xd0] 10280; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10281; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10282; X86-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8] 10283; X86-NEXT: vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2] 10284; X86-NEXT: retl # encoding: [0xc3] 10285; 10286; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256: 10287; X64: # %bb.0: 10288; X64-NEXT: vcvtdq2ps %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xd0] 10289; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10290; X64-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8] 10291; X64-NEXT: vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2] 10292; X64-NEXT: retq # encoding: [0xc3] 10293 %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) 10294 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) 10295 %res2 = fadd <8 x float> %res, %res1 10296 ret <8 x float> %res2 10297} 10298 10299declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8) 10300 10301define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 10302; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256: 10303; X86: # %bb.0: 10304; X86-NEXT: vcvtpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xd0] 10305; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10306; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10307; X86-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8] 10308; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] 10309; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10310; X86-NEXT: retl # encoding: [0xc3] 10311; 10312; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256: 10313; X64: # %bb.0: 10314; X64-NEXT: vcvtpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xd0] 10315; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10316; X64-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8] 10317; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] 10318; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10319; X64-NEXT: retq # encoding: [0xc3] 10320 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 10321 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 10322 %res2 = add <4 x i32> %res, %res1 10323 ret <4 x i32> %res2 10324} 10325 10326declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8) 10327 10328define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) { 10329; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256: 10330; X86: # %bb.0: 10331; X86-NEXT: vcvtpd2ps %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xd0] 10332; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10333; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10334; X86-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8] 10335; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] 10336; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10337; X86-NEXT: retl # encoding: [0xc3] 10338; 10339; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256: 10340; X64: # %bb.0: 10341; X64-NEXT: vcvtpd2ps %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xd0] 10342; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10343; X64-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8] 10344; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] 10345; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10346; X64-NEXT: retq # encoding: [0xc3] 10347 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2) 10348 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1) 10349 %res2 = fadd <4 x float> %res, %res1 10350 ret <4 x float> %res2 10351} 10352 10353declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8) 10354 10355define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) { 10356; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256: 10357; X86: # %bb.0: 10358; X86-NEXT: vcvtps2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xd0] 10359; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10360; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10361; X86-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8] 10362; X86-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 10363; X86-NEXT: retl # encoding: [0xc3] 10364; 10365; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256: 10366; X64: # %bb.0: 10367; X64-NEXT: vcvtps2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xd0] 10368; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10369; X64-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8] 10370; X64-NEXT: vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2] 10371; X64-NEXT: retq # encoding: [0xc3] 10372 %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2) 10373 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1) 10374 %res2 = fadd <4 x double> %res, %res1 10375 ret <4 x double> %res2 10376} 10377 10378declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8) 10379 10380define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) { 10381; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128: 10382; X86: # %bb.0: 10383; X86-NEXT: vcvtps2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xd0] 10384; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10385; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10386; X86-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8] 10387; X86-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 10388; X86-NEXT: retl # encoding: [0xc3] 10389; 10390; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128: 10391; X64: # %bb.0: 10392; X64-NEXT: vcvtps2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xd0] 10393; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10394; X64-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8] 10395; X64-NEXT: vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2] 10396; X64-NEXT: retq # encoding: [0xc3] 10397 %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2) 10398 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1) 10399 %res2 = fadd <2 x double> %res, %res1 10400 ret <2 x double> %res2 10401} 10402 10403declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8) 10404 10405define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 10406; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: 10407; X86: # %bb.0: 10408; X86-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0] 10409; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10410; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10411; X86-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] 10412; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] 10413; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10414; X86-NEXT: retl # encoding: [0xc3] 10415; 10416; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: 10417; X64: # %bb.0: 10418; X64-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0] 10419; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10420; X64-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] 10421; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] 10422; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 10423; X64-NEXT: retq # encoding: [0xc3] 10424 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 10425 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 10426 %res2 = add <4 x i32> %res, %res1 10427 ret <4 x i32> %res2 10428} 10429 10430declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8) 10431 10432define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 10433; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: 10434; X86: # %bb.0: 10435; X86-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0] 10436; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10437; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10438; X86-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] 10439; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] 10440; X86-NEXT: retl # encoding: [0xc3] 10441; 10442; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: 10443; X64: # %bb.0: 10444; X64-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0] 10445; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10446; X64-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] 10447; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] 10448; X64-NEXT: retq # encoding: [0xc3] 10449 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 10450 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 10451 %res2 = add <4 x i32> %res, %res1 10452 ret <4 x i32> %res2 10453} 10454 10455declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8) 10456 10457define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 10458; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: 10459; X86: # %bb.0: 10460; X86-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0] 10461; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10462; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10463; X86-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] 10464; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] 10465; X86-NEXT: retl # encoding: [0xc3] 10466; 10467; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: 10468; X64: # %bb.0: 10469; X64-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0] 10470; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10471; X64-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] 10472; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] 10473; X64-NEXT: retq # encoding: [0xc3] 10474 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 10475 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 10476 %res2 = add <8 x i32> %res, %res1 10477 ret <8 x i32> %res2 10478} 10479 10480declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8) 10481 10482define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 10483; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_256: 10484; X86: # %bb.0: 10485; X86-NEXT: vpermps %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8] 10486; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10487; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10488; X86-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] 10489; X86-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0] 10490; X86-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] 10491; X86-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] 10492; X86-NEXT: retl # encoding: [0xc3] 10493; 10494; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_256: 10495; X64: # %bb.0: 10496; X64-NEXT: vpermps %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8] 10497; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10498; X64-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] 10499; X64-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0] 10500; X64-NEXT: vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] 10501; X64-NEXT: vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3] 10502; X64-NEXT: retq # encoding: [0xc3] 10503 %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 10504 %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) 10505 %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 10506 %res3 = fadd <8 x float> %res, %res1 10507 %res4 = fadd <8 x float> %res3, %res2 10508 ret <8 x float> %res4 10509} 10510 10511declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 10512 10513define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 10514; X86-LABEL: test_int_x86_avx512_mask_permvar_si_256: 10515; X86: # %bb.0: 10516; X86-NEXT: vpermd %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8] 10517; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10518; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10519; X86-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] 10520; X86-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0] 10521; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 10522; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 10523; X86-NEXT: retl # encoding: [0xc3] 10524; 10525; X64-LABEL: test_int_x86_avx512_mask_permvar_si_256: 10526; X64: # %bb.0: 10527; X64-NEXT: vpermd %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8] 10528; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10529; X64-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] 10530; X64-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0] 10531; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 10532; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 10533; X64-NEXT: retq # encoding: [0xc3] 10534 %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 10535 %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 10536 %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 10537 %res3 = add <8 x i32> %res, %res1 10538 %res4 = add <8 x i32> %res3, %res2 10539 ret <8 x i32> %res4 10540} 10541 10542declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8) 10543 10544define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 10545; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256: 10546; X86: # %bb.0: 10547; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8] 10548; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10549; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10550; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0] 10551; X86-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0] 10552; X86-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] 10553; X86-NEXT: vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3] 10554; X86-NEXT: retl # encoding: [0xc3] 10555; 10556; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256: 10557; X64: # %bb.0: 10558; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8] 10559; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10560; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0] 10561; X64-NEXT: vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0] 10562; X64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] 10563; X64-NEXT: vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3] 10564; X64-NEXT: retq # encoding: [0xc3] 10565 %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 10566 %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) 10567 %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 10568 %res3 = fadd <4 x double> %res, %res1 10569 %res4 = fadd <4 x double> %res3, %res2 10570 ret <4 x double> %res4 10571} 10572 10573declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 10574 10575define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 10576; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256: 10577; X86: # %bb.0: 10578; X86-NEXT: vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8] 10579; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10580; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10581; X86-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0] 10582; X86-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0] 10583; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 10584; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 10585; X86-NEXT: retl # encoding: [0xc3] 10586; 10587; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256: 10588; X64: # %bb.0: 10589; X64-NEXT: vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8] 10590; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10591; X64-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0] 10592; X64-NEXT: vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0] 10593; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 10594; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 10595; X64-NEXT: retq # encoding: [0xc3] 10596 %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 10597 %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 10598 %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 10599 %res3 = add <4 x i64> %res, %res1 10600 %res4 = add <4 x i64> %res3, %res2 10601 ret <4 x i64> %res4 10602} 10603 10604declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) 10605 10606define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { 10607; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128: 10608; X86: # %bb.0: 10609; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10610; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21] 10611; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10612; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10613; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21] 10614; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 10615; X86-NEXT: retl # encoding: [0xc3] 10616; 10617; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128: 10618; X64: # %bb.0: 10619; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10620; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21] 10621; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10622; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21] 10623; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 10624; X64-NEXT: retq # encoding: [0xc3] 10625 %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) 10626 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) 10627 %res2 = add <4 x i32> %res, %res1 10628 ret <4 x i32> %res2 10629} 10630 10631declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) 10632 10633define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { 10634; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: 10635; X86: # %bb.0: 10636; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10637; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21] 10638; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10639; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10640; X86-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21] 10641; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 10642; X86-NEXT: retl # encoding: [0xc3] 10643; 10644; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: 10645; X64: # %bb.0: 10646; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10647; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21] 10648; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10649; X64-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21] 10650; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 10651; X64-NEXT: retq # encoding: [0xc3] 10652 %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) 10653 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) 10654 %res2 = add <4 x i32> %res, %res1 10655 ret <4 x i32> %res2 10656} 10657 10658declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) 10659 10660define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { 10661; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256: 10662; X86: # %bb.0: 10663; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10664; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21] 10665; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10666; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10667; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21] 10668; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 10669; X86-NEXT: retl # encoding: [0xc3] 10670; 10671; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256: 10672; X64: # %bb.0: 10673; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10674; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21] 10675; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10676; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21] 10677; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 10678; X64-NEXT: retq # encoding: [0xc3] 10679 %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) 10680 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) 10681 %res2 = add <8 x i32> %res, %res1 10682 ret <8 x i32> %res2 10683} 10684 10685declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) 10686 10687define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { 10688; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: 10689; X86: # %bb.0: 10690; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10691; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21] 10692; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10693; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10694; X86-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21] 10695; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 10696; X86-NEXT: retl # encoding: [0xc3] 10697; 10698; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: 10699; X64: # %bb.0: 10700; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10701; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21] 10702; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10703; X64-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21] 10704; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 10705; X64-NEXT: retq # encoding: [0xc3] 10706 %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) 10707 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) 10708 %res2 = add <8 x i32> %res, %res1 10709 ret <8 x i32> %res2 10710} 10711 10712declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) 10713 10714define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { 10715; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128: 10716; X86: # %bb.0: 10717; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10718; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21] 10719; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10720; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10721; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21] 10722; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 10723; X86-NEXT: retl # encoding: [0xc3] 10724; 10725; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128: 10726; X64: # %bb.0: 10727; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10728; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21] 10729; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10730; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21] 10731; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 10732; X64-NEXT: retq # encoding: [0xc3] 10733 %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) 10734 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) 10735 %res2 = add <2 x i64> %res, %res1 10736 ret <2 x i64> %res2 10737} 10738 10739declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) 10740 10741define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { 10742; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: 10743; X86: # %bb.0: 10744; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10745; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21] 10746; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10747; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10748; X86-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21] 10749; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 10750; X86-NEXT: retl # encoding: [0xc3] 10751; 10752; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: 10753; X64: # %bb.0: 10754; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10755; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21] 10756; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10757; X64-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21] 10758; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 10759; X64-NEXT: retq # encoding: [0xc3] 10760 %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) 10761 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) 10762 %res2 = add <2 x i64> %res, %res1 10763 ret <2 x i64> %res2 10764} 10765 10766declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) 10767 10768define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { 10769; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256: 10770; X86: # %bb.0: 10771; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10772; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21] 10773; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10774; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10775; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21] 10776; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 10777; X86-NEXT: retl # encoding: [0xc3] 10778; 10779; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256: 10780; X64: # %bb.0: 10781; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10782; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21] 10783; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10784; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21] 10785; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 10786; X64-NEXT: retq # encoding: [0xc3] 10787 %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) 10788 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) 10789 %res2 = add <4 x i64> %res, %res1 10790 ret <4 x i64> %res2 10791} 10792 10793declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) 10794 10795define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { 10796; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: 10797; X86: # %bb.0: 10798; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10799; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21] 10800; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10801; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10802; X86-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21] 10803; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 10804; X86-NEXT: retl # encoding: [0xc3] 10805; 10806; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: 10807; X64: # %bb.0: 10808; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10809; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21] 10810; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10811; X64-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21] 10812; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 10813; X64-NEXT: retq # encoding: [0xc3] 10814 %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) 10815 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) 10816 %res2 = add <4 x i64> %res, %res1 10817 ret <4 x i64> %res2 10818} 10819 10820declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) 10821 10822define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { 10823; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: 10824; X86: # %bb.0: 10825; X86-NEXT: vcvtudq2ps %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0] 10826; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10827; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10828; X86-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8] 10829; X86-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] 10830; X86-NEXT: retl # encoding: [0xc3] 10831; 10832; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: 10833; X64: # %bb.0: 10834; X64-NEXT: vcvtudq2ps %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0] 10835; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10836; X64-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8] 10837; X64-NEXT: vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2] 10838; X64-NEXT: retq # encoding: [0xc3] 10839 %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) 10840 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) 10841 %res2 = fadd <4 x float> %res, %res1 10842 ret <4 x float> %res2 10843} 10844 10845declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8) 10846 10847define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { 10848; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: 10849; X86: # %bb.0: 10850; X86-NEXT: vcvtudq2ps %ymm0, %ymm2 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0] 10851; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10852; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10853; X86-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8] 10854; X86-NEXT: vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2] 10855; X86-NEXT: retl # encoding: [0xc3] 10856; 10857; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: 10858; X64: # %bb.0: 10859; X64-NEXT: vcvtudq2ps %ymm0, %ymm2 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0] 10860; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10861; X64-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8] 10862; X64-NEXT: vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2] 10863; X64-NEXT: retq # encoding: [0xc3] 10864 %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) 10865 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) 10866 %res2 = fadd <8 x float> %res, %res1 10867 ret <8 x float> %res2 10868} 10869 10870declare <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 10871 10872define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 10873; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128: 10874; X86: # %bb.0: 10875; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10876; X86-NEXT: vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda] 10877; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10878; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10879; X86-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca] 10880; X86-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3] 10881; X86-NEXT: retl # encoding: [0xc3] 10882; 10883; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128: 10884; X64: # %bb.0: 10885; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 10886; X64-NEXT: vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda] 10887; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10888; X64-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca] 10889; X64-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3] 10890; X64-NEXT: retq # encoding: [0xc3] 10891 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 10892 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 10893 %res2 = add <4 x i32> %res, %res1 10894 ret <4 x i32> %res2 10895} 10896 10897declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 10898 10899define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 10900; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: 10901; X86: # %bb.0: 10902; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 10903; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda] 10904; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10905; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10906; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca] 10907; X86-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3] 10908; X86-NEXT: retl # encoding: [0xc3] 10909; 10910; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: 10911; X64: # %bb.0: 10912; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 10913; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda] 10914; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10915; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca] 10916; X64-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3] 10917; X64-NEXT: retq # encoding: [0xc3] 10918 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 10919 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 10920 %res2 = add <4 x i32> %res, %res1 10921 ret <4 x i32> %res2 10922} 10923 10924declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 10925 10926define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 10927; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: 10928; X86: # %bb.0: 10929; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 10930; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda] 10931; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10932; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10933; X86-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca] 10934; X86-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3] 10935; X86-NEXT: retl # encoding: [0xc3] 10936; 10937; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: 10938; X64: # %bb.0: 10939; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 10940; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda] 10941; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10942; X64-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca] 10943; X64-NEXT: vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3] 10944; X64-NEXT: retq # encoding: [0xc3] 10945 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 10946 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 10947 %res2 = add <4 x i32> %res, %res1 10948 ret <4 x i32> %res2 10949} 10950 10951declare <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 10952 10953define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 10954; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256: 10955; X86: # %bb.0: 10956; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10957; X86-NEXT: vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda] 10958; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10959; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10960; X86-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca] 10961; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3] 10962; X86-NEXT: retl # encoding: [0xc3] 10963; 10964; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256: 10965; X64: # %bb.0: 10966; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 10967; X64-NEXT: vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda] 10968; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10969; X64-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca] 10970; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3] 10971; X64-NEXT: retq # encoding: [0xc3] 10972 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 10973 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 10974 %res2 = add <8 x i32> %res, %res1 10975 ret <8 x i32> %res2 10976} 10977 10978declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 10979 10980define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 10981; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: 10982; X86: # %bb.0: 10983; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 10984; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda] 10985; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 10986; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 10987; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca] 10988; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3] 10989; X86-NEXT: retl # encoding: [0xc3] 10990; 10991; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: 10992; X64: # %bb.0: 10993; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 10994; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda] 10995; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 10996; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca] 10997; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3] 10998; X64-NEXT: retq # encoding: [0xc3] 10999 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 11000 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 11001 %res2 = add <8 x i32> %res, %res1 11002 ret <8 x i32> %res2 11003} 11004 11005declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 11006 11007define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 11008; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: 11009; X86: # %bb.0: 11010; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 11011; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda] 11012; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11013; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11014; X86-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca] 11015; X86-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3] 11016; X86-NEXT: retl # encoding: [0xc3] 11017; 11018; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: 11019; X64: # %bb.0: 11020; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 11021; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda] 11022; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11023; X64-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca] 11024; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3] 11025; X64-NEXT: retq # encoding: [0xc3] 11026 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 11027 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 11028 %res2 = add <8 x i32> %res, %res1 11029 ret <8 x i32> %res2 11030} 11031 11032declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 11033 11034define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 11035; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: 11036; X86: # %bb.0: 11037; X86-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] 11038; X86-NEXT: vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda] 11039; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11040; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11041; X86-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca] 11042; X86-NEXT: vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3] 11043; X86-NEXT: retl # encoding: [0xc3] 11044; 11045; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: 11046; X64: # %bb.0: 11047; X64-NEXT: vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8] 11048; X64-NEXT: vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda] 11049; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11050; X64-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca] 11051; X64-NEXT: vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3] 11052; X64-NEXT: retq # encoding: [0xc3] 11053 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) 11054 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 11055 %res2 = fadd <2 x double> %res, %res1 11056 ret <2 x double> %res2 11057} 11058 11059declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 11060 11061define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 11062; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: 11063; X86: # %bb.0: 11064; X86-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] 11065; X86-NEXT: vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda] 11066; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11067; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11068; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca] 11069; X86-NEXT: vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3] 11070; X86-NEXT: retl # encoding: [0xc3] 11071; 11072; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: 11073; X64: # %bb.0: 11074; X64-NEXT: vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8] 11075; X64-NEXT: vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda] 11076; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11077; X64-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca] 11078; X64-NEXT: vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3] 11079; X64-NEXT: retq # encoding: [0xc3] 11080 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 11081 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 11082 %res2 = fadd <4 x double> %res, %res1 11083 ret <4 x double> %res2 11084} 11085 11086declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 11087 11088define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { 11089; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: 11090; X86: # %bb.0: 11091; X86-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] 11092; X86-NEXT: vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda] 11093; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11094; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11095; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] 11096; X86-NEXT: vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3] 11097; X86-NEXT: retl # encoding: [0xc3] 11098; 11099; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: 11100; X64: # %bb.0: 11101; X64-NEXT: vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8] 11102; X64-NEXT: vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda] 11103; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11104; X64-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] 11105; X64-NEXT: vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3] 11106; X64-NEXT: retq # encoding: [0xc3] 11107 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) 11108 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 11109 %res2 = fadd <4 x float> %res, %res1 11110 ret <4 x float> %res2 11111} 11112 11113define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) { 11114; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast: 11115; X86: # %bb.0: 11116; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11117; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11118; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] 11119; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11120; X86-NEXT: retl # encoding: [0xc3] 11121; 11122; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast: 11123; X64: # %bb.0: 11124; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11125; X64-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] 11126; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 11127; X64-NEXT: retq # encoding: [0xc3] 11128 %x1cast = bitcast <2 x i64> %x1 to <4 x i32> 11129 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3) 11130 ret <4 x float> %res 11131} 11132 11133declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 11134 11135define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 11136; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: 11137; X86: # %bb.0: 11138; X86-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] 11139; X86-NEXT: vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda] 11140; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11141; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11142; X86-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca] 11143; X86-NEXT: vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3] 11144; X86-NEXT: retl # encoding: [0xc3] 11145; 11146; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: 11147; X64: # %bb.0: 11148; X64-NEXT: vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8] 11149; X64-NEXT: vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda] 11150; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11151; X64-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca] 11152; X64-NEXT: vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3] 11153; X64-NEXT: retq # encoding: [0xc3] 11154 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 11155 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 11156 %res2 = fadd <8 x float> %res, %res1 11157 ret <8 x float> %res2 11158} 11159 11160declare <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 11161 11162define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 11163; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128: 11164; X86: # %bb.0: 11165; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 11166; X86-NEXT: vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda] 11167; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11168; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11169; X86-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca] 11170; X86-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3] 11171; X86-NEXT: retl # encoding: [0xc3] 11172; 11173; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128: 11174; X64: # %bb.0: 11175; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 11176; X64-NEXT: vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda] 11177; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11178; X64-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca] 11179; X64-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3] 11180; X64-NEXT: retq # encoding: [0xc3] 11181 %res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 11182 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 11183 %res2 = add <2 x i64> %res, %res1 11184 ret <2 x i64> %res2 11185} 11186 11187declare <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 11188 11189define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 11190; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128: 11191; X86: # %bb.0: 11192; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 11193; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda] 11194; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11195; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11196; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca] 11197; X86-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3] 11198; X86-NEXT: retl # encoding: [0xc3] 11199; 11200; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128: 11201; X64: # %bb.0: 11202; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 11203; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda] 11204; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11205; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca] 11206; X64-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3] 11207; X64-NEXT: retq # encoding: [0xc3] 11208 %res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 11209 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 11210 %res2 = add <2 x i64> %res, %res1 11211 ret <2 x i64> %res2 11212} 11213 11214declare <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 11215 11216define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 11217; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128: 11218; X86: # %bb.0: 11219; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 11220; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda] 11221; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11222; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11223; X86-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca] 11224; X86-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3] 11225; X86-NEXT: retl # encoding: [0xc3] 11226; 11227; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128: 11228; X64: # %bb.0: 11229; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 11230; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda] 11231; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11232; X64-NEXT: vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca] 11233; X64-NEXT: vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3] 11234; X64-NEXT: retq # encoding: [0xc3] 11235 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 11236 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 11237 %res2 = add <2 x i64> %res, %res1 11238 ret <2 x i64> %res2 11239} 11240 11241declare <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 11242 11243define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 11244; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256: 11245; X86: # %bb.0: 11246; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 11247; X86-NEXT: vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda] 11248; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11249; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11250; X86-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca] 11251; X86-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3] 11252; X86-NEXT: retl # encoding: [0xc3] 11253; 11254; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256: 11255; X64: # %bb.0: 11256; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 11257; X64-NEXT: vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda] 11258; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11259; X64-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca] 11260; X64-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3] 11261; X64-NEXT: retq # encoding: [0xc3] 11262 %res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 11263 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 11264 %res2 = add <4 x i64> %res, %res1 11265 ret <4 x i64> %res2 11266} 11267 11268declare <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 11269 11270define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 11271; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256: 11272; X86: # %bb.0: 11273; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 11274; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda] 11275; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11276; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11277; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca] 11278; X86-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3] 11279; X86-NEXT: retl # encoding: [0xc3] 11280; 11281; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256: 11282; X64: # %bb.0: 11283; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 11284; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda] 11285; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11286; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca] 11287; X64-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3] 11288; X64-NEXT: retq # encoding: [0xc3] 11289 %res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 11290 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 11291 %res2 = add <4 x i64> %res, %res1 11292 ret <4 x i64> %res2 11293} 11294 11295declare <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 11296 11297define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 11298; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256: 11299; X86: # %bb.0: 11300; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 11301; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda] 11302; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 11303; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 11304; X86-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca] 11305; X86-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3] 11306; X86-NEXT: retl # encoding: [0xc3] 11307; 11308; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256: 11309; X64: # %bb.0: 11310; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 11311; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda] 11312; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 11313; X64-NEXT: vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca] 11314; X64-NEXT: vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3] 11315; X64-NEXT: retq # encoding: [0xc3] 11316 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 11317 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 11318 %res2 = add <4 x i64> %res, %res1 11319 ret <4 x i64> %res2 11320} 11321 11322define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) { 11323; X86-LABEL: test_mask_compress_store_pd_128: 11324; X86: # %bb.0: 11325; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11326; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11327; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11328; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00] 11329; X86-NEXT: retl # encoding: [0xc3] 11330; 11331; X64-LABEL: test_mask_compress_store_pd_128: 11332; X64: # %bb.0: 11333; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11334; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07] 11335; X64-NEXT: retq # encoding: [0xc3] 11336 call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask) 11337 ret void 11338} 11339 11340declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask) 11341 11342define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) { 11343; X86-LABEL: test_compress_store_pd_128: 11344; X86: # %bb.0: 11345; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11346; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11347; X86-NEXT: vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00] 11348; X86-NEXT: retl # encoding: [0xc3] 11349; 11350; X64-LABEL: test_compress_store_pd_128: 11351; X64: # %bb.0: 11352; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11353; X64-NEXT: vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07] 11354; X64-NEXT: retq # encoding: [0xc3] 11355 call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1) 11356 ret void 11357} 11358 11359define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) { 11360; X86-LABEL: test_mask_compress_store_ps_128: 11361; X86: # %bb.0: 11362; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11363; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11364; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11365; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00] 11366; X86-NEXT: retl # encoding: [0xc3] 11367; 11368; X64-LABEL: test_mask_compress_store_ps_128: 11369; X64: # %bb.0: 11370; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11371; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07] 11372; X64-NEXT: retq # encoding: [0xc3] 11373 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 11374 ret void 11375} 11376 11377declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 11378 11379define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) { 11380; X86-LABEL: test_compress_store_ps_128: 11381; X86: # %bb.0: 11382; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11383; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11384; X86-NEXT: vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00] 11385; X86-NEXT: retl # encoding: [0xc3] 11386; 11387; X64-LABEL: test_compress_store_ps_128: 11388; X64: # %bb.0: 11389; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11390; X64-NEXT: vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07] 11391; X64-NEXT: retq # encoding: [0xc3] 11392 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1) 11393 ret void 11394} 11395 11396define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) { 11397; X86-LABEL: test_mask_compress_store_q_128: 11398; X86: # %bb.0: 11399; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11400; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11401; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11402; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00] 11403; X86-NEXT: retl # encoding: [0xc3] 11404; 11405; X64-LABEL: test_mask_compress_store_q_128: 11406; X64: # %bb.0: 11407; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11408; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07] 11409; X64-NEXT: retq # encoding: [0xc3] 11410 call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask) 11411 ret void 11412} 11413 11414declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask) 11415 11416define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) { 11417; X86-LABEL: test_compress_store_q_128: 11418; X86: # %bb.0: 11419; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11420; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11421; X86-NEXT: vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00] 11422; X86-NEXT: retl # encoding: [0xc3] 11423; 11424; X64-LABEL: test_compress_store_q_128: 11425; X64: # %bb.0: 11426; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11427; X64-NEXT: vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07] 11428; X64-NEXT: retq # encoding: [0xc3] 11429 call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1) 11430 ret void 11431} 11432 11433define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) { 11434; X86-LABEL: test_mask_compress_store_d_128: 11435; X86: # %bb.0: 11436; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11437; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11438; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11439; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00] 11440; X86-NEXT: retl # encoding: [0xc3] 11441; 11442; X64-LABEL: test_mask_compress_store_d_128: 11443; X64: # %bb.0: 11444; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11445; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07] 11446; X64-NEXT: retq # encoding: [0xc3] 11447 call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask) 11448 ret void 11449} 11450 11451declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask) 11452 11453define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) { 11454; X86-LABEL: test_compress_store_d_128: 11455; X86: # %bb.0: 11456; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11457; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11458; X86-NEXT: vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00] 11459; X86-NEXT: retl # encoding: [0xc3] 11460; 11461; X64-LABEL: test_compress_store_d_128: 11462; X64: # %bb.0: 11463; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11464; X64-NEXT: vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07] 11465; X64-NEXT: retq # encoding: [0xc3] 11466 call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1) 11467 ret void 11468} 11469 11470define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) { 11471; X86-LABEL: test_mask_expand_load_pd_128: 11472; X86: # %bb.0: 11473; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11474; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11475; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11476; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00] 11477; X86-NEXT: retl # encoding: [0xc3] 11478; 11479; X64-LABEL: test_mask_expand_load_pd_128: 11480; X64: # %bb.0: 11481; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11482; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07] 11483; X64-NEXT: retq # encoding: [0xc3] 11484 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask) 11485 ret <2 x double> %res 11486} 11487 11488define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) { 11489; X86-LABEL: test_maskz_expand_load_pd_128: 11490; X86: # %bb.0: 11491; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11492; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11493; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11494; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00] 11495; X86-NEXT: retl # encoding: [0xc3] 11496; 11497; X64-LABEL: test_maskz_expand_load_pd_128: 11498; X64: # %bb.0: 11499; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11500; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07] 11501; X64-NEXT: retq # encoding: [0xc3] 11502 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask) 11503 ret <2 x double> %res 11504} 11505 11506declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask) 11507 11508define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) { 11509; X86-LABEL: test_expand_load_pd_128: 11510; X86: # %bb.0: 11511; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11512; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11513; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00] 11514; X86-NEXT: retl # encoding: [0xc3] 11515; 11516; X64-LABEL: test_expand_load_pd_128: 11517; X64: # %bb.0: 11518; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11519; X64-NEXT: vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07] 11520; X64-NEXT: retq # encoding: [0xc3] 11521 %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1) 11522 ret <2 x double> %res 11523} 11524 11525define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) { 11526; X86-LABEL: test_mask_expand_load_ps_128: 11527; X86: # %bb.0: 11528; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11529; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11530; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11531; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00] 11532; X86-NEXT: retl # encoding: [0xc3] 11533; 11534; X64-LABEL: test_mask_expand_load_ps_128: 11535; X64: # %bb.0: 11536; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11537; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] 11538; X64-NEXT: retq # encoding: [0xc3] 11539 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 11540 ret <4 x float> %res 11541} 11542 11543define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) { 11544; X86-LABEL: test_maskz_expand_load_ps_128: 11545; X86: # %bb.0: 11546; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11547; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11548; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11549; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00] 11550; X86-NEXT: retl # encoding: [0xc3] 11551; 11552; X64-LABEL: test_maskz_expand_load_ps_128: 11553; X64: # %bb.0: 11554; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11555; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07] 11556; X64-NEXT: retq # encoding: [0xc3] 11557 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask) 11558 ret <4 x float> %res 11559} 11560 11561declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 11562 11563define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) { 11564; X86-LABEL: test_expand_load_ps_128: 11565; X86: # %bb.0: 11566; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11567; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11568; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00] 11569; X86-NEXT: retl # encoding: [0xc3] 11570; 11571; X64-LABEL: test_expand_load_ps_128: 11572; X64: # %bb.0: 11573; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11574; X64-NEXT: vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] 11575; X64-NEXT: retq # encoding: [0xc3] 11576 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1) 11577 ret <4 x float> %res 11578} 11579 11580define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) { 11581; X86-LABEL: test_mask_expand_load_q_128: 11582; X86: # %bb.0: 11583; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11584; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11585; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11586; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00] 11587; X86-NEXT: retl # encoding: [0xc3] 11588; 11589; X64-LABEL: test_mask_expand_load_q_128: 11590; X64: # %bb.0: 11591; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11592; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07] 11593; X64-NEXT: retq # encoding: [0xc3] 11594 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask) 11595 ret <2 x i64> %res 11596} 11597 11598define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) { 11599; X86-LABEL: test_maskz_expand_load_q_128: 11600; X86: # %bb.0: 11601; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11602; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11603; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11604; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00] 11605; X86-NEXT: retl # encoding: [0xc3] 11606; 11607; X64-LABEL: test_maskz_expand_load_q_128: 11608; X64: # %bb.0: 11609; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11610; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07] 11611; X64-NEXT: retq # encoding: [0xc3] 11612 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask) 11613 ret <2 x i64> %res 11614} 11615 11616declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask) 11617 11618define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) { 11619; X86-LABEL: test_expand_load_q_128: 11620; X86: # %bb.0: 11621; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11622; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11623; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00] 11624; X86-NEXT: retl # encoding: [0xc3] 11625; 11626; X64-LABEL: test_expand_load_q_128: 11627; X64: # %bb.0: 11628; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11629; X64-NEXT: vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07] 11630; X64-NEXT: retq # encoding: [0xc3] 11631 %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1) 11632 ret <2 x i64> %res 11633} 11634 11635define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) { 11636; X86-LABEL: test_mask_expand_load_d_128: 11637; X86: # %bb.0: 11638; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11639; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11640; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11641; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00] 11642; X86-NEXT: retl # encoding: [0xc3] 11643; 11644; X64-LABEL: test_mask_expand_load_d_128: 11645; X64: # %bb.0: 11646; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11647; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07] 11648; X64-NEXT: retq # encoding: [0xc3] 11649 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask) 11650 ret <4 x i32> %res 11651} 11652 11653define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) { 11654; X86-LABEL: test_maskz_expand_load_d_128: 11655; X86: # %bb.0: 11656; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11657; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11658; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11659; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00] 11660; X86-NEXT: retl # encoding: [0xc3] 11661; 11662; X64-LABEL: test_maskz_expand_load_d_128: 11663; X64: # %bb.0: 11664; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11665; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07] 11666; X64-NEXT: retq # encoding: [0xc3] 11667 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask) 11668 ret <4 x i32> %res 11669} 11670 11671declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask) 11672 11673define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) { 11674; X86-LABEL: test_expand_load_d_128: 11675; X86: # %bb.0: 11676; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11677; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11678; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00] 11679; X86-NEXT: retl # encoding: [0xc3] 11680; 11681; X64-LABEL: test_expand_load_d_128: 11682; X64: # %bb.0: 11683; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11684; X64-NEXT: vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07] 11685; X64-NEXT: retq # encoding: [0xc3] 11686 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1) 11687 ret <4 x i32> %res 11688} 11689 11690define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) { 11691; X86-LABEL: test_mask_compress_store_pd_256: 11692; X86: # %bb.0: 11693; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11694; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11695; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11696; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00] 11697; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11698; X86-NEXT: retl # encoding: [0xc3] 11699; 11700; X64-LABEL: test_mask_compress_store_pd_256: 11701; X64: # %bb.0: 11702; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11703; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07] 11704; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11705; X64-NEXT: retq # encoding: [0xc3] 11706 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 11707 ret void 11708} 11709 11710declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 11711 11712define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) { 11713; X86-LABEL: test_compress_store_pd_256: 11714; X86: # %bb.0: 11715; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11716; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11717; X86-NEXT: vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00] 11718; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11719; X86-NEXT: retl # encoding: [0xc3] 11720; 11721; X64-LABEL: test_compress_store_pd_256: 11722; X64: # %bb.0: 11723; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11724; X64-NEXT: vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07] 11725; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11726; X64-NEXT: retq # encoding: [0xc3] 11727 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1) 11728 ret void 11729} 11730 11731define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) { 11732; X86-LABEL: test_mask_compress_store_ps_256: 11733; X86: # %bb.0: 11734; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11735; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11736; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11737; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00] 11738; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11739; X86-NEXT: retl # encoding: [0xc3] 11740; 11741; X64-LABEL: test_mask_compress_store_ps_256: 11742; X64: # %bb.0: 11743; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11744; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07] 11745; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11746; X64-NEXT: retq # encoding: [0xc3] 11747 call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask) 11748 ret void 11749} 11750 11751declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask) 11752 11753define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) { 11754; X86-LABEL: test_compress_store_ps_256: 11755; X86: # %bb.0: 11756; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11757; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11758; X86-NEXT: vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00] 11759; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11760; X86-NEXT: retl # encoding: [0xc3] 11761; 11762; X64-LABEL: test_compress_store_ps_256: 11763; X64: # %bb.0: 11764; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11765; X64-NEXT: vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07] 11766; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11767; X64-NEXT: retq # encoding: [0xc3] 11768 call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1) 11769 ret void 11770} 11771 11772define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) { 11773; X86-LABEL: test_mask_compress_store_q_256: 11774; X86: # %bb.0: 11775; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11776; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11777; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11778; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00] 11779; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11780; X86-NEXT: retl # encoding: [0xc3] 11781; 11782; X64-LABEL: test_mask_compress_store_q_256: 11783; X64: # %bb.0: 11784; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11785; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07] 11786; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11787; X64-NEXT: retq # encoding: [0xc3] 11788 call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask) 11789 ret void 11790} 11791 11792declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask) 11793 11794define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) { 11795; X86-LABEL: test_compress_store_q_256: 11796; X86: # %bb.0: 11797; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11798; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11799; X86-NEXT: vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00] 11800; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11801; X86-NEXT: retl # encoding: [0xc3] 11802; 11803; X64-LABEL: test_compress_store_q_256: 11804; X64: # %bb.0: 11805; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11806; X64-NEXT: vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07] 11807; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11808; X64-NEXT: retq # encoding: [0xc3] 11809 call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1) 11810 ret void 11811} 11812 11813define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) { 11814; X86-LABEL: test_mask_compress_store_d_256: 11815; X86: # %bb.0: 11816; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11817; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11818; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11819; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00] 11820; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11821; X86-NEXT: retl # encoding: [0xc3] 11822; 11823; X64-LABEL: test_mask_compress_store_d_256: 11824; X64: # %bb.0: 11825; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11826; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07] 11827; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11828; X64-NEXT: retq # encoding: [0xc3] 11829 call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask) 11830 ret void 11831} 11832 11833declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask) 11834 11835define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) { 11836; X86-LABEL: test_compress_store_d_256: 11837; X86: # %bb.0: 11838; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11839; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11840; X86-NEXT: vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00] 11841; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11842; X86-NEXT: retl # encoding: [0xc3] 11843; 11844; X64-LABEL: test_compress_store_d_256: 11845; X64: # %bb.0: 11846; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11847; X64-NEXT: vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07] 11848; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 11849; X64-NEXT: retq # encoding: [0xc3] 11850 call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1) 11851 ret void 11852} 11853 11854define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) { 11855; X86-LABEL: test_mask_expand_load_pd_256: 11856; X86: # %bb.0: 11857; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11858; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11859; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11860; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00] 11861; X86-NEXT: retl # encoding: [0xc3] 11862; 11863; X64-LABEL: test_mask_expand_load_pd_256: 11864; X64: # %bb.0: 11865; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11866; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] 11867; X64-NEXT: retq # encoding: [0xc3] 11868 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 11869 ret <4 x double> %res 11870} 11871 11872define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) { 11873; X86-LABEL: test_maskz_expand_load_pd_256: 11874; X86: # %bb.0: 11875; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11876; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11877; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11878; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00] 11879; X86-NEXT: retl # encoding: [0xc3] 11880; 11881; X64-LABEL: test_maskz_expand_load_pd_256: 11882; X64: # %bb.0: 11883; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11884; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07] 11885; X64-NEXT: retq # encoding: [0xc3] 11886 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask) 11887 ret <4 x double> %res 11888} 11889 11890declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 11891 11892define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) { 11893; X86-LABEL: test_expand_load_pd_256: 11894; X86: # %bb.0: 11895; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11896; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11897; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00] 11898; X86-NEXT: retl # encoding: [0xc3] 11899; 11900; X64-LABEL: test_expand_load_pd_256: 11901; X64: # %bb.0: 11902; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11903; X64-NEXT: vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] 11904; X64-NEXT: retq # encoding: [0xc3] 11905 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1) 11906 ret <4 x double> %res 11907} 11908 11909define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) { 11910; X86-LABEL: test_mask_expand_load_ps_256: 11911; X86: # %bb.0: 11912; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11913; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11914; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11915; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00] 11916; X86-NEXT: retl # encoding: [0xc3] 11917; 11918; X64-LABEL: test_mask_expand_load_ps_256: 11919; X64: # %bb.0: 11920; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11921; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07] 11922; X64-NEXT: retq # encoding: [0xc3] 11923 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask) 11924 ret <8 x float> %res 11925} 11926 11927define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) { 11928; X86-LABEL: test_maskz_expand_load_ps_256: 11929; X86: # %bb.0: 11930; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11931; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11932; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11933; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00] 11934; X86-NEXT: retl # encoding: [0xc3] 11935; 11936; X64-LABEL: test_maskz_expand_load_ps_256: 11937; X64: # %bb.0: 11938; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11939; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07] 11940; X64-NEXT: retq # encoding: [0xc3] 11941 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask) 11942 ret <8 x float> %res 11943} 11944 11945declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask) 11946 11947define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) { 11948; X86-LABEL: test_expand_load_ps_256: 11949; X86: # %bb.0: 11950; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11951; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11952; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00] 11953; X86-NEXT: retl # encoding: [0xc3] 11954; 11955; X64-LABEL: test_expand_load_ps_256: 11956; X64: # %bb.0: 11957; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 11958; X64-NEXT: vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07] 11959; X64-NEXT: retq # encoding: [0xc3] 11960 %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1) 11961 ret <8 x float> %res 11962} 11963 11964define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) { 11965; X86-LABEL: test_mask_expand_load_q_256: 11966; X86: # %bb.0: 11967; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11968; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11969; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11970; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00] 11971; X86-NEXT: retl # encoding: [0xc3] 11972; 11973; X64-LABEL: test_mask_expand_load_q_256: 11974; X64: # %bb.0: 11975; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11976; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07] 11977; X64-NEXT: retq # encoding: [0xc3] 11978 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask) 11979 ret <4 x i64> %res 11980} 11981 11982define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) { 11983; X86-LABEL: test_maskz_expand_load_q_256: 11984; X86: # %bb.0: 11985; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 11986; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 11987; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 11988; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00] 11989; X86-NEXT: retl # encoding: [0xc3] 11990; 11991; X64-LABEL: test_maskz_expand_load_q_256: 11992; X64: # %bb.0: 11993; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 11994; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07] 11995; X64-NEXT: retq # encoding: [0xc3] 11996 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask) 11997 ret <4 x i64> %res 11998} 11999 12000declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask) 12001 12002define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) { 12003; X86-LABEL: test_expand_load_q_256: 12004; X86: # %bb.0: 12005; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 12006; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 12007; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00] 12008; X86-NEXT: retl # encoding: [0xc3] 12009; 12010; X64-LABEL: test_expand_load_q_256: 12011; X64: # %bb.0: 12012; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 12013; X64-NEXT: vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07] 12014; X64-NEXT: retq # encoding: [0xc3] 12015 %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1) 12016 ret <4 x i64> %res 12017} 12018 12019define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) { 12020; X86-LABEL: test_mask_expand_load_d_256: 12021; X86: # %bb.0: 12022; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 12023; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 12024; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 12025; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00] 12026; X86-NEXT: retl # encoding: [0xc3] 12027; 12028; X64-LABEL: test_mask_expand_load_d_256: 12029; X64: # %bb.0: 12030; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12031; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07] 12032; X64-NEXT: retq # encoding: [0xc3] 12033 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask) 12034 ret <8 x i32> %res 12035} 12036 12037define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) { 12038; X86-LABEL: test_maskz_expand_load_d_256: 12039; X86: # %bb.0: 12040; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 12041; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 12042; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 12043; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00] 12044; X86-NEXT: retl # encoding: [0xc3] 12045; 12046; X64-LABEL: test_maskz_expand_load_d_256: 12047; X64: # %bb.0: 12048; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12049; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07] 12050; X64-NEXT: retq # encoding: [0xc3] 12051 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask) 12052 ret <8 x i32> %res 12053} 12054 12055declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask) 12056 12057define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) { 12058; X86-LABEL: test_expand_load_d_256: 12059; X86: # %bb.0: 12060; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 12061; X86-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 12062; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00] 12063; X86-NEXT: retl # encoding: [0xc3] 12064; 12065; X64-LABEL: test_expand_load_d_256: 12066; X64: # %bb.0: 12067; X64-NEXT: kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8] 12068; X64-NEXT: vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07] 12069; X64-NEXT: retq # encoding: [0xc3] 12070 %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1) 12071 ret <8 x i32> %res 12072} 12073 12074define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) { 12075; X86-LABEL: test_sqrt_pd_256: 12076; X86: # %bb.0: 12077; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12078; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12079; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] 12080; X86-NEXT: retl # encoding: [0xc3] 12081; 12082; X64-LABEL: test_sqrt_pd_256: 12083; X64: # %bb.0: 12084; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12085; X64-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] 12086; X64-NEXT: retq # encoding: [0xc3] 12087 %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 12088 ret <4 x double> %res 12089} 12090declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 12091 12092define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) { 12093; X86-LABEL: test_sqrt_ps_256: 12094; X86: # %bb.0: 12095; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12096; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12097; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] 12098; X86-NEXT: retl # encoding: [0xc3] 12099; 12100; X64-LABEL: test_sqrt_ps_256: 12101; X64: # %bb.0: 12102; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12103; X64-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] 12104; X64-NEXT: retq # encoding: [0xc3] 12105 %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 12106 ret <8 x float> %res 12107} 12108 12109declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 12110 12111declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 12112 12113define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 12114; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128: 12115; X86: # %bb.0: 12116; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9] 12117; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12118; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12119; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] 12120; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] 12121; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 12122; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 12123; X86-NEXT: retl # encoding: [0xc3] 12124; 12125; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128: 12126; X64: # %bb.0: 12127; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9] 12128; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12129; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] 12130; X64-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1] 12131; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 12132; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 12133; X64-NEXT: retq # encoding: [0xc3] 12134 %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 12135 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 12136 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 12137 %res3 = add <4 x i32> %res, %res1 12138 %res4 = add <4 x i32> %res3, %res2 12139 ret <4 x i32> %res4 12140} 12141 12142declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 12143 12144define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 12145; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256: 12146; X86: # %bb.0: 12147; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9] 12148; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12149; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12150; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] 12151; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] 12152; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 12153; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 12154; X86-NEXT: retl # encoding: [0xc3] 12155; 12156; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256: 12157; X64: # %bb.0: 12158; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9] 12159; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12160; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] 12161; X64-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1] 12162; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 12163; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 12164; X64-NEXT: retq # encoding: [0xc3] 12165 %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 12166 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 12167 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 12168 %res3 = add <8 x i32> %res, %res1 12169 %res4 = add <8 x i32> %res3, %res2 12170 ret <8 x i32> %res4 12171} 12172 12173declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 12174 12175define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 12176; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128: 12177; X86: # %bb.0: 12178; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9] 12179; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12180; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12181; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] 12182; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] 12183; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 12184; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 12185; X86-NEXT: retl # encoding: [0xc3] 12186; 12187; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128: 12188; X64: # %bb.0: 12189; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9] 12190; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12191; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] 12192; X64-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1] 12193; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 12194; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 12195; X64-NEXT: retq # encoding: [0xc3] 12196 %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 12197 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 12198 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 12199 %res3 = add <2 x i64> %res, %res1 12200 %res4 = add <2 x i64> %res3, %res2 12201 ret <2 x i64> %res4 12202} 12203 12204declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 12205 12206define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 12207; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256: 12208; X86: # %bb.0: 12209; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9] 12210; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12211; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12212; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] 12213; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] 12214; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 12215; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 12216; X86-NEXT: retl # encoding: [0xc3] 12217; 12218; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256: 12219; X64: # %bb.0: 12220; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9] 12221; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12222; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] 12223; X64-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1] 12224; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 12225; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 12226; X64-NEXT: retq # encoding: [0xc3] 12227 %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 12228 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 12229 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 12230 %res3 = add <4 x i64> %res, %res1 12231 %res4 = add <4 x i64> %res3, %res2 12232 ret <4 x i64> %res4 12233} 12234 12235declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i32, <4 x i32>, i8) 12236 12237define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 12238; X86-LABEL: test_int_x86_avx512_mask_prol_d_128: 12239; X86: # %bb.0: 12240; X86-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03] 12241; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12242; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12243; X86-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] 12244; X86-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03] 12245; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 12246; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 12247; X86-NEXT: retl # encoding: [0xc3] 12248; 12249; X64-LABEL: test_int_x86_avx512_mask_prol_d_128: 12250; X64: # %bb.0: 12251; X64-NEXT: vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03] 12252; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12253; X64-NEXT: vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] 12254; X64-NEXT: vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03] 12255; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 12256; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 12257; X64-NEXT: retq # encoding: [0xc3] 12258 %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 12259 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 12260 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 12261 %res3 = add <4 x i32> %res, %res1 12262 %res4 = add <4 x i32> %res3, %res2 12263 ret <4 x i32> %res4 12264} 12265 12266declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i32, <8 x i32>, i8) 12267 12268define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 12269; X86-LABEL: test_int_x86_avx512_mask_prol_d_256: 12270; X86: # %bb.0: 12271; X86-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03] 12272; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12273; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12274; X86-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] 12275; X86-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03] 12276; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 12277; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 12278; X86-NEXT: retl # encoding: [0xc3] 12279; 12280; X64-LABEL: test_int_x86_avx512_mask_prol_d_256: 12281; X64: # %bb.0: 12282; X64-NEXT: vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03] 12283; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12284; X64-NEXT: vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] 12285; X64-NEXT: vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03] 12286; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 12287; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 12288; X64-NEXT: retq # encoding: [0xc3] 12289 %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 12290 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 12291 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 12292 %res3 = add <8 x i32> %res, %res1 12293 %res4 = add <8 x i32> %res3, %res2 12294 ret <8 x i32> %res4 12295} 12296 12297declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i32, <2 x i64>, i8) 12298 12299define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 12300; X86-LABEL: test_int_x86_avx512_mask_prol_q_128: 12301; X86: # %bb.0: 12302; X86-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03] 12303; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12304; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12305; X86-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] 12306; X86-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03] 12307; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 12308; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 12309; X86-NEXT: retl # encoding: [0xc3] 12310; 12311; X64-LABEL: test_int_x86_avx512_mask_prol_q_128: 12312; X64: # %bb.0: 12313; X64-NEXT: vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03] 12314; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12315; X64-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] 12316; X64-NEXT: vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03] 12317; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 12318; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 12319; X64-NEXT: retq # encoding: [0xc3] 12320 %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 12321 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) 12322 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) 12323 %res3 = add <2 x i64> %res, %res1 12324 %res4 = add <2 x i64> %res3, %res2 12325 ret <2 x i64> %res4 12326} 12327 12328declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i32, <4 x i64>, i8) 12329 12330define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 12331; X86-LABEL: test_int_x86_avx512_mask_prol_q_256: 12332; X86: # %bb.0: 12333; X86-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03] 12334; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12335; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12336; X86-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] 12337; X86-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03] 12338; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 12339; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 12340; X86-NEXT: retl # encoding: [0xc3] 12341; 12342; X64-LABEL: test_int_x86_avx512_mask_prol_q_256: 12343; X64: # %bb.0: 12344; X64-NEXT: vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03] 12345; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12346; X64-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] 12347; X64-NEXT: vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03] 12348; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 12349; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 12350; X64-NEXT: retq # encoding: [0xc3] 12351 %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 12352 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 12353 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 12354 %res3 = add <4 x i64> %res, %res1 12355 %res4 = add <4 x i64> %res3, %res2 12356 ret <4 x i64> %res4 12357} 12358 12359declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 12360 12361define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 12362; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128: 12363; X86: # %bb.0: 12364; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9] 12365; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12366; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12367; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] 12368; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] 12369; X86-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 12370; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 12371; X86-NEXT: retl # encoding: [0xc3] 12372; 12373; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128: 12374; X64: # %bb.0: 12375; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9] 12376; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12377; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] 12378; X64-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1] 12379; X64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3] 12380; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0] 12381; X64-NEXT: retq # encoding: [0xc3] 12382 %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 12383 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 12384 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 12385 %res3 = add <4 x i32> %res, %res1 12386 %res4 = add <4 x i32> %res3, %res2 12387 ret <4 x i32> %res4 12388} 12389 12390declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 12391 12392define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 12393; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256: 12394; X86: # %bb.0: 12395; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9] 12396; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12397; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12398; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] 12399; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] 12400; X86-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 12401; X86-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 12402; X86-NEXT: retl # encoding: [0xc3] 12403; 12404; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256: 12405; X64: # %bb.0: 12406; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9] 12407; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12408; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] 12409; X64-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1] 12410; X64-NEXT: vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3] 12411; X64-NEXT: vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] 12412; X64-NEXT: retq # encoding: [0xc3] 12413 %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 12414 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 12415 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 12416 %res3 = add <8 x i32> %res, %res1 12417 %res4 = add <8 x i32> %res3, %res2 12418 ret <8 x i32> %res4 12419} 12420 12421declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 12422 12423define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 12424; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128: 12425; X86: # %bb.0: 12426; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9] 12427; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12428; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12429; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] 12430; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] 12431; X86-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 12432; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 12433; X86-NEXT: retl # encoding: [0xc3] 12434; 12435; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128: 12436; X64: # %bb.0: 12437; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9] 12438; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12439; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] 12440; X64-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1] 12441; X64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3] 12442; X64-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0] 12443; X64-NEXT: retq # encoding: [0xc3] 12444 %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 12445 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 12446 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 12447 %res3 = add <2 x i64> %res, %res1 12448 %res4 = add <2 x i64> %res3, %res2 12449 ret <2 x i64> %res4 12450} 12451 12452declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 12453 12454define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 12455; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256: 12456; X86: # %bb.0: 12457; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9] 12458; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12459; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12460; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] 12461; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] 12462; X86-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 12463; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 12464; X86-NEXT: retl # encoding: [0xc3] 12465; 12466; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256: 12467; X64: # %bb.0: 12468; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9] 12469; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12470; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] 12471; X64-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1] 12472; X64-NEXT: vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3] 12473; X64-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] 12474; X64-NEXT: retq # encoding: [0xc3] 12475 %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 12476 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 12477 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 12478 %res3 = add <4 x i64> %res, %res1 12479 %res4 = add <4 x i64> %res3, %res2 12480 ret <4 x i64> %res4 12481} 12482 12483declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i32, <4 x i32>, i8) 12484 12485define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 12486; X86-LABEL: test_int_x86_avx512_mask_pror_d_128: 12487; X86: # %bb.0: 12488; X86-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03] 12489; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12490; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12491; X86-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] 12492; X86-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03] 12493; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 12494; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 12495; X86-NEXT: retl # encoding: [0xc3] 12496; 12497; X64-LABEL: test_int_x86_avx512_mask_pror_d_128: 12498; X64: # %bb.0: 12499; X64-NEXT: vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03] 12500; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12501; X64-NEXT: vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] 12502; X64-NEXT: vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03] 12503; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2] 12504; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] 12505; X64-NEXT: retq # encoding: [0xc3] 12506 %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 12507 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 12508 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 12509 %res3 = add <4 x i32> %res, %res1 12510 %res4 = add <4 x i32> %res3, %res2 12511 ret <4 x i32> %res4 12512} 12513 12514declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i32, <8 x i32>, i8) 12515 12516define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 12517; X86-LABEL: test_int_x86_avx512_mask_pror_d_256: 12518; X86: # %bb.0: 12519; X86-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03] 12520; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12521; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12522; X86-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] 12523; X86-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03] 12524; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 12525; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 12526; X86-NEXT: retl # encoding: [0xc3] 12527; 12528; X64-LABEL: test_int_x86_avx512_mask_pror_d_256: 12529; X64: # %bb.0: 12530; X64-NEXT: vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03] 12531; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12532; X64-NEXT: vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] 12533; X64-NEXT: vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03] 12534; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2] 12535; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] 12536; X64-NEXT: retq # encoding: [0xc3] 12537 %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 12538 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 12539 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 12540 %res3 = add <8 x i32> %res, %res1 12541 %res4 = add <8 x i32> %res3, %res2 12542 ret <8 x i32> %res4 12543} 12544 12545declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i32, <2 x i64>, i8) 12546 12547define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 12548; X86-LABEL: test_int_x86_avx512_mask_pror_q_128: 12549; X86: # %bb.0: 12550; X86-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03] 12551; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12552; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12553; X86-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] 12554; X86-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03] 12555; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 12556; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 12557; X86-NEXT: retl # encoding: [0xc3] 12558; 12559; X64-LABEL: test_int_x86_avx512_mask_pror_q_128: 12560; X64: # %bb.0: 12561; X64-NEXT: vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03] 12562; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12563; X64-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] 12564; X64-NEXT: vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03] 12565; X64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] 12566; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] 12567; X64-NEXT: retq # encoding: [0xc3] 12568 %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 12569 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) 12570 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) 12571 %res3 = add <2 x i64> %res, %res1 12572 %res4 = add <2 x i64> %res3, %res2 12573 ret <2 x i64> %res4 12574} 12575 12576declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i32, <4 x i64>, i8) 12577 12578define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 12579; X86-LABEL: test_int_x86_avx512_mask_pror_q_256: 12580; X86: # %bb.0: 12581; X86-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03] 12582; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 12583; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12584; X86-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] 12585; X86-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03] 12586; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 12587; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 12588; X86-NEXT: retl # encoding: [0xc3] 12589; 12590; X64-LABEL: test_int_x86_avx512_mask_pror_q_256: 12591; X64: # %bb.0: 12592; X64-NEXT: vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03] 12593; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 12594; X64-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] 12595; X64-NEXT: vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03] 12596; X64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] 12597; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] 12598; X64-NEXT: retq # encoding: [0xc3] 12599 %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 12600 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 12601 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 12602 %res3 = add <4 x i64> %res, %res1 12603 %res4 = add <4 x i64> %res3, %res2 12604 ret <4 x i64> %res4 12605} 12606 12607declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 12608 12609define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 12610; CHECK-LABEL: test_vfmadd256_ps: 12611; CHECK: # %bb.0: 12612; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 12613; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 12614; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12615 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind 12616 ret <8 x float> %res 12617} 12618 12619define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 12620; X86-LABEL: test_mask_vfmadd256_ps: 12621; X86: # %bb.0: 12622; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12623; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12624; X86-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1] 12625; X86-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 12626; X86-NEXT: retl # encoding: [0xc3] 12627; 12628; X64-LABEL: test_mask_vfmadd256_ps: 12629; X64: # %bb.0: 12630; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12631; X64-NEXT: vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1] 12632; X64-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 12633; X64-NEXT: retq # encoding: [0xc3] 12634 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 12635 ret <8 x float> %res 12636} 12637 12638declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 12639 12640define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 12641; CHECK-LABEL: test_vfmadd128_ps: 12642; CHECK: # %bb.0: 12643; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 12644; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12645; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12646 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 12647 ret <4 x float> %res 12648} 12649 12650define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 12651; X86-LABEL: test_mask_vfmadd128_ps: 12652; X86: # %bb.0: 12653; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12654; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12655; X86-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1] 12656; X86-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 12657; X86-NEXT: retl # encoding: [0xc3] 12658; 12659; X64-LABEL: test_mask_vfmadd128_ps: 12660; X64: # %bb.0: 12661; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12662; X64-NEXT: vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1] 12663; X64-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 12664; X64-NEXT: retq # encoding: [0xc3] 12665 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 12666 ret <4 x float> %res 12667} 12668 12669declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 12670 12671define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 12672; CHECK-LABEL: test_fmadd256_pd: 12673; CHECK: # %bb.0: 12674; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 12675; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 12676; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12677 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 -1) 12678 ret <4 x double> %res 12679} 12680 12681define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) { 12682; X86-LABEL: test_mask_fmadd256_pd: 12683; X86: # %bb.0: 12684; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12685; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12686; X86-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1] 12687; X86-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 12688; X86-NEXT: retl # encoding: [0xc3] 12689; 12690; X64-LABEL: test_mask_fmadd256_pd: 12691; X64: # %bb.0: 12692; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12693; X64-NEXT: vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1] 12694; X64-NEXT: # ymm0 = (ymm0 * ymm1) + ymm2 12695; X64-NEXT: retq # encoding: [0xc3] 12696 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) 12697 ret <4 x double> %res 12698} 12699 12700declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 12701 12702define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 12703; CHECK-LABEL: test_fmadd128_pd: 12704; CHECK: # %bb.0: 12705; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 12706; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12707; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12708 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1) 12709 ret <2 x double> %res 12710} 12711 12712define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 12713; X86-LABEL: test_mask_fmadd128_pd: 12714; X86: # %bb.0: 12715; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12716; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12717; X86-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1] 12718; X86-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 12719; X86-NEXT: retl # encoding: [0xc3] 12720; 12721; X64-LABEL: test_mask_fmadd128_pd: 12722; X64: # %bb.0: 12723; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12724; X64-NEXT: vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1] 12725; X64-NEXT: # xmm0 = (xmm0 * xmm1) + xmm2 12726; X64-NEXT: retq # encoding: [0xc3] 12727 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) 12728 ret <2 x double> %res 12729} 12730 12731declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 12732 12733define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 12734; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: 12735; X86: # %bb.0: 12736; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12737; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12738; X86-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1] 12739; X86-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2 12740; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 12741; X86-NEXT: retl # encoding: [0xc3] 12742; 12743; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: 12744; X64: # %bb.0: 12745; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12746; X64-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1] 12747; X64-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2 12748; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 12749; X64-NEXT: retq # encoding: [0xc3] 12750 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 12751 ret <2 x double> %res 12752} 12753 12754declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 12755 12756define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 12757; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: 12758; X86: # %bb.0: 12759; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12760; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12761; X86-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2] 12762; X86-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12763; X86-NEXT: retl # encoding: [0xc3] 12764; 12765; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: 12766; X64: # %bb.0: 12767; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12768; X64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2] 12769; X64-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12770; X64-NEXT: retq # encoding: [0xc3] 12771 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 12772 ret <2 x double> %res 12773} 12774 12775declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 12776 12777define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 12778; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: 12779; X86: # %bb.0: 12780; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12781; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12782; X86-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1] 12783; X86-NEXT: # ymm2 = (ymm0 * ymm1) + ymm2 12784; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 12785; X86-NEXT: retl # encoding: [0xc3] 12786; 12787; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: 12788; X64: # %bb.0: 12789; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12790; X64-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1] 12791; X64-NEXT: # ymm2 = (ymm0 * ymm1) + ymm2 12792; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 12793; X64-NEXT: retq # encoding: [0xc3] 12794 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 12795 ret <4 x double> %res 12796} 12797 12798declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 12799 12800define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 12801; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: 12802; X86: # %bb.0: 12803; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12804; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12805; X86-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2] 12806; X86-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 12807; X86-NEXT: retl # encoding: [0xc3] 12808; 12809; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: 12810; X64: # %bb.0: 12811; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12812; X64-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2] 12813; X64-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 12814; X64-NEXT: retq # encoding: [0xc3] 12815 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 12816 ret <4 x double> %res 12817} 12818 12819declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 12820 12821define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 12822; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: 12823; X86: # %bb.0: 12824; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12825; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12826; X86-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1] 12827; X86-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2 12828; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 12829; X86-NEXT: retl # encoding: [0xc3] 12830; 12831; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: 12832; X64: # %bb.0: 12833; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12834; X64-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1] 12835; X64-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2 12836; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 12837; X64-NEXT: retq # encoding: [0xc3] 12838 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 12839 ret <4 x float> %res 12840} 12841 12842declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 12843 12844define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 12845; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: 12846; X86: # %bb.0: 12847; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12848; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12849; X86-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2] 12850; X86-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12851; X86-NEXT: retl # encoding: [0xc3] 12852; 12853; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: 12854; X64: # %bb.0: 12855; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12856; X64-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2] 12857; X64-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12858; X64-NEXT: retq # encoding: [0xc3] 12859 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 12860 ret <4 x float> %res 12861} 12862 12863declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 12864 12865define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 12866; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: 12867; X86: # %bb.0: 12868; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12869; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12870; X86-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1] 12871; X86-NEXT: # ymm2 = (ymm0 * ymm1) + ymm2 12872; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 12873; X86-NEXT: retl # encoding: [0xc3] 12874; 12875; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: 12876; X64: # %bb.0: 12877; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12878; X64-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1] 12879; X64-NEXT: # ymm2 = (ymm0 * ymm1) + ymm2 12880; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 12881; X64-NEXT: retq # encoding: [0xc3] 12882 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 12883 ret <8 x float> %res 12884} 12885 12886declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 12887 12888define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 12889; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: 12890; X86: # %bb.0: 12891; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12892; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12893; X86-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2] 12894; X86-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 12895; X86-NEXT: retl # encoding: [0xc3] 12896; 12897; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: 12898; X64: # %bb.0: 12899; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12900; X64-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2] 12901; X64-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 12902; X64-NEXT: retq # encoding: [0xc3] 12903 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 12904 ret <8 x float> %res 12905} 12906 12907 12908declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 12909 12910define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 12911; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: 12912; X86: # %bb.0: 12913; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12914; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12915; X86-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1] 12916; X86-NEXT: # xmm2 = (xmm0 * xmm1) - xmm2 12917; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 12918; X86-NEXT: retl # encoding: [0xc3] 12919; 12920; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: 12921; X64: # %bb.0: 12922; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12923; X64-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1] 12924; X64-NEXT: # xmm2 = (xmm0 * xmm1) - xmm2 12925; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 12926; X64-NEXT: retq # encoding: [0xc3] 12927 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 12928 ret <2 x double> %res 12929} 12930 12931 12932declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 12933 12934define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 12935; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: 12936; X86: # %bb.0: 12937; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12938; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12939; X86-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1] 12940; X86-NEXT: # ymm2 = (ymm0 * ymm1) - ymm2 12941; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 12942; X86-NEXT: retl # encoding: [0xc3] 12943; 12944; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: 12945; X64: # %bb.0: 12946; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12947; X64-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1] 12948; X64-NEXT: # ymm2 = (ymm0 * ymm1) - ymm2 12949; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 12950; X64-NEXT: retq # encoding: [0xc3] 12951 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 12952 ret <4 x double> %res 12953} 12954 12955declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 12956 12957define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 12958; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: 12959; X86: # %bb.0: 12960; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12961; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12962; X86-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1] 12963; X86-NEXT: # xmm2 = (xmm0 * xmm1) - xmm2 12964; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 12965; X86-NEXT: retl # encoding: [0xc3] 12966; 12967; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: 12968; X64: # %bb.0: 12969; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12970; X64-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1] 12971; X64-NEXT: # xmm2 = (xmm0 * xmm1) - xmm2 12972; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 12973; X64-NEXT: retq # encoding: [0xc3] 12974 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 12975 ret <4 x float> %res 12976} 12977 12978declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 12979 12980define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 12981; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: 12982; X86: # %bb.0: 12983; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 12984; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 12985; X86-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1] 12986; X86-NEXT: # ymm2 = (ymm0 * ymm1) - ymm2 12987; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 12988; X86-NEXT: retl # encoding: [0xc3] 12989; 12990; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: 12991; X64: # %bb.0: 12992; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 12993; X64-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1] 12994; X64-NEXT: # ymm2 = (ymm0 * ymm1) - ymm2 12995; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 12996; X64-NEXT: retq # encoding: [0xc3] 12997 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 12998 ret <8 x float> %res 12999} 13000 13001declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 13002 13003define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 13004; CHECK-LABEL: test_vfnmadd256_ps: 13005; CHECK: # %bb.0: 13006; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2] 13007; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 13008; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13009 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind 13010 ret <8 x float> %res 13011} 13012 13013define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 13014; X86-LABEL: test_mask_vfnmadd256_ps: 13015; X86: # %bb.0: 13016; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13017; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13018; X86-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1] 13019; X86-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 13020; X86-NEXT: retl # encoding: [0xc3] 13021; 13022; X64-LABEL: test_mask_vfnmadd256_ps: 13023; X64: # %bb.0: 13024; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13025; X64-NEXT: vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1] 13026; X64-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 13027; X64-NEXT: retq # encoding: [0xc3] 13028 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 13029 ret <8 x float> %res 13030} 13031 13032declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 13033 13034define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 13035; CHECK-LABEL: test_vfnmadd128_ps: 13036; CHECK: # %bb.0: 13037; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2] 13038; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 13039; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13040 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 13041 ret <4 x float> %res 13042} 13043 13044define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 13045; X86-LABEL: test_mask_vfnmadd128_ps: 13046; X86: # %bb.0: 13047; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13048; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13049; X86-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1] 13050; X86-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 13051; X86-NEXT: retl # encoding: [0xc3] 13052; 13053; X64-LABEL: test_mask_vfnmadd128_ps: 13054; X64: # %bb.0: 13055; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13056; X64-NEXT: vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1] 13057; X64-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 13058; X64-NEXT: retq # encoding: [0xc3] 13059 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 13060 ret <4 x float> %res 13061} 13062 13063declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 13064 13065define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 13066; CHECK-LABEL: test_vfnmadd256_pd: 13067; CHECK: # %bb.0: 13068; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 13069; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 13070; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13071 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 13072 ret <4 x double> %res 13073} 13074 13075define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 13076; X86-LABEL: test_mask_vfnmadd256_pd: 13077; X86: # %bb.0: 13078; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13079; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13080; X86-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1] 13081; X86-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 13082; X86-NEXT: retl # encoding: [0xc3] 13083; 13084; X64-LABEL: test_mask_vfnmadd256_pd: 13085; X64: # %bb.0: 13086; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13087; X64-NEXT: vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1] 13088; X64-NEXT: # ymm0 = -(ymm0 * ymm1) + ymm2 13089; X64-NEXT: retq # encoding: [0xc3] 13090 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 13091 ret <4 x double> %res 13092} 13093 13094declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 13095 13096define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 13097; CHECK-LABEL: test_vfnmadd128_pd: 13098; CHECK: # %bb.0: 13099; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 13100; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 13101; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13102 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 13103 ret <2 x double> %res 13104} 13105 13106define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 13107; X86-LABEL: test_mask_vfnmadd128_pd: 13108; X86: # %bb.0: 13109; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13110; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13111; X86-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1] 13112; X86-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 13113; X86-NEXT: retl # encoding: [0xc3] 13114; 13115; X64-LABEL: test_mask_vfnmadd128_pd: 13116; X64: # %bb.0: 13117; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13118; X64-NEXT: vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1] 13119; X64-NEXT: # xmm0 = -(xmm0 * xmm1) + xmm2 13120; X64-NEXT: retq # encoding: [0xc3] 13121 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 13122 ret <2 x double> %res 13123} 13124 13125declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 13126 13127define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 13128; CHECK-LABEL: test_vfnmsub256_ps: 13129; CHECK: # %bb.0: 13130; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2] 13131; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 13132; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13133 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind 13134 ret <8 x float> %res 13135} 13136 13137define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 13138; X86-LABEL: test_mask_vfnmsub256_ps: 13139; X86: # %bb.0: 13140; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13141; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13142; X86-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1] 13143; X86-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 13144; X86-NEXT: retl # encoding: [0xc3] 13145; 13146; X64-LABEL: test_mask_vfnmsub256_ps: 13147; X64: # %bb.0: 13148; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13149; X64-NEXT: vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1] 13150; X64-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 13151; X64-NEXT: retq # encoding: [0xc3] 13152 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 13153 ret <8 x float> %res 13154} 13155 13156declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 13157 13158define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 13159; CHECK-LABEL: test_vfnmsub128_ps: 13160; CHECK: # %bb.0: 13161; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2] 13162; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 13163; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13164 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 13165 ret <4 x float> %res 13166} 13167 13168define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 13169; X86-LABEL: test_mask_vfnmsub128_ps: 13170; X86: # %bb.0: 13171; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13172; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13173; X86-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1] 13174; X86-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 13175; X86-NEXT: retl # encoding: [0xc3] 13176; 13177; X64-LABEL: test_mask_vfnmsub128_ps: 13178; X64: # %bb.0: 13179; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13180; X64-NEXT: vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1] 13181; X64-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 13182; X64-NEXT: retq # encoding: [0xc3] 13183 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 13184 ret <4 x float> %res 13185} 13186 13187declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 13188 13189define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 13190; CHECK-LABEL: test_vfnmsub256_pd: 13191; CHECK: # %bb.0: 13192; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 13193; CHECK-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 13194; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13195 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 13196 ret <4 x double> %res 13197} 13198 13199define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 13200; X86-LABEL: test_mask_vfnmsub256_pd: 13201; X86: # %bb.0: 13202; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13203; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13204; X86-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1] 13205; X86-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 13206; X86-NEXT: retl # encoding: [0xc3] 13207; 13208; X64-LABEL: test_mask_vfnmsub256_pd: 13209; X64: # %bb.0: 13210; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13211; X64-NEXT: vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1] 13212; X64-NEXT: # ymm0 = -(ymm0 * ymm1) - ymm2 13213; X64-NEXT: retq # encoding: [0xc3] 13214 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 13215 ret <4 x double> %res 13216} 13217 13218declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 13219 13220define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 13221; CHECK-LABEL: test_vfnmsub128_pd: 13222; CHECK: # %bb.0: 13223; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 13224; CHECK-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 13225; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13226 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 13227 ret <2 x double> %res 13228} 13229 13230define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 13231; X86-LABEL: test_mask_vfnmsub128_pd: 13232; X86: # %bb.0: 13233; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13234; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13235; X86-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1] 13236; X86-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 13237; X86-NEXT: retl # encoding: [0xc3] 13238; 13239; X64-LABEL: test_mask_vfnmsub128_pd: 13240; X64: # %bb.0: 13241; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13242; X64-NEXT: vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1] 13243; X64-NEXT: # xmm0 = -(xmm0 * xmm1) - xmm2 13244; X64-NEXT: retq # encoding: [0xc3] 13245 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 13246 ret <2 x double> %res 13247} 13248 13249declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 13250 13251define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 13252; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: 13253; X86: # %bb.0: 13254; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13255; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13256; X86-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1] 13257; X86-NEXT: # xmm2 = -(xmm0 * xmm1) - xmm2 13258; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 13259; X86-NEXT: retl # encoding: [0xc3] 13260; 13261; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: 13262; X64: # %bb.0: 13263; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13264; X64-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1] 13265; X64-NEXT: # xmm2 = -(xmm0 * xmm1) - xmm2 13266; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 13267; X64-NEXT: retq # encoding: [0xc3] 13268 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 13269 ret <2 x double> %res 13270} 13271 13272declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 13273 13274define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 13275; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: 13276; X86: # %bb.0: 13277; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13278; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13279; X86-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1] 13280; X86-NEXT: # ymm2 = -(ymm0 * ymm1) - ymm2 13281; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 13282; X86-NEXT: retl # encoding: [0xc3] 13283; 13284; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: 13285; X64: # %bb.0: 13286; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13287; X64-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1] 13288; X64-NEXT: # ymm2 = -(ymm0 * ymm1) - ymm2 13289; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 13290; X64-NEXT: retq # encoding: [0xc3] 13291 %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 13292 ret <4 x double> %res 13293} 13294 13295declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 13296 13297define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 13298; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: 13299; X86: # %bb.0: 13300; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13301; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13302; X86-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1] 13303; X86-NEXT: # xmm2 = -(xmm0 * xmm1) - xmm2 13304; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 13305; X86-NEXT: retl # encoding: [0xc3] 13306; 13307; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: 13308; X64: # %bb.0: 13309; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13310; X64-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1] 13311; X64-NEXT: # xmm2 = -(xmm0 * xmm1) - xmm2 13312; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 13313; X64-NEXT: retq # encoding: [0xc3] 13314 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 13315 ret <4 x float> %res 13316} 13317 13318declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 13319 13320define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 13321; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: 13322; X86: # %bb.0: 13323; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13324; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13325; X86-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1] 13326; X86-NEXT: # ymm2 = -(ymm0 * ymm1) - ymm2 13327; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 13328; X86-NEXT: retl # encoding: [0xc3] 13329; 13330; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: 13331; X64: # %bb.0: 13332; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13333; X64-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1] 13334; X64-NEXT: # ymm2 = -(ymm0 * ymm1) - ymm2 13335; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 13336; X64-NEXT: retq # encoding: [0xc3] 13337 %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 13338 ret <8 x float> %res 13339} 13340 13341declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 13342 13343define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 13344; CHECK-LABEL: test_fmaddsub256_ps: 13345; CHECK: # %bb.0: 13346; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 13347; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 13348; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13349 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1) 13350 ret <8 x float> %res 13351} 13352 13353define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) { 13354; X86-LABEL: test_mask_fmaddsub256_ps: 13355; X86: # %bb.0: 13356; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13357; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13358; X86-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1] 13359; X86-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 13360; X86-NEXT: retl # encoding: [0xc3] 13361; 13362; X64-LABEL: test_mask_fmaddsub256_ps: 13363; X64: # %bb.0: 13364; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13365; X64-NEXT: vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1] 13366; X64-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 13367; X64-NEXT: retq # encoding: [0xc3] 13368 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) 13369 ret <8 x float> %res 13370} 13371 13372declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 13373 13374define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 13375; CHECK-LABEL: test_fmaddsub128_ps: 13376; CHECK: # %bb.0: 13377; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 13378; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 13379; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13380 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 -1) 13381 ret <4 x float> %res 13382} 13383 13384define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 13385; X86-LABEL: test_mask_fmaddsub128_ps: 13386; X86: # %bb.0: 13387; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13388; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13389; X86-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1] 13390; X86-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 13391; X86-NEXT: retl # encoding: [0xc3] 13392; 13393; X64-LABEL: test_mask_fmaddsub128_ps: 13394; X64: # %bb.0: 13395; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13396; X64-NEXT: vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1] 13397; X64-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 13398; X64-NEXT: retq # encoding: [0xc3] 13399 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) 13400 ret <4 x float> %res 13401} 13402 13403declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 13404 13405define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 13406; CHECK-LABEL: test_vfmaddsub256_pd: 13407; CHECK: # %bb.0: 13408; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 13409; CHECK-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 13410; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13411 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 13412 ret <4 x double> %res 13413} 13414 13415define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 13416; X86-LABEL: test_mask_vfmaddsub256_pd: 13417; X86: # %bb.0: 13418; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13419; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13420; X86-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1] 13421; X86-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 13422; X86-NEXT: retl # encoding: [0xc3] 13423; 13424; X64-LABEL: test_mask_vfmaddsub256_pd: 13425; X64: # %bb.0: 13426; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13427; X64-NEXT: vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1] 13428; X64-NEXT: # ymm0 = (ymm0 * ymm1) +/- ymm2 13429; X64-NEXT: retq # encoding: [0xc3] 13430 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 13431 ret <4 x double> %res 13432} 13433 13434declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 13435 13436define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 13437; CHECK-LABEL: test_vfmaddsub128_pd: 13438; CHECK: # %bb.0: 13439; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 13440; CHECK-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 13441; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13442 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 13443 ret <2 x double> %res 13444} 13445 13446define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 13447; X86-LABEL: test_mask_vfmaddsub128_pd: 13448; X86: # %bb.0: 13449; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13450; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13451; X86-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1] 13452; X86-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 13453; X86-NEXT: retl # encoding: [0xc3] 13454; 13455; X64-LABEL: test_mask_vfmaddsub128_pd: 13456; X64: # %bb.0: 13457; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13458; X64-NEXT: vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1] 13459; X64-NEXT: # xmm0 = (xmm0 * xmm1) +/- xmm2 13460; X64-NEXT: retq # encoding: [0xc3] 13461 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 13462 ret <2 x double> %res 13463} 13464 13465declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 13466 13467define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 13468; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: 13469; X86: # %bb.0: 13470; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13471; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13472; X86-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1] 13473; X86-NEXT: # xmm2 = (xmm0 * xmm1) +/- xmm2 13474; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 13475; X86-NEXT: retl # encoding: [0xc3] 13476; 13477; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: 13478; X64: # %bb.0: 13479; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13480; X64-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1] 13481; X64-NEXT: # xmm2 = (xmm0 * xmm1) +/- xmm2 13482; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 13483; X64-NEXT: retq # encoding: [0xc3] 13484 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 13485 ret <2 x double> %res 13486} 13487 13488declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 13489 13490define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 13491; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: 13492; X86: # %bb.0: 13493; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13494; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13495; X86-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2] 13496; X86-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 13497; X86-NEXT: retl # encoding: [0xc3] 13498; 13499; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: 13500; X64: # %bb.0: 13501; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13502; X64-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2] 13503; X64-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 13504; X64-NEXT: retq # encoding: [0xc3] 13505 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 13506 ret <2 x double> %res 13507} 13508 13509declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 13510 13511define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 13512; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: 13513; X86: # %bb.0: 13514; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13515; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13516; X86-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1] 13517; X86-NEXT: # ymm2 = (ymm0 * ymm1) +/- ymm2 13518; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 13519; X86-NEXT: retl # encoding: [0xc3] 13520; 13521; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: 13522; X64: # %bb.0: 13523; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13524; X64-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1] 13525; X64-NEXT: # ymm2 = (ymm0 * ymm1) +/- ymm2 13526; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 13527; X64-NEXT: retq # encoding: [0xc3] 13528 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 13529 ret <4 x double> %res 13530} 13531 13532declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 13533 13534define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 13535; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: 13536; X86: # %bb.0: 13537; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13538; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13539; X86-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2] 13540; X86-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 13541; X86-NEXT: retl # encoding: [0xc3] 13542; 13543; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: 13544; X64: # %bb.0: 13545; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13546; X64-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2] 13547; X64-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 13548; X64-NEXT: retq # encoding: [0xc3] 13549 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 13550 ret <4 x double> %res 13551} 13552 13553declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 13554 13555define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 13556; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: 13557; X86: # %bb.0: 13558; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13559; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13560; X86-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1] 13561; X86-NEXT: # xmm2 = (xmm0 * xmm1) +/- xmm2 13562; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 13563; X86-NEXT: retl # encoding: [0xc3] 13564; 13565; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: 13566; X64: # %bb.0: 13567; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13568; X64-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1] 13569; X64-NEXT: # xmm2 = (xmm0 * xmm1) +/- xmm2 13570; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 13571; X64-NEXT: retq # encoding: [0xc3] 13572 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 13573 ret <4 x float> %res 13574} 13575 13576declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 13577 13578define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 13579; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: 13580; X86: # %bb.0: 13581; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13582; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13583; X86-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2] 13584; X86-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 13585; X86-NEXT: retl # encoding: [0xc3] 13586; 13587; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: 13588; X64: # %bb.0: 13589; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13590; X64-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2] 13591; X64-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 13592; X64-NEXT: retq # encoding: [0xc3] 13593 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 13594 ret <4 x float> %res 13595} 13596 13597declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 13598 13599define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 13600; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: 13601; X86: # %bb.0: 13602; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13603; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13604; X86-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1] 13605; X86-NEXT: # ymm2 = (ymm0 * ymm1) +/- ymm2 13606; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 13607; X86-NEXT: retl # encoding: [0xc3] 13608; 13609; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: 13610; X64: # %bb.0: 13611; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13612; X64-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1] 13613; X64-NEXT: # ymm2 = (ymm0 * ymm1) +/- ymm2 13614; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 13615; X64-NEXT: retq # encoding: [0xc3] 13616 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 13617 ret <8 x float> %res 13618} 13619 13620declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 13621 13622define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 13623; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: 13624; X86: # %bb.0: 13625; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13626; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13627; X86-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2] 13628; X86-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 13629; X86-NEXT: retl # encoding: [0xc3] 13630; 13631; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: 13632; X64: # %bb.0: 13633; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13634; X64-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2] 13635; X64-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 13636; X64-NEXT: retq # encoding: [0xc3] 13637 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 13638 ret <8 x float> %res 13639} 13640 13641declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 13642 13643define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 13644; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: 13645; X86: # %bb.0: 13646; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13647; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13648; X86-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1] 13649; X86-NEXT: # xmm2 = (xmm0 * xmm1) -/+ xmm2 13650; X86-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 13651; X86-NEXT: retl # encoding: [0xc3] 13652; 13653; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: 13654; X64: # %bb.0: 13655; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13656; X64-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1] 13657; X64-NEXT: # xmm2 = (xmm0 * xmm1) -/+ xmm2 13658; X64-NEXT: vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2] 13659; X64-NEXT: retq # encoding: [0xc3] 13660 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 13661 ret <2 x double> %res 13662} 13663 13664declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 13665 13666define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 13667; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: 13668; X86: # %bb.0: 13669; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13670; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13671; X86-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1] 13672; X86-NEXT: # ymm2 = (ymm0 * ymm1) -/+ ymm2 13673; X86-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 13674; X86-NEXT: retl # encoding: [0xc3] 13675; 13676; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: 13677; X64: # %bb.0: 13678; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13679; X64-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1] 13680; X64-NEXT: # ymm2 = (ymm0 * ymm1) -/+ ymm2 13681; X64-NEXT: vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2] 13682; X64-NEXT: retq # encoding: [0xc3] 13683 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 13684 ret <4 x double> %res 13685} 13686 13687declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 13688 13689define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 13690; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: 13691; X86: # %bb.0: 13692; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13693; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13694; X86-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1] 13695; X86-NEXT: # xmm2 = (xmm0 * xmm1) -/+ xmm2 13696; X86-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 13697; X86-NEXT: retl # encoding: [0xc3] 13698; 13699; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: 13700; X64: # %bb.0: 13701; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13702; X64-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1] 13703; X64-NEXT: # xmm2 = (xmm0 * xmm1) -/+ xmm2 13704; X64-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 13705; X64-NEXT: retq # encoding: [0xc3] 13706 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 13707 ret <4 x float> %res 13708} 13709 13710declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 13711 13712define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 13713; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: 13714; X86: # %bb.0: 13715; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 13716; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 13717; X86-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1] 13718; X86-NEXT: # ymm2 = (ymm0 * ymm1) -/+ ymm2 13719; X86-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 13720; X86-NEXT: retl # encoding: [0xc3] 13721; 13722; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: 13723; X64: # %bb.0: 13724; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 13725; X64-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1] 13726; X64-NEXT: # ymm2 = (ymm0 * ymm1) -/+ ymm2 13727; X64-NEXT: vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2] 13728; X64-NEXT: retq # encoding: [0xc3] 13729 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 13730 ret <8 x float> %res 13731} 13732 13733 13734define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) { 13735; X86-LABEL: test_mask_vfmadd128_ps_rmk: 13736; X86: # %bb.0: 13737; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13738; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13739; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13740; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00] 13741; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13742; X86-NEXT: retl # encoding: [0xc3] 13743; 13744; X64-LABEL: test_mask_vfmadd128_ps_rmk: 13745; X64: # %bb.0: 13746; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13747; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] 13748; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13749; X64-NEXT: retq # encoding: [0xc3] 13750 %a2 = load <4 x float>, <4 x float>* %ptr_a2 13751 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 13752 ret <4 x float> %res 13753} 13754 13755define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) { 13756; X86-LABEL: test_mask_vfmadd128_ps_rmka: 13757; X86: # %bb.0: 13758; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13759; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13760; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13761; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00] 13762; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13763; X86-NEXT: retl # encoding: [0xc3] 13764; 13765; X64-LABEL: test_mask_vfmadd128_ps_rmka: 13766; X64: # %bb.0: 13767; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13768; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] 13769; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13770; X64-NEXT: retq # encoding: [0xc3] 13771 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8 13772 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 13773 ret <4 x float> %res 13774} 13775 13776define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { 13777; X86-LABEL: test_mask_vfmadd128_ps_rmkz: 13778; X86: # %bb.0: 13779; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13780; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00] 13781; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13782; X86-NEXT: retl # encoding: [0xc3] 13783; 13784; X64-LABEL: test_mask_vfmadd128_ps_rmkz: 13785; X64: # %bb.0: 13786; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07] 13787; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13788; X64-NEXT: retq # encoding: [0xc3] 13789 %a2 = load <4 x float>, <4 x float>* %ptr_a2 13790 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 13791 ret <4 x float> %res 13792} 13793 13794define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { 13795; X86-LABEL: test_mask_vfmadd128_ps_rmkza: 13796; X86: # %bb.0: 13797; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13798; X86-NEXT: vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00] 13799; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13800; X86-NEXT: retl # encoding: [0xc3] 13801; 13802; X64-LABEL: test_mask_vfmadd128_ps_rmkza: 13803; X64: # %bb.0: 13804; X64-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07] 13805; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13806; X64-NEXT: retq # encoding: [0xc3] 13807 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4 13808 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 13809 ret <4 x float> %res 13810} 13811 13812define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) { 13813; X86-LABEL: test_mask_vfmadd128_ps_rmb: 13814; X86: # %bb.0: 13815; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13816; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13817; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13818; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00] 13819; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13820; X86-NEXT: retl # encoding: [0xc3] 13821; 13822; X64-LABEL: test_mask_vfmadd128_ps_rmb: 13823; X64: # %bb.0: 13824; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13825; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] 13826; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13827; X64-NEXT: retq # encoding: [0xc3] 13828 %q = load float, float* %ptr_a2 13829 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 13830 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 13831 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 13832 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 13833 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind 13834 ret <4 x float> %res 13835} 13836 13837define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) { 13838; X86-LABEL: test_mask_vfmadd128_ps_rmba: 13839; X86: # %bb.0: 13840; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13841; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13842; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13843; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00] 13844; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13845; X86-NEXT: retl # encoding: [0xc3] 13846; 13847; X64-LABEL: test_mask_vfmadd128_ps_rmba: 13848; X64: # %bb.0: 13849; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13850; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] 13851; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13852; X64-NEXT: retq # encoding: [0xc3] 13853 %q = load float, float* %ptr_a2, align 4 13854 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 13855 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 13856 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 13857 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 13858 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind 13859 ret <4 x float> %res 13860} 13861 13862define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) { 13863; X86-LABEL: test_mask_vfmadd128_ps_rmbz: 13864; X86: # %bb.0: 13865; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13866; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00] 13867; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13868; X86-NEXT: retl # encoding: [0xc3] 13869; 13870; X64-LABEL: test_mask_vfmadd128_ps_rmbz: 13871; X64: # %bb.0: 13872; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07] 13873; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13874; X64-NEXT: retq # encoding: [0xc3] 13875 %q = load float, float* %ptr_a2 13876 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 13877 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 13878 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 13879 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 13880 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind 13881 ret <4 x float> %res 13882} 13883 13884define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) { 13885; X86-LABEL: test_mask_vfmadd128_ps_rmbza: 13886; X86: # %bb.0: 13887; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13888; X86-NEXT: vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00] 13889; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13890; X86-NEXT: retl # encoding: [0xc3] 13891; 13892; X64-LABEL: test_mask_vfmadd128_ps_rmbza: 13893; X64: # %bb.0: 13894; X64-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07] 13895; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13896; X64-NEXT: retq # encoding: [0xc3] 13897 %q = load float, float* %ptr_a2, align 4 13898 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 13899 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 13900 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 13901 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 13902 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind 13903 ret <4 x float> %res 13904} 13905 13906define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) { 13907; X86-LABEL: test_mask_vfmadd128_pd_rmk: 13908; X86: # %bb.0: 13909; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13910; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13911; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13912; X86-NEXT: vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00] 13913; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13914; X86-NEXT: retl # encoding: [0xc3] 13915; 13916; X64-LABEL: test_mask_vfmadd128_pd_rmk: 13917; X64: # %bb.0: 13918; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13919; X64-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07] 13920; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13921; X64-NEXT: retq # encoding: [0xc3] 13922 %a2 = load <2 x double>, <2 x double>* %ptr_a2 13923 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 13924 ret <2 x double> %res 13925} 13926 13927define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) { 13928; X86-LABEL: test_mask_vfmadd128_pd_rmkz: 13929; X86: # %bb.0: 13930; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13931; X86-NEXT: vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00] 13932; X86-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13933; X86-NEXT: retl # encoding: [0xc3] 13934; 13935; X64-LABEL: test_mask_vfmadd128_pd_rmkz: 13936; X64: # %bb.0: 13937; X64-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07] 13938; X64-NEXT: # xmm0 = (xmm1 * xmm0) + mem 13939; X64-NEXT: retq # encoding: [0xc3] 13940 %a2 = load <2 x double>, <2 x double>* %ptr_a2 13941 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 13942 ret <2 x double> %res 13943} 13944 13945define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) { 13946; X86-LABEL: test_mask_vfmadd256_pd_rmk: 13947; X86: # %bb.0: 13948; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13949; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 13950; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 13951; X86-NEXT: vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00] 13952; X86-NEXT: # ymm0 = (ymm1 * ymm0) + mem 13953; X86-NEXT: retl # encoding: [0xc3] 13954; 13955; X64-LABEL: test_mask_vfmadd256_pd_rmk: 13956; X64: # %bb.0: 13957; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 13958; X64-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07] 13959; X64-NEXT: # ymm0 = (ymm1 * ymm0) + mem 13960; X64-NEXT: retq # encoding: [0xc3] 13961 %a2 = load <4 x double>, <4 x double>* %ptr_a2 13962 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 13963 ret <4 x double> %res 13964} 13965 13966define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) { 13967; X86-LABEL: test_mask_vfmadd256_pd_rmkz: 13968; X86: # %bb.0: 13969; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 13970; X86-NEXT: vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00] 13971; X86-NEXT: # ymm0 = (ymm1 * ymm0) + mem 13972; X86-NEXT: retl # encoding: [0xc3] 13973; 13974; X64-LABEL: test_mask_vfmadd256_pd_rmkz: 13975; X64: # %bb.0: 13976; X64-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07] 13977; X64-NEXT: # ymm0 = (ymm1 * ymm0) + mem 13978; X64-NEXT: retq # encoding: [0xc3] 13979 %a2 = load <4 x double>, <4 x double>* %ptr_a2 13980 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 13981 ret <4 x double> %res 13982} 13983