1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4; 256-bit 5 6define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 7; CHECK-LABEL: test_cmp_b_256: 8; CHECK: ## BB#0: 9; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x00] 10; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] 11; CHECK-NEXT: vpcmpltb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x01] 12; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] 13; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] 14; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] 15; CHECK-NEXT: vpcmpunordb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x03] 16; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] 17; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] 18; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] 19; CHECK-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05] 20; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] 21; CHECK-NEXT: vpcmpnleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x06] 22; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] 23; CHECK-NEXT: vpcmpordb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x07] 24; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] 25; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] 26; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 27; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] 28; CHECK-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03] 29; CHECK-NEXT: vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8] 30; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] 31; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] 32; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] 33; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01] 34; CHECK-NEXT: retq ## encoding: [0xc3] 35 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 36 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 37 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 38 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 39 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 40 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 41 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 42 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 43 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 44 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 45 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 46 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 47 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 48 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 49 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 50 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 51 ret <8 x i32> %vec7 52} 53 54define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 55; CHECK-LABEL: test_mask_cmp_b_256: 56; CHECK: ## BB#0: 57; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 58; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x00] 59; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] 60; CHECK-NEXT: vpcmpltb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x01] 61; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] 62; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] 63; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] 64; CHECK-NEXT: vpcmpunordb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x03] 65; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] 66; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] 67; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] 68; CHECK-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05] 69; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] 70; CHECK-NEXT: vpcmpnleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x06] 71; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] 72; CHECK-NEXT: vpcmpordb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x07] 73; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] 74; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] 75; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 76; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] 77; CHECK-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03] 78; CHECK-NEXT: vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8] 79; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] 80; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] 81; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] 82; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01] 83; CHECK-NEXT: retq ## encoding: [0xc3] 84 %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 85 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 86 %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 87 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 88 %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 89 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 90 %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 91 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 92 %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 93 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 94 %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 95 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 96 %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 97 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 98 %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 99 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 100 ret <8 x i32> %vec7 101} 102 103declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 104 105define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { 106; CHECK-LABEL: test_ucmp_b_256: 107; CHECK: ## BB#0: 108; CHECK-NEXT: vpcmpequb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x00] 109; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] 110; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] 111; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] 112; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] 113; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] 114; CHECK-NEXT: vpcmpunordub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x03] 115; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] 116; CHECK-NEXT: vpcmpnequb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x04] 117; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] 118; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] 119; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] 120; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] 121; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] 122; CHECK-NEXT: vpcmpordub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x07] 123; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] 124; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] 125; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 126; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] 127; CHECK-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03] 128; CHECK-NEXT: vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8] 129; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] 130; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] 131; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] 132; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01] 133; CHECK-NEXT: retq ## encoding: [0xc3] 134 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) 135 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 136 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1) 137 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 138 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1) 139 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 140 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1) 141 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 142 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1) 143 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 144 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1) 145 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 146 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1) 147 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 148 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1) 149 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 150 ret <8 x i32> %vec7 151} 152 153define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { 154; CHECK-LABEL: test_mask_ucmp_b_256: 155; CHECK: ## BB#0: 156; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 157; CHECK-NEXT: vpcmpequb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x00] 158; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] 159; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] 160; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] 161; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] 162; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] 163; CHECK-NEXT: vpcmpunordub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x03] 164; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] 165; CHECK-NEXT: vpcmpnequb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x04] 166; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] 167; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] 168; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] 169; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] 170; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] 171; CHECK-NEXT: vpcmpordub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x07] 172; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] 173; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] 174; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] 175; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] 176; CHECK-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03] 177; CHECK-NEXT: vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8] 178; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] 179; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] 180; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] 181; CHECK-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01] 182; CHECK-NEXT: retq ## encoding: [0xc3] 183 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) 184 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 185 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask) 186 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 187 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask) 188 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 189 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask) 190 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 191 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask) 192 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 193 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask) 194 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 195 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask) 196 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 197 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask) 198 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 199 ret <8 x i32> %vec7 200} 201 202declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone 203 204define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 205; CHECK-LABEL: test_cmp_w_256: 206; CHECK: ## BB#0: 207; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x00] 208; CHECK-NEXT: vpcmpltw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x01] 209; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02] 210; CHECK-NEXT: vpcmpunordw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x03] 211; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x04] 212; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe9,0x05] 213; CHECK-NEXT: vpcmpnlew %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf1,0x06] 214; CHECK-NEXT: vpcmpordw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf9,0x07] 215; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 216; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 217; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] 218; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] 219; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 220; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] 221; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 222; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] 223; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 224; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] 225; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 226; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] 227; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 228; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] 229; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 230; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] 231; CHECK-NEXT: retq ## encoding: [0xc3] 232 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 233 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 234 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 235 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 236 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 237 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 238 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 239 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 240 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 241 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 242 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 243 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 244 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 245 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 246 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 247 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 248 ret <8 x i16> %vec7 249} 250 251define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 252; CHECK-LABEL: test_mask_cmp_w_256: 253; CHECK: ## BB#0: 254; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 255; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x00] 256; CHECK-NEXT: vpcmpltw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd1,0x01] 257; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02] 258; CHECK-NEXT: vpcmpunordw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x03] 259; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x04] 260; CHECK-NEXT: vpcmpnltw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xf1,0x05] 261; CHECK-NEXT: vpcmpnlew %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xf9,0x06] 262; CHECK-NEXT: vpcmpordw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc9,0x07] 263; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 264; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 265; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] 266; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] 267; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 268; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] 269; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 270; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] 271; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 272; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] 273; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 274; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] 275; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 276; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] 277; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 278; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] 279; CHECK-NEXT: retq ## encoding: [0xc3] 280 %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 281 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 282 %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 283 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 284 %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 285 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 286 %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 287 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 288 %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 289 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 290 %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 291 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 292 %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 293 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 294 %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 295 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 296 ret <8 x i16> %vec7 297} 298 299declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 300 301define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { 302; CHECK-LABEL: test_ucmp_w_256: 303; CHECK: ## BB#0: 304; CHECK-NEXT: vpcmpequw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x00] 305; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01] 306; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02] 307; CHECK-NEXT: vpcmpunorduw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd9,0x03] 308; CHECK-NEXT: vpcmpnequw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x04] 309; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x05] 310; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf1,0x06] 311; CHECK-NEXT: vpcmporduw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf9,0x07] 312; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 313; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 314; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] 315; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] 316; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 317; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] 318; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 319; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] 320; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 321; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] 322; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 323; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] 324; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 325; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] 326; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 327; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] 328; CHECK-NEXT: retq ## encoding: [0xc3] 329 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) 330 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 331 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1) 332 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 333 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1) 334 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 335 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1) 336 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 337 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1) 338 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 339 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1) 340 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 341 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1) 342 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 343 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1) 344 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 345 ret <8 x i16> %vec7 346} 347 348define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) { 349; CHECK-LABEL: test_mask_ucmp_w_256: 350; CHECK: ## BB#0: 351; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 352; CHECK-NEXT: vpcmpequw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x00] 353; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01] 354; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02] 355; CHECK-NEXT: vpcmpunorduw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe1,0x03] 356; CHECK-NEXT: vpcmpnequw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x04] 357; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf1,0x05] 358; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf9,0x06] 359; CHECK-NEXT: vpcmporduw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x07] 360; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 361; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 362; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] 363; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] 364; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 365; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] 366; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 367; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] 368; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 369; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] 370; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 371; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] 372; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 373; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] 374; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 375; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] 376; CHECK-NEXT: retq ## encoding: [0xc3] 377 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) 378 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 379 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask) 380 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 381 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask) 382 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 383 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask) 384 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 385 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask) 386 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 387 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask) 388 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 389 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask) 390 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 391 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask) 392 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 393 ret <8 x i16> %vec7 394} 395 396declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone 397 398; 128-bit 399 400define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) { 401; CHECK-LABEL: test_pcmpeq_b_128: 402; CHECK: ## BB#0: 403; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] 404; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 405; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 406; CHECK-NEXT: retq ## encoding: [0xc3] 407 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 408 ret i16 %res 409} 410 411define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 412; CHECK-LABEL: test_mask_pcmpeq_b_128: 413; CHECK: ## BB#0: 414; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 415; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1] 416; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 417; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 418; CHECK-NEXT: retq ## encoding: [0xc3] 419 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 420 ret i16 %res 421} 422 423declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16) 424 425define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) { 426; CHECK-LABEL: test_pcmpeq_w_128: 427; CHECK: ## BB#0: 428; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] 429; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 430; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 431; CHECK-NEXT: retq ## encoding: [0xc3] 432 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 433 ret i8 %res 434} 435 436define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 437; CHECK-LABEL: test_mask_pcmpeq_w_128: 438; CHECK: ## BB#0: 439; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 440; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1] 441; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 442; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 443; CHECK-NEXT: retq ## encoding: [0xc3] 444 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 445 ret i8 %res 446} 447 448declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8) 449 450define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) { 451; CHECK-LABEL: test_pcmpgt_b_128: 452; CHECK: ## BB#0: 453; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] 454; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 455; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 456; CHECK-NEXT: retq ## encoding: [0xc3] 457 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1) 458 ret i16 %res 459} 460 461define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 462; CHECK-LABEL: test_mask_pcmpgt_b_128: 463; CHECK: ## BB#0: 464; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 465; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc1] 466; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 467; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 468; CHECK-NEXT: retq ## encoding: [0xc3] 469 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) 470 ret i16 %res 471} 472 473declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16) 474 475define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) { 476; CHECK-LABEL: test_pcmpgt_w_128: 477; CHECK: ## BB#0: 478; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] 479; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 480; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 481; CHECK-NEXT: retq ## encoding: [0xc3] 482 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1) 483 ret i8 %res 484} 485 486define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 487; CHECK-LABEL: test_mask_pcmpgt_w_128: 488; CHECK: ## BB#0: 489; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 490; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc1] 491; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 492; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 493; CHECK-NEXT: retq ## encoding: [0xc3] 494 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask) 495 ret i8 %res 496} 497 498declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8) 499 500define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 501; CHECK-LABEL: test_cmp_b_128: 502; CHECK: ## BB#0: 503; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x00] 504; CHECK-NEXT: vpcmpltb %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc9,0x01] 505; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02] 506; CHECK-NEXT: vpcmpunordb %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x03] 507; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x04] 508; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe9,0x05] 509; CHECK-NEXT: vpcmpnleb %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf1,0x06] 510; CHECK-NEXT: vpcmpordb %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf9,0x07] 511; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 512; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 513; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] 514; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] 515; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 516; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] 517; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 518; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] 519; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 520; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] 521; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 522; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] 523; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 524; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] 525; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 526; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] 527; CHECK-NEXT: retq ## encoding: [0xc3] 528 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 529 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 530 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 531 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 532 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 533 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 534 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 535 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 536 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 537 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 538 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 539 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 540 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 541 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 542 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 543 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 544 ret <8 x i16> %vec7 545} 546 547define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 548; CHECK-LABEL: test_mask_cmp_b_128: 549; CHECK: ## BB#0: 550; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 551; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x00] 552; CHECK-NEXT: vpcmpltb %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd1,0x01] 553; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02] 554; CHECK-NEXT: vpcmpunordb %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x03] 555; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x04] 556; CHECK-NEXT: vpcmpnltb %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xf1,0x05] 557; CHECK-NEXT: vpcmpnleb %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xf9,0x06] 558; CHECK-NEXT: vpcmpordb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc9,0x07] 559; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 560; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 561; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] 562; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] 563; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 564; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] 565; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 566; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] 567; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 568; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] 569; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 570; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] 571; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 572; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] 573; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 574; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] 575; CHECK-NEXT: retq ## encoding: [0xc3] 576 %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 577 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 578 %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 579 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 580 %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 581 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 582 %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 583 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 584 %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 585 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 586 %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 587 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 588 %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 589 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 590 %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 591 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 592 ret <8 x i16> %vec7 593} 594 595declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 596 597define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) { 598; CHECK-LABEL: test_ucmp_b_128: 599; CHECK: ## BB#0: 600; CHECK-NEXT: vpcmpequb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x00] 601; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01] 602; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02] 603; CHECK-NEXT: vpcmpunordub %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd9,0x03] 604; CHECK-NEXT: vpcmpnequb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x04] 605; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x05] 606; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf1,0x06] 607; CHECK-NEXT: vpcmpordub %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf9,0x07] 608; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 609; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 610; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] 611; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] 612; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 613; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] 614; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 615; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] 616; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 617; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] 618; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 619; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] 620; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 621; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] 622; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 623; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] 624; CHECK-NEXT: retq ## encoding: [0xc3] 625 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1) 626 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 627 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1) 628 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 629 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1) 630 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 631 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1) 632 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 633 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1) 634 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 635 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1) 636 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 637 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1) 638 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 639 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1) 640 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 641 ret <8 x i16> %vec7 642} 643 644define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) { 645; CHECK-LABEL: test_mask_ucmp_b_128: 646; CHECK: ## BB#0: 647; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 648; CHECK-NEXT: vpcmpequb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x00] 649; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01] 650; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02] 651; CHECK-NEXT: vpcmpunordub %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe1,0x03] 652; CHECK-NEXT: vpcmpnequb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x04] 653; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf1,0x05] 654; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf9,0x06] 655; CHECK-NEXT: vpcmpordub %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x07] 656; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 657; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 658; CHECK-NEXT: vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1] 659; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01] 660; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 661; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02] 662; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 663; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03] 664; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 665; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04] 666; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 667; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05] 668; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 669; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06] 670; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 671; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07] 672; CHECK-NEXT: retq ## encoding: [0xc3] 673 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask) 674 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 675 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask) 676 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 677 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask) 678 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 679 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask) 680 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 681 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask) 682 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 683 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask) 684 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 685 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask) 686 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 687 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask) 688 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 689 ret <8 x i16> %vec7 690} 691 692declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone 693 694define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 695; CHECK-LABEL: test_cmp_w_128: 696; CHECK: ## BB#0: 697; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x00] 698; CHECK-NEXT: vpcmpltw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x01] 699; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x02] 700; CHECK-NEXT: vpcmpunordw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x03] 701; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe1,0x04] 702; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe9,0x05] 703; CHECK-NEXT: vpcmpnlew %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf1,0x06] 704; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf9,0x07] 705; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 706; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00] 707; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 708; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] 709; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 710; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] 711; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 712; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06] 713; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 714; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08] 715; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 716; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a] 717; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 718; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c] 719; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 720; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e] 721; CHECK-NEXT: retq ## encoding: [0xc3] 722 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 723 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 724 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 725 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 726 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 727 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 728 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 729 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 730 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 731 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 732 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 733 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 734 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 735 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 736 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 737 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 738 ret <8 x i8> %vec7 739} 740 741define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 742; CHECK-LABEL: test_mask_cmp_w_128: 743; CHECK: ## BB#0: 744; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 745; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x00] 746; CHECK-NEXT: vpcmpltw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x01] 747; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02] 748; CHECK-NEXT: vpcmpunordw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x03] 749; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x04] 750; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xf1,0x05] 751; CHECK-NEXT: vpcmpnlew %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xf9,0x06] 752; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc9,0x07] 753; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 754; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00] 755; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 756; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] 757; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 758; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] 759; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 760; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06] 761; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 762; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08] 763; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 764; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a] 765; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 766; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c] 767; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 768; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e] 769; CHECK-NEXT: retq ## encoding: [0xc3] 770 %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 771 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 772 %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 773 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 774 %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 775 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 776 %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 777 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 778 %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 779 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 780 %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 781 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 782 %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 783 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 784 %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 785 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 786 ret <8 x i8> %vec7 787} 788 789declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 790 791define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { 792; CHECK-LABEL: test_ucmp_w_128: 793; CHECK: ## BB#0: 794; CHECK-NEXT: vpcmpequw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x00] 795; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x01] 796; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x02] 797; CHECK-NEXT: vpcmpunorduw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x03] 798; CHECK-NEXT: vpcmpnequw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x04] 799; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe9,0x05] 800; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf1,0x06] 801; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf9,0x07] 802; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 803; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00] 804; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 805; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] 806; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 807; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] 808; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 809; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06] 810; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 811; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08] 812; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 813; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a] 814; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 815; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c] 816; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 817; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e] 818; CHECK-NEXT: retq ## encoding: [0xc3] 819 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1) 820 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 821 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1) 822 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 823 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1) 824 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 825 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1) 826 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 827 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1) 828 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 829 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1) 830 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 831 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1) 832 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 833 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1) 834 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 835 ret <8 x i8> %vec7 836} 837 838define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { 839; CHECK-LABEL: test_mask_ucmp_w_128: 840; CHECK: ## BB#0: 841; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 842; CHECK-NEXT: vpcmpequw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x00] 843; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01] 844; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02] 845; CHECK-NEXT: vpcmpunorduw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x03] 846; CHECK-NEXT: vpcmpnequw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x04] 847; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf1,0x05] 848; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf9,0x06] 849; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x07] 850; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 851; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00] 852; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 853; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] 854; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 855; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] 856; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 857; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06] 858; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 859; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08] 860; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 861; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a] 862; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 863; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c] 864; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 865; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e] 866; CHECK-NEXT: retq ## encoding: [0xc3] 867 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask) 868 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 869 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask) 870 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 871 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask) 872 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 873 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask) 874 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 875 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask) 876 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 877 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask) 878 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 879 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask) 880 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 881 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask) 882 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 883 ret <8 x i8> %vec7 884} 885 886declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone 887 888declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 889 890define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 891; CHECK-LABEL: test_mask_vfmadd256_ps: 892; CHECK: ## BB#0: 893; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 894; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2] 895; CHECK-NEXT: retq ## encoding: [0xc3] 896 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 897 ret <8 x float> %res 898} 899 900declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 901 902define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 903; CHECK-LABEL: test_mask_vfmadd128_ps: 904; CHECK: ## BB#0: 905; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 906; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2] 907; CHECK-NEXT: retq ## encoding: [0xc3] 908 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 909 ret <4 x float> %res 910} 911 912declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 913 914define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) { 915; CHECK-LABEL: test_mask_fmadd256_pd: 916; CHECK: ## BB#0: 917; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 918; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2] 919; CHECK-NEXT: retq ## encoding: [0xc3] 920 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) 921 ret <4 x double> %res 922} 923 924declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 925 926define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) { 927; CHECK-LABEL: test_mask_fmadd128_pd: 928; CHECK: ## BB#0: 929; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 930; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2] 931; CHECK-NEXT: retq ## encoding: [0xc3] 932 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) 933 ret <2 x double> %res 934} 935 936define <2 x double>@test_int_x86_avx512_mask_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 937; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128: 938; CHECK: ## BB#0: 939; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 940; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 941; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xda] 942; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] 943; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 944; CHECK-NEXT: retq ## encoding: [0xc3] 945 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 946 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 947 %res2 = fadd <2 x double> %res, %res1 948 ret <2 x double> %res2 949} 950 951declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 952 953define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 954; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128: 955; CHECK: ## BB#0: 956; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 957; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 958; CHECK-NEXT: vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd9] 959; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] 960; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 961; CHECK-NEXT: retq ## encoding: [0xc3] 962 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 963 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 964 %res2 = fadd <2 x double> %res, %res1 965 ret <2 x double> %res2 966} 967 968declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 969 970define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 971; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128: 972; CHECK: ## BB#0: 973; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 974; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 975; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xda] 976; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] 977; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 978; CHECK-NEXT: retq ## encoding: [0xc3] 979 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 980 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 981 %res2 = fadd <2 x double> %res, %res1 982 ret <2 x double> %res2 983} 984 985define <4 x double>@test_int_x86_avx512_mask_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 986; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256: 987; CHECK: ## BB#0: 988; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 989; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 990; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xda] 991; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] 992; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 993; CHECK-NEXT: retq ## encoding: [0xc3] 994 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 995 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 996 %res2 = fadd <4 x double> %res, %res1 997 ret <4 x double> %res2 998} 999 1000declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1001 1002define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1003; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256: 1004; CHECK: ## BB#0: 1005; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1006; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1007; CHECK-NEXT: vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd9] 1008; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] 1009; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1010; CHECK-NEXT: retq ## encoding: [0xc3] 1011 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1012 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1013 %res2 = fadd <4 x double> %res, %res1 1014 ret <4 x double> %res2 1015} 1016 1017declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1018 1019define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1020; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256: 1021; CHECK: ## BB#0: 1022; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1023; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1024; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xda] 1025; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] 1026; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1027; CHECK-NEXT: retq ## encoding: [0xc3] 1028 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1029 %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1030 %res2 = fadd <4 x double> %res, %res1 1031 ret <4 x double> %res2 1032} 1033 1034define <4 x float>@test_int_x86_avx512_mask_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1035; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128: 1036; CHECK: ## BB#0: 1037; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1038; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1039; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xda] 1040; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] 1041; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1042; CHECK-NEXT: retq ## encoding: [0xc3] 1043 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1044 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1045 %res2 = fadd <4 x float> %res, %res1 1046 ret <4 x float> %res2 1047} 1048 1049declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1050 1051define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1052; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128: 1053; CHECK: ## BB#0: 1054; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1055; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1056; CHECK-NEXT: vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd9] 1057; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] 1058; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1059; CHECK-NEXT: retq ## encoding: [0xc3] 1060 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1061 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1062 %res2 = fadd <4 x float> %res, %res1 1063 ret <4 x float> %res2 1064} 1065 1066declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1067 1068define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1069; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128: 1070; CHECK: ## BB#0: 1071; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1072; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1073; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa8,0xda] 1074; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] 1075; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1076; CHECK-NEXT: retq ## encoding: [0xc3] 1077 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1078 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1079 %res2 = fadd <4 x float> %res, %res1 1080 ret <4 x float> %res2 1081} 1082 1083define <8 x float>@test_int_x86_avx512_mask_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1084; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256: 1085; CHECK: ## BB#0: 1086; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1087; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1088; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xda] 1089; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2] 1090; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1091; CHECK-NEXT: retq ## encoding: [0xc3] 1092 %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1093 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1094 %res2 = fadd <8 x float> %res, %res1 1095 ret <8 x float> %res2 1096} 1097 1098declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1099 1100define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1101; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256: 1102; CHECK: ## BB#0: 1103; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1104; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1105; CHECK-NEXT: vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd9] 1106; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2] 1107; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1108; CHECK-NEXT: retq ## encoding: [0xc3] 1109 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1110 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1111 %res2 = fadd <8 x float> %res, %res1 1112 ret <8 x float> %res2 1113} 1114 1115declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1116 1117define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1118; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256: 1119; CHECK: ## BB#0: 1120; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1121; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1122; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xda] 1123; CHECK-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2] 1124; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1125; CHECK-NEXT: retq ## encoding: [0xc3] 1126 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1127 %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1128 %res2 = fadd <8 x float> %res, %res1 1129 ret <8 x float> %res2 1130} 1131 1132 1133declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1134 1135define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1136; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128: 1137; CHECK: ## BB#0: 1138; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1139; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1140; CHECK-NEXT: vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd9] 1141; CHECK-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xaa,0xc2] 1142; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 1143; CHECK-NEXT: retq ## encoding: [0xc3] 1144 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1145 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1146 %res2 = fadd <2 x double> %res, %res1 1147 ret <2 x double> %res2 1148} 1149 1150 1151declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1152 1153define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1154; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256: 1155; CHECK: ## BB#0: 1156; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1157; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1158; CHECK-NEXT: vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd9] 1159; CHECK-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xaa,0xc2] 1160; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1161; CHECK-NEXT: retq ## encoding: [0xc3] 1162 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1163 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1164 %res2 = fadd <4 x double> %res, %res1 1165 ret <4 x double> %res2 1166} 1167 1168declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1169 1170define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1171; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128: 1172; CHECK: ## BB#0: 1173; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1174; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1175; CHECK-NEXT: vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd9] 1176; CHECK-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xaa,0xc2] 1177; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1178; CHECK-NEXT: retq ## encoding: [0xc3] 1179 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1180 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1181 %res2 = fadd <4 x float> %res, %res1 1182 ret <4 x float> %res2 1183} 1184 1185declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1186 1187define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1188; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256: 1189; CHECK: ## BB#0: 1190; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1191; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1192; CHECK-NEXT: vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd9] 1193; CHECK-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xaa,0xc2] 1194; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1195; CHECK-NEXT: retq ## encoding: [0xc3] 1196 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1197 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1198 %res2 = fadd <8 x float> %res, %res1 1199 ret <8 x float> %res2 1200} 1201 1202declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 1203 1204define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 1205; CHECK-LABEL: test_mask_vfnmadd256_ps: 1206; CHECK: ## BB#0: 1207; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1208; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2] 1209; CHECK-NEXT: retq ## encoding: [0xc3] 1210 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 1211 ret <8 x float> %res 1212} 1213 1214declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 1215 1216define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1217; CHECK-LABEL: test_mask_vfnmadd128_ps: 1218; CHECK: ## BB#0: 1219; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1220; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2] 1221; CHECK-NEXT: retq ## encoding: [0xc3] 1222 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 1223 ret <4 x float> %res 1224} 1225 1226declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 1227 1228define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 1229; CHECK-LABEL: test_mask_vfnmadd256_pd: 1230; CHECK: ## BB#0: 1231; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1232; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2] 1233; CHECK-NEXT: retq ## encoding: [0xc3] 1234 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 1235 ret <4 x double> %res 1236} 1237 1238declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 1239 1240define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 1241; CHECK-LABEL: test_mask_vfnmadd128_pd: 1242; CHECK: ## BB#0: 1243; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1244; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2] 1245; CHECK-NEXT: retq ## encoding: [0xc3] 1246 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1247 ret <2 x double> %res 1248} 1249 1250declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 1251 1252define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) { 1253; CHECK-LABEL: test_mask_vfnmsub256_ps: 1254; CHECK: ## BB#0: 1255; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1256; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2] 1257; CHECK-NEXT: retq ## encoding: [0xc3] 1258 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind 1259 ret <8 x float> %res 1260} 1261 1262declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 1263 1264define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1265; CHECK-LABEL: test_mask_vfnmsub128_ps: 1266; CHECK: ## BB#0: 1267; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1268; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2] 1269; CHECK-NEXT: retq ## encoding: [0xc3] 1270 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 1271 ret <4 x float> %res 1272} 1273 1274declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 1275 1276define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 1277; CHECK-LABEL: test_mask_vfnmsub256_pd: 1278; CHECK: ## BB#0: 1279; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1280; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2] 1281; CHECK-NEXT: retq ## encoding: [0xc3] 1282 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 1283 ret <4 x double> %res 1284} 1285 1286declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 1287 1288define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 1289; CHECK-LABEL: test_mask_vfnmsub128_pd: 1290; CHECK: ## BB#0: 1291; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1292; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2] 1293; CHECK-NEXT: retq ## encoding: [0xc3] 1294 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1295 ret <2 x double> %res 1296} 1297 1298 1299define <2 x double>@test_int_x86_avx512_mask_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1300; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128: 1301; CHECK: ## BB#0: 1302; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1303; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1304; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xda] 1305; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xae,0xc2] 1306; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 1307; CHECK-NEXT: retq ## encoding: [0xc3] 1308 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1309 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1310 %res2 = fadd <2 x double> %res, %res1 1311 ret <2 x double> %res2 1312} 1313 1314declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1315 1316define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1317; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128: 1318; CHECK: ## BB#0: 1319; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1320; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1321; CHECK-NEXT: vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd9] 1322; CHECK-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xae,0xc2] 1323; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 1324; CHECK-NEXT: retq ## encoding: [0xc3] 1325 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1326 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1327 %res2 = fadd <2 x double> %res, %res1 1328 ret <2 x double> %res2 1329} 1330 1331define <4 x double>@test_int_x86_avx512_mask_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1332; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256: 1333; CHECK: ## BB#0: 1334; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1335; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1336; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xda] 1337; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xae,0xc2] 1338; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1339; CHECK-NEXT: retq ## encoding: [0xc3] 1340 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1341 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1342 %res2 = fadd <4 x double> %res, %res1 1343 ret <4 x double> %res2 1344} 1345 1346declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1347 1348define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1349; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256: 1350; CHECK: ## BB#0: 1351; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1352; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1353; CHECK-NEXT: vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd9] 1354; CHECK-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xae,0xc2] 1355; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1356; CHECK-NEXT: retq ## encoding: [0xc3] 1357 %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1358 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1359 %res2 = fadd <4 x double> %res, %res1 1360 ret <4 x double> %res2 1361} 1362 1363define <4 x float>@test_int_x86_avx512_mask_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1364; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128: 1365; CHECK: ## BB#0: 1366; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1367; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1368; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xda] 1369; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xae,0xc2] 1370; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1371; CHECK-NEXT: retq ## encoding: [0xc3] 1372 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1373 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1374 %res2 = fadd <4 x float> %res, %res1 1375 ret <4 x float> %res2 1376} 1377 1378declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1379 1380define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1381; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128: 1382; CHECK: ## BB#0: 1383; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1384; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1385; CHECK-NEXT: vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd9] 1386; CHECK-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xae,0xc2] 1387; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1388; CHECK-NEXT: retq ## encoding: [0xc3] 1389 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1390 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1391 %res2 = fadd <4 x float> %res, %res1 1392 ret <4 x float> %res2 1393} 1394 1395define <8 x float>@test_int_x86_avx512_mask_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1396; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256: 1397; CHECK: ## BB#0: 1398; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1399; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1400; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xda] 1401; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xae,0xc2] 1402; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1403; CHECK-NEXT: retq ## encoding: [0xc3] 1404 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1405 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1406 %res2 = fadd <8 x float> %res, %res1 1407 ret <8 x float> %res2 1408} 1409 1410declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1411 1412define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1413; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256: 1414; CHECK: ## BB#0: 1415; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1416; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1417; CHECK-NEXT: vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd9] 1418; CHECK-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xae,0xc2] 1419; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1420; CHECK-NEXT: retq ## encoding: [0xc3] 1421 %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1422 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1423 %res2 = fadd <8 x float> %res, %res1 1424 ret <8 x float> %res2 1425} 1426 1427define <2 x double>@test_int_x86_avx512_mask_vfnmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1428; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128: 1429; CHECK: ## BB#0: 1430; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1431; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1432; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xda] 1433; CHECK-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xac,0xc2] 1434; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 1435; CHECK-NEXT: retq ## encoding: [0xc3] 1436 %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1437 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1438 %res2 = fadd <2 x double> %res, %res1 1439 ret <2 x double> %res2 1440} 1441 1442define <4 x double>@test_int_x86_avx512_mask_vfnmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1443; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256: 1444; CHECK: ## BB#0: 1445; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1446; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1447; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xda] 1448; CHECK-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xac,0xc2] 1449; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1450; CHECK-NEXT: retq ## encoding: [0xc3] 1451 %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1452 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1453 %res2 = fadd <4 x double> %res, %res1 1454 ret <4 x double> %res2 1455} 1456 1457define <4 x float>@test_int_x86_avx512_mask_vfnmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1458; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128: 1459; CHECK: ## BB#0: 1460; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1461; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1462; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xda] 1463; CHECK-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xac,0xc2] 1464; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1465; CHECK-NEXT: retq ## encoding: [0xc3] 1466 %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1467 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1468 %res2 = fadd <4 x float> %res, %res1 1469 ret <4 x float> %res2 1470} 1471 1472define <8 x float>@test_int_x86_avx512_mask_vfnmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1473; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256: 1474; CHECK: ## BB#0: 1475; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1476; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1477; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xda] 1478; CHECK-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xac,0xc2] 1479; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1480; CHECK-NEXT: retq ## encoding: [0xc3] 1481 %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1482 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1483 %res2 = fadd <8 x float> %res, %res1 1484 ret <8 x float> %res2 1485} 1486 1487declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone 1488 1489define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) { 1490; CHECK-LABEL: test_mask_fmaddsub256_ps: 1491; CHECK: ## BB#0: 1492; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1493; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2] 1494; CHECK-NEXT: retq ## encoding: [0xc3] 1495 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) 1496 ret <8 x float> %res 1497} 1498 1499declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone 1500 1501define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { 1502; CHECK-LABEL: test_mask_fmaddsub128_ps: 1503; CHECK: ## BB#0: 1504; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1505; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2] 1506; CHECK-NEXT: retq ## encoding: [0xc3] 1507 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) 1508 ret <4 x float> %res 1509} 1510 1511declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone 1512 1513define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 1514; CHECK-LABEL: test_mask_vfmaddsub256_pd: 1515; CHECK: ## BB#0: 1516; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1517; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2] 1518; CHECK-NEXT: retq ## encoding: [0xc3] 1519 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 1520 ret <4 x double> %res 1521} 1522 1523declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone 1524 1525define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 1526; CHECK-LABEL: test_mask_vfmaddsub128_pd: 1527; CHECK: ## BB#0: 1528; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1529; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2] 1530; CHECK-NEXT: retq ## encoding: [0xc3] 1531 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1532 ret <2 x double> %res 1533} 1534 1535define <2 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1536; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128: 1537; CHECK: ## BB#0: 1538; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1539; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1540; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xda] 1541; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2] 1542; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 1543; CHECK-NEXT: retq ## encoding: [0xc3] 1544 %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1545 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1546 %res2 = fadd <2 x double> %res, %res1 1547 ret <2 x double> %res2 1548} 1549 1550declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1551 1552define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1553; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128: 1554; CHECK: ## BB#0: 1555; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1556; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1557; CHECK-NEXT: vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd9] 1558; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2] 1559; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 1560; CHECK-NEXT: retq ## encoding: [0xc3] 1561 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1562 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1563 %res2 = fadd <2 x double> %res, %res1 1564 ret <2 x double> %res2 1565} 1566 1567declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1568 1569define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1570; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128: 1571; CHECK: ## BB#0: 1572; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1573; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1574; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xda] 1575; CHECK-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2] 1576; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 1577; CHECK-NEXT: retq ## encoding: [0xc3] 1578 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1579 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1580 %res2 = fadd <2 x double> %res, %res1 1581 ret <2 x double> %res2 1582} 1583 1584define <4 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1585; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256: 1586; CHECK: ## BB#0: 1587; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1588; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1589; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xda] 1590; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2] 1591; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1592; CHECK-NEXT: retq ## encoding: [0xc3] 1593 %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1594 %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1595 %res2 = fadd <4 x double> %res, %res1 1596 ret <4 x double> %res2 1597} 1598 1599declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1600 1601define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1602; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256: 1603; CHECK: ## BB#0: 1604; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1605; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1606; CHECK-NEXT: vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd9] 1607; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2] 1608; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1609; CHECK-NEXT: retq ## encoding: [0xc3] 1610 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1611 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1612 %res2 = fadd <4 x double> %res, %res1 1613 ret <4 x double> %res2 1614} 1615 1616declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1617 1618define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1619; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256: 1620; CHECK: ## BB#0: 1621; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1622; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1623; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xda] 1624; CHECK-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2] 1625; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1626; CHECK-NEXT: retq ## encoding: [0xc3] 1627 %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1628 %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1629 %res2 = fadd <4 x double> %res, %res1 1630 ret <4 x double> %res2 1631} 1632 1633define <4 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1634; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128: 1635; CHECK: ## BB#0: 1636; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1637; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1638; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xda] 1639; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2] 1640; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1641; CHECK-NEXT: retq ## encoding: [0xc3] 1642 %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1643 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1644 %res2 = fadd <4 x float> %res, %res1 1645 ret <4 x float> %res2 1646} 1647 1648declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1649 1650define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1651; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128: 1652; CHECK: ## BB#0: 1653; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1654; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1655; CHECK-NEXT: vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd9] 1656; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2] 1657; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1658; CHECK-NEXT: retq ## encoding: [0xc3] 1659 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1660 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1661 %res2 = fadd <4 x float> %res, %res1 1662 ret <4 x float> %res2 1663} 1664 1665declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1666 1667define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1668; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128: 1669; CHECK: ## BB#0: 1670; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1671; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1672; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa6,0xda] 1673; CHECK-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2] 1674; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1675; CHECK-NEXT: retq ## encoding: [0xc3] 1676 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1677 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1678 %res2 = fadd <4 x float> %res, %res1 1679 ret <4 x float> %res2 1680} 1681 1682define <8 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1683; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256: 1684; CHECK: ## BB#0: 1685; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1686; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1687; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xda] 1688; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2] 1689; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1690; CHECK-NEXT: retq ## encoding: [0xc3] 1691 %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1692 %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1693 %res2 = fadd <8 x float> %res, %res1 1694 ret <8 x float> %res2 1695} 1696 1697declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1698 1699define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1700; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256: 1701; CHECK: ## BB#0: 1702; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1703; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1704; CHECK-NEXT: vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd9] 1705; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2] 1706; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1707; CHECK-NEXT: retq ## encoding: [0xc3] 1708 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1709 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1710 %res2 = fadd <8 x float> %res, %res1 1711 ret <8 x float> %res2 1712} 1713 1714declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1715 1716define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1717; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256: 1718; CHECK: ## BB#0: 1719; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1720; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 1721; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xda] 1722; CHECK-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2] 1723; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1724; CHECK-NEXT: retq ## encoding: [0xc3] 1725 %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1726 %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1727 %res2 = fadd <8 x float> %res, %res1 1728 ret <8 x float> %res2 1729} 1730 1731declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 1732 1733define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 1734; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128: 1735; CHECK: ## BB#0: 1736; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1737; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1738; CHECK-NEXT: vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd9] 1739; CHECK-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa7,0xc2] 1740; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 1741; CHECK-NEXT: retq ## encoding: [0xc3] 1742 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 1743 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 1744 %res2=fadd <2 x double> %res, %res1 1745 ret <2 x double> %res2 1746} 1747 1748declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 1749 1750define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 1751; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256: 1752; CHECK: ## BB#0: 1753; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1754; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1755; CHECK-NEXT: vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd9] 1756; CHECK-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa7,0xc2] 1757; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 1758; CHECK-NEXT: retq ## encoding: [0xc3] 1759 %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 1760 %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 1761 %res2=fadd <4 x double> %res, %res1 1762 ret <4 x double> %res2 1763} 1764 1765declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1766 1767define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 1768; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128: 1769; CHECK: ## BB#0: 1770; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1771; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1772; CHECK-NEXT: vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd9] 1773; CHECK-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa7,0xc2] 1774; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 1775; CHECK-NEXT: retq ## encoding: [0xc3] 1776 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 1777 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 1778 %res2=fadd <4 x float> %res, %res1 1779 ret <4 x float> %res2 1780} 1781 1782declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1783 1784define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 1785; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256: 1786; CHECK: ## BB#0: 1787; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1788; CHECK-NEXT: vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda] 1789; CHECK-NEXT: vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd9] 1790; CHECK-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa7,0xc2] 1791; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 1792; CHECK-NEXT: retq ## encoding: [0xc3] 1793 %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 1794 %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 1795 %res2=fadd <8 x float> %res, %res1 1796 ret <8 x float> %res2 1797} 1798 1799 1800define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { 1801; CHECK-LABEL: test_mask_vfmadd128_ps_r: 1802; CHECK: ## BB#0: 1803; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1804; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2] 1805; CHECK-NEXT: retq ## encoding: [0xc3] 1806 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 1807 ret <4 x float> %res 1808} 1809 1810define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 1811; CHECK-LABEL: test_mask_vfmadd128_ps_rz: 1812; CHECK: ## BB#0: 1813; CHECK-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2] 1814; CHECK-NEXT: retq ## encoding: [0xc3] 1815 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 1816 ret <4 x float> %res 1817} 1818 1819define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) { 1820; CHECK-LABEL: test_mask_vfmadd128_ps_rmk: 1821; CHECK: ## BB#0: 1822; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1823; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] 1824; CHECK-NEXT: retq ## encoding: [0xc3] 1825 %a2 = load <4 x float>, <4 x float>* %ptr_a2 1826 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 1827 ret <4 x float> %res 1828} 1829 1830define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) { 1831; CHECK-LABEL: test_mask_vfmadd128_ps_rmka: 1832; CHECK: ## BB#0: 1833; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1834; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07] 1835; CHECK-NEXT: retq ## encoding: [0xc3] 1836 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8 1837 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind 1838 ret <4 x float> %res 1839} 1840 1841define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { 1842; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz: 1843; CHECK: ## BB#0: 1844; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07] 1845; CHECK-NEXT: retq ## encoding: [0xc3] 1846 %a2 = load <4 x float>, <4 x float>* %ptr_a2 1847 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 1848 ret <4 x float> %res 1849} 1850 1851define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) { 1852; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza: 1853; CHECK: ## BB#0: 1854; CHECK-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07] 1855; CHECK-NEXT: retq ## encoding: [0xc3] 1856 %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4 1857 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind 1858 ret <4 x float> %res 1859} 1860 1861define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) { 1862; CHECK-LABEL: test_mask_vfmadd128_ps_rmb: 1863; CHECK: ## BB#0: 1864; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1865; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] 1866; CHECK-NEXT: retq ## encoding: [0xc3] 1867 %q = load float, float* %ptr_a2 1868 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1869 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 1870 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 1871 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 1872 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind 1873 ret <4 x float> %res 1874} 1875 1876define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) { 1877; CHECK-LABEL: test_mask_vfmadd128_ps_rmba: 1878; CHECK: ## BB#0: 1879; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1880; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07] 1881; CHECK-NEXT: retq ## encoding: [0xc3] 1882 %q = load float, float* %ptr_a2, align 4 1883 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1884 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 1885 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 1886 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 1887 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind 1888 ret <4 x float> %res 1889} 1890 1891define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) { 1892; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz: 1893; CHECK: ## BB#0: 1894; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07] 1895; CHECK-NEXT: retq ## encoding: [0xc3] 1896 %q = load float, float* %ptr_a2 1897 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1898 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 1899 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 1900 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 1901 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind 1902 ret <4 x float> %res 1903} 1904 1905define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) { 1906; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza: 1907; CHECK: ## BB#0: 1908; CHECK-NEXT: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07] 1909; CHECK-NEXT: retq ## encoding: [0xc3] 1910 %q = load float, float* %ptr_a2, align 4 1911 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 1912 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 1913 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 1914 %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 1915 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind 1916 ret <4 x float> %res 1917} 1918 1919define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) { 1920; CHECK-LABEL: test_mask_vfmadd128_pd_r: 1921; CHECK: ## BB#0: 1922; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1923; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2] 1924; CHECK-NEXT: retq ## encoding: [0xc3] 1925 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1926 ret <2 x double> %res 1927} 1928 1929define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 1930; CHECK-LABEL: test_mask_vfmadd128_pd_rz: 1931; CHECK: ## BB#0: 1932; CHECK-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2] 1933; CHECK-NEXT: retq ## encoding: [0xc3] 1934 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 1935 ret <2 x double> %res 1936} 1937 1938define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) { 1939; CHECK-LABEL: test_mask_vfmadd128_pd_rmk: 1940; CHECK: ## BB#0: 1941; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1942; CHECK-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07] 1943; CHECK-NEXT: retq ## encoding: [0xc3] 1944 %a2 = load <2 x double>, <2 x double>* %ptr_a2 1945 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind 1946 ret <2 x double> %res 1947} 1948 1949define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) { 1950; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz: 1951; CHECK: ## BB#0: 1952; CHECK-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0x07] 1953; CHECK-NEXT: retq ## encoding: [0xc3] 1954 %a2 = load <2 x double>, <2 x double>* %ptr_a2 1955 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind 1956 ret <2 x double> %res 1957} 1958 1959define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) { 1960; CHECK-LABEL: test_mask_vfmadd256_pd_r: 1961; CHECK: ## BB#0: 1962; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1963; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2] 1964; CHECK-NEXT: retq ## encoding: [0xc3] 1965 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 1966 ret <4 x double> %res 1967} 1968 1969define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 1970; CHECK-LABEL: test_mask_vfmadd256_pd_rz: 1971; CHECK: ## BB#0: 1972; CHECK-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2] 1973; CHECK-NEXT: retq ## encoding: [0xc3] 1974 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 1975 ret <4 x double> %res 1976} 1977 1978define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) { 1979; CHECK-LABEL: test_mask_vfmadd256_pd_rmk: 1980; CHECK: ## BB#0: 1981; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1982; CHECK-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07] 1983; CHECK-NEXT: retq ## encoding: [0xc3] 1984 %a2 = load <4 x double>, <4 x double>* %ptr_a2 1985 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind 1986 ret <4 x double> %res 1987} 1988 1989define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) { 1990; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz: 1991; CHECK: ## BB#0: 1992; CHECK-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0x07] 1993; CHECK-NEXT: retq ## encoding: [0xc3] 1994 %a2 = load <4 x double>, <4 x double>* %ptr_a2 1995 %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind 1996 ret <4 x double> %res 1997} 1998define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 1999; CHECK-LABEL: test_mask_add_epi16_rr_128: 2000; CHECK: ## BB#0: 2001; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] 2002; CHECK-NEXT: retq ## encoding: [0xc3] 2003 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2004 ret <8 x i16> %res 2005} 2006 2007define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2008; CHECK-LABEL: test_mask_add_epi16_rrk_128: 2009; CHECK: ## BB#0: 2010; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2011; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1] 2012; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2013; CHECK-NEXT: retq ## encoding: [0xc3] 2014 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2015 ret <8 x i16> %res 2016} 2017 2018define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2019; CHECK-LABEL: test_mask_add_epi16_rrkz_128: 2020; CHECK: ## BB#0: 2021; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2022; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1] 2023; CHECK-NEXT: retq ## encoding: [0xc3] 2024 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2025 ret <8 x i16> %res 2026} 2027 2028define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2029; CHECK-LABEL: test_mask_add_epi16_rm_128: 2030; CHECK: ## BB#0: 2031; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07] 2032; CHECK-NEXT: retq ## encoding: [0xc3] 2033 %b = load <8 x i16>, <8 x i16>* %ptr_b 2034 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2035 ret <8 x i16> %res 2036} 2037 2038define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2039; CHECK-LABEL: test_mask_add_epi16_rmk_128: 2040; CHECK: ## BB#0: 2041; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2042; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f] 2043; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2044; CHECK-NEXT: retq ## encoding: [0xc3] 2045 %b = load <8 x i16>, <8 x i16>* %ptr_b 2046 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2047 ret <8 x i16> %res 2048} 2049 2050define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2051; CHECK-LABEL: test_mask_add_epi16_rmkz_128: 2052; CHECK: ## BB#0: 2053; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2054; CHECK-NEXT: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07] 2055; CHECK-NEXT: retq ## encoding: [0xc3] 2056 %b = load <8 x i16>, <8 x i16>* %ptr_b 2057 %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2058 ret <8 x i16> %res 2059} 2060 2061declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2062 2063define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2064; CHECK-LABEL: test_mask_add_epi16_rr_256: 2065; CHECK: ## BB#0: 2066; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1] 2067; CHECK-NEXT: retq ## encoding: [0xc3] 2068 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2069 ret <16 x i16> %res 2070} 2071 2072define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2073; CHECK-LABEL: test_mask_add_epi16_rrk_256: 2074; CHECK: ## BB#0: 2075; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2076; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1] 2077; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2078; CHECK-NEXT: retq ## encoding: [0xc3] 2079 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2080 ret <16 x i16> %res 2081} 2082 2083define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2084; CHECK-LABEL: test_mask_add_epi16_rrkz_256: 2085; CHECK: ## BB#0: 2086; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2087; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1] 2088; CHECK-NEXT: retq ## encoding: [0xc3] 2089 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2090 ret <16 x i16> %res 2091} 2092 2093define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2094; CHECK-LABEL: test_mask_add_epi16_rm_256: 2095; CHECK: ## BB#0: 2096; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07] 2097; CHECK-NEXT: retq ## encoding: [0xc3] 2098 %b = load <16 x i16>, <16 x i16>* %ptr_b 2099 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2100 ret <16 x i16> %res 2101} 2102 2103define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2104; CHECK-LABEL: test_mask_add_epi16_rmk_256: 2105; CHECK: ## BB#0: 2106; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2107; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f] 2108; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2109; CHECK-NEXT: retq ## encoding: [0xc3] 2110 %b = load <16 x i16>, <16 x i16>* %ptr_b 2111 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2112 ret <16 x i16> %res 2113} 2114 2115define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2116; CHECK-LABEL: test_mask_add_epi16_rmkz_256: 2117; CHECK: ## BB#0: 2118; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2119; CHECK-NEXT: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07] 2120; CHECK-NEXT: retq ## encoding: [0xc3] 2121 %b = load <16 x i16>, <16 x i16>* %ptr_b 2122 %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2123 ret <16 x i16> %res 2124} 2125 2126declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2127 2128define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2129; CHECK-LABEL: test_mask_sub_epi16_rr_128: 2130; CHECK: ## BB#0: 2131; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1] 2132; CHECK-NEXT: retq ## encoding: [0xc3] 2133 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2134 ret <8 x i16> %res 2135} 2136 2137define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2138; CHECK-LABEL: test_mask_sub_epi16_rrk_128: 2139; CHECK: ## BB#0: 2140; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2141; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1] 2142; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2143; CHECK-NEXT: retq ## encoding: [0xc3] 2144 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2145 ret <8 x i16> %res 2146} 2147 2148define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2149; CHECK-LABEL: test_mask_sub_epi16_rrkz_128: 2150; CHECK: ## BB#0: 2151; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2152; CHECK-NEXT: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1] 2153; CHECK-NEXT: retq ## encoding: [0xc3] 2154 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2155 ret <8 x i16> %res 2156} 2157 2158define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2159; CHECK-LABEL: test_mask_sub_epi16_rm_128: 2160; CHECK: ## BB#0: 2161; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07] 2162; CHECK-NEXT: retq ## encoding: [0xc3] 2163 %b = load <8 x i16>, <8 x i16>* %ptr_b 2164 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2165 ret <8 x i16> %res 2166} 2167 2168define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2169; CHECK-LABEL: test_mask_sub_epi16_rmk_128: 2170; CHECK: ## BB#0: 2171; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2172; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f] 2173; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2174; CHECK-NEXT: retq ## encoding: [0xc3] 2175 %b = load <8 x i16>, <8 x i16>* %ptr_b 2176 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2177 ret <8 x i16> %res 2178} 2179 2180define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2181; CHECK-LABEL: test_mask_sub_epi16_rmkz_128: 2182; CHECK: ## BB#0: 2183; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2184; CHECK-NEXT: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07] 2185; CHECK-NEXT: retq ## encoding: [0xc3] 2186 %b = load <8 x i16>, <8 x i16>* %ptr_b 2187 %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2188 ret <8 x i16> %res 2189} 2190 2191declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2192 2193define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2194; CHECK-LABEL: test_mask_sub_epi16_rr_256: 2195; CHECK: ## BB#0: 2196; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1] 2197; CHECK-NEXT: retq ## encoding: [0xc3] 2198 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2199 ret <16 x i16> %res 2200} 2201 2202define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2203; CHECK-LABEL: test_mask_sub_epi16_rrk_256: 2204; CHECK: ## BB#0: 2205; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2206; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1] 2207; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2208; CHECK-NEXT: retq ## encoding: [0xc3] 2209 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2210 ret <16 x i16> %res 2211} 2212 2213define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2214; CHECK-LABEL: test_mask_sub_epi16_rrkz_256: 2215; CHECK: ## BB#0: 2216; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2217; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1] 2218; CHECK-NEXT: retq ## encoding: [0xc3] 2219 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2220 ret <16 x i16> %res 2221} 2222 2223define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2224; CHECK-LABEL: test_mask_sub_epi16_rm_256: 2225; CHECK: ## BB#0: 2226; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07] 2227; CHECK-NEXT: retq ## encoding: [0xc3] 2228 %b = load <16 x i16>, <16 x i16>* %ptr_b 2229 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2230 ret <16 x i16> %res 2231} 2232 2233define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2234; CHECK-LABEL: test_mask_sub_epi16_rmk_256: 2235; CHECK: ## BB#0: 2236; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2237; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f] 2238; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2239; CHECK-NEXT: retq ## encoding: [0xc3] 2240 %b = load <16 x i16>, <16 x i16>* %ptr_b 2241 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2242 ret <16 x i16> %res 2243} 2244 2245define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2246; CHECK-LABEL: test_mask_sub_epi16_rmkz_256: 2247; CHECK: ## BB#0: 2248; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2249; CHECK-NEXT: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07] 2250; CHECK-NEXT: retq ## encoding: [0xc3] 2251 %b = load <16 x i16>, <16 x i16>* %ptr_b 2252 %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2253 ret <16 x i16> %res 2254} 2255 2256declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2257 2258define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 2259; CHECK-LABEL: test_mask_add_epi16_rr_512: 2260; CHECK: ## BB#0: 2261; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] 2262; CHECK-NEXT: retq ## encoding: [0xc3] 2263 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2264 ret <32 x i16> %res 2265} 2266 2267define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 2268; CHECK-LABEL: test_mask_add_epi16_rrk_512: 2269; CHECK: ## BB#0: 2270; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 2271; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1] 2272; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2273; CHECK-NEXT: retq ## encoding: [0xc3] 2274 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2275 ret <32 x i16> %res 2276} 2277 2278define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 2279; CHECK-LABEL: test_mask_add_epi16_rrkz_512: 2280; CHECK: ## BB#0: 2281; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 2282; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1] 2283; CHECK-NEXT: retq ## encoding: [0xc3] 2284 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2285 ret <32 x i16> %res 2286} 2287 2288define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 2289; CHECK-LABEL: test_mask_add_epi16_rm_512: 2290; CHECK: ## BB#0: 2291; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07] 2292; CHECK-NEXT: retq ## encoding: [0xc3] 2293 %b = load <32 x i16>, <32 x i16>* %ptr_b 2294 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2295 ret <32 x i16> %res 2296} 2297 2298define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 2299; CHECK-LABEL: test_mask_add_epi16_rmk_512: 2300; CHECK: ## BB#0: 2301; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 2302; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f] 2303; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2304; CHECK-NEXT: retq ## encoding: [0xc3] 2305 %b = load <32 x i16>, <32 x i16>* %ptr_b 2306 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2307 ret <32 x i16> %res 2308} 2309 2310define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 2311; CHECK-LABEL: test_mask_add_epi16_rmkz_512: 2312; CHECK: ## BB#0: 2313; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 2314; CHECK-NEXT: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07] 2315; CHECK-NEXT: retq ## encoding: [0xc3] 2316 %b = load <32 x i16>, <32 x i16>* %ptr_b 2317 %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2318 ret <32 x i16> %res 2319} 2320 2321declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2322 2323define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 2324; CHECK-LABEL: test_mask_sub_epi16_rr_512: 2325; CHECK: ## BB#0: 2326; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1] 2327; CHECK-NEXT: retq ## encoding: [0xc3] 2328 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2329 ret <32 x i16> %res 2330} 2331 2332define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 2333; CHECK-LABEL: test_mask_sub_epi16_rrk_512: 2334; CHECK: ## BB#0: 2335; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 2336; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1] 2337; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2338; CHECK-NEXT: retq ## encoding: [0xc3] 2339 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2340 ret <32 x i16> %res 2341} 2342 2343define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 2344; CHECK-LABEL: test_mask_sub_epi16_rrkz_512: 2345; CHECK: ## BB#0: 2346; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 2347; CHECK-NEXT: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1] 2348; CHECK-NEXT: retq ## encoding: [0xc3] 2349 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2350 ret <32 x i16> %res 2351} 2352 2353define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 2354; CHECK-LABEL: test_mask_sub_epi16_rm_512: 2355; CHECK: ## BB#0: 2356; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07] 2357; CHECK-NEXT: retq ## encoding: [0xc3] 2358 %b = load <32 x i16>, <32 x i16>* %ptr_b 2359 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2360 ret <32 x i16> %res 2361} 2362 2363define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 2364; CHECK-LABEL: test_mask_sub_epi16_rmk_512: 2365; CHECK: ## BB#0: 2366; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 2367; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f] 2368; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2369; CHECK-NEXT: retq ## encoding: [0xc3] 2370 %b = load <32 x i16>, <32 x i16>* %ptr_b 2371 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2372 ret <32 x i16> %res 2373} 2374 2375define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 2376; CHECK-LABEL: test_mask_sub_epi16_rmkz_512: 2377; CHECK: ## BB#0: 2378; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 2379; CHECK-NEXT: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07] 2380; CHECK-NEXT: retq ## encoding: [0xc3] 2381 %b = load <32 x i16>, <32 x i16>* %ptr_b 2382 %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2383 ret <32 x i16> %res 2384} 2385 2386declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2387 2388define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 2389; CHECK-LABEL: test_mask_mullo_epi16_rr_512: 2390; CHECK: ## BB#0: 2391; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1] 2392; CHECK-NEXT: retq ## encoding: [0xc3] 2393 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2394 ret <32 x i16> %res 2395} 2396 2397define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 2398; CHECK-LABEL: test_mask_mullo_epi16_rrk_512: 2399; CHECK: ## BB#0: 2400; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 2401; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1] 2402; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2403; CHECK-NEXT: retq ## encoding: [0xc3] 2404 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2405 ret <32 x i16> %res 2406} 2407 2408define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 2409; CHECK-LABEL: test_mask_mullo_epi16_rrkz_512: 2410; CHECK: ## BB#0: 2411; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 2412; CHECK-NEXT: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1] 2413; CHECK-NEXT: retq ## encoding: [0xc3] 2414 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2415 ret <32 x i16> %res 2416} 2417 2418define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 2419; CHECK-LABEL: test_mask_mullo_epi16_rm_512: 2420; CHECK: ## BB#0: 2421; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07] 2422; CHECK-NEXT: retq ## encoding: [0xc3] 2423 %b = load <32 x i16>, <32 x i16>* %ptr_b 2424 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 2425 ret <32 x i16> %res 2426} 2427 2428define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 2429; CHECK-LABEL: test_mask_mullo_epi16_rmk_512: 2430; CHECK: ## BB#0: 2431; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 2432; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f] 2433; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2434; CHECK-NEXT: retq ## encoding: [0xc3] 2435 %b = load <32 x i16>, <32 x i16>* %ptr_b 2436 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 2437 ret <32 x i16> %res 2438} 2439 2440define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 2441; CHECK-LABEL: test_mask_mullo_epi16_rmkz_512: 2442; CHECK: ## BB#0: 2443; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 2444; CHECK-NEXT: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07] 2445; CHECK-NEXT: retq ## encoding: [0xc3] 2446 %b = load <32 x i16>, <32 x i16>* %ptr_b 2447 %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 2448 ret <32 x i16> %res 2449} 2450 2451declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2452 2453define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2454; CHECK-LABEL: test_mask_mullo_epi16_rr_128: 2455; CHECK: ## BB#0: 2456; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1] 2457; CHECK-NEXT: retq ## encoding: [0xc3] 2458 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2459 ret <8 x i16> %res 2460} 2461 2462define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 2463; CHECK-LABEL: test_mask_mullo_epi16_rrk_128: 2464; CHECK: ## BB#0: 2465; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2466; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1] 2467; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2468; CHECK-NEXT: retq ## encoding: [0xc3] 2469 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2470 ret <8 x i16> %res 2471} 2472 2473define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 2474; CHECK-LABEL: test_mask_mullo_epi16_rrkz_128: 2475; CHECK: ## BB#0: 2476; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2477; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1] 2478; CHECK-NEXT: retq ## encoding: [0xc3] 2479 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2480 ret <8 x i16> %res 2481} 2482 2483define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2484; CHECK-LABEL: test_mask_mullo_epi16_rm_128: 2485; CHECK: ## BB#0: 2486; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07] 2487; CHECK-NEXT: retq ## encoding: [0xc3] 2488 %b = load <8 x i16>, <8 x i16>* %ptr_b 2489 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 2490 ret <8 x i16> %res 2491} 2492 2493define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2494; CHECK-LABEL: test_mask_mullo_epi16_rmk_128: 2495; CHECK: ## BB#0: 2496; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2497; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f] 2498; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2499; CHECK-NEXT: retq ## encoding: [0xc3] 2500 %b = load <8 x i16>, <8 x i16>* %ptr_b 2501 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 2502 ret <8 x i16> %res 2503} 2504 2505define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 2506; CHECK-LABEL: test_mask_mullo_epi16_rmkz_128: 2507; CHECK: ## BB#0: 2508; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2509; CHECK-NEXT: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07] 2510; CHECK-NEXT: retq ## encoding: [0xc3] 2511 %b = load <8 x i16>, <8 x i16>* %ptr_b 2512 %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 2513 ret <8 x i16> %res 2514} 2515 2516declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 2517 2518define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2519; CHECK-LABEL: test_mask_mullo_epi16_rr_256: 2520; CHECK: ## BB#0: 2521; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1] 2522; CHECK-NEXT: retq ## encoding: [0xc3] 2523 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2524 ret <16 x i16> %res 2525} 2526 2527define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 2528; CHECK-LABEL: test_mask_mullo_epi16_rrk_256: 2529; CHECK: ## BB#0: 2530; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2531; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1] 2532; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2533; CHECK-NEXT: retq ## encoding: [0xc3] 2534 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2535 ret <16 x i16> %res 2536} 2537 2538define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 2539; CHECK-LABEL: test_mask_mullo_epi16_rrkz_256: 2540; CHECK: ## BB#0: 2541; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2542; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1] 2543; CHECK-NEXT: retq ## encoding: [0xc3] 2544 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2545 ret <16 x i16> %res 2546} 2547 2548define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2549; CHECK-LABEL: test_mask_mullo_epi16_rm_256: 2550; CHECK: ## BB#0: 2551; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07] 2552; CHECK-NEXT: retq ## encoding: [0xc3] 2553 %b = load <16 x i16>, <16 x i16>* %ptr_b 2554 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 2555 ret <16 x i16> %res 2556} 2557 2558define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2559; CHECK-LABEL: test_mask_mullo_epi16_rmk_256: 2560; CHECK: ## BB#0: 2561; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2562; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f] 2563; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2564; CHECK-NEXT: retq ## encoding: [0xc3] 2565 %b = load <16 x i16>, <16 x i16>* %ptr_b 2566 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 2567 ret <16 x i16> %res 2568} 2569 2570define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 2571; CHECK-LABEL: test_mask_mullo_epi16_rmkz_256: 2572; CHECK: ## BB#0: 2573; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2574; CHECK-NEXT: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07] 2575; CHECK-NEXT: retq ## encoding: [0xc3] 2576 %b = load <16 x i16>, <16 x i16>* %ptr_b 2577 %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 2578 ret <16 x i16> %res 2579} 2580 2581declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 2582 2583 2584define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2585; CHECK-LABEL: test_mask_packs_epi32_rr_128: 2586; CHECK: ## BB#0: 2587; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1] 2588; CHECK-NEXT: retq ## encoding: [0xc3] 2589 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2590 ret <8 x i16> %res 2591} 2592 2593define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 2594; CHECK-LABEL: test_mask_packs_epi32_rrk_128: 2595; CHECK: ## BB#0: 2596; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2597; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1] 2598; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2599; CHECK-NEXT: retq ## encoding: [0xc3] 2600 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2601 ret <8 x i16> %res 2602} 2603 2604define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2605; CHECK-LABEL: test_mask_packs_epi32_rrkz_128: 2606; CHECK: ## BB#0: 2607; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2608; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1] 2609; CHECK-NEXT: retq ## encoding: [0xc3] 2610 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2611 ret <8 x i16> %res 2612} 2613 2614define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2615; CHECK-LABEL: test_mask_packs_epi32_rm_128: 2616; CHECK: ## BB#0: 2617; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0x07] 2618; CHECK-NEXT: retq ## encoding: [0xc3] 2619 %b = load <4 x i32>, <4 x i32>* %ptr_b 2620 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2621 ret <8 x i16> %res 2622} 2623 2624define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2625; CHECK-LABEL: test_mask_packs_epi32_rmk_128: 2626; CHECK: ## BB#0: 2627; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2628; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f] 2629; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2630; CHECK-NEXT: retq ## encoding: [0xc3] 2631 %b = load <4 x i32>, <4 x i32>* %ptr_b 2632 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2633 ret <8 x i16> %res 2634} 2635 2636define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2637; CHECK-LABEL: test_mask_packs_epi32_rmkz_128: 2638; CHECK: ## BB#0: 2639; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2640; CHECK-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07] 2641; CHECK-NEXT: retq ## encoding: [0xc3] 2642 %b = load <4 x i32>, <4 x i32>* %ptr_b 2643 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2644 ret <8 x i16> %res 2645} 2646 2647define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2648; CHECK-LABEL: test_mask_packs_epi32_rmb_128: 2649; CHECK: ## BB#0: 2650; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07] 2651; CHECK-NEXT: retq ## encoding: [0xc3] 2652 %q = load i32, i32* %ptr_b 2653 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2654 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2655 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2656 ret <8 x i16> %res 2657} 2658 2659define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2660; CHECK-LABEL: test_mask_packs_epi32_rmbk_128: 2661; CHECK: ## BB#0: 2662; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2663; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f] 2664; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2665; CHECK-NEXT: retq ## encoding: [0xc3] 2666 %q = load i32, i32* %ptr_b 2667 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2668 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2669 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2670 ret <8 x i16> %res 2671} 2672 2673define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 2674; CHECK-LABEL: test_mask_packs_epi32_rmbkz_128: 2675; CHECK: ## BB#0: 2676; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2677; CHECK-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07] 2678; CHECK-NEXT: retq ## encoding: [0xc3] 2679 %q = load i32, i32* %ptr_b 2680 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2681 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2682 %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2683 ret <8 x i16> %res 2684} 2685 2686declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 2687 2688define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 2689; CHECK-LABEL: test_mask_packs_epi32_rr_256: 2690; CHECK: ## BB#0: 2691; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0xc1] 2692; CHECK-NEXT: retq ## encoding: [0xc3] 2693 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2694 ret <16 x i16> %res 2695} 2696 2697define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 2698; CHECK-LABEL: test_mask_packs_epi32_rrk_256: 2699; CHECK: ## BB#0: 2700; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2701; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1] 2702; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2703; CHECK-NEXT: retq ## encoding: [0xc3] 2704 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2705 ret <16 x i16> %res 2706} 2707 2708define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 2709; CHECK-LABEL: test_mask_packs_epi32_rrkz_256: 2710; CHECK: ## BB#0: 2711; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2712; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1] 2713; CHECK-NEXT: retq ## encoding: [0xc3] 2714 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2715 ret <16 x i16> %res 2716} 2717 2718define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 2719; CHECK-LABEL: test_mask_packs_epi32_rm_256: 2720; CHECK: ## BB#0: 2721; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0x07] 2722; CHECK-NEXT: retq ## encoding: [0xc3] 2723 %b = load <8 x i32>, <8 x i32>* %ptr_b 2724 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2725 ret <16 x i16> %res 2726} 2727 2728define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2729; CHECK-LABEL: test_mask_packs_epi32_rmk_256: 2730; CHECK: ## BB#0: 2731; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2732; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f] 2733; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2734; CHECK-NEXT: retq ## encoding: [0xc3] 2735 %b = load <8 x i32>, <8 x i32>* %ptr_b 2736 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2737 ret <16 x i16> %res 2738} 2739 2740define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 2741; CHECK-LABEL: test_mask_packs_epi32_rmkz_256: 2742; CHECK: ## BB#0: 2743; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2744; CHECK-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07] 2745; CHECK-NEXT: retq ## encoding: [0xc3] 2746 %b = load <8 x i32>, <8 x i32>* %ptr_b 2747 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2748 ret <16 x i16> %res 2749} 2750 2751define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 2752; CHECK-LABEL: test_mask_packs_epi32_rmb_256: 2753; CHECK: ## BB#0: 2754; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07] 2755; CHECK-NEXT: retq ## encoding: [0xc3] 2756 %q = load i32, i32* %ptr_b 2757 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2758 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2759 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 2760 ret <16 x i16> %res 2761} 2762 2763define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 2764; CHECK-LABEL: test_mask_packs_epi32_rmbk_256: 2765; CHECK: ## BB#0: 2766; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2767; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f] 2768; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2769; CHECK-NEXT: retq ## encoding: [0xc3] 2770 %q = load i32, i32* %ptr_b 2771 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2772 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2773 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 2774 ret <16 x i16> %res 2775} 2776 2777define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 2778; CHECK-LABEL: test_mask_packs_epi32_rmbkz_256: 2779; CHECK: ## BB#0: 2780; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2781; CHECK-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07] 2782; CHECK-NEXT: retq ## encoding: [0xc3] 2783 %q = load i32, i32* %ptr_b 2784 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 2785 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 2786 %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 2787 ret <16 x i16> %res 2788} 2789 2790declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 2791 2792define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 2793; CHECK-LABEL: test_mask_packs_epi16_rr_128: 2794; CHECK: ## BB#0: 2795; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0xc1] 2796; CHECK-NEXT: retq ## encoding: [0xc3] 2797 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 2798 ret <16 x i8> %res 2799} 2800 2801define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 2802; CHECK-LABEL: test_mask_packs_epi16_rrk_128: 2803; CHECK: ## BB#0: 2804; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2805; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1] 2806; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2807; CHECK-NEXT: retq ## encoding: [0xc3] 2808 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 2809 ret <16 x i8> %res 2810} 2811 2812define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 2813; CHECK-LABEL: test_mask_packs_epi16_rrkz_128: 2814; CHECK: ## BB#0: 2815; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2816; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1] 2817; CHECK-NEXT: retq ## encoding: [0xc3] 2818 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 2819 ret <16 x i8> %res 2820} 2821 2822define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 2823; CHECK-LABEL: test_mask_packs_epi16_rm_128: 2824; CHECK: ## BB#0: 2825; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0x07] 2826; CHECK-NEXT: retq ## encoding: [0xc3] 2827 %b = load <8 x i16>, <8 x i16>* %ptr_b 2828 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 2829 ret <16 x i8> %res 2830} 2831 2832define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 2833; CHECK-LABEL: test_mask_packs_epi16_rmk_128: 2834; CHECK: ## BB#0: 2835; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2836; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f] 2837; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2838; CHECK-NEXT: retq ## encoding: [0xc3] 2839 %b = load <8 x i16>, <8 x i16>* %ptr_b 2840 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 2841 ret <16 x i8> %res 2842} 2843 2844define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 2845; CHECK-LABEL: test_mask_packs_epi16_rmkz_128: 2846; CHECK: ## BB#0: 2847; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2848; CHECK-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07] 2849; CHECK-NEXT: retq ## encoding: [0xc3] 2850 %b = load <8 x i16>, <8 x i16>* %ptr_b 2851 %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 2852 ret <16 x i8> %res 2853} 2854 2855declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 2856 2857define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 2858; CHECK-LABEL: test_mask_packs_epi16_rr_256: 2859; CHECK: ## BB#0: 2860; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x63,0xc1] 2861; CHECK-NEXT: retq ## encoding: [0xc3] 2862 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 2863 ret <32 x i8> %res 2864} 2865 2866define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 2867; CHECK-LABEL: test_mask_packs_epi16_rrk_256: 2868; CHECK: ## BB#0: 2869; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 2870; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1] 2871; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2872; CHECK-NEXT: retq ## encoding: [0xc3] 2873 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 2874 ret <32 x i8> %res 2875} 2876 2877define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 2878; CHECK-LABEL: test_mask_packs_epi16_rrkz_256: 2879; CHECK: ## BB#0: 2880; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 2881; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1] 2882; CHECK-NEXT: retq ## encoding: [0xc3] 2883 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 2884 ret <32 x i8> %res 2885} 2886 2887define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 2888; CHECK-LABEL: test_mask_packs_epi16_rm_256: 2889; CHECK: ## BB#0: 2890; CHECK-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x63,0x07] 2891; CHECK-NEXT: retq ## encoding: [0xc3] 2892 %b = load <16 x i16>, <16 x i16>* %ptr_b 2893 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 2894 ret <32 x i8> %res 2895} 2896 2897define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 2898; CHECK-LABEL: test_mask_packs_epi16_rmk_256: 2899; CHECK: ## BB#0: 2900; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 2901; CHECK-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f] 2902; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2903; CHECK-NEXT: retq ## encoding: [0xc3] 2904 %b = load <16 x i16>, <16 x i16>* %ptr_b 2905 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 2906 ret <32 x i8> %res 2907} 2908 2909define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 2910; CHECK-LABEL: test_mask_packs_epi16_rmkz_256: 2911; CHECK: ## BB#0: 2912; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 2913; CHECK-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07] 2914; CHECK-NEXT: retq ## encoding: [0xc3] 2915 %b = load <16 x i16>, <16 x i16>* %ptr_b 2916 %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 2917 ret <32 x i8> %res 2918} 2919 2920declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 2921 2922 2923define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 2924; CHECK-LABEL: test_mask_packus_epi32_rr_128: 2925; CHECK: ## BB#0: 2926; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0xc1] 2927; CHECK-NEXT: retq ## encoding: [0xc3] 2928 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2929 ret <8 x i16> %res 2930} 2931 2932define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) { 2933; CHECK-LABEL: test_mask_packus_epi32_rrk_128: 2934; CHECK: ## BB#0: 2935; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2936; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1] 2937; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2938; CHECK-NEXT: retq ## encoding: [0xc3] 2939 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2940 ret <8 x i16> %res 2941} 2942 2943define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 2944; CHECK-LABEL: test_mask_packus_epi32_rrkz_128: 2945; CHECK: ## BB#0: 2946; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2947; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1] 2948; CHECK-NEXT: retq ## encoding: [0xc3] 2949 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2950 ret <8 x i16> %res 2951} 2952 2953define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 2954; CHECK-LABEL: test_mask_packus_epi32_rm_128: 2955; CHECK: ## BB#0: 2956; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0x07] 2957; CHECK-NEXT: retq ## encoding: [0xc3] 2958 %b = load <4 x i32>, <4 x i32>* %ptr_b 2959 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2960 ret <8 x i16> %res 2961} 2962 2963define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2964; CHECK-LABEL: test_mask_packus_epi32_rmk_128: 2965; CHECK: ## BB#0: 2966; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2967; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f] 2968; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 2969; CHECK-NEXT: retq ## encoding: [0xc3] 2970 %b = load <4 x i32>, <4 x i32>* %ptr_b 2971 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 2972 ret <8 x i16> %res 2973} 2974 2975define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 2976; CHECK-LABEL: test_mask_packus_epi32_rmkz_128: 2977; CHECK: ## BB#0: 2978; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2979; CHECK-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07] 2980; CHECK-NEXT: retq ## encoding: [0xc3] 2981 %b = load <4 x i32>, <4 x i32>* %ptr_b 2982 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 2983 ret <8 x i16> %res 2984} 2985 2986define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 2987; CHECK-LABEL: test_mask_packus_epi32_rmb_128: 2988; CHECK: ## BB#0: 2989; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07] 2990; CHECK-NEXT: retq ## encoding: [0xc3] 2991 %q = load i32, i32* %ptr_b 2992 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 2993 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 2994 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1) 2995 ret <8 x i16> %res 2996} 2997 2998define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) { 2999; CHECK-LABEL: test_mask_packus_epi32_rmbk_128: 3000; CHECK: ## BB#0: 3001; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3002; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f] 3003; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3004; CHECK-NEXT: retq ## encoding: [0xc3] 3005 %q = load i32, i32* %ptr_b 3006 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3007 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3008 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) 3009 ret <8 x i16> %res 3010} 3011 3012define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 3013; CHECK-LABEL: test_mask_packus_epi32_rmbkz_128: 3014; CHECK: ## BB#0: 3015; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3016; CHECK-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07] 3017; CHECK-NEXT: retq ## encoding: [0xc3] 3018 %q = load i32, i32* %ptr_b 3019 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 3020 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 3021 %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask) 3022 ret <8 x i16> %res 3023} 3024 3025declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8) 3026 3027define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 3028; CHECK-LABEL: test_mask_packus_epi32_rr_256: 3029; CHECK: ## BB#0: 3030; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2b,0xc1] 3031; CHECK-NEXT: retq ## encoding: [0xc3] 3032 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3033 ret <16 x i16> %res 3034} 3035 3036define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) { 3037; CHECK-LABEL: test_mask_packus_epi32_rrk_256: 3038; CHECK: ## BB#0: 3039; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3040; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1] 3041; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3042; CHECK-NEXT: retq ## encoding: [0xc3] 3043 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3044 ret <16 x i16> %res 3045} 3046 3047define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) { 3048; CHECK-LABEL: test_mask_packus_epi32_rrkz_256: 3049; CHECK: ## BB#0: 3050; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3051; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1] 3052; CHECK-NEXT: retq ## encoding: [0xc3] 3053 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3054 ret <16 x i16> %res 3055} 3056 3057define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 3058; CHECK-LABEL: test_mask_packus_epi32_rm_256: 3059; CHECK: ## BB#0: 3060; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2b,0x07] 3061; CHECK-NEXT: retq ## encoding: [0xc3] 3062 %b = load <8 x i32>, <8 x i32>* %ptr_b 3063 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3064 ret <16 x i16> %res 3065} 3066 3067define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3068; CHECK-LABEL: test_mask_packus_epi32_rmk_256: 3069; CHECK: ## BB#0: 3070; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3071; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f] 3072; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3073; CHECK-NEXT: retq ## encoding: [0xc3] 3074 %b = load <8 x i32>, <8 x i32>* %ptr_b 3075 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3076 ret <16 x i16> %res 3077} 3078 3079define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) { 3080; CHECK-LABEL: test_mask_packus_epi32_rmkz_256: 3081; CHECK: ## BB#0: 3082; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3083; CHECK-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07] 3084; CHECK-NEXT: retq ## encoding: [0xc3] 3085 %b = load <8 x i32>, <8 x i32>* %ptr_b 3086 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3087 ret <16 x i16> %res 3088} 3089 3090define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 3091; CHECK-LABEL: test_mask_packus_epi32_rmb_256: 3092; CHECK: ## BB#0: 3093; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07] 3094; CHECK-NEXT: retq ## encoding: [0xc3] 3095 %q = load i32, i32* %ptr_b 3096 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3097 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3098 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1) 3099 ret <16 x i16> %res 3100} 3101 3102define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3103; CHECK-LABEL: test_mask_packus_epi32_rmbk_256: 3104; CHECK: ## BB#0: 3105; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3106; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f] 3107; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3108; CHECK-NEXT: retq ## encoding: [0xc3] 3109 %q = load i32, i32* %ptr_b 3110 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3111 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3112 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) 3113 ret <16 x i16> %res 3114} 3115 3116define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) { 3117; CHECK-LABEL: test_mask_packus_epi32_rmbkz_256: 3118; CHECK: ## BB#0: 3119; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3120; CHECK-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07] 3121; CHECK-NEXT: retq ## encoding: [0xc3] 3122 %q = load i32, i32* %ptr_b 3123 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 3124 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 3125 %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask) 3126 ret <16 x i16> %res 3127} 3128 3129declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16) 3130 3131define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 3132; CHECK-LABEL: test_mask_packus_epi16_rr_128: 3133; CHECK: ## BB#0: 3134; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0xc1] 3135; CHECK-NEXT: retq ## encoding: [0xc3] 3136 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 3137 ret <16 x i8> %res 3138} 3139 3140define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) { 3141; CHECK-LABEL: test_mask_packus_epi16_rrk_128: 3142; CHECK: ## BB#0: 3143; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3144; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1] 3145; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3146; CHECK-NEXT: retq ## encoding: [0xc3] 3147 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 3148 ret <16 x i8> %res 3149} 3150 3151define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) { 3152; CHECK-LABEL: test_mask_packus_epi16_rrkz_128: 3153; CHECK: ## BB#0: 3154; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3155; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1] 3156; CHECK-NEXT: retq ## encoding: [0xc3] 3157 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 3158 ret <16 x i8> %res 3159} 3160 3161define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 3162; CHECK-LABEL: test_mask_packus_epi16_rm_128: 3163; CHECK: ## BB#0: 3164; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0x07] 3165; CHECK-NEXT: retq ## encoding: [0xc3] 3166 %b = load <8 x i16>, <8 x i16>* %ptr_b 3167 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1) 3168 ret <16 x i8> %res 3169} 3170 3171define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 3172; CHECK-LABEL: test_mask_packus_epi16_rmk_128: 3173; CHECK: ## BB#0: 3174; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3175; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f] 3176; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3177; CHECK-NEXT: retq ## encoding: [0xc3] 3178 %b = load <8 x i16>, <8 x i16>* %ptr_b 3179 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) 3180 ret <16 x i8> %res 3181} 3182 3183define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) { 3184; CHECK-LABEL: test_mask_packus_epi16_rmkz_128: 3185; CHECK: ## BB#0: 3186; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3187; CHECK-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07] 3188; CHECK-NEXT: retq ## encoding: [0xc3] 3189 %b = load <8 x i16>, <8 x i16>* %ptr_b 3190 %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask) 3191 ret <16 x i8> %res 3192} 3193 3194declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16) 3195 3196define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 3197; CHECK-LABEL: test_mask_packus_epi16_rr_256: 3198; CHECK: ## BB#0: 3199; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x67,0xc1] 3200; CHECK-NEXT: retq ## encoding: [0xc3] 3201 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 3202 ret <32 x i8> %res 3203} 3204 3205define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) { 3206; CHECK-LABEL: test_mask_packus_epi16_rrk_256: 3207; CHECK: ## BB#0: 3208; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 3209; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1] 3210; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3211; CHECK-NEXT: retq ## encoding: [0xc3] 3212 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 3213 ret <32 x i8> %res 3214} 3215 3216define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) { 3217; CHECK-LABEL: test_mask_packus_epi16_rrkz_256: 3218; CHECK: ## BB#0: 3219; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 3220; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1] 3221; CHECK-NEXT: retq ## encoding: [0xc3] 3222 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 3223 ret <32 x i8> %res 3224} 3225 3226define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 3227; CHECK-LABEL: test_mask_packus_epi16_rm_256: 3228; CHECK: ## BB#0: 3229; CHECK-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x67,0x07] 3230; CHECK-NEXT: retq ## encoding: [0xc3] 3231 %b = load <16 x i16>, <16 x i16>* %ptr_b 3232 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1) 3233 ret <32 x i8> %res 3234} 3235 3236define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 3237; CHECK-LABEL: test_mask_packus_epi16_rmk_256: 3238; CHECK: ## BB#0: 3239; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 3240; CHECK-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f] 3241; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3242; CHECK-NEXT: retq ## encoding: [0xc3] 3243 %b = load <16 x i16>, <16 x i16>* %ptr_b 3244 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) 3245 ret <32 x i8> %res 3246} 3247 3248define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) { 3249; CHECK-LABEL: test_mask_packus_epi16_rmkz_256: 3250; CHECK: ## BB#0: 3251; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 3252; CHECK-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07] 3253; CHECK-NEXT: retq ## encoding: [0xc3] 3254 %b = load <16 x i16>, <16 x i16>* %ptr_b 3255 %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask) 3256 ret <32 x i8> %res 3257} 3258 3259declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32) 3260 3261define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 3262; CHECK-LABEL: test_mask_adds_epi16_rr_128: 3263; CHECK: ## BB#0: 3264; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0xc1] 3265; CHECK-NEXT: retq ## encoding: [0xc3] 3266 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 3267 ret <8 x i16> %res 3268} 3269 3270define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 3271; CHECK-LABEL: test_mask_adds_epi16_rrk_128: 3272; CHECK: ## BB#0: 3273; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3274; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] 3275; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3276; CHECK-NEXT: retq ## encoding: [0xc3] 3277 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 3278 ret <8 x i16> %res 3279} 3280 3281define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 3282; CHECK-LABEL: test_mask_adds_epi16_rrkz_128: 3283; CHECK: ## BB#0: 3284; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3285; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] 3286; CHECK-NEXT: retq ## encoding: [0xc3] 3287 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 3288 ret <8 x i16> %res 3289} 3290 3291define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 3292; CHECK-LABEL: test_mask_adds_epi16_rm_128: 3293; CHECK: ## BB#0: 3294; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0x07] 3295; CHECK-NEXT: retq ## encoding: [0xc3] 3296 %b = load <8 x i16>, <8 x i16>* %ptr_b 3297 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 3298 ret <8 x i16> %res 3299} 3300 3301define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3302; CHECK-LABEL: test_mask_adds_epi16_rmk_128: 3303; CHECK: ## BB#0: 3304; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3305; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] 3306; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3307; CHECK-NEXT: retq ## encoding: [0xc3] 3308 %b = load <8 x i16>, <8 x i16>* %ptr_b 3309 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 3310 ret <8 x i16> %res 3311} 3312 3313define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 3314; CHECK-LABEL: test_mask_adds_epi16_rmkz_128: 3315; CHECK: ## BB#0: 3316; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3317; CHECK-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] 3318; CHECK-NEXT: retq ## encoding: [0xc3] 3319 %b = load <8 x i16>, <8 x i16>* %ptr_b 3320 %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 3321 ret <8 x i16> %res 3322} 3323 3324declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3325 3326define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 3327; CHECK-LABEL: test_mask_adds_epi16_rr_256: 3328; CHECK: ## BB#0: 3329; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xed,0xc1] 3330; CHECK-NEXT: retq ## encoding: [0xc3] 3331 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 3332 ret <16 x i16> %res 3333} 3334 3335define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 3336; CHECK-LABEL: test_mask_adds_epi16_rrk_256: 3337; CHECK: ## BB#0: 3338; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3339; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] 3340; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3341; CHECK-NEXT: retq ## encoding: [0xc3] 3342 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 3343 ret <16 x i16> %res 3344} 3345 3346define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 3347; CHECK-LABEL: test_mask_adds_epi16_rrkz_256: 3348; CHECK: ## BB#0: 3349; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3350; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] 3351; CHECK-NEXT: retq ## encoding: [0xc3] 3352 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 3353 ret <16 x i16> %res 3354} 3355 3356define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 3357; CHECK-LABEL: test_mask_adds_epi16_rm_256: 3358; CHECK: ## BB#0: 3359; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xed,0x07] 3360; CHECK-NEXT: retq ## encoding: [0xc3] 3361 %b = load <16 x i16>, <16 x i16>* %ptr_b 3362 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 3363 ret <16 x i16> %res 3364} 3365 3366define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3367; CHECK-LABEL: test_mask_adds_epi16_rmk_256: 3368; CHECK: ## BB#0: 3369; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3370; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] 3371; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3372; CHECK-NEXT: retq ## encoding: [0xc3] 3373 %b = load <16 x i16>, <16 x i16>* %ptr_b 3374 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 3375 ret <16 x i16> %res 3376} 3377 3378define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 3379; CHECK-LABEL: test_mask_adds_epi16_rmkz_256: 3380; CHECK: ## BB#0: 3381; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3382; CHECK-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] 3383; CHECK-NEXT: retq ## encoding: [0xc3] 3384 %b = load <16 x i16>, <16 x i16>* %ptr_b 3385 %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 3386 ret <16 x i16> %res 3387} 3388 3389declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3390 3391define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { 3392; CHECK-LABEL: test_mask_subs_epi16_rr_128: 3393; CHECK: ## BB#0: 3394; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0xc1] 3395; CHECK-NEXT: retq ## encoding: [0xc3] 3396 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 3397 ret <8 x i16> %res 3398} 3399 3400define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 3401; CHECK-LABEL: test_mask_subs_epi16_rrk_128: 3402; CHECK: ## BB#0: 3403; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3404; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] 3405; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3406; CHECK-NEXT: retq ## encoding: [0xc3] 3407 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 3408 ret <8 x i16> %res 3409} 3410 3411define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 3412; CHECK-LABEL: test_mask_subs_epi16_rrkz_128: 3413; CHECK: ## BB#0: 3414; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3415; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] 3416; CHECK-NEXT: retq ## encoding: [0xc3] 3417 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 3418 ret <8 x i16> %res 3419} 3420 3421define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 3422; CHECK-LABEL: test_mask_subs_epi16_rm_128: 3423; CHECK: ## BB#0: 3424; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0x07] 3425; CHECK-NEXT: retq ## encoding: [0xc3] 3426 %b = load <8 x i16>, <8 x i16>* %ptr_b 3427 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 3428 ret <8 x i16> %res 3429} 3430 3431define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3432; CHECK-LABEL: test_mask_subs_epi16_rmk_128: 3433; CHECK: ## BB#0: 3434; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3435; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] 3436; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3437; CHECK-NEXT: retq ## encoding: [0xc3] 3438 %b = load <8 x i16>, <8 x i16>* %ptr_b 3439 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 3440 ret <8 x i16> %res 3441} 3442 3443define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 3444; CHECK-LABEL: test_mask_subs_epi16_rmkz_128: 3445; CHECK: ## BB#0: 3446; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3447; CHECK-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] 3448; CHECK-NEXT: retq ## encoding: [0xc3] 3449 %b = load <8 x i16>, <8 x i16>* %ptr_b 3450 %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 3451 ret <8 x i16> %res 3452} 3453 3454declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3455 3456define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) { 3457; CHECK-LABEL: test_mask_subs_epi16_rr_256: 3458; CHECK: ## BB#0: 3459; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe9,0xc1] 3460; CHECK-NEXT: retq ## encoding: [0xc3] 3461 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 3462 ret <16 x i16> %res 3463} 3464 3465define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 3466; CHECK-LABEL: test_mask_subs_epi16_rrk_256: 3467; CHECK: ## BB#0: 3468; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3469; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] 3470; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3471; CHECK-NEXT: retq ## encoding: [0xc3] 3472 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 3473 ret <16 x i16> %res 3474} 3475 3476define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 3477; CHECK-LABEL: test_mask_subs_epi16_rrkz_256: 3478; CHECK: ## BB#0: 3479; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3480; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] 3481; CHECK-NEXT: retq ## encoding: [0xc3] 3482 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 3483 ret <16 x i16> %res 3484} 3485 3486define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 3487; CHECK-LABEL: test_mask_subs_epi16_rm_256: 3488; CHECK: ## BB#0: 3489; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe9,0x07] 3490; CHECK-NEXT: retq ## encoding: [0xc3] 3491 %b = load <16 x i16>, <16 x i16>* %ptr_b 3492 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 3493 ret <16 x i16> %res 3494} 3495 3496define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3497; CHECK-LABEL: test_mask_subs_epi16_rmk_256: 3498; CHECK: ## BB#0: 3499; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3500; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] 3501; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3502; CHECK-NEXT: retq ## encoding: [0xc3] 3503 %b = load <16 x i16>, <16 x i16>* %ptr_b 3504 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 3505 ret <16 x i16> %res 3506} 3507 3508define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 3509; CHECK-LABEL: test_mask_subs_epi16_rmkz_256: 3510; CHECK: ## BB#0: 3511; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3512; CHECK-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] 3513; CHECK-NEXT: retq ## encoding: [0xc3] 3514 %b = load <16 x i16>, <16 x i16>* %ptr_b 3515 %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 3516 ret <16 x i16> %res 3517} 3518 3519declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3520 3521define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 3522; CHECK-LABEL: test_mask_adds_epu16_rr_128: 3523; CHECK: ## BB#0: 3524; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0xc1] 3525; CHECK-NEXT: retq ## encoding: [0xc3] 3526 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 3527 ret <8 x i16> %res 3528} 3529 3530define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 3531; CHECK-LABEL: test_mask_adds_epu16_rrk_128: 3532; CHECK: ## BB#0: 3533; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3534; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1] 3535; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3536; CHECK-NEXT: retq ## encoding: [0xc3] 3537 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 3538 ret <8 x i16> %res 3539} 3540 3541define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 3542; CHECK-LABEL: test_mask_adds_epu16_rrkz_128: 3543; CHECK: ## BB#0: 3544; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3545; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1] 3546; CHECK-NEXT: retq ## encoding: [0xc3] 3547 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 3548 ret <8 x i16> %res 3549} 3550 3551define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 3552; CHECK-LABEL: test_mask_adds_epu16_rm_128: 3553; CHECK: ## BB#0: 3554; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0x07] 3555; CHECK-NEXT: retq ## encoding: [0xc3] 3556 %b = load <8 x i16>, <8 x i16>* %ptr_b 3557 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 3558 ret <8 x i16> %res 3559} 3560 3561define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3562; CHECK-LABEL: test_mask_adds_epu16_rmk_128: 3563; CHECK: ## BB#0: 3564; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3565; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f] 3566; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3567; CHECK-NEXT: retq ## encoding: [0xc3] 3568 %b = load <8 x i16>, <8 x i16>* %ptr_b 3569 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 3570 ret <8 x i16> %res 3571} 3572 3573define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 3574; CHECK-LABEL: test_mask_adds_epu16_rmkz_128: 3575; CHECK: ## BB#0: 3576; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3577; CHECK-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07] 3578; CHECK-NEXT: retq ## encoding: [0xc3] 3579 %b = load <8 x i16>, <8 x i16>* %ptr_b 3580 %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 3581 ret <8 x i16> %res 3582} 3583 3584declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3585 3586define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 3587; CHECK-LABEL: test_mask_adds_epu16_rr_256: 3588; CHECK: ## BB#0: 3589; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdd,0xc1] 3590; CHECK-NEXT: retq ## encoding: [0xc3] 3591 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 3592 ret <16 x i16> %res 3593} 3594 3595define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 3596; CHECK-LABEL: test_mask_adds_epu16_rrk_256: 3597; CHECK: ## BB#0: 3598; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3599; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1] 3600; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3601; CHECK-NEXT: retq ## encoding: [0xc3] 3602 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 3603 ret <16 x i16> %res 3604} 3605 3606define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 3607; CHECK-LABEL: test_mask_adds_epu16_rrkz_256: 3608; CHECK: ## BB#0: 3609; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3610; CHECK-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1] 3611; CHECK-NEXT: retq ## encoding: [0xc3] 3612 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 3613 ret <16 x i16> %res 3614} 3615 3616define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 3617; CHECK-LABEL: test_mask_adds_epu16_rm_256: 3618; CHECK: ## BB#0: 3619; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdd,0x07] 3620; CHECK-NEXT: retq ## encoding: [0xc3] 3621 %b = load <16 x i16>, <16 x i16>* %ptr_b 3622 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 3623 ret <16 x i16> %res 3624} 3625 3626define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3627; CHECK-LABEL: test_mask_adds_epu16_rmk_256: 3628; CHECK: ## BB#0: 3629; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3630; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f] 3631; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3632; CHECK-NEXT: retq ## encoding: [0xc3] 3633 %b = load <16 x i16>, <16 x i16>* %ptr_b 3634 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 3635 ret <16 x i16> %res 3636} 3637 3638define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 3639; CHECK-LABEL: test_mask_adds_epu16_rmkz_256: 3640; CHECK: ## BB#0: 3641; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3642; CHECK-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07] 3643; CHECK-NEXT: retq ## encoding: [0xc3] 3644 %b = load <16 x i16>, <16 x i16>* %ptr_b 3645 %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 3646 ret <16 x i16> %res 3647} 3648 3649declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3650 3651define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { 3652; CHECK-LABEL: test_mask_subs_epu16_rr_128: 3653; CHECK: ## BB#0: 3654; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0xc1] 3655; CHECK-NEXT: retq ## encoding: [0xc3] 3656 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 3657 ret <8 x i16> %res 3658} 3659 3660define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { 3661; CHECK-LABEL: test_mask_subs_epu16_rrk_128: 3662; CHECK: ## BB#0: 3663; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3664; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1] 3665; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3666; CHECK-NEXT: retq ## encoding: [0xc3] 3667 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 3668 ret <8 x i16> %res 3669} 3670 3671define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { 3672; CHECK-LABEL: test_mask_subs_epu16_rrkz_128: 3673; CHECK: ## BB#0: 3674; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3675; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1] 3676; CHECK-NEXT: retq ## encoding: [0xc3] 3677 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 3678 ret <8 x i16> %res 3679} 3680 3681define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) { 3682; CHECK-LABEL: test_mask_subs_epu16_rm_128: 3683; CHECK: ## BB#0: 3684; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0x07] 3685; CHECK-NEXT: retq ## encoding: [0xc3] 3686 %b = load <8 x i16>, <8 x i16>* %ptr_b 3687 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1) 3688 ret <8 x i16> %res 3689} 3690 3691define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { 3692; CHECK-LABEL: test_mask_subs_epu16_rmk_128: 3693; CHECK: ## BB#0: 3694; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3695; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f] 3696; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3697; CHECK-NEXT: retq ## encoding: [0xc3] 3698 %b = load <8 x i16>, <8 x i16>* %ptr_b 3699 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) 3700 ret <8 x i16> %res 3701} 3702 3703define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { 3704; CHECK-LABEL: test_mask_subs_epu16_rmkz_128: 3705; CHECK: ## BB#0: 3706; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3707; CHECK-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07] 3708; CHECK-NEXT: retq ## encoding: [0xc3] 3709 %b = load <8 x i16>, <8 x i16>* %ptr_b 3710 %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask) 3711 ret <8 x i16> %res 3712} 3713 3714declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 3715 3716define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) { 3717; CHECK-LABEL: test_mask_subs_epu16_rr_256: 3718; CHECK: ## BB#0: 3719; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd9,0xc1] 3720; CHECK-NEXT: retq ## encoding: [0xc3] 3721 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 3722 ret <16 x i16> %res 3723} 3724 3725define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { 3726; CHECK-LABEL: test_mask_subs_epu16_rrk_256: 3727; CHECK: ## BB#0: 3728; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3729; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1] 3730; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3731; CHECK-NEXT: retq ## encoding: [0xc3] 3732 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 3733 ret <16 x i16> %res 3734} 3735 3736define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { 3737; CHECK-LABEL: test_mask_subs_epu16_rrkz_256: 3738; CHECK: ## BB#0: 3739; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3740; CHECK-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1] 3741; CHECK-NEXT: retq ## encoding: [0xc3] 3742 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 3743 ret <16 x i16> %res 3744} 3745 3746define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) { 3747; CHECK-LABEL: test_mask_subs_epu16_rm_256: 3748; CHECK: ## BB#0: 3749; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd9,0x07] 3750; CHECK-NEXT: retq ## encoding: [0xc3] 3751 %b = load <16 x i16>, <16 x i16>* %ptr_b 3752 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1) 3753 ret <16 x i16> %res 3754} 3755 3756define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { 3757; CHECK-LABEL: test_mask_subs_epu16_rmk_256: 3758; CHECK: ## BB#0: 3759; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3760; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f] 3761; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3762; CHECK-NEXT: retq ## encoding: [0xc3] 3763 %b = load <16 x i16>, <16 x i16>* %ptr_b 3764 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) 3765 ret <16 x i16> %res 3766} 3767 3768define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { 3769; CHECK-LABEL: test_mask_subs_epu16_rmkz_256: 3770; CHECK: ## BB#0: 3771; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3772; CHECK-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07] 3773; CHECK-NEXT: retq ## encoding: [0xc3] 3774 %b = load <16 x i16>, <16 x i16>* %ptr_b 3775 %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask) 3776 ret <16 x i16> %res 3777} 3778 3779declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 3780 3781define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 3782; CHECK-LABEL: test_mask_adds_epi8_rr_128: 3783; CHECK: ## BB#0: 3784; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0xc1] 3785; CHECK-NEXT: retq ## encoding: [0xc3] 3786 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3787 ret <16 x i8> %res 3788} 3789 3790define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 3791; CHECK-LABEL: test_mask_adds_epi8_rrk_128: 3792; CHECK: ## BB#0: 3793; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3794; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] 3795; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3796; CHECK-NEXT: retq ## encoding: [0xc3] 3797 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3798 ret <16 x i8> %res 3799} 3800 3801define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 3802; CHECK-LABEL: test_mask_adds_epi8_rrkz_128: 3803; CHECK: ## BB#0: 3804; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3805; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] 3806; CHECK-NEXT: retq ## encoding: [0xc3] 3807 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3808 ret <16 x i8> %res 3809} 3810 3811define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 3812; CHECK-LABEL: test_mask_adds_epi8_rm_128: 3813; CHECK: ## BB#0: 3814; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0x07] 3815; CHECK-NEXT: retq ## encoding: [0xc3] 3816 %b = load <16 x i8>, <16 x i8>* %ptr_b 3817 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3818 ret <16 x i8> %res 3819} 3820 3821define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 3822; CHECK-LABEL: test_mask_adds_epi8_rmk_128: 3823; CHECK: ## BB#0: 3824; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3825; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] 3826; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3827; CHECK-NEXT: retq ## encoding: [0xc3] 3828 %b = load <16 x i8>, <16 x i8>* %ptr_b 3829 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3830 ret <16 x i8> %res 3831} 3832 3833define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 3834; CHECK-LABEL: test_mask_adds_epi8_rmkz_128: 3835; CHECK: ## BB#0: 3836; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3837; CHECK-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] 3838; CHECK-NEXT: retq ## encoding: [0xc3] 3839 %b = load <16 x i8>, <16 x i8>* %ptr_b 3840 %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3841 ret <16 x i8> %res 3842} 3843 3844declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3845 3846define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 3847; CHECK-LABEL: test_mask_adds_epi8_rr_256: 3848; CHECK: ## BB#0: 3849; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xec,0xc1] 3850; CHECK-NEXT: retq ## encoding: [0xc3] 3851 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3852 ret <32 x i8> %res 3853} 3854 3855define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 3856; CHECK-LABEL: test_mask_adds_epi8_rrk_256: 3857; CHECK: ## BB#0: 3858; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 3859; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] 3860; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3861; CHECK-NEXT: retq ## encoding: [0xc3] 3862 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3863 ret <32 x i8> %res 3864} 3865 3866define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 3867; CHECK-LABEL: test_mask_adds_epi8_rrkz_256: 3868; CHECK: ## BB#0: 3869; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 3870; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] 3871; CHECK-NEXT: retq ## encoding: [0xc3] 3872 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3873 ret <32 x i8> %res 3874} 3875 3876define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 3877; CHECK-LABEL: test_mask_adds_epi8_rm_256: 3878; CHECK: ## BB#0: 3879; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xec,0x07] 3880; CHECK-NEXT: retq ## encoding: [0xc3] 3881 %b = load <32 x i8>, <32 x i8>* %ptr_b 3882 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3883 ret <32 x i8> %res 3884} 3885 3886define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 3887; CHECK-LABEL: test_mask_adds_epi8_rmk_256: 3888; CHECK: ## BB#0: 3889; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 3890; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] 3891; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3892; CHECK-NEXT: retq ## encoding: [0xc3] 3893 %b = load <32 x i8>, <32 x i8>* %ptr_b 3894 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3895 ret <32 x i8> %res 3896} 3897 3898define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 3899; CHECK-LABEL: test_mask_adds_epi8_rmkz_256: 3900; CHECK: ## BB#0: 3901; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 3902; CHECK-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] 3903; CHECK-NEXT: retq ## encoding: [0xc3] 3904 %b = load <32 x i8>, <32 x i8>* %ptr_b 3905 %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 3906 ret <32 x i8> %res 3907} 3908 3909declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 3910 3911define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { 3912; CHECK-LABEL: test_mask_subs_epi8_rr_128: 3913; CHECK: ## BB#0: 3914; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0xc1] 3915; CHECK-NEXT: retq ## encoding: [0xc3] 3916 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3917 ret <16 x i8> %res 3918} 3919 3920define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 3921; CHECK-LABEL: test_mask_subs_epi8_rrk_128: 3922; CHECK: ## BB#0: 3923; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3924; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] 3925; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3926; CHECK-NEXT: retq ## encoding: [0xc3] 3927 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3928 ret <16 x i8> %res 3929} 3930 3931define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 3932; CHECK-LABEL: test_mask_subs_epi8_rrkz_128: 3933; CHECK: ## BB#0: 3934; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3935; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] 3936; CHECK-NEXT: retq ## encoding: [0xc3] 3937 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3938 ret <16 x i8> %res 3939} 3940 3941define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 3942; CHECK-LABEL: test_mask_subs_epi8_rm_128: 3943; CHECK: ## BB#0: 3944; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0x07] 3945; CHECK-NEXT: retq ## encoding: [0xc3] 3946 %b = load <16 x i8>, <16 x i8>* %ptr_b 3947 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 3948 ret <16 x i8> %res 3949} 3950 3951define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 3952; CHECK-LABEL: test_mask_subs_epi8_rmk_128: 3953; CHECK: ## BB#0: 3954; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3955; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] 3956; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 3957; CHECK-NEXT: retq ## encoding: [0xc3] 3958 %b = load <16 x i8>, <16 x i8>* %ptr_b 3959 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 3960 ret <16 x i8> %res 3961} 3962 3963define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 3964; CHECK-LABEL: test_mask_subs_epi8_rmkz_128: 3965; CHECK: ## BB#0: 3966; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3967; CHECK-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] 3968; CHECK-NEXT: retq ## encoding: [0xc3] 3969 %b = load <16 x i8>, <16 x i8>* %ptr_b 3970 %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 3971 ret <16 x i8> %res 3972} 3973 3974declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 3975 3976define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) { 3977; CHECK-LABEL: test_mask_subs_epi8_rr_256: 3978; CHECK: ## BB#0: 3979; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe8,0xc1] 3980; CHECK-NEXT: retq ## encoding: [0xc3] 3981 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 3982 ret <32 x i8> %res 3983} 3984 3985define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 3986; CHECK-LABEL: test_mask_subs_epi8_rrk_256: 3987; CHECK: ## BB#0: 3988; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 3989; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] 3990; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 3991; CHECK-NEXT: retq ## encoding: [0xc3] 3992 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 3993 ret <32 x i8> %res 3994} 3995 3996define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 3997; CHECK-LABEL: test_mask_subs_epi8_rrkz_256: 3998; CHECK: ## BB#0: 3999; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4000; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] 4001; CHECK-NEXT: retq ## encoding: [0xc3] 4002 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 4003 ret <32 x i8> %res 4004} 4005 4006define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 4007; CHECK-LABEL: test_mask_subs_epi8_rm_256: 4008; CHECK: ## BB#0: 4009; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe8,0x07] 4010; CHECK-NEXT: retq ## encoding: [0xc3] 4011 %b = load <32 x i8>, <32 x i8>* %ptr_b 4012 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 4013 ret <32 x i8> %res 4014} 4015 4016define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 4017; CHECK-LABEL: test_mask_subs_epi8_rmk_256: 4018; CHECK: ## BB#0: 4019; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 4020; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] 4021; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 4022; CHECK-NEXT: retq ## encoding: [0xc3] 4023 %b = load <32 x i8>, <32 x i8>* %ptr_b 4024 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 4025 ret <32 x i8> %res 4026} 4027 4028define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 4029; CHECK-LABEL: test_mask_subs_epi8_rmkz_256: 4030; CHECK: ## BB#0: 4031; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 4032; CHECK-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] 4033; CHECK-NEXT: retq ## encoding: [0xc3] 4034 %b = load <32 x i8>, <32 x i8>* %ptr_b 4035 %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 4036 ret <32 x i8> %res 4037} 4038 4039declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4040 4041define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 4042; CHECK-LABEL: test_mask_adds_epu8_rr_128: 4043; CHECK: ## BB#0: 4044; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0xc1] 4045; CHECK-NEXT: retq ## encoding: [0xc3] 4046 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 4047 ret <16 x i8> %res 4048} 4049 4050define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 4051; CHECK-LABEL: test_mask_adds_epu8_rrk_128: 4052; CHECK: ## BB#0: 4053; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4054; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1] 4055; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 4056; CHECK-NEXT: retq ## encoding: [0xc3] 4057 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 4058 ret <16 x i8> %res 4059} 4060 4061define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 4062; CHECK-LABEL: test_mask_adds_epu8_rrkz_128: 4063; CHECK: ## BB#0: 4064; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4065; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1] 4066; CHECK-NEXT: retq ## encoding: [0xc3] 4067 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 4068 ret <16 x i8> %res 4069} 4070 4071define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 4072; CHECK-LABEL: test_mask_adds_epu8_rm_128: 4073; CHECK: ## BB#0: 4074; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0x07] 4075; CHECK-NEXT: retq ## encoding: [0xc3] 4076 %b = load <16 x i8>, <16 x i8>* %ptr_b 4077 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 4078 ret <16 x i8> %res 4079} 4080 4081define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 4082; CHECK-LABEL: test_mask_adds_epu8_rmk_128: 4083; CHECK: ## BB#0: 4084; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4085; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f] 4086; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 4087; CHECK-NEXT: retq ## encoding: [0xc3] 4088 %b = load <16 x i8>, <16 x i8>* %ptr_b 4089 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 4090 ret <16 x i8> %res 4091} 4092 4093define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 4094; CHECK-LABEL: test_mask_adds_epu8_rmkz_128: 4095; CHECK: ## BB#0: 4096; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4097; CHECK-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07] 4098; CHECK-NEXT: retq ## encoding: [0xc3] 4099 %b = load <16 x i8>, <16 x i8>* %ptr_b 4100 %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 4101 ret <16 x i8> %res 4102} 4103 4104declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4105 4106define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 4107; CHECK-LABEL: test_mask_adds_epu8_rr_256: 4108; CHECK: ## BB#0: 4109; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdc,0xc1] 4110; CHECK-NEXT: retq ## encoding: [0xc3] 4111 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 4112 ret <32 x i8> %res 4113} 4114 4115define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 4116; CHECK-LABEL: test_mask_adds_epu8_rrk_256: 4117; CHECK: ## BB#0: 4118; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4119; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1] 4120; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 4121; CHECK-NEXT: retq ## encoding: [0xc3] 4122 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 4123 ret <32 x i8> %res 4124} 4125 4126define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 4127; CHECK-LABEL: test_mask_adds_epu8_rrkz_256: 4128; CHECK: ## BB#0: 4129; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4130; CHECK-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1] 4131; CHECK-NEXT: retq ## encoding: [0xc3] 4132 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 4133 ret <32 x i8> %res 4134} 4135 4136define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 4137; CHECK-LABEL: test_mask_adds_epu8_rm_256: 4138; CHECK: ## BB#0: 4139; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdc,0x07] 4140; CHECK-NEXT: retq ## encoding: [0xc3] 4141 %b = load <32 x i8>, <32 x i8>* %ptr_b 4142 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 4143 ret <32 x i8> %res 4144} 4145 4146define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 4147; CHECK-LABEL: test_mask_adds_epu8_rmk_256: 4148; CHECK: ## BB#0: 4149; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 4150; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f] 4151; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 4152; CHECK-NEXT: retq ## encoding: [0xc3] 4153 %b = load <32 x i8>, <32 x i8>* %ptr_b 4154 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 4155 ret <32 x i8> %res 4156} 4157 4158define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 4159; CHECK-LABEL: test_mask_adds_epu8_rmkz_256: 4160; CHECK: ## BB#0: 4161; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 4162; CHECK-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07] 4163; CHECK-NEXT: retq ## encoding: [0xc3] 4164 %b = load <32 x i8>, <32 x i8>* %ptr_b 4165 %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 4166 ret <32 x i8> %res 4167} 4168 4169declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4170 4171define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) { 4172; CHECK-LABEL: test_mask_subs_epu8_rr_128: 4173; CHECK: ## BB#0: 4174; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0xc1] 4175; CHECK-NEXT: retq ## encoding: [0xc3] 4176 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 4177 ret <16 x i8> %res 4178} 4179 4180define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { 4181; CHECK-LABEL: test_mask_subs_epu8_rrk_128: 4182; CHECK: ## BB#0: 4183; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4184; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1] 4185; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 4186; CHECK-NEXT: retq ## encoding: [0xc3] 4187 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 4188 ret <16 x i8> %res 4189} 4190 4191define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { 4192; CHECK-LABEL: test_mask_subs_epu8_rrkz_128: 4193; CHECK: ## BB#0: 4194; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4195; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1] 4196; CHECK-NEXT: retq ## encoding: [0xc3] 4197 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 4198 ret <16 x i8> %res 4199} 4200 4201define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { 4202; CHECK-LABEL: test_mask_subs_epu8_rm_128: 4203; CHECK: ## BB#0: 4204; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0x07] 4205; CHECK-NEXT: retq ## encoding: [0xc3] 4206 %b = load <16 x i8>, <16 x i8>* %ptr_b 4207 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1) 4208 ret <16 x i8> %res 4209} 4210 4211define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { 4212; CHECK-LABEL: test_mask_subs_epu8_rmk_128: 4213; CHECK: ## BB#0: 4214; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4215; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f] 4216; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 4217; CHECK-NEXT: retq ## encoding: [0xc3] 4218 %b = load <16 x i8>, <16 x i8>* %ptr_b 4219 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) 4220 ret <16 x i8> %res 4221} 4222 4223define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { 4224; CHECK-LABEL: test_mask_subs_epu8_rmkz_128: 4225; CHECK: ## BB#0: 4226; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4227; CHECK-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07] 4228; CHECK-NEXT: retq ## encoding: [0xc3] 4229 %b = load <16 x i8>, <16 x i8>* %ptr_b 4230 %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask) 4231 ret <16 x i8> %res 4232} 4233 4234declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4235 4236define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) { 4237; CHECK-LABEL: test_mask_subs_epu8_rr_256: 4238; CHECK: ## BB#0: 4239; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd8,0xc1] 4240; CHECK-NEXT: retq ## encoding: [0xc3] 4241 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 4242 ret <32 x i8> %res 4243} 4244 4245define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { 4246; CHECK-LABEL: test_mask_subs_epu8_rrk_256: 4247; CHECK: ## BB#0: 4248; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4249; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1] 4250; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 4251; CHECK-NEXT: retq ## encoding: [0xc3] 4252 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 4253 ret <32 x i8> %res 4254} 4255 4256define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { 4257; CHECK-LABEL: test_mask_subs_epu8_rrkz_256: 4258; CHECK: ## BB#0: 4259; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4260; CHECK-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1] 4261; CHECK-NEXT: retq ## encoding: [0xc3] 4262 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 4263 ret <32 x i8> %res 4264} 4265 4266define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) { 4267; CHECK-LABEL: test_mask_subs_epu8_rm_256: 4268; CHECK: ## BB#0: 4269; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd8,0x07] 4270; CHECK-NEXT: retq ## encoding: [0xc3] 4271 %b = load <32 x i8>, <32 x i8>* %ptr_b 4272 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1) 4273 ret <32 x i8> %res 4274} 4275 4276define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { 4277; CHECK-LABEL: test_mask_subs_epu8_rmk_256: 4278; CHECK: ## BB#0: 4279; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 4280; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f] 4281; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 4282; CHECK-NEXT: retq ## encoding: [0xc3] 4283 %b = load <32 x i8>, <32 x i8>* %ptr_b 4284 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) 4285 ret <32 x i8> %res 4286} 4287 4288define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { 4289; CHECK-LABEL: test_mask_subs_epu8_rmkz_256: 4290; CHECK: ## BB#0: 4291; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 4292; CHECK-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07] 4293; CHECK-NEXT: retq ## encoding: [0xc3] 4294 %b = load <32 x i8>, <32 x i8>* %ptr_b 4295 %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask) 4296 ret <32 x i8> %res 4297} 4298 4299declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4300 4301declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4302 4303define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 4304; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_b_128: 4305; CHECK: ## BB#0: 4306; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4307; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1] 4308; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1] 4309; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] 4310; CHECK-NEXT: retq ## encoding: [0xc3] 4311 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask) 4312 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 4313 %res2 = add <16 x i8> %res, %res1 4314 ret <16 x i8> %res2 4315} 4316 4317declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4318 4319define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 4320; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_b_256: 4321; CHECK: ## BB#0: 4322; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4323; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1] 4324; CHECK-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3c,0xc1] 4325; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] 4326; CHECK-NEXT: retq ## encoding: [0xc3] 4327 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 4328 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 4329 %res2 = add <32 x i8> %res, %res1 4330 ret <32 x i8> %res2 4331} 4332 4333declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4334 4335define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4336; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_w_128: 4337; CHECK: ## BB#0: 4338; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4339; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1] 4340; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xee,0xc1] 4341; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 4342; CHECK-NEXT: retq ## encoding: [0xc3] 4343 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4344 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4345 %res2 = add <8 x i16> %res, %res1 4346 ret <8 x i16> %res2 4347} 4348 4349declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4350 4351define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 4352; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_w_256: 4353; CHECK: ## BB#0: 4354; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4355; CHECK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1] 4356; CHECK-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1] 4357; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 4358; CHECK-NEXT: retq ## encoding: [0xc3] 4359 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 4360 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 4361 %res2 = add <16 x i16> %res, %res1 4362 ret <16 x i16> %res2 4363} 4364 4365declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4366 4367define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) { 4368; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_b_128: 4369; CHECK: ## BB#0: 4370; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4371; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1] 4372; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1] 4373; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] 4374; CHECK-NEXT: retq ## encoding: [0xc3] 4375 %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 4376 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 4377 %res2 = add <16 x i8> %res, %res1 4378 ret <16 x i8> %res2 4379} 4380 4381declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4382 4383define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 4384; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_b_256: 4385; CHECK: ## BB#0: 4386; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4387; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1] 4388; CHECK-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xde,0xc1] 4389; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] 4390; CHECK-NEXT: retq ## encoding: [0xc3] 4391 %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 4392 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 4393 %res2 = add <32 x i8> %res, %res1 4394 ret <32 x i8> %res2 4395} 4396 4397declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4398 4399define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4400; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_w_128: 4401; CHECK: ## BB#0: 4402; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4403; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1] 4404; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3e,0xc1] 4405; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 4406; CHECK-NEXT: retq ## encoding: [0xc3] 4407 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4408 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4409 %res2 = add <8 x i16> %res, %res1 4410 ret <8 x i16> %res2 4411} 4412 4413declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4414 4415define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 4416; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_w_256: 4417; CHECK: ## BB#0: 4418; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4419; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1] 4420; CHECK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1] 4421; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 4422; CHECK-NEXT: retq ## encoding: [0xc3] 4423 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 4424 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 4425 %res2 = add <16 x i16> %res, %res1 4426 ret <16 x i16> %res2 4427} 4428 4429declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4430 4431define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 4432; CHECK-LABEL: test_int_x86_avx512_mask_pmins_b_128: 4433; CHECK: ## BB#0: 4434; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4435; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1] 4436; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1] 4437; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] 4438; CHECK-NEXT: retq ## encoding: [0xc3] 4439 %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 4440 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 4441 %res2 = add <16 x i8> %res, %res1 4442 ret <16 x i8> %res2 4443} 4444 4445declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4446 4447define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 4448; CHECK-LABEL: test_int_x86_avx512_mask_pmins_b_256: 4449; CHECK: ## BB#0: 4450; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4451; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1] 4452; CHECK-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x38,0xc1] 4453; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] 4454; CHECK-NEXT: retq ## encoding: [0xc3] 4455 %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 4456 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 4457 %res2 = add <32 x i8> %res, %res1 4458 ret <32 x i8> %res2 4459} 4460 4461declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4462 4463define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4464; CHECK-LABEL: test_int_x86_avx512_mask_pmins_w_128: 4465; CHECK: ## BB#0: 4466; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4467; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1] 4468; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xea,0xc1] 4469; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 4470; CHECK-NEXT: retq ## encoding: [0xc3] 4471 %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4472 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4473 %res2 = add <8 x i16> %res, %res1 4474 ret <8 x i16> %res2 4475} 4476 4477declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4478 4479define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 4480; CHECK-LABEL: test_int_x86_avx512_mask_pmins_w_256: 4481; CHECK: ## BB#0: 4482; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4483; CHECK-NEXT: vpminsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1] 4484; CHECK-NEXT: vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1] 4485; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 4486; CHECK-NEXT: retq ## encoding: [0xc3] 4487 %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 4488 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 4489 %res2 = add <16 x i16> %res, %res1 4490 ret <16 x i16> %res2 4491} 4492 4493declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4494 4495define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) { 4496; CHECK-LABEL: test_int_x86_avx512_mask_pminu_b_128: 4497; CHECK: ## BB#0: 4498; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4499; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1] 4500; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1] 4501; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] 4502; CHECK-NEXT: retq ## encoding: [0xc3] 4503 %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) 4504 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask) 4505 %res2 = add <16 x i8> %res, %res1 4506 ret <16 x i8> %res2 4507} 4508 4509declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4510 4511define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 4512; CHECK-LABEL: test_int_x86_avx512_mask_pminu_b_256: 4513; CHECK: ## BB#0: 4514; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4515; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1] 4516; CHECK-NEXT: vpminub %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xda,0xc1] 4517; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] 4518; CHECK-NEXT: retq ## encoding: [0xc3] 4519 %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 4520 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 4521 %res2 = add <32 x i8> %res, %res1 4522 ret <32 x i8> %res2 4523} 4524 4525declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4526 4527define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4528; CHECK-LABEL: test_int_x86_avx512_mask_pminu_w_128: 4529; CHECK: ## BB#0: 4530; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4531; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1] 4532; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3a,0xc1] 4533; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 4534; CHECK-NEXT: retq ## encoding: [0xc3] 4535 %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4536 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4537 %res2 = add <8 x i16> %res, %res1 4538 ret <8 x i16> %res2 4539} 4540 4541declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4542 4543define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) { 4544; CHECK-LABEL: test_int_x86_avx512_mask_pminu_w_256: 4545; CHECK: ## BB#0: 4546; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4547; CHECK-NEXT: vpminuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1] 4548; CHECK-NEXT: vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1] 4549; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 4550; CHECK-NEXT: retq ## encoding: [0xc3] 4551 %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) 4552 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask) 4553 %res2 = add <16 x i16> %res, %res1 4554 ret <16 x i16> %res2 4555} 4556 4557declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4558 4559define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4560; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: 4561; CHECK: ## BB#0: 4562; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4563; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 4564; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xda] 4565; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca] 4566; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] 4567; CHECK-NEXT: retq ## encoding: [0xc3] 4568 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4569 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4570 %res2 = add <8 x i16> %res, %res1 4571 ret <8 x i16> %res2 4572} 4573 4574declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4575 4576define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4577; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128: 4578; CHECK: ## BB#0: 4579; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4580; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 4581; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xda] 4582; CHECK-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca] 4583; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] 4584; CHECK-NEXT: retq ## encoding: [0xc3] 4585 %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4586 %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4587 %res2 = add <8 x i16> %res, %res1 4588 ret <8 x i16> %res2 4589} 4590 4591declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4592 4593define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4594; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256: 4595; CHECK: ## BB#0: 4596; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4597; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 4598; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xda] 4599; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca] 4600; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] 4601; CHECK-NEXT: retq ## encoding: [0xc3] 4602 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4603 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4604 %res2 = add <16 x i16> %res, %res1 4605 ret <16 x i16> %res2 4606} 4607 4608declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4609 4610define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4611; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256: 4612; CHECK: ## BB#0: 4613; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4614; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 4615; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xda] 4616; CHECK-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca] 4617; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] 4618; CHECK-NEXT: retq ## encoding: [0xc3] 4619 %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4620 %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4621 %res2 = add <16 x i16> %res, %res1 4622 ret <16 x i16> %res2 4623} 4624 4625declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4626 4627define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4628; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128: 4629; CHECK: ## BB#0: 4630; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4631; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 4632; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xda] 4633; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xca] 4634; CHECK-NEXT: vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1] 4635; CHECK-NEXT: retq ## encoding: [0xc3] 4636 %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4637 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4638 %res2 = add <8 x i16> %res, %res1 4639 ret <8 x i16> %res2 4640} 4641 4642declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4643 4644define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4645; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256: 4646; CHECK: ## BB#0: 4647; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4648; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 4649; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xda] 4650; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xca] 4651; CHECK-NEXT: vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1] 4652; CHECK-NEXT: retq ## encoding: [0xc3] 4653 %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4654 %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4655 %res2 = add <16 x i16> %res, %res1 4656 ret <16 x i16> %res2 4657} 4658 4659declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4660 4661define <16 x i8>@test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 4662; CHECK-LABEL: test_int_x86_avx512_mask_pavg_b_128: 4663; CHECK: ## BB#0: 4664; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4665; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1] 4666; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe0,0xc1] 4667; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] 4668; CHECK-NEXT: retq ## encoding: [0xc3] 4669 %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 4670 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 4671 %res2 = add <16 x i8> %res, %res1 4672 ret <16 x i8> %res2 4673} 4674 4675declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4676 4677define <32 x i8>@test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 4678; CHECK-LABEL: test_int_x86_avx512_mask_pavg_b_256: 4679; CHECK: ## BB#0: 4680; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4681; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1] 4682; CHECK-NEXT: vpavgb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe0,0xc1] 4683; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] 4684; CHECK-NEXT: retq ## encoding: [0xc3] 4685 %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 4686 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 4687 %res2 = add <32 x i8> %res, %res1 4688 ret <32 x i8> %res2 4689} 4690 4691declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4692 4693define <8 x i16>@test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4694; CHECK-LABEL: test_int_x86_avx512_mask_pavg_w_128: 4695; CHECK: ## BB#0: 4696; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4697; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1] 4698; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe3,0xc1] 4699; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 4700; CHECK-NEXT: retq ## encoding: [0xc3] 4701 %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4702 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4703 %res2 = add <8 x i16> %res, %res1 4704 ret <8 x i16> %res2 4705} 4706 4707declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4708 4709define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4710; CHECK-LABEL: test_int_x86_avx512_mask_pavg_w_256: 4711; CHECK: ## BB#0: 4712; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4713; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1] 4714; CHECK-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe3,0xc1] 4715; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 4716; CHECK-NEXT: retq ## encoding: [0xc3] 4717 %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4718 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4719 %res2 = add <16 x i16> %res, %res1 4720 ret <16 x i16> %res2 4721} 4722 4723declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 4724 4725define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 4726; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_128: 4727; CHECK: ## BB#0: 4728; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4729; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1] 4730; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x00,0xc1] 4731; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] 4732; CHECK-NEXT: retq ## encoding: [0xc3] 4733 %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 4734 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 4735 %res2 = add <16 x i8> %res, %res1 4736 ret <16 x i8> %res2 4737} 4738 4739declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 4740 4741define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 4742; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_256: 4743; CHECK: ## BB#0: 4744; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4745; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1] 4746; CHECK-NEXT: vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x00,0xc1] 4747; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] 4748; CHECK-NEXT: retq ## encoding: [0xc3] 4749 %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 4750 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 4751 %res2 = add <32 x i8> %res, %res1 4752 ret <32 x i8> %res2 4753} 4754 4755declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16) 4756 4757define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 4758; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_128: 4759; CHECK: ## BB#0: 4760; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4761; CHECK-NEXT: vpabsb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] 4762; CHECK-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1c,0xc0] 4763; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0] 4764; CHECK-NEXT: retq ## encoding: [0xc3] 4765 %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 4766 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) 4767 %res2 = add <16 x i8> %res, %res1 4768 ret <16 x i8> %res2 4769} 4770 4771declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32) 4772 4773define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 4774; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_256: 4775; CHECK: ## BB#0: 4776; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 4777; CHECK-NEXT: vpabsb %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] 4778; CHECK-NEXT: vpabsb %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1c,0xc0] 4779; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0] 4780; CHECK-NEXT: retq ## encoding: [0xc3] 4781 %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 4782 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) 4783 %res2 = add <32 x i8> %res, %res1 4784 ret <32 x i8> %res2 4785} 4786 4787declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8) 4788 4789define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 4790; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_128: 4791; CHECK: ## BB#0: 4792; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4793; CHECK-NEXT: vpabsw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] 4794; CHECK-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1d,0xc0] 4795; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 4796; CHECK-NEXT: retq ## encoding: [0xc3] 4797 %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 4798 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) 4799 %res2 = add <8 x i16> %res, %res1 4800 ret <8 x i16> %res2 4801} 4802 4803declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16) 4804 4805define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 4806; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_256: 4807; CHECK: ## BB#0: 4808; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4809; CHECK-NEXT: vpabsw %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] 4810; CHECK-NEXT: vpabsw %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1d,0xc0] 4811; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 4812; CHECK-NEXT: retq ## encoding: [0xc3] 4813 %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 4814 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) 4815 %res2 = add <16 x i16> %res, %res1 4816 ret <16 x i16> %res2 4817} 4818 4819declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4820 4821define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4822; CHECK-LABEL: test_int_x86_avx512_mask_pmulhu_w_128: 4823; CHECK: ## BB#0: 4824; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4825; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1] 4826; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe4,0xc1] 4827; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 4828; CHECK-NEXT: retq ## encoding: [0xc3] 4829 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4830 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4831 %res2 = add <8 x i16> %res, %res1 4832 ret <8 x i16> %res2 4833} 4834 4835declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4836 4837define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4838; CHECK-LABEL: test_int_x86_avx512_mask_pmulhu_w_256: 4839; CHECK: ## BB#0: 4840; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4841; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1] 4842; CHECK-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe4,0xc1] 4843; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 4844; CHECK-NEXT: retq ## encoding: [0xc3] 4845 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4846 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4847 %res2 = add <16 x i16> %res, %res1 4848 ret <16 x i16> %res2 4849} 4850 4851declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4852 4853define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4854; CHECK-LABEL: test_int_x86_avx512_mask_pmulh_w_128: 4855; CHECK: ## BB#0: 4856; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4857; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1] 4858; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe5,0xc1] 4859; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 4860; CHECK-NEXT: retq ## encoding: [0xc3] 4861 %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4862 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4863 %res2 = add <8 x i16> %res, %res1 4864 ret <8 x i16> %res2 4865} 4866 4867declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4868 4869define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4870; CHECK-LABEL: test_int_x86_avx512_mask_pmulh_w_256: 4871; CHECK: ## BB#0: 4872; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4873; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1] 4874; CHECK-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe5,0xc1] 4875; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 4876; CHECK-NEXT: retq ## encoding: [0xc3] 4877 %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4878 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4879 %res2 = add <16 x i16> %res, %res1 4880 ret <16 x i16> %res2 4881} 4882 4883declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 4884 4885define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 4886; CHECK-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128: 4887; CHECK: ## BB#0: 4888; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4889; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1] 4890; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0b,0xc1] 4891; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 4892; CHECK-NEXT: retq ## encoding: [0xc3] 4893 %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 4894 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 4895 %res2 = add <8 x i16> %res, %res1 4896 ret <8 x i16> %res2 4897} 4898 4899declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 4900 4901define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 4902; CHECK-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256: 4903; CHECK: ## BB#0: 4904; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4905; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1] 4906; CHECK-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x0b,0xc1] 4907; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 4908; CHECK-NEXT: retq ## encoding: [0xc3] 4909 %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 4910 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 4911 %res2 = add <16 x i16> %res, %res1 4912 ret <16 x i16> %res2 4913} 4914 4915declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) 4916 4917define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 4918; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128: 4919; CHECK: ## BB#0: 4920; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4921; CHECK-NEXT: vpmovwb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] 4922; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] 4923; CHECK-NEXT: vpmovwb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0] 4924; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] 4925; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] 4926; CHECK-NEXT: retq ## encoding: [0xc3] 4927 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 4928 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 4929 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 4930 %res3 = add <16 x i8> %res0, %res1 4931 %res4 = add <16 x i8> %res3, %res2 4932 ret <16 x i8> %res4 4933} 4934 4935declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8) 4936 4937define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 4938; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128: 4939; CHECK: ## BB#0: 4940; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4941; CHECK-NEXT: vpmovwb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07] 4942; CHECK-NEXT: vpmovwb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07] 4943; CHECK-NEXT: retq ## encoding: [0xc3] 4944 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 4945 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 4946 ret void 4947} 4948 4949declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) 4950 4951define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 4952; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: 4953; CHECK: ## BB#0: 4954; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4955; CHECK-NEXT: vpmovswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] 4956; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] 4957; CHECK-NEXT: vpmovswb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0] 4958; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] 4959; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] 4960; CHECK-NEXT: retq ## encoding: [0xc3] 4961 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 4962 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 4963 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 4964 %res3 = add <16 x i8> %res0, %res1 4965 %res4 = add <16 x i8> %res3, %res2 4966 ret <16 x i8> %res4 4967} 4968 4969declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) 4970 4971define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 4972; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: 4973; CHECK: ## BB#0: 4974; CHECK-NEXT: vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] 4975; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 4976; CHECK-NEXT: vpmovswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07] 4977; CHECK-NEXT: retq ## encoding: [0xc3] 4978 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 4979 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 4980 ret void 4981} 4982 4983declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) 4984 4985define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { 4986; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: 4987; CHECK: ## BB#0: 4988; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4989; CHECK-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] 4990; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] 4991; CHECK-NEXT: vpmovuswb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0] 4992; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] 4993; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] 4994; CHECK-NEXT: retq ## encoding: [0xc3] 4995 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) 4996 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) 4997 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) 4998 %res3 = add <16 x i8> %res0, %res1 4999 %res4 = add <16 x i8> %res3, %res2 5000 ret <16 x i8> %res4 5001} 5002 5003declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) 5004 5005define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { 5006; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: 5007; CHECK: ## BB#0: 5008; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] 5009; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5010; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07] 5011; CHECK-NEXT: retq ## encoding: [0xc3] 5012 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) 5013 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2) 5014 ret void 5015} 5016 5017declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16) 5018 5019define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 5020; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256: 5021; CHECK: ## BB#0: 5022; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5023; CHECK-NEXT: vpmovwb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1] 5024; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc2] 5025; CHECK-NEXT: vpmovwb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] 5026; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] 5027; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] 5028; CHECK-NEXT: retq ## encoding: [0xc3] 5029 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 5030 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 5031 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 5032 %res3 = add <16 x i8> %res0, %res1 5033 %res4 = add <16 x i8> %res3, %res2 5034 ret <16 x i8> %res4 5035} 5036 5037declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16) 5038 5039define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 5040; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256: 5041; CHECK: ## BB#0: 5042; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5043; CHECK-NEXT: vpmovwb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07] 5044; CHECK-NEXT: vpmovwb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07] 5045; CHECK-NEXT: retq ## encoding: [0xc3] 5046 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 5047 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 5048 ret void 5049} 5050 5051declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16) 5052 5053define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 5054; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256: 5055; CHECK: ## BB#0: 5056; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5057; CHECK-NEXT: vpmovswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1] 5058; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc2] 5059; CHECK-NEXT: vpmovswb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0] 5060; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] 5061; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] 5062; CHECK-NEXT: retq ## encoding: [0xc3] 5063 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 5064 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 5065 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 5066 %res3 = add <16 x i8> %res0, %res1 5067 %res4 = add <16 x i8> %res3, %res2 5068 ret <16 x i8> %res4 5069} 5070 5071declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16) 5072 5073define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 5074; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: 5075; CHECK: ## BB#0: 5076; CHECK-NEXT: vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] 5077; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5078; CHECK-NEXT: vpmovswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07] 5079; CHECK-NEXT: retq ## encoding: [0xc3] 5080 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 5081 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 5082 ret void 5083} 5084 5085declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16) 5086 5087define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) { 5088; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256: 5089; CHECK: ## BB#0: 5090; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5091; CHECK-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1] 5092; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc2] 5093; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0] 5094; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1] 5095; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2] 5096; CHECK-NEXT: retq ## encoding: [0xc3] 5097 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1) 5098 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) 5099 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2) 5100 %res3 = add <16 x i8> %res0, %res1 5101 %res4 = add <16 x i8> %res3, %res2 5102 ret <16 x i8> %res4 5103} 5104 5105declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16) 5106 5107define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { 5108; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: 5109; CHECK: ## BB#0: 5110; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] 5111; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5112; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07] 5113; CHECK-NEXT: retq ## encoding: [0xc3] 5114 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) 5115 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2) 5116 ret void 5117} 5118 5119declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8) 5120 5121define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) { 5122; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128: 5123; CHECK: ## BB#0: 5124; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5125; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1] 5126; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf5,0xc1] 5127; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 5128; CHECK-NEXT: retq ## encoding: [0xc3] 5129 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) 5130 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1) 5131 %res2 = add <4 x i32> %res, %res1 5132 ret <4 x i32> %res2 5133} 5134 5135declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8) 5136 5137define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) { 5138; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256: 5139; CHECK: ## BB#0: 5140; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5141; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1] 5142; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf5,0xc1] 5143; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 5144; CHECK-NEXT: retq ## encoding: [0xc3] 5145 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) 5146 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1) 5147 %res2 = add <8 x i32> %res, %res1 5148 ret <8 x i32> %res2 5149} 5150 5151declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8) 5152 5153define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) { 5154; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128: 5155; CHECK: ## BB#0: 5156; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5157; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1] 5158; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x04,0xc1] 5159; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 5160; CHECK-NEXT: retq ## encoding: [0xc3] 5161 %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) 5162 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1) 5163 %res2 = add <8 x i16> %res, %res1 5164 ret <8 x i16> %res2 5165} 5166 5167declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16) 5168 5169define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) { 5170; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256: 5171; CHECK: ## BB#0: 5172; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5173; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1] 5174; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x04,0xc1] 5175; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 5176; CHECK-NEXT: retq ## encoding: [0xc3] 5177 %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) 5178 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1) 5179 %res2 = add <16 x i16> %res, %res1 5180 ret <16 x i16> %res2 5181} 5182 5183declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) 5184 5185define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { 5186; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: 5187; CHECK: ## BB#0: 5188; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5189; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] 5190; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x02] 5191; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x02] 5192; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] 5193; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1] 5194; CHECK-NEXT: retq ## encoding: [0xc3] 5195 %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) 5196 %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4) 5197 %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1) 5198 %res3 = add <8 x i16> %res, %res1 5199 %res4 = add <8 x i16> %res2, %res3 5200 ret <8 x i16> %res4 5201} 5202 5203declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) 5204 5205define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { 5206; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: 5207; CHECK: ## BB#0: 5208; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5209; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] 5210; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x02] 5211; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x02] 5212; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] 5213; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5214; CHECK-NEXT: retq ## encoding: [0xc3] 5215 %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) 5216 %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4) 5217 %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1) 5218 %res3 = add <16 x i16> %res, %res1 5219 %res4 = add <16 x i16> %res3, %res2 5220 ret <16 x i16> %res4 5221} 5222 5223declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>) 5224 5225define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) { 5226; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128: 5227; CHECK: ## BB#0: 5228; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0] 5229; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5230; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 5231; CHECK-NEXT: retq ## encoding: [0xc3] 5232 %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0) 5233 ret i16 %res 5234} 5235 5236declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>) 5237 5238define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) { 5239; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256: 5240; CHECK: ## BB#0: 5241; CHECK-NEXT: vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0] 5242; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] 5243; CHECK-NEXT: retq ## encoding: [0xc3] 5244 %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0) 5245 ret i32 %res 5246} 5247 5248declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>) 5249 5250define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) { 5251; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128: 5252; CHECK: ## BB#0: 5253; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0] 5254; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5255; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 5256; CHECK-NEXT: retq ## encoding: [0xc3] 5257 %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0) 5258 ret i8 %res 5259} 5260 5261declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>) 5262 5263define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) { 5264; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256: 5265; CHECK: ## BB#0: 5266; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0] 5267; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5268; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 5269; CHECK-NEXT: retq ## encoding: [0xc3] 5270 %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0) 5271 ret i16 %res 5272} 5273 5274declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16) 5275 5276define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) { 5277; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_128: 5278; CHECK: ## BB#0: 5279; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] 5280; CHECK-NEXT: vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] 5281; CHECK-NEXT: retq ## encoding: [0xc3] 5282 %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0) 5283 ret <16 x i8> %res 5284} 5285 5286declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32) 5287 5288define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) { 5289; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_256: 5290; CHECK: ## BB#0: 5291; CHECK-NEXT: kmovd %edi, %k0 ## encoding: [0xc5,0xfb,0x92,0xc7] 5292; CHECK-NEXT: vpmovm2b %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0] 5293; CHECK-NEXT: retq ## encoding: [0xc3] 5294 %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0) 5295 ret <32 x i8> %res 5296} 5297 5298declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8) 5299 5300define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) { 5301; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_128: 5302; CHECK: ## BB#0: 5303; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] 5304; CHECK-NEXT: vpmovm2w %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] 5305; CHECK-NEXT: retq ## encoding: [0xc3] 5306 %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0) 5307 ret <8 x i16> %res 5308} 5309 5310declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16) 5311 5312define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) { 5313; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_256: 5314; CHECK: ## BB#0: 5315; CHECK-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7] 5316; CHECK-NEXT: vpmovm2w %k0, %ymm0 ## encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0] 5317; CHECK-NEXT: retq ## encoding: [0xc3] 5318 %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0) 5319 ret <16 x i16> %res 5320} 5321 5322declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5323 5324define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5325; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_128: 5326; CHECK: ## BB#0: 5327; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5328; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] 5329; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xd9] 5330; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd1,0xc1] 5331; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 5332; CHECK-NEXT: vpaddw %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc0] 5333; CHECK-NEXT: retq ## encoding: [0xc3] 5334 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5335 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5336 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 5337 %res3 = add <8 x i16> %res, %res1 5338 %res4 = add <8 x i16> %res2, %res3 5339 ret <8 x i16> %res4 5340} 5341 5342declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 5343 5344define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5345; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_256: 5346; CHECK: ## BB#0: 5347; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5348; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] 5349; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xd9] 5350; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd1,0xc1] 5351; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 5352; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3] 5353; CHECK-NEXT: retq ## encoding: [0xc3] 5354 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 5355 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 5356 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 5357 %res3 = add <16 x i16> %res, %res1 5358 %res4 = add <16 x i16> %res3, %res2 5359 ret <16 x i16> %res4 5360} 5361 5362declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8) 5363 5364define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 5365; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_128: 5366; CHECK: ## BB#0: 5367; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5368; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] 5369; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x03] 5370; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xd0,0x03] 5371; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5372; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 5373; CHECK-NEXT: retq ## encoding: [0xc3] 5374 %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 5375 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 5376 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 5377 %res3 = add <8 x i16> %res, %res1 5378 %res4 = add <8 x i16> %res2, %res3 5379 ret <8 x i16> %res4 5380} 5381 5382declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16) 5383 5384define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 5385; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_256: 5386; CHECK: ## BB#0: 5387; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5388; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] 5389; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x03] 5390; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xd0,0x03] 5391; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5392; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc2] 5393; CHECK-NEXT: retq ## encoding: [0xc3] 5394 %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 5395 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 5396 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 5397 %res3 = add <16 x i16> %res, %res1 5398 %res4 = add <16 x i16> %res3, %res2 5399 ret <16 x i16> %res4 5400} 5401 5402declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 5403 5404define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5405; CHECK-LABEL: test_int_x86_avx512_mask_psrlv16_hi: 5406; CHECK: ## BB#0: 5407; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5408; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1] 5409; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xd9] 5410; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1] 5411; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] 5412; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5413; CHECK-NEXT: retq ## encoding: [0xc3] 5414 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 5415 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 5416 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 5417 %res3 = add <16 x i16> %res, %res1 5418 %res4 = add <16 x i16> %res3, %res2 5419 ret <16 x i16> %res4 5420} 5421 5422declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5423 5424define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5425; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_hi: 5426; CHECK: ## BB#0: 5427; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5428; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1] 5429; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x10,0xd9] 5430; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1] 5431; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] 5432; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5433; CHECK-NEXT: retq ## encoding: [0xc3] 5434 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5435 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 5436 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5437 %res3 = add <8 x i16> %res, %res1 5438 %res4 = add <8 x i16> %res3, %res2 5439 ret <8 x i16> %res4 5440} 5441 5442declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5443 5444define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5445; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_128: 5446; CHECK: ## BB#0: 5447; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5448; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] 5449; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xd9] 5450; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe1,0xc1] 5451; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] 5452; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5453; CHECK-NEXT: retq ## encoding: [0xc3] 5454 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5455 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 5456 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5457 %res3 = add <8 x i16> %res, %res1 5458 %res4 = add <8 x i16> %res3, %res2 5459 ret <8 x i16> %res4 5460} 5461 5462declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8) 5463 5464define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 5465; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_128: 5466; CHECK: ## BB#0: 5467; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5468; CHECK-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] 5469; CHECK-NEXT: vpsraw $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x03] 5470; CHECK-NEXT: vpsraw $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xe0,0x03] 5471; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca] 5472; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5473; CHECK-NEXT: retq ## encoding: [0xc3] 5474 %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 5475 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 5476 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 5477 %res3 = add <8 x i16> %res, %res1 5478 %res4 = add <8 x i16> %res3, %res2 5479 ret <8 x i16> %res4 5480} 5481 5482declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 5483 5484define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5485; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_256: 5486; CHECK: ## BB#0: 5487; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5488; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] 5489; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xd9] 5490; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe1,0xc1] 5491; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] 5492; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5493; CHECK-NEXT: retq ## encoding: [0xc3] 5494 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 5495 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 5496 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 5497 %res3 = add <16 x i16> %res, %res1 5498 %res4 = add <16 x i16> %res3, %res2 5499 ret <16 x i16> %res4 5500} 5501 5502declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16) 5503 5504define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 5505; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_256: 5506; CHECK: ## BB#0: 5507; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5508; CHECK-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] 5509; CHECK-NEXT: vpsraw $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x03] 5510; CHECK-NEXT: vpsraw $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xe0,0x03] 5511; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca] 5512; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5513; CHECK-NEXT: retq ## encoding: [0xc3] 5514 %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 5515 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 5516 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 5517 %res3 = add <16 x i16> %res, %res1 5518 %res4 = add <16 x i16> %res3, %res2 5519 ret <16 x i16> %res4 5520} 5521 5522declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 5523 5524define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5525; CHECK-LABEL: test_int_x86_avx512_mask_psrav16_hi: 5526; CHECK: ## BB#0: 5527; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5528; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1] 5529; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xd9] 5530; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1] 5531; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] 5532; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5533; CHECK-NEXT: retq ## encoding: [0xc3] 5534 %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 5535 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 5536 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 5537 %res3 = add <16 x i16> %res, %res1 5538 %res4 = add <16 x i16> %res3, %res2 5539 ret <16 x i16> %res4 5540} 5541 5542declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5543 5544define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5545; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_hi: 5546; CHECK: ## BB#0: 5547; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5548; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1] 5549; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x11,0xd9] 5550; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1] 5551; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] 5552; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5553; CHECK-NEXT: retq ## encoding: [0xc3] 5554 %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5555 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 5556 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5557 %res3 = add <8 x i16> %res, %res1 5558 %res4 = add <8 x i16> %res3, %res2 5559 ret <8 x i16> %res4 5560} 5561 5562 5563declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5564 5565define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5566; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_128: 5567; CHECK: ## BB#0: 5568; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5569; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] 5570; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xd9] 5571; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf1,0xc1] 5572; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] 5573; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5574; CHECK-NEXT: retq ## encoding: [0xc3] 5575 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5576 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 5577 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5578 %res3 = add <8 x i16> %res, %res1 5579 %res4 = add <8 x i16> %res3, %res2 5580 ret <8 x i16> %res4 5581} 5582 5583declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16) 5584 5585define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5586; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_256: 5587; CHECK: ## BB#0: 5588; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5589; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] 5590; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xd9] 5591; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf1,0xc1] 5592; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] 5593; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5594; CHECK-NEXT: retq ## encoding: [0xc3] 5595 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) 5596 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 5597 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1) 5598 %res3 = add <16 x i16> %res, %res1 5599 %res4 = add <16 x i16> %res3, %res2 5600 ret <16 x i16> %res4 5601} 5602 5603declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8) 5604 5605define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { 5606; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_128: 5607; CHECK: ## BB#0: 5608; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5609; CHECK-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] 5610; CHECK-NEXT: vpsllw $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x03] 5611; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xf0,0x03] 5612; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca] 5613; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5614; CHECK-NEXT: retq ## encoding: [0xc3] 5615 %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) 5616 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) 5617 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) 5618 %res3 = add <8 x i16> %res, %res1 5619 %res4 = add <8 x i16> %res3, %res2 5620 ret <8 x i16> %res4 5621} 5622 5623declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16) 5624 5625define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { 5626; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_256: 5627; CHECK: ## BB#0: 5628; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5629; CHECK-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] 5630; CHECK-NEXT: vpsllw $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x03] 5631; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xf0,0x03] 5632; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca] 5633; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5634; CHECK-NEXT: retq ## encoding: [0xc3] 5635 %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) 5636 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) 5637 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1) 5638 %res3 = add <16 x i16> %res, %res1 5639 %res4 = add <16 x i16> %res3, %res2 5640 ret <16 x i16> %res4 5641} 5642 5643declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) 5644 5645define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5646; CHECK-LABEL: test_int_x86_avx512_mask_psllv16_hi: 5647; CHECK: ## BB#0: 5648; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5649; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1] 5650; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xd9] 5651; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1] 5652; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] 5653; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5654; CHECK-NEXT: retq ## encoding: [0xc3] 5655 %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 5656 %res1 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 5657 %res2 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 5658 %res3 = add <16 x i16> %res, %res1 5659 %res4 = add <16 x i16> %res3, %res2 5660 ret <16 x i16> %res4 5661} 5662 5663declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5664 5665define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5666; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_hi: 5667; CHECK: ## BB#0: 5668; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5669; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1] 5670; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x12,0xd9] 5671; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1] 5672; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] 5673; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5674; CHECK-NEXT: retq ## encoding: [0xc3] 5675 %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5676 %res1 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 5677 %res2 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5678 %res3 = add <8 x i16> %res, %res1 5679 %res4 = add <8 x i16> %res3, %res2 5680 ret <8 x i16> %res4 5681} 5682 5683declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8) 5684 5685define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { 5686; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128: 5687; CHECK: ## BB#0: 5688; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5689; CHECK-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8] 5690; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5691; CHECK-NEXT: vpmovzxbw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x30,0xd0] 5692; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5693; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x30,0xc0] 5694; CHECK-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 5695; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca] 5696; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5697; CHECK-NEXT: retq ## encoding: [0xc3] 5698 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) 5699 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) 5700 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1) 5701 %res3 = add <8 x i16> %res, %res1 5702 %res4 = add <8 x i16> %res3, %res2 5703 ret <8 x i16> %res4 5704} 5705 5706declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16) 5707 5708define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { 5709; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256: 5710; CHECK: ## BB#0: 5711; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5712; CHECK-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8] 5713; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 5714; CHECK-NEXT: vpmovzxbw %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xd0] 5715; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 5716; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x30,0xc0] 5717; CHECK-NEXT: ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 5718; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca] 5719; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5720; CHECK-NEXT: retq ## encoding: [0xc3] 5721 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) 5722 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) 5723 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1) 5724 %res3 = add <16 x i16> %res, %res1 5725 %res4 = add <16 x i16> %res3, %res2 5726 ret <16 x i16> %res4 5727} 5728 5729 5730declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8) 5731 5732define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) { 5733; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128: 5734; CHECK: ## BB#0: 5735; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5736; CHECK-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] 5737; CHECK-NEXT: vpmovsxbw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x20,0xd0] 5738; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x20,0xc0] 5739; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca] 5740; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5741; CHECK-NEXT: retq ## encoding: [0xc3] 5742 %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) 5743 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) 5744 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1) 5745 %res3 = add <8 x i16> %res, %res1 5746 %res4 = add <8 x i16> %res3, %res2 5747 ret <8 x i16> %res4 5748} 5749 5750declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16) 5751 5752define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) { 5753; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256: 5754; CHECK: ## BB#0: 5755; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5756; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] 5757; CHECK-NEXT: vpmovsxbw %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xd0] 5758; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x20,0xc0] 5759; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca] 5760; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5761; CHECK-NEXT: retq ## encoding: [0xc3] 5762 %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) 5763 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) 5764 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1) 5765 %res3 = add <16 x i16> %res, %res1 5766 %res4 = add <16 x i16> %res3, %res2 5767 ret <16 x i16> %res4 5768} 5769 5770declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8) 5771 5772define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { 5773; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128: 5774; CHECK: ## BB#0: 5775; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5776; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] 5777; CHECK-NEXT: vpmovsxdq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x25,0xd0] 5778; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x25,0xc0] 5779; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 5780; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 5781; CHECK-NEXT: retq ## encoding: [0xc3] 5782 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) 5783 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) 5784 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1) 5785 %res3 = add <2 x i64> %res, %res1 5786 %res4 = add <2 x i64> %res3, %res2 5787 ret <2 x i64> %res4 5788} 5789 5790declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8) 5791 5792define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { 5793; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256: 5794; CHECK: ## BB#0: 5795; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5796; CHECK-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] 5797; CHECK-NEXT: vpmovsxdq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xd0] 5798; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x25,0xc0] 5799; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 5800; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 5801; CHECK-NEXT: retq ## encoding: [0xc3] 5802 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) 5803 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) 5804 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1) 5805 %res3 = add <4 x i64> %res, %res1 5806 %res4 = add <4 x i64> %res3, %res2 5807 ret <4 x i64> %res4 5808} 5809 5810declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) 5811 5812define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { 5813; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_128: 5814; CHECK: ## BB#0: 5815; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5816; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0] 5817; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xd8] 5818; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0] 5819; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb] 5820; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 5821; CHECK-NEXT: retq ## encoding: [0xc3] 5822 %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) 5823 %res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) 5824 %res2 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) 5825 %res3 = add <8 x i16> %res, %res1 5826 %res4 = add <8 x i16> %res3, %res2 5827 ret <8 x i16> %res4 5828} 5829 5830declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) 5831 5832define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { 5833; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_256: 5834; CHECK: ## BB#0: 5835; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5836; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0] 5837; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xd8] 5838; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0] 5839; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb] 5840; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 5841; CHECK-NEXT: retq ## encoding: [0xc3] 5842 %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) 5843 %res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) 5844 %res2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) 5845 %res3 = add <16 x i16> %res, %res1 5846 %res4 = add <16 x i16> %res3, %res2 5847 ret <16 x i16> %res4 5848} 5849 5850declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16) 5851 5852define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 5853; CHECK-LABEL: test_int_x86_avx512_ptestm_b_128: 5854; CHECK: ## BB#0: 5855; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5856; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc1] 5857; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5858; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1] 5859; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5860; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] 5861; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 5862; CHECK-NEXT: retq ## encoding: [0xc3] 5863 %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 5864 %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) 5865 %res2 = add i16 %res, %res1 5866 ret i16 %res2 5867} 5868 5869declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32) 5870 5871define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 5872; CHECK-LABEL: test_int_x86_avx512_ptestm_b_256: 5873; CHECK: ## BB#0: 5874; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 5875; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x26,0xc1] 5876; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] 5877; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1] 5878; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] 5879; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] 5880; CHECK-NEXT: retq ## encoding: [0xc3] 5881 %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 5882 %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) 5883 %res2 = add i32 %res, %res1 5884 ret i32 %res2 5885} 5886 5887declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8) 5888 5889define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 5890; CHECK-LABEL: test_int_x86_avx512_ptestm_w_128: 5891; CHECK: ## BB#0: 5892; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5893; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc1] 5894; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5895; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] 5896; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5897; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 5898; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 5899; CHECK-NEXT: retq ## encoding: [0xc3] 5900 %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 5901 %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) 5902 %res2 = add i8 %res, %res1 5903 ret i8 %res2 5904} 5905 5906declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16) 5907 5908define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 5909; CHECK-LABEL: test_int_x86_avx512_ptestm_w_256: 5910; CHECK: ## BB#0: 5911; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5912; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc1] 5913; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5914; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1] 5915; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5916; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] 5917; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 5918; CHECK-NEXT: retq ## encoding: [0xc3] 5919 %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 5920 %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) 5921 %res2 = add i16 %res, %res1 5922 ret i16 %res2 5923} 5924 5925declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16) 5926 5927define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { 5928; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_128: 5929; CHECK: ## BB#0: 5930; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5931; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc1] 5932; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5933; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1] 5934; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5935; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] 5936; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 5937; CHECK-NEXT: retq ## encoding: [0xc3] 5938 %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) 5939 %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1) 5940 %res2 = add i16 %res, %res1 5941 ret i16 %res2 5942} 5943 5944declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32) 5945 5946define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { 5947; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_256: 5948; CHECK: ## BB#0: 5949; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] 5950; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x26,0xc1] 5951; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] 5952; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1] 5953; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] 5954; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] 5955; CHECK-NEXT: retq ## encoding: [0xc3] 5956 %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) 5957 %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1) 5958 %res2 = add i32 %res, %res1 5959 ret i32 %res2 5960} 5961 5962declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2) 5963 5964define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { 5965; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_128: 5966; CHECK: ## BB#0: 5967; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5968; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc1] 5969; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5970; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] 5971; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5972; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 5973; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 5974; CHECK-NEXT: retq ## encoding: [0xc3] 5975 %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) 5976 %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1) 5977 %res2 = add i8 %res, %res1 5978 ret i8 %res2 5979} 5980 5981declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2) 5982 5983define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { 5984; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_256: 5985; CHECK: ## BB#0: 5986; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5987; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc1] 5988; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 5989; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1] 5990; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 5991; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] 5992; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 5993; CHECK-NEXT: retq ## encoding: [0xc3] 5994 %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) 5995 %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1) 5996 %res2 = add i16 %res, %res1 5997 ret i16 %res2 5998} 5999 6000declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32) 6001 6002define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) { 6003; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: 6004; CHECK: ## BB#0: 6005; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] 6006; CHECK-NEXT: vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] 6007; CHECK-NEXT: vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] 6008; CHECK-NEXT: vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7] 6009; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0] 6010; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0] 6011; CHECK-NEXT: retq ## encoding: [0xc3] 6012 %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) 6013 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask) 6014 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask) 6015 %res3 = add <32 x i8> %res, %res1 6016 %res4 = add <32 x i8> %res2, %res3 6017 ret <32 x i8> %res4 6018} 6019 6020declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16) 6021 6022define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { 6023; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: 6024; CHECK: ## BB#0: 6025; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6026; CHECK-NEXT: vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] 6027; CHECK-NEXT: vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] 6028; CHECK-NEXT: vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7] 6029; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0] 6030; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0] 6031; CHECK-NEXT: retq ## encoding: [0xc3] 6032 %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) 6033 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask) 6034 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask) 6035 %res3 = add <16 x i8> %res, %res1 6036 %res4 = add <16 x i8> %res2, %res3 6037 ret <16 x i8> %res4 6038} 6039 6040declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16) 6041 6042define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { 6043; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: 6044; CHECK: ## BB#0: 6045; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6046; CHECK-NEXT: vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] 6047; CHECK-NEXT: vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] 6048; CHECK-NEXT: vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7] 6049; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0] 6050; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0] 6051; CHECK-NEXT: retq ## encoding: [0xc3] 6052 %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) 6053 %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask) 6054 %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask) 6055 %res3 = add <16 x i16> %res, %res1 6056 %res4 = add <16 x i16> %res2, %res3 6057 ret <16 x i16> %res4 6058} 6059 6060declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8) 6061 6062define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { 6063; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: 6064; CHECK: ## BB#0: 6065; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6066; CHECK-NEXT: vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] 6067; CHECK-NEXT: vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] 6068; CHECK-NEXT: vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7] 6069; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0] 6070; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0] 6071; CHECK-NEXT: retq ## encoding: [0xc3] 6072 %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) 6073 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask) 6074 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask) 6075 %res3 = add <8 x i16> %res, %res1 6076 %res4 = add <8 x i16> %res2, %res3 6077 ret <8 x i16> %res4 6078} 6079