1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4; 256-bit 5 6define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { 7; CHECK-LABEL: test_cmp_d_256: 8; CHECK: ## BB#0: 9; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x00] 10; CHECK-NEXT: vpcmpltd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x01] 11; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02] 12; CHECK-NEXT: vpcmpunordd %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x03] 13; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x04] 14; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe9,0x05] 15; CHECK-NEXT: vpcmpnled %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf1,0x06] 16; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf9,0x07] 17; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 18; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 19; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 20; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 21; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 22; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 23; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 24; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 25; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 26; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 27; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 28; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 29; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 30; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 31; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 32; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 33; CHECK-NEXT: retq ## encoding: [0xc3] 34 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 35 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 36 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 37 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 38 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 39 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 40 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 41 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 42 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 43 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 44 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 45 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 46 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 47 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 48 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 49 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 50 ret <8 x i8> %vec7 51} 52 53define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { 54; CHECK-LABEL: test_mask_cmp_d_256: 55; CHECK: ## BB#0: 56; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 57; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x00] 58; CHECK-NEXT: vpcmpltd %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x01] 59; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02] 60; CHECK-NEXT: vpcmpunordd %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x03] 61; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x04] 62; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xf1,0x05] 63; CHECK-NEXT: vpcmpnled %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xf9,0x06] 64; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc9,0x07] 65; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 66; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 67; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 68; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 69; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 70; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 71; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 72; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 73; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 74; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 75; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 76; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 77; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 78; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 79; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 80; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 81; CHECK-NEXT: retq ## encoding: [0xc3] 82 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 83 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 84 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) 85 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 86 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask) 87 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 88 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask) 89 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 90 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask) 91 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 92 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask) 93 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 94 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask) 95 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 96 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask) 97 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 98 ret <8 x i8> %vec7 99} 100 101declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone 102 103define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { 104; CHECK-LABEL: test_ucmp_d_256: 105; CHECK: ## BB#0: 106; CHECK-NEXT: vpcmpequd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x00] 107; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01] 108; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02] 109; CHECK-NEXT: vpcmpunordud %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x03] 110; CHECK-NEXT: vpcmpnequd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x04] 111; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x05] 112; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf1,0x06] 113; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf9,0x07] 114; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 115; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 116; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 117; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 118; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 119; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 120; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 121; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 122; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 123; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 124; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 125; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 126; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 127; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 128; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 129; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 130; CHECK-NEXT: retq ## encoding: [0xc3] 131 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1) 132 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 133 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1) 134 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 135 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1) 136 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 137 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1) 138 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 139 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1) 140 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 141 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1) 142 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 143 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1) 144 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 145 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1) 146 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 147 ret <8 x i8> %vec7 148} 149 150define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { 151; CHECK-LABEL: test_mask_ucmp_d_256: 152; CHECK: ## BB#0: 153; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 154; CHECK-NEXT: vpcmpequd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x00] 155; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01] 156; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02] 157; CHECK-NEXT: vpcmpunordud %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x03] 158; CHECK-NEXT: vpcmpnequd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x04] 159; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf1,0x05] 160; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf9,0x06] 161; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x07] 162; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 163; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 164; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 165; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 166; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 167; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 168; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 169; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 170; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 171; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 172; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 173; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 174; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 175; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 176; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 177; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 178; CHECK-NEXT: retq ## encoding: [0xc3] 179 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask) 180 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 181 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask) 182 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 183 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask) 184 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 185 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask) 186 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 187 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask) 188 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 189 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask) 190 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 191 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask) 192 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 193 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask) 194 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 195 ret <8 x i8> %vec7 196} 197 198declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone 199 200define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { 201; CHECK-LABEL: test_cmp_q_256: 202; CHECK: ## BB#0: 203; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x00] 204; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x01] 205; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02] 206; CHECK-NEXT: vpcmpunordq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x03] 207; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x04] 208; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe9,0x05] 209; CHECK-NEXT: vpcmpnleq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf1,0x06] 210; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf9,0x07] 211; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 212; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 213; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 214; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 215; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 216; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 217; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 218; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 219; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 220; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 221; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 222; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 223; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 224; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 225; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 226; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 227; CHECK-NEXT: retq ## encoding: [0xc3] 228 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) 229 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 230 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1) 231 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 232 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1) 233 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 234 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1) 235 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 236 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1) 237 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 238 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1) 239 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 240 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1) 241 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 242 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1) 243 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 244 ret <8 x i8> %vec7 245} 246 247define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { 248; CHECK-LABEL: test_mask_cmp_q_256: 249; CHECK: ## BB#0: 250; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 251; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x00] 252; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd1,0x01] 253; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02] 254; CHECK-NEXT: vpcmpunordq %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x03] 255; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04] 256; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xf1,0x05] 257; CHECK-NEXT: vpcmpnleq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xf9,0x06] 258; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc9,0x07] 259; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 260; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 261; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 262; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 263; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 264; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 265; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 266; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 267; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 268; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 269; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 270; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 271; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 272; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 273; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 274; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 275; CHECK-NEXT: retq ## encoding: [0xc3] 276 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) 277 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 278 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask) 279 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 280 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask) 281 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 282 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask) 283 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 284 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask) 285 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 286 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask) 287 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 288 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask) 289 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 290 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask) 291 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 292 ret <8 x i8> %vec7 293} 294 295declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone 296 297define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { 298; CHECK-LABEL: test_ucmp_q_256: 299; CHECK: ## BB#0: 300; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x00] 301; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01] 302; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02] 303; CHECK-NEXT: vpcmpunorduq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd9,0x03] 304; CHECK-NEXT: vpcmpnequq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x04] 305; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x05] 306; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf1,0x06] 307; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf9,0x07] 308; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 309; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 310; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 311; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 312; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 313; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 314; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 315; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 316; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 317; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 318; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 319; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 320; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 321; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 322; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 323; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 324; CHECK-NEXT: retq ## encoding: [0xc3] 325 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1) 326 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 327 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1) 328 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 329 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1) 330 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 331 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1) 332 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 333 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1) 334 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 335 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1) 336 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 337 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1) 338 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 339 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1) 340 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 341 ret <8 x i8> %vec7 342} 343 344define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { 345; CHECK-LABEL: test_mask_ucmp_q_256: 346; CHECK: ## BB#0: 347; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 348; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x00] 349; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01] 350; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02] 351; CHECK-NEXT: vpcmpunorduq %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe1,0x03] 352; CHECK-NEXT: vpcmpnequq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x04] 353; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x05] 354; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf9,0x06] 355; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc9,0x07] 356; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 357; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 358; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 359; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 360; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 361; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 362; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 363; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 364; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 365; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 366; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 367; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 368; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 369; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 370; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 371; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 372; CHECK-NEXT: retq ## encoding: [0xc3] 373 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask) 374 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 375 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask) 376 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 377 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask) 378 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 379 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask) 380 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 381 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask) 382 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 383 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask) 384 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 385 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask) 386 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 387 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask) 388 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 389 ret <8 x i8> %vec7 390} 391 392declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone 393 394; 128-bit 395 396define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { 397; CHECK-LABEL: test_cmp_d_128: 398; CHECK: ## BB#0: 399; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x00] 400; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x01] 401; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02] 402; CHECK-NEXT: vpcmpunordd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x03] 403; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x04] 404; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe9,0x05] 405; CHECK-NEXT: vpcmpnled %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf1,0x06] 406; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf9,0x07] 407; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 408; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 409; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 410; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 411; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 412; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 413; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 414; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 415; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 416; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 417; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 418; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 419; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 420; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 421; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 422; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 423; CHECK-NEXT: retq ## encoding: [0xc3] 424 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) 425 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 426 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1) 427 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 428 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1) 429 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 430 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1) 431 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 432 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1) 433 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 434 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1) 435 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 436 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1) 437 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 438 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1) 439 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 440 ret <8 x i8> %vec7 441} 442 443define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { 444; CHECK-LABEL: test_mask_cmp_d_128: 445; CHECK: ## BB#0: 446; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 447; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x00] 448; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd1,0x01] 449; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02] 450; CHECK-NEXT: vpcmpunordd %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x03] 451; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04] 452; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xf1,0x05] 453; CHECK-NEXT: vpcmpnled %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xf9,0x06] 454; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc9,0x07] 455; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 456; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 457; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 458; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 459; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 460; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 461; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 462; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 463; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 464; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 465; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 466; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 467; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 468; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 469; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 470; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 471; CHECK-NEXT: retq ## encoding: [0xc3] 472 %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) 473 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 474 %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask) 475 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 476 %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask) 477 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 478 %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask) 479 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 480 %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask) 481 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 482 %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask) 483 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 484 %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask) 485 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 486 %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask) 487 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 488 ret <8 x i8> %vec7 489} 490 491declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone 492 493define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { 494; CHECK-LABEL: test_ucmp_d_128: 495; CHECK: ## BB#0: 496; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x00] 497; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01] 498; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02] 499; CHECK-NEXT: vpcmpunordud %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd9,0x03] 500; CHECK-NEXT: vpcmpnequd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x04] 501; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x05] 502; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf1,0x06] 503; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf9,0x07] 504; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 505; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 506; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 507; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 508; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 509; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 510; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 511; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 512; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 513; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 514; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 515; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 516; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 517; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 518; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 519; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 520; CHECK-NEXT: retq ## encoding: [0xc3] 521 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1) 522 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 523 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1) 524 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 525 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1) 526 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 527 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1) 528 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 529 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1) 530 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 531 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1) 532 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 533 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1) 534 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 535 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1) 536 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 537 ret <8 x i8> %vec7 538} 539 540define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { 541; CHECK-LABEL: test_mask_ucmp_d_128: 542; CHECK: ## BB#0: 543; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 544; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x00] 545; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01] 546; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02] 547; CHECK-NEXT: vpcmpunordud %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe1,0x03] 548; CHECK-NEXT: vpcmpnequd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x04] 549; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x05] 550; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf9,0x06] 551; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc9,0x07] 552; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 553; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 554; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 555; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 556; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 557; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 558; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 559; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 560; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 561; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 562; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 563; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 564; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 565; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 566; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 567; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 568; CHECK-NEXT: retq ## encoding: [0xc3] 569 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask) 570 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 571 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask) 572 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 573 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask) 574 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 575 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask) 576 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 577 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask) 578 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 579 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask) 580 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 581 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask) 582 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 583 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask) 584 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 585 ret <8 x i8> %vec7 586} 587 588declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone 589 590define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { 591; CHECK-LABEL: test_cmp_q_128: 592; CHECK: ## BB#0: 593; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x00] 594; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x01] 595; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02] 596; CHECK-NEXT: vpcmpunordq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x03] 597; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x04] 598; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe9,0x05] 599; CHECK-NEXT: vpcmpnleq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf1,0x06] 600; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf9,0x07] 601; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 602; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 603; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 604; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 605; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 606; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 607; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 608; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 609; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 610; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 611; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 612; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 613; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 614; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 615; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 616; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 617; CHECK-NEXT: retq ## encoding: [0xc3] 618 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) 619 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 620 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1) 621 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 622 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1) 623 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 624 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1) 625 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 626 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1) 627 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 628 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1) 629 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 630 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1) 631 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 632 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1) 633 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 634 ret <8 x i8> %vec7 635} 636 637define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { 638; CHECK-LABEL: test_mask_cmp_q_128: 639; CHECK: ## BB#0: 640; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 641; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x00] 642; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd1,0x01] 643; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02] 644; CHECK-NEXT: vpcmpunordq %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x03] 645; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04] 646; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xf1,0x05] 647; CHECK-NEXT: vpcmpnleq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xf9,0x06] 648; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc9,0x07] 649; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 650; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 651; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 652; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 653; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 654; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 655; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 656; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 657; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 658; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 659; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 660; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 661; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 662; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 663; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 664; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 665; CHECK-NEXT: retq ## encoding: [0xc3] 666 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) 667 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 668 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask) 669 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 670 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask) 671 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 672 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask) 673 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 674 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask) 675 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 676 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask) 677 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 678 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask) 679 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 680 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask) 681 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 682 ret <8 x i8> %vec7 683} 684 685declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone 686 687define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { 688; CHECK-LABEL: test_ucmp_q_128: 689; CHECK: ## BB#0: 690; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x00] 691; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01] 692; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02] 693; CHECK-NEXT: vpcmpunorduq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd9,0x03] 694; CHECK-NEXT: vpcmpnequq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x04] 695; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x05] 696; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf1,0x06] 697; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf9,0x07] 698; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 699; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 700; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 701; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 702; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 703; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 704; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 705; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 706; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 707; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 708; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 709; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 710; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 711; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 712; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 713; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 714; CHECK-NEXT: retq ## encoding: [0xc3] 715 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1) 716 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 717 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1) 718 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 719 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1) 720 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 721 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1) 722 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 723 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1) 724 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 725 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1) 726 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 727 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1) 728 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 729 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1) 730 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 731 ret <8 x i8> %vec7 732} 733 734define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { 735; CHECK-LABEL: test_mask_ucmp_q_128: 736; CHECK: ## BB#0: 737; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 738; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x00] 739; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01] 740; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02] 741; CHECK-NEXT: vpcmpunorduq %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe1,0x03] 742; CHECK-NEXT: vpcmpnequq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe9,0x04] 743; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05] 744; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06] 745; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc9,0x07] 746; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 747; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] 748; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] 749; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] 750; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] 751; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] 752; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] 753; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] 754; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] 755; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] 756; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] 757; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] 758; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] 759; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] 760; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] 761; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] 762; CHECK-NEXT: retq ## encoding: [0xc3] 763 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) 764 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 765 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask) 766 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 767 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask) 768 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 769 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask) 770 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 771 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask) 772 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 773 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask) 774 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 775 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask) 776 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 777 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask) 778 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 779 ret <8 x i8> %vec7 780} 781 782declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone 783 784define void @compr1(i8* %addr, <8 x double> %data, i8 %mask) { 785; CHECK-LABEL: compr1: 786; CHECK: ## BB#0: 787; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 788; CHECK-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07] 789; CHECK-NEXT: retq ## encoding: [0xc3] 790 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 791 ret void 792} 793 794declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 795 796define void @compr2(i8* %addr, <4 x double> %data, i8 %mask) { 797; CHECK-LABEL: compr2: 798; CHECK: ## BB#0: 799; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 800; CHECK-NEXT: vcompresspd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07] 801; CHECK-NEXT: retq ## encoding: [0xc3] 802 call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 803 ret void 804} 805 806declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 807 808define void @compr3(i8* %addr, <4 x float> %data, i8 %mask) { 809; CHECK-LABEL: compr3: 810; CHECK: ## BB#0: 811; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 812; CHECK-NEXT: vcompressps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07] 813; CHECK-NEXT: retq ## encoding: [0xc3] 814 call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 815 ret void 816} 817 818declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 819 820define <8 x double> @compr4(i8* %addr, <8 x double> %data, i8 %mask) { 821; CHECK-LABEL: compr4: 822; CHECK: ## BB#0: 823; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 824; CHECK-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0] 825; CHECK-NEXT: retq ## encoding: [0xc3] 826 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) 827 ret <8 x double> %res 828} 829 830declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) 831 832define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) { 833; CHECK-LABEL: compr5: 834; CHECK: ## BB#0: 835; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 836; CHECK-NEXT: vcompresspd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1] 837; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 838; CHECK-NEXT: retq ## encoding: [0xc3] 839 %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask) 840 ret <4 x double> %res 841} 842 843declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask) 844 845define <4 x float> @compr6(<4 x float> %data, i8 %mask) { 846; CHECK-LABEL: compr6: 847; CHECK: ## BB#0: 848; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 849; CHECK-NEXT: vcompressps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0] 850; CHECK-NEXT: retq ## encoding: [0xc3] 851 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask) 852 ret <4 x float> %res 853} 854 855declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask) 856 857define void @compr7(i8* %addr, <8 x double> %data) { 858; CHECK-LABEL: compr7: 859; CHECK: ## BB#0: 860; CHECK-NEXT: vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07] 861; CHECK-NEXT: retq ## encoding: [0xc3] 862 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1) 863 ret void 864} 865 866define <4 x float> @compr8(<4 x float> %data) { 867; CHECK-LABEL: compr8: 868; CHECK: ## BB#0: 869; CHECK-NEXT: retq ## encoding: [0xc3] 870 %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1) 871 ret <4 x float> %res 872} 873 874define void @compr9(i8* %addr, <8 x i64> %data, i8 %mask) { 875; CHECK-LABEL: compr9: 876; CHECK: ## BB#0: 877; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 878; CHECK-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07] 879; CHECK-NEXT: retq ## encoding: [0xc3] 880 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 881 ret void 882} 883 884declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 885 886define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) { 887; CHECK-LABEL: compr10: 888; CHECK: ## BB#0: 889; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 890; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0] 891; CHECK-NEXT: retq ## encoding: [0xc3] 892 %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask) 893 ret <4 x i32> %res 894} 895 896declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask) 897 898; Expand 899 900define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) { 901; CHECK-LABEL: expand1: 902; CHECK: ## BB#0: 903; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 904; CHECK-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07] 905; CHECK-NEXT: retq ## encoding: [0xc3] 906 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 907 ret <8 x double> %res 908} 909 910declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask) 911 912define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) { 913; CHECK-LABEL: expand2: 914; CHECK: ## BB#0: 915; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 916; CHECK-NEXT: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07] 917; CHECK-NEXT: retq ## encoding: [0xc3] 918 %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 919 ret <4 x double> %res 920} 921 922declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask) 923 924define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) { 925; CHECK-LABEL: expand3: 926; CHECK: ## BB#0: 927; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 928; CHECK-NEXT: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07] 929; CHECK-NEXT: retq ## encoding: [0xc3] 930 %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 931 ret <4 x float> %res 932} 933 934declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask) 935 936define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) { 937; CHECK-LABEL: expand4: 938; CHECK: ## BB#0: 939; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 940; CHECK-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0] 941; CHECK-NEXT: retq ## encoding: [0xc3] 942 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) 943 ret <8 x double> %res 944} 945 946declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) 947 948define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) { 949; CHECK-LABEL: expand5: 950; CHECK: ## BB#0: 951; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 952; CHECK-NEXT: vexpandpd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8] 953; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 954; CHECK-NEXT: retq ## encoding: [0xc3] 955 %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask) 956 ret <4 x double> %res 957} 958 959declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask) 960 961define <4 x float> @expand6(<4 x float> %data, i8 %mask) { 962; CHECK-LABEL: expand6: 963; CHECK: ## BB#0: 964; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 965; CHECK-NEXT: vexpandps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0] 966; CHECK-NEXT: retq ## encoding: [0xc3] 967 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask) 968 ret <4 x float> %res 969} 970 971declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask) 972 973define <8 x double> @expand7(i8* %addr, <8 x double> %data) { 974; CHECK-LABEL: expand7: 975; CHECK: ## BB#0: 976; CHECK-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07] 977; CHECK-NEXT: retq ## encoding: [0xc3] 978 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1) 979 ret <8 x double> %res 980} 981 982define <4 x float> @expand8(<4 x float> %data) { 983; CHECK-LABEL: expand8: 984; CHECK: ## BB#0: 985; CHECK-NEXT: retq ## encoding: [0xc3] 986 %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1) 987 ret <4 x float> %res 988} 989 990define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) { 991; CHECK-LABEL: expand9: 992; CHECK: ## BB#0: 993; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 994; CHECK-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07] 995; CHECK-NEXT: retq ## encoding: [0xc3] 996 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 997 ret <8 x i64> %res 998} 999 1000declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask) 1001 1002define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) { 1003; CHECK-LABEL: expand10: 1004; CHECK: ## BB#0: 1005; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1006; CHECK-NEXT: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0] 1007; CHECK-NEXT: retq ## encoding: [0xc3] 1008 %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask) 1009 ret <4 x i32> %res 1010} 1011 1012declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask) 1013 1014define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { 1015; CHECK-LABEL: test_mask_mul_epi32_rr_128: 1016; CHECK: ## BB#0: 1017; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1] 1018; CHECK-NEXT: retq ## encoding: [0xc3] 1019 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1020 ret < 2 x i64> %res 1021} 1022 1023define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { 1024; CHECK-LABEL: test_mask_mul_epi32_rrk_128: 1025; CHECK: ## BB#0: 1026; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1027; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1] 1028; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1029; CHECK-NEXT: retq ## encoding: [0xc3] 1030 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1031 ret < 2 x i64> %res 1032} 1033 1034define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { 1035; CHECK-LABEL: test_mask_mul_epi32_rrkz_128: 1036; CHECK: ## BB#0: 1037; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1038; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1] 1039; CHECK-NEXT: retq ## encoding: [0xc3] 1040 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1041 ret < 2 x i64> %res 1042} 1043 1044define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { 1045; CHECK-LABEL: test_mask_mul_epi32_rm_128: 1046; CHECK: ## BB#0: 1047; CHECK-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07] 1048; CHECK-NEXT: retq ## encoding: [0xc3] 1049 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1050 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1051 ret < 2 x i64> %res 1052} 1053 1054define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 1055; CHECK-LABEL: test_mask_mul_epi32_rmk_128: 1056; CHECK: ## BB#0: 1057; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1058; CHECK-NEXT: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f] 1059; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1060; CHECK-NEXT: retq ## encoding: [0xc3] 1061 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1062 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1063 ret < 2 x i64> %res 1064} 1065 1066define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { 1067; CHECK-LABEL: test_mask_mul_epi32_rmkz_128: 1068; CHECK: ## BB#0: 1069; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1070; CHECK-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07] 1071; CHECK-NEXT: retq ## encoding: [0xc3] 1072 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1073 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1074 ret < 2 x i64> %res 1075} 1076 1077define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { 1078; CHECK-LABEL: test_mask_mul_epi32_rmb_128: 1079; CHECK: ## BB#0: 1080; CHECK-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07] 1081; CHECK-NEXT: retq ## encoding: [0xc3] 1082 %q = load i64, i64* %ptr_b 1083 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1084 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 1085 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1086 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1087 ret < 2 x i64> %res 1088} 1089 1090define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 1091; CHECK-LABEL: test_mask_mul_epi32_rmbk_128: 1092; CHECK: ## BB#0: 1093; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1094; CHECK-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f] 1095; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1096; CHECK-NEXT: retq ## encoding: [0xc3] 1097 %q = load i64, i64* %ptr_b 1098 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1099 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 1100 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1101 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1102 ret < 2 x i64> %res 1103} 1104 1105define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { 1106; CHECK-LABEL: test_mask_mul_epi32_rmbkz_128: 1107; CHECK: ## BB#0: 1108; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1109; CHECK-NEXT: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07] 1110; CHECK-NEXT: retq ## encoding: [0xc3] 1111 %q = load i64, i64* %ptr_b 1112 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1113 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer 1114 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1115 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1116 ret < 2 x i64> %res 1117} 1118 1119declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) 1120 1121define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { 1122; CHECK-LABEL: test_mask_mul_epi32_rr_256: 1123; CHECK: ## BB#0: 1124; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1] 1125; CHECK-NEXT: retq ## encoding: [0xc3] 1126 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1127 ret < 4 x i64> %res 1128} 1129 1130define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { 1131; CHECK-LABEL: test_mask_mul_epi32_rrk_256: 1132; CHECK: ## BB#0: 1133; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1134; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1] 1135; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1136; CHECK-NEXT: retq ## encoding: [0xc3] 1137 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1138 ret < 4 x i64> %res 1139} 1140 1141define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { 1142; CHECK-LABEL: test_mask_mul_epi32_rrkz_256: 1143; CHECK: ## BB#0: 1144; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1145; CHECK-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1] 1146; CHECK-NEXT: retq ## encoding: [0xc3] 1147 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1148 ret < 4 x i64> %res 1149} 1150 1151define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { 1152; CHECK-LABEL: test_mask_mul_epi32_rm_256: 1153; CHECK: ## BB#0: 1154; CHECK-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07] 1155; CHECK-NEXT: retq ## encoding: [0xc3] 1156 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1157 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1158 ret < 4 x i64> %res 1159} 1160 1161define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 1162; CHECK-LABEL: test_mask_mul_epi32_rmk_256: 1163; CHECK: ## BB#0: 1164; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1165; CHECK-NEXT: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f] 1166; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1167; CHECK-NEXT: retq ## encoding: [0xc3] 1168 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1169 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1170 ret < 4 x i64> %res 1171} 1172 1173define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { 1174; CHECK-LABEL: test_mask_mul_epi32_rmkz_256: 1175; CHECK: ## BB#0: 1176; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1177; CHECK-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07] 1178; CHECK-NEXT: retq ## encoding: [0xc3] 1179 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1180 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1181 ret < 4 x i64> %res 1182} 1183 1184define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { 1185; CHECK-LABEL: test_mask_mul_epi32_rmb_256: 1186; CHECK: ## BB#0: 1187; CHECK-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07] 1188; CHECK-NEXT: retq ## encoding: [0xc3] 1189 %q = load i64, i64* %ptr_b 1190 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1191 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1192 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1193 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1194 ret < 4 x i64> %res 1195} 1196 1197define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 1198; CHECK-LABEL: test_mask_mul_epi32_rmbk_256: 1199; CHECK: ## BB#0: 1200; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1201; CHECK-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f] 1202; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1203; CHECK-NEXT: retq ## encoding: [0xc3] 1204 %q = load i64, i64* %ptr_b 1205 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1206 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1207 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1208 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1209 ret < 4 x i64> %res 1210} 1211 1212define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { 1213; CHECK-LABEL: test_mask_mul_epi32_rmbkz_256: 1214; CHECK: ## BB#0: 1215; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1216; CHECK-NEXT: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07] 1217; CHECK-NEXT: retq ## encoding: [0xc3] 1218 %q = load i64, i64* %ptr_b 1219 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1220 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1221 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1222 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1223 ret < 4 x i64> %res 1224} 1225 1226declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) 1227 1228define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) { 1229; CHECK-LABEL: test_mask_mul_epu32_rr_128: 1230; CHECK: ## BB#0: 1231; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0xc1] 1232; CHECK-NEXT: retq ## encoding: [0xc3] 1233 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1234 ret < 2 x i64> %res 1235} 1236 1237define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) { 1238; CHECK-LABEL: test_mask_mul_epu32_rrk_128: 1239; CHECK: ## BB#0: 1240; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1241; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1] 1242; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1243; CHECK-NEXT: retq ## encoding: [0xc3] 1244 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1245 ret < 2 x i64> %res 1246} 1247 1248define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) { 1249; CHECK-LABEL: test_mask_mul_epu32_rrkz_128: 1250; CHECK: ## BB#0: 1251; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1252; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1] 1253; CHECK-NEXT: retq ## encoding: [0xc3] 1254 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1255 ret < 2 x i64> %res 1256} 1257 1258define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) { 1259; CHECK-LABEL: test_mask_mul_epu32_rm_128: 1260; CHECK: ## BB#0: 1261; CHECK-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xf4,0x07] 1262; CHECK-NEXT: retq ## encoding: [0xc3] 1263 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1264 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1265 ret < 2 x i64> %res 1266} 1267 1268define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 1269; CHECK-LABEL: test_mask_mul_epu32_rmk_128: 1270; CHECK: ## BB#0: 1271; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1272; CHECK-NEXT: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f] 1273; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1274; CHECK-NEXT: retq ## encoding: [0xc3] 1275 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1276 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1277 ret < 2 x i64> %res 1278} 1279 1280define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) { 1281; CHECK-LABEL: test_mask_mul_epu32_rmkz_128: 1282; CHECK: ## BB#0: 1283; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1284; CHECK-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07] 1285; CHECK-NEXT: retq ## encoding: [0xc3] 1286 %b = load < 4 x i32>, < 4 x i32>* %ptr_b 1287 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1288 ret < 2 x i64> %res 1289} 1290 1291define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) { 1292; CHECK-LABEL: test_mask_mul_epu32_rmb_128: 1293; CHECK: ## BB#0: 1294; CHECK-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07] 1295; CHECK-NEXT: retq ## encoding: [0xc3] 1296 %q = load i64, i64* %ptr_b 1297 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1298 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 1299 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1300 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1) 1301 ret < 2 x i64> %res 1302} 1303 1304define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) { 1305; CHECK-LABEL: test_mask_mul_epu32_rmbk_128: 1306; CHECK: ## BB#0: 1307; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1308; CHECK-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f] 1309; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1310; CHECK-NEXT: retq ## encoding: [0xc3] 1311 %q = load i64, i64* %ptr_b 1312 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1313 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer 1314 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1315 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) 1316 ret < 2 x i64> %res 1317} 1318 1319define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) { 1320; CHECK-LABEL: test_mask_mul_epu32_rmbkz_128: 1321; CHECK: ## BB#0: 1322; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1323; CHECK-NEXT: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07] 1324; CHECK-NEXT: retq ## encoding: [0xc3] 1325 %q = load i64, i64* %ptr_b 1326 %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0 1327 %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer 1328 %b = bitcast < 2 x i64> %b64 to < 4 x i32> 1329 %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask) 1330 ret < 2 x i64> %res 1331} 1332 1333declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8) 1334 1335define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) { 1336; CHECK-LABEL: test_mask_mul_epu32_rr_256: 1337; CHECK: ## BB#0: 1338; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf4,0xc1] 1339; CHECK-NEXT: retq ## encoding: [0xc3] 1340 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1341 ret < 4 x i64> %res 1342} 1343 1344define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) { 1345; CHECK-LABEL: test_mask_mul_epu32_rrk_256: 1346; CHECK: ## BB#0: 1347; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1348; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1] 1349; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1350; CHECK-NEXT: retq ## encoding: [0xc3] 1351 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1352 ret < 4 x i64> %res 1353} 1354 1355define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) { 1356; CHECK-LABEL: test_mask_mul_epu32_rrkz_256: 1357; CHECK: ## BB#0: 1358; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1359; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1] 1360; CHECK-NEXT: retq ## encoding: [0xc3] 1361 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1362 ret < 4 x i64> %res 1363} 1364 1365define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) { 1366; CHECK-LABEL: test_mask_mul_epu32_rm_256: 1367; CHECK: ## BB#0: 1368; CHECK-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf4,0x07] 1369; CHECK-NEXT: retq ## encoding: [0xc3] 1370 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1371 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1372 ret < 4 x i64> %res 1373} 1374 1375define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 1376; CHECK-LABEL: test_mask_mul_epu32_rmk_256: 1377; CHECK: ## BB#0: 1378; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1379; CHECK-NEXT: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f] 1380; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1381; CHECK-NEXT: retq ## encoding: [0xc3] 1382 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1383 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1384 ret < 4 x i64> %res 1385} 1386 1387define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) { 1388; CHECK-LABEL: test_mask_mul_epu32_rmkz_256: 1389; CHECK: ## BB#0: 1390; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1391; CHECK-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07] 1392; CHECK-NEXT: retq ## encoding: [0xc3] 1393 %b = load < 8 x i32>, < 8 x i32>* %ptr_b 1394 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1395 ret < 4 x i64> %res 1396} 1397 1398define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) { 1399; CHECK-LABEL: test_mask_mul_epu32_rmb_256: 1400; CHECK: ## BB#0: 1401; CHECK-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07] 1402; CHECK-NEXT: retq ## encoding: [0xc3] 1403 %q = load i64, i64* %ptr_b 1404 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1405 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1406 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1407 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1) 1408 ret < 4 x i64> %res 1409} 1410 1411define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) { 1412; CHECK-LABEL: test_mask_mul_epu32_rmbk_256: 1413; CHECK: ## BB#0: 1414; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1415; CHECK-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f] 1416; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1417; CHECK-NEXT: retq ## encoding: [0xc3] 1418 %q = load i64, i64* %ptr_b 1419 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1420 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1421 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1422 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) 1423 ret < 4 x i64> %res 1424} 1425 1426define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) { 1427; CHECK-LABEL: test_mask_mul_epu32_rmbkz_256: 1428; CHECK: ## BB#0: 1429; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1430; CHECK-NEXT: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07] 1431; CHECK-NEXT: retq ## encoding: [0xc3] 1432 %q = load i64, i64* %ptr_b 1433 %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0 1434 %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer 1435 %b = bitcast < 4 x i64> %b64 to < 8 x i32> 1436 %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask) 1437 ret < 4 x i64> %res 1438} 1439 1440declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8) 1441 1442define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1443; CHECK-LABEL: test_mask_add_epi32_rr_128: 1444; CHECK: ## BB#0: 1445; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 1446; CHECK-NEXT: retq ## encoding: [0xc3] 1447 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1448 ret <4 x i32> %res 1449} 1450 1451define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1452; CHECK-LABEL: test_mask_add_epi32_rrk_128: 1453; CHECK: ## BB#0: 1454; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1455; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1] 1456; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1457; CHECK-NEXT: retq ## encoding: [0xc3] 1458 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1459 ret <4 x i32> %res 1460} 1461 1462define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1463; CHECK-LABEL: test_mask_add_epi32_rrkz_128: 1464; CHECK: ## BB#0: 1465; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1466; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] 1467; CHECK-NEXT: retq ## encoding: [0xc3] 1468 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1469 ret <4 x i32> %res 1470} 1471 1472define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1473; CHECK-LABEL: test_mask_add_epi32_rm_128: 1474; CHECK: ## BB#0: 1475; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07] 1476; CHECK-NEXT: retq ## encoding: [0xc3] 1477 %b = load <4 x i32>, <4 x i32>* %ptr_b 1478 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1479 ret <4 x i32> %res 1480} 1481 1482define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1483; CHECK-LABEL: test_mask_add_epi32_rmk_128: 1484; CHECK: ## BB#0: 1485; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1486; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f] 1487; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1488; CHECK-NEXT: retq ## encoding: [0xc3] 1489 %b = load <4 x i32>, <4 x i32>* %ptr_b 1490 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1491 ret <4 x i32> %res 1492} 1493 1494define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1495; CHECK-LABEL: test_mask_add_epi32_rmkz_128: 1496; CHECK: ## BB#0: 1497; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1498; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07] 1499; CHECK-NEXT: retq ## encoding: [0xc3] 1500 %b = load <4 x i32>, <4 x i32>* %ptr_b 1501 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1502 ret <4 x i32> %res 1503} 1504 1505define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1506; CHECK-LABEL: test_mask_add_epi32_rmb_128: 1507; CHECK: ## BB#0: 1508; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07] 1509; CHECK-NEXT: retq ## encoding: [0xc3] 1510 %q = load i32, i32* %ptr_b 1511 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1512 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1513 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1514 ret <4 x i32> %res 1515} 1516 1517define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1518; CHECK-LABEL: test_mask_add_epi32_rmbk_128: 1519; CHECK: ## BB#0: 1520; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1521; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f] 1522; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1523; CHECK-NEXT: retq ## encoding: [0xc3] 1524 %q = load i32, i32* %ptr_b 1525 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1526 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1527 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1528 ret <4 x i32> %res 1529} 1530 1531define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1532; CHECK-LABEL: test_mask_add_epi32_rmbkz_128: 1533; CHECK: ## BB#0: 1534; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1535; CHECK-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07] 1536; CHECK-NEXT: retq ## encoding: [0xc3] 1537 %q = load i32, i32* %ptr_b 1538 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1539 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1540 %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1541 ret <4 x i32> %res 1542} 1543 1544declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1545 1546define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) { 1547; CHECK-LABEL: test_mask_sub_epi32_rr_128: 1548; CHECK: ## BB#0: 1549; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1] 1550; CHECK-NEXT: retq ## encoding: [0xc3] 1551 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1552 ret <4 x i32> %res 1553} 1554 1555define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) { 1556; CHECK-LABEL: test_mask_sub_epi32_rrk_128: 1557; CHECK: ## BB#0: 1558; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1559; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1] 1560; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1561; CHECK-NEXT: retq ## encoding: [0xc3] 1562 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1563 ret <4 x i32> %res 1564} 1565 1566define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) { 1567; CHECK-LABEL: test_mask_sub_epi32_rrkz_128: 1568; CHECK: ## BB#0: 1569; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1570; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1] 1571; CHECK-NEXT: retq ## encoding: [0xc3] 1572 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1573 ret <4 x i32> %res 1574} 1575 1576define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) { 1577; CHECK-LABEL: test_mask_sub_epi32_rm_128: 1578; CHECK: ## BB#0: 1579; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07] 1580; CHECK-NEXT: retq ## encoding: [0xc3] 1581 %b = load <4 x i32>, <4 x i32>* %ptr_b 1582 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1583 ret <4 x i32> %res 1584} 1585 1586define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1587; CHECK-LABEL: test_mask_sub_epi32_rmk_128: 1588; CHECK: ## BB#0: 1589; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1590; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f] 1591; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1592; CHECK-NEXT: retq ## encoding: [0xc3] 1593 %b = load <4 x i32>, <4 x i32>* %ptr_b 1594 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1595 ret <4 x i32> %res 1596} 1597 1598define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) { 1599; CHECK-LABEL: test_mask_sub_epi32_rmkz_128: 1600; CHECK: ## BB#0: 1601; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1602; CHECK-NEXT: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07] 1603; CHECK-NEXT: retq ## encoding: [0xc3] 1604 %b = load <4 x i32>, <4 x i32>* %ptr_b 1605 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1606 ret <4 x i32> %res 1607} 1608 1609define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) { 1610; CHECK-LABEL: test_mask_sub_epi32_rmb_128: 1611; CHECK: ## BB#0: 1612; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07] 1613; CHECK-NEXT: retq ## encoding: [0xc3] 1614 %q = load i32, i32* %ptr_b 1615 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1616 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1617 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1) 1618 ret <4 x i32> %res 1619} 1620 1621define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) { 1622; CHECK-LABEL: test_mask_sub_epi32_rmbk_128: 1623; CHECK: ## BB#0: 1624; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1625; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f] 1626; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1627; CHECK-NEXT: retq ## encoding: [0xc3] 1628 %q = load i32, i32* %ptr_b 1629 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1630 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1631 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) 1632 ret <4 x i32> %res 1633} 1634 1635define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) { 1636; CHECK-LABEL: test_mask_sub_epi32_rmbkz_128: 1637; CHECK: ## BB#0: 1638; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1639; CHECK-NEXT: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07] 1640; CHECK-NEXT: retq ## encoding: [0xc3] 1641 %q = load i32, i32* %ptr_b 1642 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0 1643 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer 1644 %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask) 1645 ret <4 x i32> %res 1646} 1647 1648declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 1649 1650define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1651; CHECK-LABEL: test_mask_sub_epi32_rr_256: 1652; CHECK: ## BB#0: 1653; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1] 1654; CHECK-NEXT: retq ## encoding: [0xc3] 1655 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1656 ret <8 x i32> %res 1657} 1658 1659define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1660; CHECK-LABEL: test_mask_sub_epi32_rrk_256: 1661; CHECK: ## BB#0: 1662; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1663; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1] 1664; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1665; CHECK-NEXT: retq ## encoding: [0xc3] 1666 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1667 ret <8 x i32> %res 1668} 1669 1670define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1671; CHECK-LABEL: test_mask_sub_epi32_rrkz_256: 1672; CHECK: ## BB#0: 1673; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1674; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1] 1675; CHECK-NEXT: retq ## encoding: [0xc3] 1676 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1677 ret <8 x i32> %res 1678} 1679 1680define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1681; CHECK-LABEL: test_mask_sub_epi32_rm_256: 1682; CHECK: ## BB#0: 1683; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07] 1684; CHECK-NEXT: retq ## encoding: [0xc3] 1685 %b = load <8 x i32>, <8 x i32>* %ptr_b 1686 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1687 ret <8 x i32> %res 1688} 1689 1690define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1691; CHECK-LABEL: test_mask_sub_epi32_rmk_256: 1692; CHECK: ## BB#0: 1693; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1694; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f] 1695; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1696; CHECK-NEXT: retq ## encoding: [0xc3] 1697 %b = load <8 x i32>, <8 x i32>* %ptr_b 1698 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1699 ret <8 x i32> %res 1700} 1701 1702define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1703; CHECK-LABEL: test_mask_sub_epi32_rmkz_256: 1704; CHECK: ## BB#0: 1705; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1706; CHECK-NEXT: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07] 1707; CHECK-NEXT: retq ## encoding: [0xc3] 1708 %b = load <8 x i32>, <8 x i32>* %ptr_b 1709 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1710 ret <8 x i32> %res 1711} 1712 1713define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1714; CHECK-LABEL: test_mask_sub_epi32_rmb_256: 1715; CHECK: ## BB#0: 1716; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07] 1717; CHECK-NEXT: retq ## encoding: [0xc3] 1718 %q = load i32, i32* %ptr_b 1719 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1720 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1721 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1722 ret <8 x i32> %res 1723} 1724 1725define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1726; CHECK-LABEL: test_mask_sub_epi32_rmbk_256: 1727; CHECK: ## BB#0: 1728; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1729; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f] 1730; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1731; CHECK-NEXT: retq ## encoding: [0xc3] 1732 %q = load i32, i32* %ptr_b 1733 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1734 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1735 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1736 ret <8 x i32> %res 1737} 1738 1739define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1740; CHECK-LABEL: test_mask_sub_epi32_rmbkz_256: 1741; CHECK: ## BB#0: 1742; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1743; CHECK-NEXT: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07] 1744; CHECK-NEXT: retq ## encoding: [0xc3] 1745 %q = load i32, i32* %ptr_b 1746 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1747 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1748 %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1749 ret <8 x i32> %res 1750} 1751 1752declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1753 1754define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) { 1755; CHECK-LABEL: test_mask_add_epi32_rr_256: 1756; CHECK: ## BB#0: 1757; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] 1758; CHECK-NEXT: retq ## encoding: [0xc3] 1759 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1760 ret <8 x i32> %res 1761} 1762 1763define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) { 1764; CHECK-LABEL: test_mask_add_epi32_rrk_256: 1765; CHECK: ## BB#0: 1766; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1767; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1] 1768; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1769; CHECK-NEXT: retq ## encoding: [0xc3] 1770 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1771 ret <8 x i32> %res 1772} 1773 1774define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) { 1775; CHECK-LABEL: test_mask_add_epi32_rrkz_256: 1776; CHECK: ## BB#0: 1777; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1778; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] 1779; CHECK-NEXT: retq ## encoding: [0xc3] 1780 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1781 ret <8 x i32> %res 1782} 1783 1784define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) { 1785; CHECK-LABEL: test_mask_add_epi32_rm_256: 1786; CHECK: ## BB#0: 1787; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07] 1788; CHECK-NEXT: retq ## encoding: [0xc3] 1789 %b = load <8 x i32>, <8 x i32>* %ptr_b 1790 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1791 ret <8 x i32> %res 1792} 1793 1794define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1795; CHECK-LABEL: test_mask_add_epi32_rmk_256: 1796; CHECK: ## BB#0: 1797; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1798; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f] 1799; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1800; CHECK-NEXT: retq ## encoding: [0xc3] 1801 %b = load <8 x i32>, <8 x i32>* %ptr_b 1802 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1803 ret <8 x i32> %res 1804} 1805 1806define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) { 1807; CHECK-LABEL: test_mask_add_epi32_rmkz_256: 1808; CHECK: ## BB#0: 1809; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1810; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07] 1811; CHECK-NEXT: retq ## encoding: [0xc3] 1812 %b = load <8 x i32>, <8 x i32>* %ptr_b 1813 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1814 ret <8 x i32> %res 1815} 1816 1817define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) { 1818; CHECK-LABEL: test_mask_add_epi32_rmb_256: 1819; CHECK: ## BB#0: 1820; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07] 1821; CHECK-NEXT: retq ## encoding: [0xc3] 1822 %q = load i32, i32* %ptr_b 1823 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1824 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1825 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1) 1826 ret <8 x i32> %res 1827} 1828 1829define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) { 1830; CHECK-LABEL: test_mask_add_epi32_rmbk_256: 1831; CHECK: ## BB#0: 1832; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1833; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f] 1834; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 1835; CHECK-NEXT: retq ## encoding: [0xc3] 1836 %q = load i32, i32* %ptr_b 1837 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1838 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1839 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) 1840 ret <8 x i32> %res 1841} 1842 1843define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) { 1844; CHECK-LABEL: test_mask_add_epi32_rmbkz_256: 1845; CHECK: ## BB#0: 1846; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 1847; CHECK-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07] 1848; CHECK-NEXT: retq ## encoding: [0xc3] 1849 %q = load i32, i32* %ptr_b 1850 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0 1851 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer 1852 %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask) 1853 ret <8 x i32> %res 1854} 1855 1856declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 1857 1858define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) { 1859; CHECK-LABEL: test_cmpps_256: 1860; CHECK: ## BB#0: 1861; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02] 1862; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1863; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1864; CHECK-NEXT: retq ## encoding: [0xc3] 1865 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1) 1866 ret i8 %res 1867 } 1868 declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8) 1869 1870define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) { 1871; CHECK-LABEL: test_cmpps_128: 1872; CHECK: ## BB#0: 1873; CHECK-NEXT: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 1874; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1875; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1876; CHECK-NEXT: retq ## encoding: [0xc3] 1877 %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1) 1878 ret i8 %res 1879 } 1880 declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8) 1881 1882define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) { 1883; CHECK-LABEL: test_cmppd_256: 1884; CHECK: ## BB#0: 1885; CHECK-NEXT: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02] 1886; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1887; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1888; CHECK-NEXT: retq ## encoding: [0xc3] 1889 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1) 1890 ret i8 %res 1891 } 1892 declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8) 1893 1894define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) { 1895; CHECK-LABEL: test_cmppd_128: 1896; CHECK: ## BB#0: 1897; CHECK-NEXT: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] 1898; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 1899; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 1900; CHECK-NEXT: retq ## encoding: [0xc3] 1901 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1) 1902 ret i8 %res 1903 } 1904 declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8) 1905 1906define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 1907; CHECK-LABEL: test_mm512_maskz_add_ps_256: 1908; CHECK: ## BB#0: 1909; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1910; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1] 1911; CHECK-NEXT: retq ## encoding: [0xc3] 1912 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 1913 ret <8 x float> %res 1914} 1915 1916define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 1917; CHECK-LABEL: test_mm512_mask_add_ps_256: 1918; CHECK: ## BB#0: 1919; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1920; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1] 1921; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1922; CHECK-NEXT: retq ## encoding: [0xc3] 1923 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 1924 ret <8 x float> %res 1925} 1926 1927define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 1928; CHECK-LABEL: test_mm512_add_ps_256: 1929; CHECK: ## BB#0: 1930; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] 1931; CHECK-NEXT: retq ## encoding: [0xc3] 1932 %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 1933 ret <8 x float> %res 1934} 1935declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1936 1937define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 1938; CHECK-LABEL: test_mm512_maskz_add_ps_128: 1939; CHECK: ## BB#0: 1940; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1941; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1] 1942; CHECK-NEXT: retq ## encoding: [0xc3] 1943 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 1944 ret <4 x float> %res 1945} 1946 1947define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 1948; CHECK-LABEL: test_mm512_mask_add_ps_128: 1949; CHECK: ## BB#0: 1950; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1951; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1] 1952; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1953; CHECK-NEXT: retq ## encoding: [0xc3] 1954 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 1955 ret <4 x float> %res 1956} 1957 1958define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 1959; CHECK-LABEL: test_mm512_add_ps_128: 1960; CHECK: ## BB#0: 1961; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 1962; CHECK-NEXT: retq ## encoding: [0xc3] 1963 %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 1964 ret <4 x float> %res 1965} 1966declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 1967 1968define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 1969; CHECK-LABEL: test_mm512_maskz_sub_ps_256: 1970; CHECK: ## BB#0: 1971; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1972; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1] 1973; CHECK-NEXT: retq ## encoding: [0xc3] 1974 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 1975 ret <8 x float> %res 1976} 1977 1978define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 1979; CHECK-LABEL: test_mm512_mask_sub_ps_256: 1980; CHECK: ## BB#0: 1981; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 1982; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1] 1983; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1984; CHECK-NEXT: retq ## encoding: [0xc3] 1985 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 1986 ret <8 x float> %res 1987} 1988 1989define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 1990; CHECK-LABEL: test_mm512_sub_ps_256: 1991; CHECK: ## BB#0: 1992; CHECK-NEXT: vsubps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5c,0xc1] 1993; CHECK-NEXT: retq ## encoding: [0xc3] 1994 %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 1995 ret <8 x float> %res 1996} 1997declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 1998 1999define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2000; CHECK-LABEL: test_mm512_maskz_sub_ps_128: 2001; CHECK: ## BB#0: 2002; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2003; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1] 2004; CHECK-NEXT: retq ## encoding: [0xc3] 2005 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2006 ret <4 x float> %res 2007} 2008 2009define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2010; CHECK-LABEL: test_mm512_mask_sub_ps_128: 2011; CHECK: ## BB#0: 2012; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2013; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1] 2014; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2015; CHECK-NEXT: retq ## encoding: [0xc3] 2016 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2017 ret <4 x float> %res 2018} 2019 2020define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2021; CHECK-LABEL: test_mm512_sub_ps_128: 2022; CHECK: ## BB#0: 2023; CHECK-NEXT: vsubps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5c,0xc1] 2024; CHECK-NEXT: retq ## encoding: [0xc3] 2025 %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2026 ret <4 x float> %res 2027} 2028declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2029 2030define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2031; CHECK-LABEL: test_mm512_maskz_mul_ps_256: 2032; CHECK: ## BB#0: 2033; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2034; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1] 2035; CHECK-NEXT: retq ## encoding: [0xc3] 2036 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2037 ret <8 x float> %res 2038} 2039 2040define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2041; CHECK-LABEL: test_mm512_mask_mul_ps_256: 2042; CHECK: ## BB#0: 2043; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2044; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1] 2045; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2046; CHECK-NEXT: retq ## encoding: [0xc3] 2047 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2048 ret <8 x float> %res 2049} 2050 2051define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2052; CHECK-LABEL: test_mm512_mul_ps_256: 2053; CHECK: ## BB#0: 2054; CHECK-NEXT: vmulps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x59,0xc1] 2055; CHECK-NEXT: retq ## encoding: [0xc3] 2056 %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2057 ret <8 x float> %res 2058} 2059declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2060 2061define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2062; CHECK-LABEL: test_mm512_maskz_mul_ps_128: 2063; CHECK: ## BB#0: 2064; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2065; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1] 2066; CHECK-NEXT: retq ## encoding: [0xc3] 2067 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2068 ret <4 x float> %res 2069} 2070 2071define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2072; CHECK-LABEL: test_mm512_mask_mul_ps_128: 2073; CHECK: ## BB#0: 2074; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2075; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1] 2076; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2077; CHECK-NEXT: retq ## encoding: [0xc3] 2078 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2079 ret <4 x float> %res 2080} 2081 2082define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2083; CHECK-LABEL: test_mm512_mul_ps_128: 2084; CHECK: ## BB#0: 2085; CHECK-NEXT: vmulps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x59,0xc1] 2086; CHECK-NEXT: retq ## encoding: [0xc3] 2087 %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2088 ret <4 x float> %res 2089} 2090declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2091 2092define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2093; CHECK-LABEL: test_mm512_maskz_div_ps_256: 2094; CHECK: ## BB#0: 2095; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2096; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1] 2097; CHECK-NEXT: retq ## encoding: [0xc3] 2098 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2099 ret <8 x float> %res 2100} 2101 2102define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2103; CHECK-LABEL: test_mm512_mask_div_ps_256: 2104; CHECK: ## BB#0: 2105; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2106; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1] 2107; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2108; CHECK-NEXT: retq ## encoding: [0xc3] 2109 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2110 ret <8 x float> %res 2111} 2112 2113define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2114; CHECK-LABEL: test_mm512_div_ps_256: 2115; CHECK: ## BB#0: 2116; CHECK-NEXT: vdivps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5e,0xc1] 2117; CHECK-NEXT: retq ## encoding: [0xc3] 2118 %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2119 ret <8 x float> %res 2120} 2121declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2122 2123define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2124; CHECK-LABEL: test_mm512_maskz_div_ps_128: 2125; CHECK: ## BB#0: 2126; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2127; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1] 2128; CHECK-NEXT: retq ## encoding: [0xc3] 2129 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2130 ret <4 x float> %res 2131} 2132 2133define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2134; CHECK-LABEL: test_mm512_mask_div_ps_128: 2135; CHECK: ## BB#0: 2136; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2137; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1] 2138; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2139; CHECK-NEXT: retq ## encoding: [0xc3] 2140 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2141 ret <4 x float> %res 2142} 2143 2144define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2145; CHECK-LABEL: test_mm512_div_ps_128: 2146; CHECK: ## BB#0: 2147; CHECK-NEXT: vdivps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5e,0xc1] 2148; CHECK-NEXT: retq ## encoding: [0xc3] 2149 %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2150 ret <4 x float> %res 2151} 2152declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2153 2154define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2155; CHECK-LABEL: test_mm512_maskz_max_ps_256: 2156; CHECK: ## BB#0: 2157; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2158; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1] 2159; CHECK-NEXT: retq ## encoding: [0xc3] 2160 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2161 ret <8 x float> %res 2162} 2163 2164define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2165; CHECK-LABEL: test_mm512_mask_max_ps_256: 2166; CHECK: ## BB#0: 2167; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2168; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1] 2169; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2170; CHECK-NEXT: retq ## encoding: [0xc3] 2171 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2172 ret <8 x float> %res 2173} 2174 2175define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2176; CHECK-LABEL: test_mm512_max_ps_256: 2177; CHECK: ## BB#0: 2178; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5f,0xc1] 2179; CHECK-NEXT: retq ## encoding: [0xc3] 2180 %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2181 ret <8 x float> %res 2182} 2183declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2184 2185define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2186; CHECK-LABEL: test_mm512_maskz_max_ps_128: 2187; CHECK: ## BB#0: 2188; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2189; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1] 2190; CHECK-NEXT: retq ## encoding: [0xc3] 2191 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2192 ret <4 x float> %res 2193} 2194 2195define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2196; CHECK-LABEL: test_mm512_mask_max_ps_128: 2197; CHECK: ## BB#0: 2198; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2199; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1] 2200; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2201; CHECK-NEXT: retq ## encoding: [0xc3] 2202 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2203 ret <4 x float> %res 2204} 2205 2206define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2207; CHECK-LABEL: test_mm512_max_ps_128: 2208; CHECK: ## BB#0: 2209; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5f,0xc1] 2210; CHECK-NEXT: retq ## encoding: [0xc3] 2211 %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2212 ret <4 x float> %res 2213} 2214declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2215 2216define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2217; CHECK-LABEL: test_mm512_maskz_min_ps_256: 2218; CHECK: ## BB#0: 2219; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2220; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1] 2221; CHECK-NEXT: retq ## encoding: [0xc3] 2222 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask) 2223 ret <8 x float> %res 2224} 2225 2226define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) { 2227; CHECK-LABEL: test_mm512_mask_min_ps_256: 2228; CHECK: ## BB#0: 2229; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2230; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1] 2231; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2232; CHECK-NEXT: retq ## encoding: [0xc3] 2233 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) 2234 ret <8 x float> %res 2235} 2236 2237define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 2238; CHECK-LABEL: test_mm512_min_ps_256: 2239; CHECK: ## BB#0: 2240; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5d,0xc1] 2241; CHECK-NEXT: retq ## encoding: [0xc3] 2242 %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1) 2243 ret <8 x float> %res 2244} 2245declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2246 2247define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2248; CHECK-LABEL: test_mm512_maskz_min_ps_128: 2249; CHECK: ## BB#0: 2250; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2251; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1] 2252; CHECK-NEXT: retq ## encoding: [0xc3] 2253 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask) 2254 ret <4 x float> %res 2255} 2256 2257define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) { 2258; CHECK-LABEL: test_mm512_mask_min_ps_128: 2259; CHECK: ## BB#0: 2260; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2261; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1] 2262; CHECK-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 2263; CHECK-NEXT: retq ## encoding: [0xc3] 2264 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) 2265 ret <4 x float> %res 2266} 2267 2268define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 2269; CHECK-LABEL: test_mm512_min_ps_128: 2270; CHECK: ## BB#0: 2271; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5d,0xc1] 2272; CHECK-NEXT: retq ## encoding: [0xc3] 2273 %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1) 2274 ret <4 x float> %res 2275} 2276declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2277 2278define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) { 2279; CHECK-LABEL: test_sqrt_pd_256: 2280; CHECK: ## BB#0: 2281; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2282; CHECK-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0] 2283; CHECK-NEXT: retq ## encoding: [0xc3] 2284 %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 2285 ret <4 x double> %res 2286} 2287declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2288 2289define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) { 2290; CHECK-LABEL: test_sqrt_ps_256: 2291; CHECK: ## BB#0: 2292; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2293; CHECK-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0] 2294; CHECK-NEXT: retq ## encoding: [0xc3] 2295 %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 2296 ret <8 x float> %res 2297} 2298 2299declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2300 2301define <4 x double> @test_getexp_pd_256(<4 x double> %a0) { 2302; CHECK-LABEL: test_getexp_pd_256: 2303; CHECK: ## BB#0: 2304; CHECK-NEXT: vgetexppd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x42,0xc0] 2305; CHECK-NEXT: retq ## encoding: [0xc3] 2306 %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1) 2307 ret <4 x double> %res 2308} 2309 2310declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 2311 2312define <8 x float> @test_getexp_ps_256(<8 x float> %a0) { 2313; CHECK-LABEL: test_getexp_ps_256: 2314; CHECK: ## BB#0: 2315; CHECK-NEXT: vgetexpps %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x42,0xc0] 2316; CHECK-NEXT: retq ## encoding: [0xc3] 2317 %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1) 2318 ret <8 x float> %res 2319} 2320declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 2321 2322declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2323 2324define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 2325; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_128: 2326; CHECK: ## BB#0: 2327; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2328; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1] 2329; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1] 2330; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 2331; CHECK-NEXT: retq ## encoding: [0xc3] 2332 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask) 2333 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 2334 %res2 = add <4 x i32> %res, %res1 2335 ret <4 x i32> %res2 2336} 2337 2338declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2339 2340define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2341; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_256: 2342; CHECK: ## BB#0: 2343; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2344; CHECK-NEXT: vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1] 2345; CHECK-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3d,0xc1] 2346; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 2347; CHECK-NEXT: retq ## encoding: [0xc3] 2348 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2349 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2350 %res2 = add <8 x i32> %res, %res1 2351 ret <8 x i32> %res2 2352} 2353 2354declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2355 2356define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2357; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_128: 2358; CHECK: ## BB#0: 2359; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2360; CHECK-NEXT: vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1] 2361; CHECK-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xc1] 2362; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] 2363; CHECK-NEXT: retq ## encoding: [0xc3] 2364 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2365 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2366 %res2 = add <2 x i64> %res, %res1 2367 ret <2 x i64> %res2 2368} 2369 2370declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2371 2372define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 2373; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_256: 2374; CHECK: ## BB#0: 2375; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2376; CHECK-NEXT: vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1] 2377; CHECK-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1] 2378; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 2379; CHECK-NEXT: retq ## encoding: [0xc3] 2380 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 2381 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 2382 %res2 = add <4 x i64> %res, %res1 2383 ret <4 x i64> %res2 2384} 2385 2386declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2387 2388define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) { 2389; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_128: 2390; CHECK: ## BB#0: 2391; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2392; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1] 2393; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1] 2394; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 2395; CHECK-NEXT: retq ## encoding: [0xc3] 2396 %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 2397 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 2398 %res2 = add <4 x i32> %res, %res1 2399 ret <4 x i32> %res2 2400} 2401 2402declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2403 2404define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2405; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_256: 2406; CHECK: ## BB#0: 2407; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2408; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1] 2409; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3f,0xc1] 2410; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 2411; CHECK-NEXT: retq ## encoding: [0xc3] 2412 %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2413 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2414 %res2 = add <8 x i32> %res, %res1 2415 ret <8 x i32> %res2 2416} 2417 2418declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2419 2420define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2421; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_128: 2422; CHECK: ## BB#0: 2423; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2424; CHECK-NEXT: vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1] 2425; CHECK-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xc1] 2426; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] 2427; CHECK-NEXT: retq ## encoding: [0xc3] 2428 %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2429 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2430 %res2 = add <2 x i64> %res, %res1 2431 ret <2 x i64> %res2 2432} 2433 2434declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2435 2436define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 2437; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_256: 2438; CHECK: ## BB#0: 2439; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2440; CHECK-NEXT: vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1] 2441; CHECK-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1] 2442; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 2443; CHECK-NEXT: retq ## encoding: [0xc3] 2444 %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 2445 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 2446 %res2 = add <4 x i64> %res, %res1 2447 ret <4 x i64> %res2 2448} 2449 2450declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2451 2452define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 2453; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_128: 2454; CHECK: ## BB#0: 2455; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2456; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1] 2457; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1] 2458; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 2459; CHECK-NEXT: retq ## encoding: [0xc3] 2460 %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 2461 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 2462 %res2 = add <4 x i32> %res, %res1 2463 ret <4 x i32> %res2 2464} 2465 2466declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2467 2468define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2469; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_256: 2470; CHECK: ## BB#0: 2471; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2472; CHECK-NEXT: vpminsd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1] 2473; CHECK-NEXT: vpminsd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x39,0xc1] 2474; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 2475; CHECK-NEXT: retq ## encoding: [0xc3] 2476 %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2477 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2478 %res2 = add <8 x i32> %res, %res1 2479 ret <8 x i32> %res2 2480} 2481 2482declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2483 2484define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2485; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_128: 2486; CHECK: ## BB#0: 2487; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2488; CHECK-NEXT: vpminsq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1] 2489; CHECK-NEXT: vpminsq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x39,0xc1] 2490; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] 2491; CHECK-NEXT: retq ## encoding: [0xc3] 2492 %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2493 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2494 %res2 = add <2 x i64> %res, %res1 2495 ret <2 x i64> %res2 2496} 2497 2498declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2499 2500define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 2501; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_256: 2502; CHECK: ## BB#0: 2503; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2504; CHECK-NEXT: vpminsq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1] 2505; CHECK-NEXT: vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1] 2506; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 2507; CHECK-NEXT: retq ## encoding: [0xc3] 2508 %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 2509 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 2510 %res2 = add <4 x i64> %res, %res1 2511 ret <4 x i64> %res2 2512} 2513 2514declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2515 2516define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) { 2517; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_128: 2518; CHECK: ## BB#0: 2519; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2520; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1] 2521; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1] 2522; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 2523; CHECK-NEXT: retq ## encoding: [0xc3] 2524 %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) 2525 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask) 2526 %res2 = add <4 x i32> %res, %res1 2527 ret <4 x i32> %res2 2528} 2529 2530declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2531 2532define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2533; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_256: 2534; CHECK: ## BB#0: 2535; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2536; CHECK-NEXT: vpminud %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1] 2537; CHECK-NEXT: vpminud %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3b,0xc1] 2538; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 2539; CHECK-NEXT: retq ## encoding: [0xc3] 2540 %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2541 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2542 %res2 = add <8 x i32> %res, %res1 2543 ret <8 x i32> %res2 2544} 2545 2546declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 2547 2548define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 2549; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_128: 2550; CHECK: ## BB#0: 2551; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2552; CHECK-NEXT: vpminuq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1] 2553; CHECK-NEXT: vpminuq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xc1] 2554; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] 2555; CHECK-NEXT: retq ## encoding: [0xc3] 2556 %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 2557 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 2558 %res2 = add <2 x i64> %res, %res1 2559 ret <2 x i64> %res2 2560} 2561 2562declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 2563 2564define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) { 2565; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_256: 2566; CHECK: ## BB#0: 2567; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2568; CHECK-NEXT: vpminuq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1] 2569; CHECK-NEXT: vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1] 2570; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 2571; CHECK-NEXT: retq ## encoding: [0xc3] 2572 %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) 2573 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask) 2574 %res2 = add <4 x i64> %res, %res1 2575 ret <4 x i64> %res2 2576} 2577 2578declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2579 2580define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 2581; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128: 2582; CHECK: ## BB#0: 2583; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2584; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 2585; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xda] 2586; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca] 2587; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc1] 2588; CHECK-NEXT: retq ## encoding: [0xc3] 2589 %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 2590 %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 2591 %res2 = add <4 x i32> %res, %res1 2592 ret <4 x i32> %res2 2593} 2594 2595declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 2596 2597define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 2598; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128: 2599; CHECK: ## BB#0: 2600; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2601; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 2602; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xda] 2603; CHECK-NEXT: vpermt2d %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xca] 2604; CHECK-NEXT: vpaddd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc1] 2605; CHECK-NEXT: retq ## encoding: [0xc3] 2606 %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 2607 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 2608 %res2 = add <4 x i32> %res, %res1 2609 ret <4 x i32> %res2 2610} 2611 2612declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2613 2614define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2615; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256: 2616; CHECK: ## BB#0: 2617; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2618; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 2619; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xda] 2620; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca] 2621; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc1] 2622; CHECK-NEXT: retq ## encoding: [0xc3] 2623 %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2624 %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2625 %res2 = add <8 x i32> %res, %res1 2626 ret <8 x i32> %res2 2627} 2628 2629declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 2630 2631define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 2632; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256: 2633; CHECK: ## BB#0: 2634; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2635; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 2636; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xda] 2637; CHECK-NEXT: vpermt2d %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xca] 2638; CHECK-NEXT: vpaddd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc1] 2639; CHECK-NEXT: retq ## encoding: [0xc3] 2640 %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 2641 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 2642 %res2 = add <8 x i32> %res, %res1 2643 ret <8 x i32> %res2 2644} 2645 2646declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 2647 2648define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 2649; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128: 2650; CHECK: ## BB#0: 2651; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2652; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 2653; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x77,0xda] 2654; CHECK-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x77,0xca] 2655; CHECK-NEXT: vaddpd %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc1] 2656; CHECK-NEXT: retq ## encoding: [0xc3] 2657 %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) 2658 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 2659 %res2 = fadd <2 x double> %res, %res1 2660 ret <2 x double> %res2 2661} 2662 2663declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 2664 2665define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 2666; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256: 2667; CHECK: ## BB#0: 2668; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2669; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 2670; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x77,0xda] 2671; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x77,0xca] 2672; CHECK-NEXT: vaddpd %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc1] 2673; CHECK-NEXT: retq ## encoding: [0xc3] 2674 %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 2675 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 2676 %res2 = fadd <4 x double> %res, %res1 2677 ret <4 x double> %res2 2678} 2679 2680declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 2681 2682define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { 2683; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128: 2684; CHECK: ## BB#0: 2685; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2686; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 2687; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xda] 2688; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xca] 2689; CHECK-NEXT: vaddps %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc1] 2690; CHECK-NEXT: retq ## encoding: [0xc3] 2691 %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) 2692 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 2693 %res2 = fadd <4 x float> %res, %res1 2694 ret <4 x float> %res2 2695} 2696 2697declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 2698 2699define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 2700; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256: 2701; CHECK: ## BB#0: 2702; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2703; CHECK-NEXT: vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9] 2704; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x77,0xda] 2705; CHECK-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x77,0xca] 2706; CHECK-NEXT: vaddps %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc1] 2707; CHECK-NEXT: retq ## encoding: [0xc3] 2708 %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 2709 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 2710 %res2 = fadd <8 x float> %res, %res1 2711 ret <8 x float> %res2 2712} 2713 2714declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) 2715 2716define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 2717; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_128: 2718; CHECK: ## BB#0: 2719; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2720; CHECK-NEXT: vpabsq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8] 2721; CHECK-NEXT: vpabsq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0] 2722; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 2723; CHECK-NEXT: retq ## encoding: [0xc3] 2724 %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 2725 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 2726 %res2 = add <2 x i64> %res, %res1 2727 ret <2 x i64> %res2 2728} 2729 2730declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) 2731 2732define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 2733; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_256: 2734; CHECK: ## BB#0: 2735; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2736; CHECK-NEXT: vpabsq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8] 2737; CHECK-NEXT: vpabsq %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0] 2738; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 2739; CHECK-NEXT: retq ## encoding: [0xc3] 2740 %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 2741 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 2742 %res2 = add <4 x i64> %res, %res1 2743 ret <4 x i64> %res2 2744} 2745 2746declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8) 2747 2748define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 2749; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_128: 2750; CHECK: ## BB#0: 2751; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2752; CHECK-NEXT: vpabsd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8] 2753; CHECK-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1e,0xc0] 2754; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 2755; CHECK-NEXT: retq ## encoding: [0xc3] 2756 %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 2757 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 2758 %res2 = add <4 x i32> %res, %res1 2759 ret <4 x i32> %res2 2760} 2761 2762declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8) 2763 2764define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 2765; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_256: 2766; CHECK: ## BB#0: 2767; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2768; CHECK-NEXT: vpabsd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8] 2769; CHECK-NEXT: vpabsd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1e,0xc0] 2770; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 2771; CHECK-NEXT: retq ## encoding: [0xc3] 2772 %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 2773 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 2774 %res2 = add <8 x i32> %res, %res1 2775 ret <8 x i32> %res2 2776} 2777 2778declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) 2779 2780define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { 2781; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_128: 2782; CHECK: ## BB#0: 2783; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2784; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1] 2785; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1] 2786; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] 2787; CHECK-NEXT: retq ## encoding: [0xc3] 2788 %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) 2789 %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) 2790 %res2 = fadd <2 x double> %res, %res1 2791 ret <2 x double> %res2 2792} 2793 2794declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) 2795 2796define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { 2797; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_256: 2798; CHECK: ## BB#0: 2799; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2800; CHECK-NEXT: vscalefpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x2c,0xd1] 2801; CHECK-NEXT: vscalefpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x2c,0xc1] 2802; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] 2803; CHECK-NEXT: retq ## encoding: [0xc3] 2804 %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) 2805 %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) 2806 %res2 = fadd <4 x double> %res, %res1 2807 ret <4 x double> %res2 2808} 2809 2810declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) 2811 2812define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { 2813; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_128: 2814; CHECK: ## BB#0: 2815; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2816; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1] 2817; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1] 2818; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] 2819; CHECK-NEXT: retq ## encoding: [0xc3] 2820 %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) 2821 %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) 2822 %res2 = fadd <4 x float> %res, %res1 2823 ret <4 x float> %res2 2824} 2825 2826declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) 2827 2828define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { 2829; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_256: 2830; CHECK: ## BB#0: 2831; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2832; CHECK-NEXT: vscalefps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2c,0xd1] 2833; CHECK-NEXT: vscalefps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2c,0xc1] 2834; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] 2835; CHECK-NEXT: retq ## encoding: [0xc3] 2836 %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) 2837 %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) 2838 %res2 = fadd <8 x float> %res, %res1 2839 ret <8 x float> %res2 2840} 2841 2842declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8) 2843 2844define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { 2845; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128: 2846; CHECK: ## BB#0: 2847; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2848; CHECK-NEXT: vpmovqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1] 2849; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2] 2850; CHECK-NEXT: vpmovqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x32,0xc0] 2851; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 2852; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 2853; CHECK-NEXT: retq ## encoding: [0xc3] 2854 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) 2855 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) 2856 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 2857 %res3 = add <16 x i8> %res0, %res1 2858 %res4 = add <16 x i8> %res3, %res2 2859 ret <16 x i8> %res4 2860} 2861 2862declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8) 2863 2864define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 2865; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128: 2866; CHECK: ## BB#0: 2867; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2868; CHECK-NEXT: vpmovqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x32,0x07] 2869; CHECK-NEXT: vpmovqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x32,0x07] 2870; CHECK-NEXT: retq ## encoding: [0xc3] 2871 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 2872 call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 2873 ret void 2874} 2875 2876declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8) 2877 2878define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { 2879; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128: 2880; CHECK: ## BB#0: 2881; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2882; CHECK-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1] 2883; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2] 2884; CHECK-NEXT: vpmovsqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0xc0] 2885; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 2886; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 2887; CHECK-NEXT: retq ## encoding: [0xc3] 2888 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) 2889 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) 2890 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 2891 %res3 = add <16 x i8> %res0, %res1 2892 %res4 = add <16 x i8> %res3, %res2 2893 ret <16 x i8> %res4 2894} 2895 2896declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8) 2897 2898define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 2899; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128: 2900; CHECK: ## BB#0: 2901; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07] 2902; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2903; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07] 2904; CHECK-NEXT: retq ## encoding: [0xc3] 2905 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 2906 call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 2907 ret void 2908} 2909 2910declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8) 2911 2912define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { 2913; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128: 2914; CHECK: ## BB#0: 2915; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2916; CHECK-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1] 2917; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2] 2918; CHECK-NEXT: vpmovusqb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0xc0] 2919; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 2920; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 2921; CHECK-NEXT: retq ## encoding: [0xc3] 2922 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1) 2923 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) 2924 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 2925 %res3 = add <16 x i8> %res0, %res1 2926 %res4 = add <16 x i8> %res3, %res2 2927 ret <16 x i8> %res4 2928} 2929 2930declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8) 2931 2932define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 2933; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128: 2934; CHECK: ## BB#0: 2935; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07] 2936; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2937; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07] 2938; CHECK-NEXT: retq ## encoding: [0xc3] 2939 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 2940 call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 2941 ret void 2942} 2943 2944declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8) 2945 2946define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { 2947; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256: 2948; CHECK: ## BB#0: 2949; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2950; CHECK-NEXT: vpmovqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1] 2951; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2] 2952; CHECK-NEXT: vpmovqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x32,0xc0] 2953; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 2954; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 2955; CHECK-NEXT: retq ## encoding: [0xc3] 2956 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) 2957 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) 2958 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 2959 %res3 = add <16 x i8> %res0, %res1 2960 %res4 = add <16 x i8> %res3, %res2 2961 ret <16 x i8> %res4 2962} 2963 2964declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8) 2965 2966define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 2967; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256: 2968; CHECK: ## BB#0: 2969; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 2970; CHECK-NEXT: vpmovqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x32,0x07] 2971; CHECK-NEXT: vpmovqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x32,0x07] 2972; CHECK-NEXT: retq ## encoding: [0xc3] 2973 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 2974 call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 2975 ret void 2976} 2977 2978declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8) 2979 2980define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { 2981; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256: 2982; CHECK: ## BB#0: 2983; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 2984; CHECK-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1] 2985; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2] 2986; CHECK-NEXT: vpmovsqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0xc0] 2987; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 2988; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 2989; CHECK-NEXT: retq ## encoding: [0xc3] 2990 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) 2991 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) 2992 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 2993 %res3 = add <16 x i8> %res0, %res1 2994 %res4 = add <16 x i8> %res3, %res2 2995 ret <16 x i8> %res4 2996} 2997 2998declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8) 2999 3000define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3001; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256: 3002; CHECK: ## BB#0: 3003; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07] 3004; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3005; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07] 3006; CHECK-NEXT: retq ## encoding: [0xc3] 3007 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3008 call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3009 ret void 3010} 3011 3012declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8) 3013 3014define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) { 3015; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256: 3016; CHECK: ## BB#0: 3017; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3018; CHECK-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1] 3019; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2] 3020; CHECK-NEXT: vpmovusqb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0xc0] 3021; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 3022; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 3023; CHECK-NEXT: retq ## encoding: [0xc3] 3024 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1) 3025 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) 3026 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) 3027 %res3 = add <16 x i8> %res0, %res1 3028 %res4 = add <16 x i8> %res3, %res2 3029 ret <16 x i8> %res4 3030} 3031 3032declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8) 3033 3034define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3035; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256: 3036; CHECK: ## BB#0: 3037; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07] 3038; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3039; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07] 3040; CHECK-NEXT: retq ## encoding: [0xc3] 3041 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3042 call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3043 ret void 3044} 3045 3046declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8) 3047 3048define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3049; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128: 3050; CHECK: ## BB#0: 3051; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3052; CHECK-NEXT: vpmovqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x34,0xc1] 3053; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x34,0xc2] 3054; CHECK-NEXT: vpmovqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x34,0xc0] 3055; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3056; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3057; CHECK-NEXT: retq ## encoding: [0xc3] 3058 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) 3059 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) 3060 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3061 %res3 = add <8 x i16> %res0, %res1 3062 %res4 = add <8 x i16> %res3, %res2 3063 ret <8 x i16> %res4 3064} 3065 3066declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8) 3067 3068define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3069; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128: 3070; CHECK: ## BB#0: 3071; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3072; CHECK-NEXT: vpmovqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x34,0x07] 3073; CHECK-NEXT: vpmovqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x34,0x07] 3074; CHECK-NEXT: retq ## encoding: [0xc3] 3075 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3076 call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3077 ret void 3078} 3079 3080declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8) 3081 3082define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3083; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128: 3084; CHECK: ## BB#0: 3085; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3086; CHECK-NEXT: vpmovsqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0xc1] 3087; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x24,0xc2] 3088; CHECK-NEXT: vpmovsqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0xc0] 3089; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3090; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3091; CHECK-NEXT: retq ## encoding: [0xc3] 3092 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) 3093 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) 3094 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3095 %res3 = add <8 x i16> %res0, %res1 3096 %res4 = add <8 x i16> %res3, %res2 3097 ret <8 x i16> %res4 3098} 3099 3100declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8) 3101 3102define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3103; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128: 3104; CHECK: ## BB#0: 3105; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07] 3106; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3107; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07] 3108; CHECK-NEXT: retq ## encoding: [0xc3] 3109 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3110 call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3111 ret void 3112} 3113 3114declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8) 3115 3116define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3117; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128: 3118; CHECK: ## BB#0: 3119; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3120; CHECK-NEXT: vpmovusqw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0xc1] 3121; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x14,0xc2] 3122; CHECK-NEXT: vpmovusqw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0xc0] 3123; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3124; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3125; CHECK-NEXT: retq ## encoding: [0xc3] 3126 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1) 3127 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) 3128 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3129 %res3 = add <8 x i16> %res0, %res1 3130 %res4 = add <8 x i16> %res3, %res2 3131 ret <8 x i16> %res4 3132} 3133 3134declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8) 3135 3136define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3137; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128: 3138; CHECK: ## BB#0: 3139; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07] 3140; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3141; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07] 3142; CHECK-NEXT: retq ## encoding: [0xc3] 3143 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3144 call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3145 ret void 3146} 3147 3148declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8) 3149 3150define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3151; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256: 3152; CHECK: ## BB#0: 3153; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3154; CHECK-NEXT: vpmovqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x34,0xc1] 3155; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x34,0xc2] 3156; CHECK-NEXT: vpmovqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x34,0xc0] 3157; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3158; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3159; CHECK-NEXT: retq ## encoding: [0xc3] 3160 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) 3161 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) 3162 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3163 %res3 = add <8 x i16> %res0, %res1 3164 %res4 = add <8 x i16> %res3, %res2 3165 ret <8 x i16> %res4 3166} 3167 3168declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8) 3169 3170define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3171; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256: 3172; CHECK: ## BB#0: 3173; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3174; CHECK-NEXT: vpmovqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x34,0x07] 3175; CHECK-NEXT: vpmovqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x34,0x07] 3176; CHECK-NEXT: retq ## encoding: [0xc3] 3177 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3178 call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3179 ret void 3180} 3181 3182declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8) 3183 3184define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3185; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256: 3186; CHECK: ## BB#0: 3187; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3188; CHECK-NEXT: vpmovsqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0xc1] 3189; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x24,0xc2] 3190; CHECK-NEXT: vpmovsqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0xc0] 3191; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3192; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3193; CHECK-NEXT: retq ## encoding: [0xc3] 3194 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) 3195 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) 3196 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3197 %res3 = add <8 x i16> %res0, %res1 3198 %res4 = add <8 x i16> %res3, %res2 3199 ret <8 x i16> %res4 3200} 3201 3202declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8) 3203 3204define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3205; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256: 3206; CHECK: ## BB#0: 3207; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07] 3208; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3209; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07] 3210; CHECK-NEXT: retq ## encoding: [0xc3] 3211 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3212 call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3213 ret void 3214} 3215 3216declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8) 3217 3218define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) { 3219; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256: 3220; CHECK: ## BB#0: 3221; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3222; CHECK-NEXT: vpmovusqw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0xc1] 3223; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x14,0xc2] 3224; CHECK-NEXT: vpmovusqw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0xc0] 3225; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3226; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3227; CHECK-NEXT: retq ## encoding: [0xc3] 3228 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1) 3229 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) 3230 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) 3231 %res3 = add <8 x i16> %res0, %res1 3232 %res4 = add <8 x i16> %res3, %res2 3233 ret <8 x i16> %res4 3234} 3235 3236declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8) 3237 3238define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3239; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256: 3240; CHECK: ## BB#0: 3241; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07] 3242; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3243; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07] 3244; CHECK-NEXT: retq ## encoding: [0xc3] 3245 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3246 call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3247 ret void 3248} 3249 3250declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8) 3251 3252define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3253; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128: 3254; CHECK: ## BB#0: 3255; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3256; CHECK-NEXT: vpmovqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x35,0xc1] 3257; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x35,0xc2] 3258; CHECK-NEXT: vpmovqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x35,0xc0] 3259; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 3260; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] 3261; CHECK-NEXT: retq ## encoding: [0xc3] 3262 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) 3263 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) 3264 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3265 %res3 = add <4 x i32> %res0, %res1 3266 %res4 = add <4 x i32> %res3, %res2 3267 ret <4 x i32> %res4 3268} 3269 3270declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8) 3271 3272define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3273; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128: 3274; CHECK: ## BB#0: 3275; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3276; CHECK-NEXT: vpmovqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x35,0x07] 3277; CHECK-NEXT: vpmovqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x35,0x07] 3278; CHECK-NEXT: retq ## encoding: [0xc3] 3279 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3280 call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3281 ret void 3282} 3283 3284declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8) 3285 3286define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3287; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128: 3288; CHECK: ## BB#0: 3289; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3290; CHECK-NEXT: vpmovsqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0xc1] 3291; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x25,0xc2] 3292; CHECK-NEXT: vpmovsqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0xc0] 3293; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 3294; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] 3295; CHECK-NEXT: retq ## encoding: [0xc3] 3296 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) 3297 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) 3298 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3299 %res3 = add <4 x i32> %res0, %res1 3300 %res4 = add <4 x i32> %res3, %res2 3301 ret <4 x i32> %res4 3302} 3303 3304declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8) 3305 3306define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3307; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128: 3308; CHECK: ## BB#0: 3309; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07] 3310; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3311; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07] 3312; CHECK-NEXT: retq ## encoding: [0xc3] 3313 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3314 call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3315 ret void 3316} 3317 3318declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8) 3319 3320define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3321; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128: 3322; CHECK: ## BB#0: 3323; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3324; CHECK-NEXT: vpmovusqd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0xc1] 3325; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x15,0xc2] 3326; CHECK-NEXT: vpmovusqd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0xc0] 3327; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 3328; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] 3329; CHECK-NEXT: retq ## encoding: [0xc3] 3330 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1) 3331 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) 3332 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3333 %res3 = add <4 x i32> %res0, %res1 3334 %res4 = add <4 x i32> %res3, %res2 3335 ret <4 x i32> %res4 3336} 3337 3338declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8) 3339 3340define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { 3341; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128: 3342; CHECK: ## BB#0: 3343; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07] 3344; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3345; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07] 3346; CHECK-NEXT: retq ## encoding: [0xc3] 3347 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) 3348 call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2) 3349 ret void 3350} 3351 3352declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8) 3353 3354define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3355; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256: 3356; CHECK: ## BB#0: 3357; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3358; CHECK-NEXT: vpmovqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1] 3359; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc2] 3360; CHECK-NEXT: vpmovqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc0] 3361; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 3362; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] 3363; CHECK-NEXT: retq ## encoding: [0xc3] 3364 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) 3365 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) 3366 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3367 %res3 = add <4 x i32> %res0, %res1 3368 %res4 = add <4 x i32> %res3, %res2 3369 ret <4 x i32> %res4 3370} 3371 3372declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8) 3373 3374define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3375; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256: 3376; CHECK: ## BB#0: 3377; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3378; CHECK-NEXT: vpmovqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x35,0x07] 3379; CHECK-NEXT: vpmovqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x35,0x07] 3380; CHECK-NEXT: retq ## encoding: [0xc3] 3381 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3382 call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3383 ret void 3384} 3385 3386declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8) 3387 3388define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3389; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256: 3390; CHECK: ## BB#0: 3391; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3392; CHECK-NEXT: vpmovsqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0xc1] 3393; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x25,0xc2] 3394; CHECK-NEXT: vpmovsqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0xc0] 3395; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 3396; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] 3397; CHECK-NEXT: retq ## encoding: [0xc3] 3398 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) 3399 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) 3400 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3401 %res3 = add <4 x i32> %res0, %res1 3402 %res4 = add <4 x i32> %res3, %res2 3403 ret <4 x i32> %res4 3404} 3405 3406declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8) 3407 3408define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3409; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256: 3410; CHECK: ## BB#0: 3411; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07] 3412; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3413; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07] 3414; CHECK-NEXT: retq ## encoding: [0xc3] 3415 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3416 call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3417 ret void 3418} 3419 3420declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8) 3421 3422define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) { 3423; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256: 3424; CHECK: ## BB#0: 3425; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3426; CHECK-NEXT: vpmovusqd %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0xc1] 3427; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x15,0xc2] 3428; CHECK-NEXT: vpmovusqd %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0xc0] 3429; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1] 3430; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2] 3431; CHECK-NEXT: retq ## encoding: [0xc3] 3432 %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1) 3433 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) 3434 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2) 3435 %res3 = add <4 x i32> %res0, %res1 3436 %res4 = add <4 x i32> %res3, %res2 3437 ret <4 x i32> %res4 3438} 3439 3440declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8) 3441 3442define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { 3443; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256: 3444; CHECK: ## BB#0: 3445; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07] 3446; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3447; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07] 3448; CHECK-NEXT: retq ## encoding: [0xc3] 3449 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) 3450 call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2) 3451 ret void 3452} 3453 3454declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8) 3455 3456define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3457; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128: 3458; CHECK: ## BB#0: 3459; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3460; CHECK-NEXT: vpmovdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1] 3461; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2] 3462; CHECK-NEXT: vpmovdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x31,0xc0] 3463; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 3464; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 3465; CHECK-NEXT: retq ## encoding: [0xc3] 3466 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) 3467 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) 3468 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3469 %res3 = add <16 x i8> %res0, %res1 3470 %res4 = add <16 x i8> %res3, %res2 3471 ret <16 x i8> %res4 3472} 3473 3474declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8) 3475 3476define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3477; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128: 3478; CHECK: ## BB#0: 3479; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3480; CHECK-NEXT: vpmovdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x31,0x07] 3481; CHECK-NEXT: vpmovdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x31,0x07] 3482; CHECK-NEXT: retq ## encoding: [0xc3] 3483 call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3484 call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3485 ret void 3486} 3487 3488declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8) 3489 3490define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3491; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128: 3492; CHECK: ## BB#0: 3493; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3494; CHECK-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1] 3495; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2] 3496; CHECK-NEXT: vpmovsdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0xc0] 3497; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 3498; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 3499; CHECK-NEXT: retq ## encoding: [0xc3] 3500 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) 3501 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) 3502 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3503 %res3 = add <16 x i8> %res0, %res1 3504 %res4 = add <16 x i8> %res3, %res2 3505 ret <16 x i8> %res4 3506} 3507 3508declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8) 3509 3510define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3511; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128: 3512; CHECK: ## BB#0: 3513; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07] 3514; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3515; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07] 3516; CHECK-NEXT: retq ## encoding: [0xc3] 3517 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3518 call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3519 ret void 3520} 3521 3522declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8) 3523 3524define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3525; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128: 3526; CHECK: ## BB#0: 3527; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3528; CHECK-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1] 3529; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2] 3530; CHECK-NEXT: vpmovusdb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0xc0] 3531; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 3532; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 3533; CHECK-NEXT: retq ## encoding: [0xc3] 3534 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1) 3535 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) 3536 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3537 %res3 = add <16 x i8> %res0, %res1 3538 %res4 = add <16 x i8> %res3, %res2 3539 ret <16 x i8> %res4 3540} 3541 3542declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8) 3543 3544define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3545; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128: 3546; CHECK: ## BB#0: 3547; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07] 3548; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3549; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07] 3550; CHECK-NEXT: retq ## encoding: [0xc3] 3551 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3552 call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3553 ret void 3554} 3555 3556declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8) 3557 3558define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3559; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256: 3560; CHECK: ## BB#0: 3561; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3562; CHECK-NEXT: vpmovdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1] 3563; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2] 3564; CHECK-NEXT: vpmovdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x31,0xc0] 3565; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 3566; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 3567; CHECK-NEXT: retq ## encoding: [0xc3] 3568 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) 3569 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) 3570 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3571 %res3 = add <16 x i8> %res0, %res1 3572 %res4 = add <16 x i8> %res3, %res2 3573 ret <16 x i8> %res4 3574} 3575 3576declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8) 3577 3578define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3579; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256: 3580; CHECK: ## BB#0: 3581; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3582; CHECK-NEXT: vpmovdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x31,0x07] 3583; CHECK-NEXT: vpmovdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x31,0x07] 3584; CHECK-NEXT: retq ## encoding: [0xc3] 3585 call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3586 call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3587 ret void 3588} 3589 3590declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8) 3591 3592define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3593; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256: 3594; CHECK: ## BB#0: 3595; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3596; CHECK-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1] 3597; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2] 3598; CHECK-NEXT: vpmovsdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0xc0] 3599; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 3600; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 3601; CHECK-NEXT: retq ## encoding: [0xc3] 3602 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) 3603 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) 3604 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3605 %res3 = add <16 x i8> %res0, %res1 3606 %res4 = add <16 x i8> %res3, %res2 3607 ret <16 x i8> %res4 3608} 3609 3610declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8) 3611 3612define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3613; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256: 3614; CHECK: ## BB#0: 3615; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07] 3616; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3617; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07] 3618; CHECK-NEXT: retq ## encoding: [0xc3] 3619 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3620 call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3621 ret void 3622} 3623 3624declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8) 3625 3626define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) { 3627; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256: 3628; CHECK: ## BB#0: 3629; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3630; CHECK-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1] 3631; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2] 3632; CHECK-NEXT: vpmovusdb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0xc0] 3633; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc1] 3634; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfc,0xc2] 3635; CHECK-NEXT: retq ## encoding: [0xc3] 3636 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1) 3637 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) 3638 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2) 3639 %res3 = add <16 x i8> %res0, %res1 3640 %res4 = add <16 x i8> %res3, %res2 3641 ret <16 x i8> %res4 3642} 3643 3644declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8) 3645 3646define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3647; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256: 3648; CHECK: ## BB#0: 3649; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07] 3650; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3651; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07] 3652; CHECK-NEXT: retq ## encoding: [0xc3] 3653 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3654 call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3655 ret void 3656} 3657 3658declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8) 3659 3660define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3661; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128: 3662; CHECK: ## BB#0: 3663; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3664; CHECK-NEXT: vpmovdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x33,0xc1] 3665; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x33,0xc2] 3666; CHECK-NEXT: vpmovdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x33,0xc0] 3667; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3668; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3669; CHECK-NEXT: retq ## encoding: [0xc3] 3670 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) 3671 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) 3672 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3673 %res3 = add <8 x i16> %res0, %res1 3674 %res4 = add <8 x i16> %res3, %res2 3675 ret <8 x i16> %res4 3676} 3677 3678declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8) 3679 3680define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3681; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128: 3682; CHECK: ## BB#0: 3683; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3684; CHECK-NEXT: vpmovdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x33,0x07] 3685; CHECK-NEXT: vpmovdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x33,0x07] 3686; CHECK-NEXT: retq ## encoding: [0xc3] 3687 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3688 call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3689 ret void 3690} 3691 3692declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8) 3693 3694define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3695; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128: 3696; CHECK: ## BB#0: 3697; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3698; CHECK-NEXT: vpmovsdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0xc1] 3699; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x23,0xc2] 3700; CHECK-NEXT: vpmovsdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0xc0] 3701; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3702; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3703; CHECK-NEXT: retq ## encoding: [0xc3] 3704 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) 3705 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) 3706 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3707 %res3 = add <8 x i16> %res0, %res1 3708 %res4 = add <8 x i16> %res3, %res2 3709 ret <8 x i16> %res4 3710} 3711 3712declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8) 3713 3714define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3715; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128: 3716; CHECK: ## BB#0: 3717; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07] 3718; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3719; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07] 3720; CHECK-NEXT: retq ## encoding: [0xc3] 3721 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3722 call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3723 ret void 3724} 3725 3726declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8) 3727 3728define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3729; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128: 3730; CHECK: ## BB#0: 3731; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3732; CHECK-NEXT: vpmovusdw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0xc1] 3733; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x13,0xc2] 3734; CHECK-NEXT: vpmovusdw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0xc0] 3735; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3736; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3737; CHECK-NEXT: retq ## encoding: [0xc3] 3738 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1) 3739 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) 3740 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3741 %res3 = add <8 x i16> %res0, %res1 3742 %res4 = add <8 x i16> %res3, %res2 3743 ret <8 x i16> %res4 3744} 3745 3746declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8) 3747 3748define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { 3749; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128: 3750; CHECK: ## BB#0: 3751; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07] 3752; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3753; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07] 3754; CHECK-NEXT: retq ## encoding: [0xc3] 3755 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) 3756 call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2) 3757 ret void 3758} 3759 3760declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8) 3761 3762define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3763; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256: 3764; CHECK: ## BB#0: 3765; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3766; CHECK-NEXT: vpmovdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x33,0xc1] 3767; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x33,0xc2] 3768; CHECK-NEXT: vpmovdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0] 3769; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3770; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3771; CHECK-NEXT: retq ## encoding: [0xc3] 3772 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) 3773 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) 3774 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3775 %res3 = add <8 x i16> %res0, %res1 3776 %res4 = add <8 x i16> %res3, %res2 3777 ret <8 x i16> %res4 3778} 3779 3780declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8) 3781 3782define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3783; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256: 3784; CHECK: ## BB#0: 3785; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3786; CHECK-NEXT: vpmovdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x33,0x07] 3787; CHECK-NEXT: vpmovdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x33,0x07] 3788; CHECK-NEXT: retq ## encoding: [0xc3] 3789 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3790 call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3791 ret void 3792} 3793 3794declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8) 3795 3796define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3797; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256: 3798; CHECK: ## BB#0: 3799; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3800; CHECK-NEXT: vpmovsdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0xc1] 3801; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x23,0xc2] 3802; CHECK-NEXT: vpmovsdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0xc0] 3803; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3804; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3805; CHECK-NEXT: retq ## encoding: [0xc3] 3806 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) 3807 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) 3808 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3809 %res3 = add <8 x i16> %res0, %res1 3810 %res4 = add <8 x i16> %res3, %res2 3811 ret <8 x i16> %res4 3812} 3813 3814declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8) 3815 3816define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3817; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256: 3818; CHECK: ## BB#0: 3819; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07] 3820; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3821; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07] 3822; CHECK-NEXT: retq ## encoding: [0xc3] 3823 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3824 call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3825 ret void 3826} 3827 3828declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8) 3829 3830define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) { 3831; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256: 3832; CHECK: ## BB#0: 3833; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3834; CHECK-NEXT: vpmovusdw %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0xc1] 3835; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x13,0xc2] 3836; CHECK-NEXT: vpmovusdw %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0xc0] 3837; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc1] 3838; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 3839; CHECK-NEXT: retq ## encoding: [0xc3] 3840 %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1) 3841 %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) 3842 %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2) 3843 %res3 = add <8 x i16> %res0, %res1 3844 %res4 = add <8 x i16> %res3, %res2 3845 ret <8 x i16> %res4 3846} 3847 3848declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8) 3849 3850define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { 3851; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256: 3852; CHECK: ## BB#0: 3853; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07] 3854; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 3855; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07] 3856; CHECK-NEXT: retq ## encoding: [0xc3] 3857 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) 3858 call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2) 3859 ret void 3860} 3861 3862declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8) 3863 3864define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { 3865; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128: 3866; CHECK: ## BB#0: 3867; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3868; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8] 3869; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xe6,0xc0] 3870; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 3871; CHECK-NEXT: retq ## encoding: [0xc3] 3872 %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) 3873 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) 3874 %res2 = fadd <2 x double> %res, %res1 3875 ret <2 x double> %res2 3876} 3877 3878declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8) 3879 3880define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { 3881; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256: 3882; CHECK: ## BB#0: 3883; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3884; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8] 3885; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 ## encoding: [0xc5,0xfe,0xe6,0xc0] 3886; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 3887; CHECK-NEXT: retq ## encoding: [0xc3] 3888 %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) 3889 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) 3890 %res2 = fadd <4 x double> %res, %res1 3891 ret <4 x double> %res2 3892} 3893 3894declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8) 3895 3896define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { 3897; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128: 3898; CHECK: ## BB#0: 3899; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3900; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8] 3901; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5b,0xc0] 3902; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 3903; CHECK-NEXT: retq ## encoding: [0xc3] 3904 %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) 3905 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) 3906 %res2 = fadd <4 x float> %res, %res1 3907 ret <4 x float> %res2 3908} 3909 3910declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8) 3911 3912define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { 3913; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256: 3914; CHECK: ## BB#0: 3915; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3916; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8] 3917; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5b,0xc0] 3918; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 3919; CHECK-NEXT: retq ## encoding: [0xc3] 3920 %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) 3921 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) 3922 %res2 = fadd <8 x float> %res, %res1 3923 ret <8 x float> %res2 3924} 3925 3926declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8) 3927 3928define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { 3929; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128: 3930; CHECK: ## BB#0: 3931; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3932; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0xe6,0xc8] 3933; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] 3934; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 3935; CHECK-NEXT: retq ## encoding: [0xc3] 3936 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) 3937 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) 3938 %res2 = add <4 x i32> %res, %res1 3939 ret <4 x i32> %res2 3940} 3941 3942declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8) 3943 3944define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 3945; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256: 3946; CHECK: ## BB#0: 3947; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3948; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8] 3949; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0xe6,0xc0] 3950; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 3951; CHECK-NEXT: retq ## encoding: [0xc3] 3952 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 3953 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 3954 %res2 = add <4 x i32> %res, %res1 3955 ret <4 x i32> %res2 3956} 3957 3958declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8) 3959 3960define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) { 3961; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256: 3962; CHECK: ## BB#0: 3963; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3964; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8] 3965; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5a,0xc0] 3966; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 3967; CHECK-NEXT: retq ## encoding: [0xc3] 3968 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2) 3969 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1) 3970 %res2 = fadd <4 x float> %res, %res1 3971 ret <4 x float> %res2 3972} 3973 3974declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8) 3975 3976define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) { 3977; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps: 3978; CHECK: ## BB#0: 3979; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3980; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x5a,0xc8] 3981; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x5a,0xc0] 3982; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 3983; CHECK-NEXT: retq ## encoding: [0xc3] 3984 %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) 3985 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1) 3986 %res2 = fadd <4 x float> %res, %res1 3987 ret <4 x float> %res2 3988} 3989 3990declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8) 3991 3992define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { 3993; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128: 3994; CHECK: ## BB#0: 3995; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 3996; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x79,0xc8] 3997; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x79,0xc0] 3998; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 3999; CHECK-NEXT: retq ## encoding: [0xc3] 4000 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) 4001 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) 4002 %res2 = add <4 x i32> %res, %res1 4003 ret <4 x i32> %res2 4004} 4005 4006declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8) 4007 4008define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 4009; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256: 4010; CHECK: ## BB#0: 4011; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4012; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x79,0xc8] 4013; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x79,0xc0] 4014; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4015; CHECK-NEXT: retq ## encoding: [0xc3] 4016 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 4017 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 4018 %res2 = add <4 x i32> %res, %res1 4019 ret <4 x i32> %res2 4020} 4021 4022declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8) 4023 4024define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 4025; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128: 4026; CHECK: ## BB#0: 4027; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4028; CHECK-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x5b,0xc8] 4029; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x5b,0xc0] 4030; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4031; CHECK-NEXT: retq ## encoding: [0xc3] 4032 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 4033 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 4034 %res2 = add <4 x i32> %res, %res1 4035 ret <4 x i32> %res2 4036} 4037 4038declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8) 4039 4040define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 4041; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256: 4042; CHECK: ## BB#0: 4043; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4044; CHECK-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x5b,0xc8] 4045; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x5b,0xc0] 4046; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 4047; CHECK-NEXT: retq ## encoding: [0xc3] 4048 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 4049 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 4050 %res2 = add <8 x i32> %res, %res1 4051 ret <8 x i32> %res2 4052} 4053 4054declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8) 4055 4056define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) { 4057; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128: 4058; CHECK: ## BB#0: 4059; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4060; CHECK-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8] 4061; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x5a,0xc0] 4062; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 4063; CHECK-NEXT: retq ## encoding: [0xc3] 4064 %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2) 4065 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1) 4066 %res2 = fadd <2 x double> %res, %res1 4067 ret <2 x double> %res2 4068} 4069 4070declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8) 4071 4072define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) { 4073; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256: 4074; CHECK: ## BB#0: 4075; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4076; CHECK-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8] 4077; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x5a,0xc0] 4078; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 4079; CHECK-NEXT: retq ## encoding: [0xc3] 4080 %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2) 4081 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1) 4082 %res2 = fadd <4 x double> %res, %res1 4083 ret <4 x double> %res2 4084} 4085 4086declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8) 4087 4088define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 4089; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128: 4090; CHECK: ## BB#0: 4091; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4092; CHECK-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x79,0xc8] 4093; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x79,0xc0] 4094; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4095; CHECK-NEXT: retq ## encoding: [0xc3] 4096 %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 4097 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 4098 %res2 = add <4 x i32> %res, %res1 4099 ret <4 x i32> %res2 4100} 4101 4102declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8) 4103 4104define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 4105; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256: 4106; CHECK: ## BB#0: 4107; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4108; CHECK-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x79,0xc8] 4109; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x79,0xc0] 4110; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 4111; CHECK-NEXT: retq ## encoding: [0xc3] 4112 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 4113 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 4114 %res2 = add <8 x i32> %res, %res1 4115 ret <8 x i32> %res2 4116} 4117 4118declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8) 4119 4120define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { 4121; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128: 4122; CHECK: ## BB#0: 4123; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4124; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xe6,0xc8] 4125; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe6,0xc0] 4126; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4127; CHECK-NEXT: retq ## encoding: [0xc3] 4128 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) 4129 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) 4130 %res2 = add <4 x i32> %res, %res1 4131 ret <4 x i32> %res2 4132} 4133 4134declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8) 4135 4136define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 4137; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: 4138; CHECK: ## BB#0: 4139; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4140; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] 4141; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xe6,0xc0] 4142; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4143; CHECK-NEXT: retq ## encoding: [0xc3] 4144 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 4145 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 4146 %res2 = add <4 x i32> %res, %res1 4147 ret <4 x i32> %res2 4148} 4149 4150declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8) 4151 4152define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) { 4153; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128: 4154; CHECK: ## BB#0: 4155; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4156; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x09,0x78,0xc8] 4157; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x08,0x78,0xc0] 4158; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4159; CHECK-NEXT: retq ## encoding: [0xc3] 4160 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) 4161 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1) 4162 %res2 = add <4 x i32> %res, %res1 4163 ret <4 x i32> %res2 4164} 4165 4166declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8) 4167 4168define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { 4169; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256: 4170; CHECK: ## BB#0: 4171; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4172; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8] 4173; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0] 4174; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4175; CHECK-NEXT: retq ## encoding: [0xc3] 4176 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) 4177 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1) 4178 %res2 = add <4 x i32> %res, %res1 4179 ret <4 x i32> %res2 4180} 4181 4182declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8) 4183 4184define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 4185; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: 4186; CHECK: ## BB#0: 4187; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4188; CHECK-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] 4189; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0] 4190; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4191; CHECK-NEXT: retq ## encoding: [0xc3] 4192 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 4193 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 4194 %res2 = add <4 x i32> %res, %res1 4195 ret <4 x i32> %res2 4196} 4197 4198declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8) 4199 4200define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 4201; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: 4202; CHECK: ## BB#0: 4203; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4204; CHECK-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] 4205; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x5b,0xc0] 4206; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 4207; CHECK-NEXT: retq ## encoding: [0xc3] 4208 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 4209 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 4210 %res2 = add <8 x i32> %res, %res1 4211 ret <8 x i32> %res2 4212} 4213 4214declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8) 4215 4216define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { 4217; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128: 4218; CHECK: ## BB#0: 4219; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4220; CHECK-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8] 4221; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0] 4222; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 4223; CHECK-NEXT: retq ## encoding: [0xc3] 4224 %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) 4225 %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) 4226 %res2 = add <4 x i32> %res, %res1 4227 ret <4 x i32> %res2 4228} 4229 4230declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8) 4231 4232define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { 4233; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256: 4234; CHECK: ## BB#0: 4235; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4236; CHECK-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8] 4237; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0] 4238; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 4239; CHECK-NEXT: retq ## encoding: [0xc3] 4240 %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) 4241 %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) 4242 %res2 = add <8 x i32> %res, %res1 4243 ret <8 x i32> %res2 4244} 4245 4246declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8) 4247 4248define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) { 4249; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128: 4250; CHECK: ## BB#0: 4251; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4252; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8] 4253; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xc0] 4254; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 4255; CHECK-NEXT: retq ## encoding: [0xc3] 4256 %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) 4257 %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1) 4258 %res2 = fadd <2 x double> %res, %res1 4259 ret <2 x double> %res2 4260} 4261 4262declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8) 4263 4264define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) { 4265; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256: 4266; CHECK: ## BB#0: 4267; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4268; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8] 4269; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xc0] 4270; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 4271; CHECK-NEXT: retq ## encoding: [0xc3] 4272 %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) 4273 %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1) 4274 %res2 = fadd <4 x double> %res, %res1 4275 ret <4 x double> %res2 4276} 4277 4278declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8) 4279 4280define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) { 4281; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128: 4282; CHECK: ## BB#0: 4283; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4284; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8] 4285; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xc0] 4286; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 4287; CHECK-NEXT: retq ## encoding: [0xc3] 4288 %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) 4289 %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1) 4290 %res2 = fadd <4 x float> %res, %res1 4291 ret <4 x float> %res2 4292} 4293 4294declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8) 4295 4296define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) { 4297; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256: 4298; CHECK: ## BB#0: 4299; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4300; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8] 4301; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xc0] 4302; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 4303; CHECK-NEXT: retq ## encoding: [0xc3] 4304 %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) 4305 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1) 4306 %res2 = fadd <8 x float> %res, %res1 4307 ret <8 x float> %res2 4308} 4309 4310declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8) 4311 4312define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 4313; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_pd_128: 4314; CHECK: ## BB#0: 4315; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4316; CHECK-NEXT: vrndscalepd $4, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x09,0xc8,0x04] 4317; CHECK-NEXT: vrndscalepd $88, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x09,0xc0,0x58] 4318; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 4319; CHECK-NEXT: retq ## encoding: [0xc3] 4320 %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3) 4321 %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1) 4322 %res2 = fadd <2 x double> %res, %res1 4323 ret <2 x double> %res2 4324} 4325 4326declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8) 4327 4328define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 4329; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_pd_256: 4330; CHECK: ## BB#0: 4331; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4332; CHECK-NEXT: vrndscalepd $4, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x09,0xc8,0x04] 4333; CHECK-NEXT: vrndscalepd $88, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x09,0xc0,0x58] 4334; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 4335; CHECK-NEXT: retq ## encoding: [0xc3] 4336 %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3) 4337 %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1) 4338 %res2 = fadd <4 x double> %res, %res1 4339 ret <4 x double> %res2 4340} 4341 4342declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8) 4343 4344define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 4345; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ps_128: 4346; CHECK: ## BB#0: 4347; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4348; CHECK-NEXT: vrndscaleps $88, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x08,0xc8,0x58] 4349; CHECK-NEXT: vrndscaleps $4, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x08,0xc0,0x04] 4350; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 4351; CHECK-NEXT: retq ## encoding: [0xc3] 4352 %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3) 4353 %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1) 4354 %res2 = fadd <4 x float> %res, %res1 4355 ret <4 x float> %res2 4356} 4357 4358declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8) 4359 4360define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 4361; CHECK-LABEL: test_int_x86_avx512_mask_rndscale_ps_256: 4362; CHECK: ## BB#0: 4363; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4364; CHECK-NEXT: vrndscaleps $5, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x08,0xc8,0x05] 4365; CHECK-NEXT: vrndscaleps $66, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x08,0xc0,0x42] 4366; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 4367; CHECK-NEXT: retq ## encoding: [0xc3] 4368 %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3) 4369 %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1) 4370 %res2 = fadd <8 x float> %res, %res1 4371 ret <8 x float> %res2 4372} 4373 4374declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 4375 4376define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 4377; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256: 4378; CHECK: ## BB#0: 4379; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4380; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16] 4381; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] 4382; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd9,0x16] 4383; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7] 4384; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16] 4385; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 4386; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] 4387; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 4388; CHECK-NEXT: retq ## encoding: [0xc3] 4389 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) 4390 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) 4391 %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4) 4392 %res3 = fadd <8 x float> %res, %res1 4393 %res4 = fadd <8 x float> %res2, %res3 4394 ret <8 x float> %res4 4395} 4396 4397declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 4398 4399define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 4400; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256: 4401; CHECK: ## BB#0: 4402; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4403; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16] 4404; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] 4405; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd9,0x16] 4406; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3] 4407; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16] 4408; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] 4409; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] 4410; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0] 4411; CHECK-NEXT: retq ## encoding: [0xc3] 4412 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4) 4413 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1) 4414 %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4) 4415 %res3 = fadd <4 x double> %res, %res1 4416 %res4 = fadd <4 x double> %res2, %res3 4417 ret <4 x double> %res4 4418} 4419 4420declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 4421 4422define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 4423; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256: 4424; CHECK: ## BB#0: 4425; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4426; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16] 4427; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] 4428; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16] 4429; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 4430; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 4431; CHECK-NEXT: retq ## encoding: [0xc3] 4432 %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 4433 %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) 4434 %res2 = add <8 x i32> %res, %res1 4435 ret <8 x i32> %res2 4436} 4437 4438declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 4439 4440define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 4441; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256: 4442; CHECK: ## BB#0: 4443; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4444; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16] 4445; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] 4446; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16] 4447; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] 4448; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 4449; CHECK-NEXT: retq ## encoding: [0xc3] 4450 %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 4451 %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) 4452 %res2 = add <4 x i64> %res, %res1 4453 ret <4 x i64> %res2 4454} 4455 4456declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8) 4457 4458define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) { 4459; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256: 4460; CHECK: ## BB#0: 4461; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4462; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01] 4463; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc2,0x01] 4464; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x19,0xc0,0x01] 4465; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] 4466; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 4467; CHECK-NEXT: retq ## encoding: [0xc3] 4468 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3) 4469 %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3) 4470 %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1) 4471 %res3 = fadd <4 x float> %res, %res1 4472 %res4 = fadd <4 x float> %res2, %res3 4473 ret <4 x float> %res4 4474} 4475 4476declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8) 4477 4478define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { 4479; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128: 4480; CHECK: ## BB#0: 4481; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4482; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x26,0xc8,0x0b] 4483; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x26,0xd0,0x0b] 4484; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x26,0xc0,0x0b] 4485; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 4486; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] 4487; CHECK-NEXT: retq ## encoding: [0xc3] 4488 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3) 4489 %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3) 4490 %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 -1) 4491 %res3 = fadd <2 x double> %res, %res1 4492 %res4 = fadd <2 x double> %res2, %res3 4493 ret <2 x double> %res4 4494} 4495 4496declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8) 4497 4498define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { 4499; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256: 4500; CHECK: ## BB#0: 4501; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4502; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x26,0xc8,0x0b] 4503; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x26,0xc0,0x0b] 4504; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 4505; CHECK-NEXT: retq ## encoding: [0xc3] 4506 %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3) 4507 %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1) 4508 %res2 = fadd <4 x double> %res, %res1 4509 ret <4 x double> %res2 4510} 4511 4512declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8) 4513 4514define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { 4515; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128: 4516; CHECK: ## BB#0: 4517; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4518; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x26,0xc8,0x0b] 4519; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x26,0xc0,0x0b] 4520; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] 4521; CHECK-NEXT: retq ## encoding: [0xc3] 4522 %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3) 4523 %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1) 4524 %res2 = fadd <4 x float> %res, %res1 4525 ret <4 x float> %res2 4526} 4527 4528declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8) 4529 4530define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { 4531; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256: 4532; CHECK: ## BB#0: 4533; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4534; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x26,0xc8,0x0b] 4535; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x26,0xc0,0x0b] 4536; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 4537; CHECK-NEXT: retq ## encoding: [0xc3] 4538 %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3) 4539 %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1) 4540 %res2 = fadd <8 x float> %res, %res1 4541 ret <8 x float> %res2 4542} 4543 4544declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8) 4545 4546define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 4547; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128: 4548; CHECK: ## BB#0: 4549; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4550; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x16] 4551; CHECK-NEXT: ## xmm2 {%k1} = xmm0[0],xmm1[1] 4552; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xd9,0x16] 4553; CHECK-NEXT: ## xmm3 {%k1} {z} = xmm0[0],xmm1[1] 4554; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc6,0xc1,0x16] 4555; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1] 4556; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] 4557; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 4558; CHECK-NEXT: retq ## encoding: [0xc3] 4559 %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4) 4560 %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 -1) 4561 %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> zeroinitializer, i8 %x4) 4562 %res3 = fadd <2 x double> %res, %res1 4563 %res4 = fadd <2 x double> %res2, %res3 4564 ret <2 x double> %res4 4565} 4566 4567declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8) 4568 4569define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) { 4570; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256: 4571; CHECK: ## BB#0: 4572; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4573; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x16] 4574; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 4575; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc6,0xc1,0x16] 4576; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2] 4577; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] 4578; CHECK-NEXT: retq ## encoding: [0xc3] 4579 %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4) 4580 %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1) 4581 %res2 = fadd <4 x double> %res, %res1 4582 ret <4 x double> %res2 4583} 4584 4585declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8) 4586 4587define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 4588; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128: 4589; CHECK: ## BB#0: 4590; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4591; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16] 4592; CHECK-NEXT: ## xmm2 {%k1} = xmm0[2,1],xmm1[1,0] 4593; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc6,0xc1,0x16] 4594; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0] 4595; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] 4596; CHECK-NEXT: retq ## encoding: [0xc3] 4597 %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4) 4598 %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1) 4599 %res2 = fadd <4 x float> %res, %res1 4600 ret <4 x float> %res2 4601} 4602 4603declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8) 4604 4605define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) { 4606; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256: 4607; CHECK: ## BB#0: 4608; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4609; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16] 4610; CHECK-NEXT: ## ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 4611; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc6,0xc1,0x16] 4612; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4] 4613; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] 4614; CHECK-NEXT: retq ## encoding: [0xc3] 4615 %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4) 4616 %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1) 4617 %res2 = fadd <8 x float> %res, %res1 4618 ret <8 x float> %res2 4619} 4620 4621declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8) 4622 4623define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) { 4624; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128: 4625; CHECK: ## BB#0: 4626; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4627; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x16] 4628; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x03,0xd9,0x16] 4629; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x03,0xc1,0x16] 4630; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 4631; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3] 4632; CHECK-NEXT: retq ## encoding: [0xc3] 4633 %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4) 4634 %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1) 4635 %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4) 4636 %res3 = add <4 x i32> %res, %res1 4637 %res4 = add <4 x i32> %res3, %res2 4638 ret <4 x i32> %res4 4639} 4640 4641declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8) 4642 4643define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) { 4644; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256: 4645; CHECK: ## BB#0: 4646; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4647; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x16] 4648; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x03,0xc1,0x16] 4649; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 4650; CHECK-NEXT: retq ## encoding: [0xc3] 4651 %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) 4652 %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1) 4653 %res2 = add <8 x i32> %res, %res1 4654 ret <8 x i32> %res2 4655} 4656 4657declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8) 4658 4659define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) { 4660; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128: 4661; CHECK: ## BB#0: 4662; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4663; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x16] 4664; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x03,0xc1,0x16] 4665; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] 4666; CHECK-NEXT: retq ## encoding: [0xc3] 4667 %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4) 4668 %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1) 4669 %res2 = add <2 x i64> %res, %res1 4670 ret <2 x i64> %res2 4671} 4672 4673declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8) 4674 4675define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) { 4676; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256: 4677; CHECK: ## BB#0: 4678; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4679; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x16] 4680; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x03,0xc1,0x16] 4681; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 4682; CHECK-NEXT: retq ## encoding: [0xc3] 4683 %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4) 4684 %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1) 4685 %res2 = add <4 x i64> %res, %res1 4686 ret <4 x i64> %res2 4687} 4688 4689declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) 4690 4691define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 4692; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256: 4693; CHECK: ## BB#0: 4694; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4695; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1] 4696; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xd9] 4697; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x0d,0xc1] 4698; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xcb] 4699; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] 4700; CHECK-NEXT: retq ## encoding: [0xc3] 4701 %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 4702 %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) 4703 %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 4704 %res3 = fadd <4 x double> %res, %res1 4705 %res4 = fadd <4 x double> %res2, %res3 4706 ret <4 x double> %res4 4707} 4708 4709declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8) 4710 4711define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) { 4712; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128: 4713; CHECK: ## BB#0: 4714; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4715; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1] 4716; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xd9] 4717; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x0d,0xc1] 4718; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xcb] 4719; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 4720; CHECK-NEXT: retq ## encoding: [0xc3] 4721 %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) 4722 %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3) 4723 %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1) 4724 %res3 = fadd <2 x double> %res, %res1 4725 %res4 = fadd <2 x double> %res3, %res2 4726 ret <2 x double> %res4 4727} 4728 4729declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8) 4730 4731define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 4732; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256: 4733; CHECK: ## BB#0: 4734; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4735; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1] 4736; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xd9] 4737; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x0c,0xc1] 4738; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xcb] 4739; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 4740; CHECK-NEXT: retq ## encoding: [0xc3] 4741 %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 4742 %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) 4743 %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 4744 %res3 = fadd <8 x float> %res, %res1 4745 %res4 = fadd <8 x float> %res3, %res2 4746 ret <8 x float> %res4 4747} 4748 4749declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8) 4750 4751define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) { 4752; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128: 4753; CHECK: ## BB#0: 4754; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4755; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1] 4756; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xd9] 4757; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0c,0xc1] 4758; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xcb] 4759; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] 4760; CHECK-NEXT: retq ## encoding: [0xc3] 4761 %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) 4762 %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3) 4763 %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1) 4764 %res3 = fadd <4 x float> %res, %res1 4765 %res4 = fadd <4 x float> %res2, %res3 4766 ret <4 x float> %res4 4767} 4768 4769declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8) 4770 4771define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) { 4772; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256: 4773; CHECK: ## BB#0: 4774; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4775; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01] 4776; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xd9,0x01] 4777; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x18,0xc1,0x01] 4778; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] 4779; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 4780; CHECK-NEXT: retq ## encoding: [0xc3] 4781 %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4) 4782 %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1) 4783 %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4) 4784 %res3 = fadd <8 x float> %res, %res1 4785 %res4 = fadd <8 x float> %res2, %res3 4786 ret <8 x float> %res4 4787} 4788 4789declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8) 4790 4791define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) { 4792; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256: 4793; CHECK: ## BB#0: 4794; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4795; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01] 4796; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xd9,0x01] 4797; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x38,0xc1,0x01] 4798; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 4799; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] 4800; CHECK-NEXT: retq ## encoding: [0xc3] 4801 4802 %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4) 4803 %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1) 4804 %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4) 4805 %res3 = add <8 x i32> %res, %res1 4806 %res4 = add <8 x i32> %res2, %res3 4807 ret <8 x i32> %res4 4808} 4809 4810declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) 4811 4812define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { 4813; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128: 4814; CHECK: ## BB#0: 4815; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4816; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 4817; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x25,0xda,0x21] 4818; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] 4819; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc0] 4820; CHECK-NEXT: retq ## encoding: [0xc3] 4821 %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) 4822 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) 4823 %res2 = add <4 x i32> %res, %res1 4824 ret <4 x i32> %res2 4825} 4826 4827declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) 4828 4829define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { 4830; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: 4831; CHECK: ## BB#0: 4832; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4833; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 4834; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x25,0xda,0x21] 4835; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21] 4836; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfe,0xc0] 4837; CHECK-NEXT: retq ## encoding: [0xc3] 4838 %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) 4839 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) 4840 %res2 = add <4 x i32> %res, %res1 4841 ret <4 x i32> %res2 4842} 4843 4844declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) 4845 4846define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { 4847; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256: 4848; CHECK: ## BB#0: 4849; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4850; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 4851; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x25,0xda,0x21] 4852; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] 4853; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] 4854; CHECK-NEXT: retq ## encoding: [0xc3] 4855 %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) 4856 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) 4857 %res2 = add <8 x i32> %res, %res1 4858 ret <8 x i32> %res2 4859} 4860 4861declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) 4862 4863define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { 4864; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: 4865; CHECK: ## BB#0: 4866; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4867; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 4868; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x25,0xda,0x21] 4869; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21] 4870; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] 4871; CHECK-NEXT: retq ## encoding: [0xc3] 4872 %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) 4873 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) 4874 %res2 = add <8 x i32> %res, %res1 4875 ret <8 x i32> %res2 4876} 4877 4878declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) 4879 4880define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { 4881; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128: 4882; CHECK: ## BB#0: 4883; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4884; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 4885; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x25,0xda,0x21] 4886; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] 4887; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0xd4,0xc0] 4888; CHECK-NEXT: retq ## encoding: [0xc3] 4889 %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) 4890 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) 4891 %res2 = add <2 x i64> %res, %res1 4892 ret <2 x i64> %res2 4893} 4894 4895declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) 4896 4897define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { 4898; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: 4899; CHECK: ## BB#0: 4900; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4901; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 4902; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x25,0xda,0x21] 4903; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21] 4904; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0xd4,0xc0] 4905; CHECK-NEXT: retq ## encoding: [0xc3] 4906 %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) 4907 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) 4908 %res2 = add <2 x i64> %res, %res1 4909 ret <2 x i64> %res2 4910} 4911 4912declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) 4913 4914define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { 4915; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256: 4916; CHECK: ## BB#0: 4917; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4918; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 4919; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x25,0xda,0x21] 4920; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] 4921; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0xd4,0xc0] 4922; CHECK-NEXT: retq ## encoding: [0xc3] 4923 %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) 4924 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) 4925 %res2 = add <4 x i64> %res, %res1 4926 ret <4 x i64> %res2 4927} 4928 4929declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) 4930 4931define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { 4932; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: 4933; CHECK: ## BB#0: 4934; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4935; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 4936; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xda,0x21] 4937; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21] 4938; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0xd4,0xc0] 4939; CHECK-NEXT: retq ## encoding: [0xc3] 4940 %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) 4941 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) 4942 %res2 = add <4 x i64> %res, %res1 4943 ret <4 x i64> %res2 4944} 4945 4946define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) { 4947; CHECK-LABEL: test_x86_vcvtph2ps_128: 4948; CHECK: ## BB#0: 4949; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x13,0xc0] 4950; CHECK-NEXT: retq ## encoding: [0xc3] 4951 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1) 4952 ret <4 x float> %res 4953} 4954 4955define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) { 4956; CHECK-LABEL: test_x86_vcvtph2ps_128_rrk: 4957; CHECK: ## BB#0: 4958; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4959; CHECK-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8] 4960; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 4961; CHECK-NEXT: retq ## encoding: [0xc3] 4962 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask) 4963 ret <4 x float> %res 4964} 4965 4966 4967define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) { 4968; CHECK-LABEL: test_x86_vcvtph2ps_128_rrkz: 4969; CHECK: ## BB#0: 4970; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4971; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0] 4972; CHECK-NEXT: retq ## encoding: [0xc3] 4973 %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask) 4974 ret <4 x float> %res 4975} 4976 4977declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly 4978 4979define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) { 4980; CHECK-LABEL: test_x86_vcvtph2ps_256: 4981; CHECK: ## BB#0: 4982; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x13,0xc0] 4983; CHECK-NEXT: retq ## encoding: [0xc3] 4984 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1) 4985 ret <8 x float> %res 4986} 4987 4988define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) { 4989; CHECK-LABEL: test_x86_vcvtph2ps_256_rrk: 4990; CHECK: ## BB#0: 4991; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 4992; CHECK-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8] 4993; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 4994; CHECK-NEXT: retq ## encoding: [0xc3] 4995 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask) 4996 ret <8 x float> %res 4997} 4998 4999define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) { 5000; CHECK-LABEL: test_x86_vcvtph2ps_256_rrkz: 5001; CHECK: ## BB#0: 5002; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5003; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0] 5004; CHECK-NEXT: retq ## encoding: [0xc3] 5005 %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask) 5006 ret <8 x float> %res 5007} 5008 5009declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly 5010 5011define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %src) { 5012; CHECK-LABEL: test_x86_vcvtps2ph_128: 5013; CHECK: ## BB#0: 5014; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5015; CHECK-NEXT: vcvtps2ph $2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1d,0xc1,0x02] 5016; CHECK-NEXT: vcvtps2ph $2, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x1d,0xc2,0x02] 5017; CHECK-NEXT: vcvtps2ph $2, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1d,0xc0,0x02] 5018; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 5019; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xfd,0xc0] 5020; CHECK-NEXT: retq ## encoding: [0xc3] 5021 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1) 5022 %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask) 5023 %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> %src, i8 %mask) 5024 %res0 = add <8 x i16> %res1, %res2 5025 %res = add <8 x i16> %res3, %res0 5026 ret <8 x i16> %res 5027} 5028 5029declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly 5030 5031define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %src) { 5032; CHECK-LABEL: test_x86_vcvtps2ph_256: 5033; CHECK: ## BB#0: 5034; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5035; CHECK-NEXT: vcvtps2ph $2, %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1d,0xc1,0x02] 5036; CHECK-NEXT: vcvtps2ph $2, %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0xc2,0x02] 5037; CHECK-NEXT: vcvtps2ph $2, %ymm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1d,0xc0,0x02] 5038; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xfd,0xc2] 5039; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0xc5,0xf1,0xfd,0xc0] 5040; CHECK-NEXT: retq ## encoding: [0xc3] 5041 %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1) 5042 %res2 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 %mask) 5043 %res3 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> %src, i8 %mask) 5044 %res0 = add <8 x i16> %res1, %res2 5045 %res = add <8 x i16> %res3, %res0 5046 ret <8 x i16> %res 5047} 5048 5049declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly 5050 5051define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) { 5052; CHECK-LABEL: test_rsqrt_ps_256_rr: 5053; CHECK: ## BB#0: 5054; CHECK-NEXT: vrsqrt14ps %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x4e,0xc0] 5055; CHECK-NEXT: retq ## encoding: [0xc3] 5056 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1) 5057 ret <8 x float> %res 5058} 5059 5060define <8 x float> @test_rsqrt_ps_256_rrkz(<8 x float> %a0, i8 %mask) { 5061; CHECK-LABEL: test_rsqrt_ps_256_rrkz: 5062; CHECK: ## BB#0: 5063; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5064; CHECK-NEXT: vrsqrt14ps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x4e,0xc0] 5065; CHECK-NEXT: retq ## encoding: [0xc3] 5066 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 5067 ret <8 x float> %res 5068} 5069 5070define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5071; CHECK-LABEL: test_rsqrt_ps_256_rrk: 5072; CHECK: ## BB#0: 5073; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5074; CHECK-NEXT: vrsqrt14ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x4e,0xc8] 5075; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 5076; CHECK-NEXT: retq ## encoding: [0xc3] 5077 %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask) 5078 ret <8 x float> %res 5079} 5080 5081define <4 x float> @test_rsqrt_ps_128_rr(<4 x float> %a0) { 5082; CHECK-LABEL: test_rsqrt_ps_128_rr: 5083; CHECK: ## BB#0: 5084; CHECK-NEXT: vrsqrt14ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x4e,0xc0] 5085; CHECK-NEXT: retq ## encoding: [0xc3] 5086 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) 5087 ret <4 x float> %res 5088} 5089 5090define <4 x float> @test_rsqrt_ps_128_rrkz(<4 x float> %a0, i8 %mask) { 5091; CHECK-LABEL: test_rsqrt_ps_128_rrkz: 5092; CHECK: ## BB#0: 5093; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5094; CHECK-NEXT: vrsqrt14ps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x4e,0xc0] 5095; CHECK-NEXT: retq ## encoding: [0xc3] 5096 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) 5097 ret <4 x float> %res 5098} 5099 5100define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5101; CHECK-LABEL: test_rsqrt_ps_128_rrk: 5102; CHECK: ## BB#0: 5103; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5104; CHECK-NEXT: vrsqrt14ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x4e,0xc8] 5105; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 5106; CHECK-NEXT: retq ## encoding: [0xc3] 5107 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) 5108 ret <4 x float> %res 5109} 5110 5111declare <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 5112declare <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone 5113 5114define <8 x float> @test_rcp_ps_256_rr(<8 x float> %a0) { 5115; CHECK-LABEL: test_rcp_ps_256_rr: 5116; CHECK: ## BB#0: 5117; CHECK-NEXT: vrcp14ps %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x4c,0xc0] 5118; CHECK-NEXT: retq ## encoding: [0xc3] 5119 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1) 5120 ret <8 x float> %res 5121} 5122 5123define <8 x float> @test_rcp_ps_256_rrkz(<8 x float> %a0, i8 %mask) { 5124; CHECK-LABEL: test_rcp_ps_256_rrkz: 5125; CHECK: ## BB#0: 5126; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5127; CHECK-NEXT: vrcp14ps %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x4c,0xc0] 5128; CHECK-NEXT: retq ## encoding: [0xc3] 5129 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 5130 ret <8 x float> %res 5131} 5132 5133define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) { 5134; CHECK-LABEL: test_rcp_ps_256_rrk: 5135; CHECK: ## BB#0: 5136; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5137; CHECK-NEXT: vrcp14ps %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x4c,0xc8] 5138; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 5139; CHECK-NEXT: retq ## encoding: [0xc3] 5140 %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask) 5141 ret <8 x float> %res 5142} 5143 5144define <4 x float> @test_rcp_ps_128_rr(<4 x float> %a0) { 5145; CHECK-LABEL: test_rcp_ps_128_rr: 5146; CHECK: ## BB#0: 5147; CHECK-NEXT: vrcp14ps %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x4c,0xc0] 5148; CHECK-NEXT: retq ## encoding: [0xc3] 5149 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) 5150 ret <4 x float> %res 5151} 5152 5153define <4 x float> @test_rcp_ps_128_rrkz(<4 x float> %a0, i8 %mask) { 5154; CHECK-LABEL: test_rcp_ps_128_rrkz: 5155; CHECK: ## BB#0: 5156; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5157; CHECK-NEXT: vrcp14ps %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x4c,0xc0] 5158; CHECK-NEXT: retq ## encoding: [0xc3] 5159 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) 5160 ret <4 x float> %res 5161} 5162 5163define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) { 5164; CHECK-LABEL: test_rcp_ps_128_rrk: 5165; CHECK: ## BB#0: 5166; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5167; CHECK-NEXT: vrcp14ps %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x4c,0xc8] 5168; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 5169; CHECK-NEXT: retq ## encoding: [0xc3] 5170 %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) 5171 ret <4 x float> %res 5172} 5173 5174declare <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 5175declare <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone 5176 5177define <4 x double> @test_rsqrt_pd_256_rr(<4 x double> %a0) { 5178; CHECK-LABEL: test_rsqrt_pd_256_rr: 5179; CHECK: ## BB#0: 5180; CHECK-NEXT: vrsqrt14pd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x4e,0xc0] 5181; CHECK-NEXT: retq ## encoding: [0xc3] 5182 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1) 5183 ret <4 x double> %res 5184} 5185 5186define <4 x double> @test_rsqrt_pd_256_rrkz(<4 x double> %a0, i8 %mask) { 5187; CHECK-LABEL: test_rsqrt_pd_256_rrkz: 5188; CHECK: ## BB#0: 5189; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5190; CHECK-NEXT: vrsqrt14pd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x4e,0xc0] 5191; CHECK-NEXT: retq ## encoding: [0xc3] 5192 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 5193 ret <4 x double> %res 5194} 5195 5196define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) { 5197; CHECK-LABEL: test_rsqrt_pd_256_rrk: 5198; CHECK: ## BB#0: 5199; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5200; CHECK-NEXT: vrsqrt14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4e,0xc8] 5201; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 5202; CHECK-NEXT: retq ## encoding: [0xc3] 5203 %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask) 5204 ret <4 x double> %res 5205} 5206 5207define <2 x double> @test_rsqrt_pd_128_rr(<2 x double> %a0) { 5208; CHECK-LABEL: test_rsqrt_pd_128_rr: 5209; CHECK: ## BB#0: 5210; CHECK-NEXT: vrsqrt14pd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x4e,0xc0] 5211; CHECK-NEXT: retq ## encoding: [0xc3] 5212 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1) 5213 ret <2 x double> %res 5214} 5215 5216define <2 x double> @test_rsqrt_pd_128_rrkz(<2 x double> %a0, i8 %mask) { 5217; CHECK-LABEL: test_rsqrt_pd_128_rrkz: 5218; CHECK: ## BB#0: 5219; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5220; CHECK-NEXT: vrsqrt14pd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x4e,0xc0] 5221; CHECK-NEXT: retq ## encoding: [0xc3] 5222 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask) 5223 ret <2 x double> %res 5224} 5225 5226define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 5227; CHECK-LABEL: test_rsqrt_pd_128_rrk: 5228; CHECK: ## BB#0: 5229; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5230; CHECK-NEXT: vrsqrt14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4e,0xc8] 5231; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 5232; CHECK-NEXT: retq ## encoding: [0xc3] 5233 %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask) 5234 ret <2 x double> %res 5235} 5236 5237declare <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 5238declare <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone 5239 5240define <4 x double> @test_rcp_pd_256_rr(<4 x double> %a0) { 5241; CHECK-LABEL: test_rcp_pd_256_rr: 5242; CHECK: ## BB#0: 5243; CHECK-NEXT: vrcp14pd %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x4c,0xc0] 5244; CHECK-NEXT: retq ## encoding: [0xc3] 5245 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1) 5246 ret <4 x double> %res 5247} 5248 5249define <4 x double> @test_rcp_pd_256_rrkz(<4 x double> %a0, i8 %mask) { 5250; CHECK-LABEL: test_rcp_pd_256_rrkz: 5251; CHECK: ## BB#0: 5252; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5253; CHECK-NEXT: vrcp14pd %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x4c,0xc0] 5254; CHECK-NEXT: retq ## encoding: [0xc3] 5255 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 5256 ret <4 x double> %res 5257} 5258 5259define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) { 5260; CHECK-LABEL: test_rcp_pd_256_rrk: 5261; CHECK: ## BB#0: 5262; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5263; CHECK-NEXT: vrcp14pd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x4c,0xc8] 5264; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 5265; CHECK-NEXT: retq ## encoding: [0xc3] 5266 %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask) 5267 ret <4 x double> %res 5268} 5269 5270define <2 x double> @test_rcp_pd_128_rr(<2 x double> %a0) { 5271; CHECK-LABEL: test_rcp_pd_128_rr: 5272; CHECK: ## BB#0: 5273; CHECK-NEXT: vrcp14pd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x4c,0xc0] 5274; CHECK-NEXT: retq ## encoding: [0xc3] 5275 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1) 5276 ret <2 x double> %res 5277} 5278 5279define <2 x double> @test_rcp_pd_128_rrkz(<2 x double> %a0, i8 %mask) { 5280; CHECK-LABEL: test_rcp_pd_128_rrkz: 5281; CHECK: ## BB#0: 5282; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5283; CHECK-NEXT: vrcp14pd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x4c,0xc0] 5284; CHECK-NEXT: retq ## encoding: [0xc3] 5285 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask) 5286 ret <2 x double> %res 5287} 5288 5289define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) { 5290; CHECK-LABEL: test_rcp_pd_128_rrk: 5291; CHECK: ## BB#0: 5292; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5293; CHECK-NEXT: vrcp14pd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x4c,0xc8] 5294; CHECK-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 5295; CHECK-NEXT: retq ## encoding: [0xc3] 5296 %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask) 5297 ret <2 x double> %res 5298} 5299 5300declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 5301declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone 5302 5303declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8) 5304 5305define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) { 5306; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256: 5307; CHECK: ## BB#0: 5308; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> 5309; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5310; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd0,0x00] 5311; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3] 5312; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xc8,0x00] 5313; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3] 5314; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc0,0x00] 5315; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3] 5316; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1] 5317; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0] 5318; CHECK-NEXT: retq ## encoding: [0xc3] 5319 %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1) 5320 %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask) 5321 %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask) 5322 %res4 = fadd <8 x float> %res1, %res2 5323 %res5 = fadd <8 x float> %res3, %res4 5324 ret <8 x float> %res5 5325} 5326 5327declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8) 5328 5329define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) { 5330; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256: 5331; CHECK: ## BB#0: 5332; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> 5333; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5334; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x43,0xd0,0x00] 5335; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3] 5336; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xc8,0x00] 5337; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,0,1,2,3] 5338; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc0,0x00] 5339; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,0,1,2,3] 5340; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] 5341; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 5342; CHECK-NEXT: retq ## encoding: [0xc3] 5343 %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1) 5344 %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) 5345 %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) 5346 %res4 = add <8 x i32> %res1, %res2 5347 %res5 = add <8 x i32> %res3, %res4 5348 ret <8 x i32> %res5 5349} 5350 5351declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5352 5353define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5354; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_128: 5355; CHECK: ## BB#0: 5356; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5357; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1] 5358; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xd9] 5359; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd3,0xc1] 5360; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] 5361; CHECK-NEXT: vpaddq %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc3] 5362; CHECK-NEXT: retq ## encoding: [0xc3] 5363 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5364 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5365 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 5366 %res3 = add <2 x i64> %res, %res1 5367 %res4 = add <2 x i64> %res3, %res2 5368 ret <2 x i64> %res4 5369} 5370 5371declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 5372 5373define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 5374; CHECK-LABEL: test_int_x86_avx512_mask_psrl_q_256: 5375; CHECK: ## BB#0: 5376; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5377; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1] 5378; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xd9] 5379; CHECK-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd3,0xc1] 5380; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 5381; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc3] 5382; CHECK-NEXT: retq ## encoding: [0xc3] 5383 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 5384 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 5385 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 5386 %res3 = add <4 x i64> %res, %res1 5387 %res4 = add <4 x i64> %res3, %res2 5388 ret <4 x i64> %res4 5389} 5390 5391declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i32, <2 x i64>, i8) 5392 5393define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 5394; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_128: 5395; CHECK: ## BB#0: 5396; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5397; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0xff] 5398; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x73,0xd0,0xff] 5399; CHECK-NEXT: vpsrlq $255, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xd0,0xff] 5400; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 5401; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] 5402; CHECK-NEXT: retq ## encoding: [0xc3] 5403 %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 255, <2 x i64> %x2, i8 %x3) 5404 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 255, <2 x i64> %x2, i8 -1) 5405 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 255, <2 x i64> zeroinitializer, i8 %x3) 5406 %res3 = add <2 x i64> %res, %res1 5407 %res4 = add <2 x i64> %res2, %res3 5408 ret <2 x i64> %res4 5409} 5410 5411declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i32, <4 x i64>, i8) 5412 5413define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 5414; CHECK-LABEL: test_int_x86_avx512_mask_psrl_qi_256: 5415; CHECK: ## BB#0: 5416; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5417; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0xff] 5418; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x73,0xd0,0xff] 5419; CHECK-NEXT: vpsrlq $255, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x73,0xd0,0xff] 5420; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 5421; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 5422; CHECK-NEXT: retq ## encoding: [0xc3] 5423 %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 255, <4 x i64> %x2, i8 %x3) 5424 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 255, <4 x i64> %x2, i8 -1) 5425 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 255, <4 x i64> zeroinitializer, i8 %x3) 5426 %res3 = add <4 x i64> %res, %res1 5427 %res4 = add <4 x i64> %res2, %res3 5428 ret <4 x i64> %res4 5429} 5430declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5431define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 5432; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_128: 5433; CHECK: ## BB#0: 5434; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5435; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1] 5436; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xd9] 5437; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd2,0xc1] 5438; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 5439; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc3] 5440; CHECK-NEXT: retq ## encoding: [0xc3] 5441 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 5442 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 5443 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 5444 %res3 = add <4 x i32> %res, %res1 5445 %res4 = add <4 x i32> %res3, %res2 5446 ret <4 x i32> %res4 5447} 5448 5449declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 5450 5451define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5452; CHECK-LABEL: test_int_x86_avx512_mask_psrl_d_256: 5453; CHECK: ## BB#0: 5454; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5455; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1] 5456; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xd9] 5457; CHECK-NEXT: vpsrld %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd2,0xc1] 5458; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 5459; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfe,0xc0] 5460; CHECK-NEXT: retq ## encoding: [0xc3] 5461 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 5462 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 5463 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 5464 %res3 = add <8 x i32> %res, %res1 5465 %res4 = add <8 x i32> %res2, %res3 5466 ret <8 x i32> %res4 5467} 5468 5469declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i32, <4 x i32>, i8) 5470 5471define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 5472; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_128: 5473; CHECK: ## BB#0: 5474; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5475; CHECK-NEXT: vpsrld $255, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0xff] 5476; CHECK-NEXT: vpsrld $255, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xd0,0xff] 5477; CHECK-NEXT: vpsrld $255, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xd0,0xff] 5478; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 5479; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 5480; CHECK-NEXT: retq ## encoding: [0xc3] 5481 %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 255, <4 x i32> %x2, i8 %x3) 5482 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 255, <4 x i32> %x2, i8 -1) 5483 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 255, <4 x i32> zeroinitializer, i8 %x3) 5484 %res3 = add <4 x i32> %res, %res1 5485 %res4 = add <4 x i32> %res2, %res3 5486 ret <4 x i32> %res4 5487} 5488 5489declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i32, <8 x i32>, i8) 5490 5491define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 5492; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_256: 5493; CHECK: ## BB#0: 5494; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5495; CHECK-NEXT: vpsrld $255, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0xff] 5496; CHECK-NEXT: vpsrld $255, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xd0,0xff] 5497; CHECK-NEXT: vpsrld $255, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xd0,0xff] 5498; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 5499; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 5500; CHECK-NEXT: retq ## encoding: [0xc3] 5501 %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 255, <8 x i32> %x2, i8 %x3) 5502 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 255, <8 x i32> %x2, i8 -1) 5503 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 255, <8 x i32> zeroinitializer, i8 %x3) 5504 %res3 = add <8 x i32> %res, %res1 5505 %res4 = add <8 x i32> %res2, %res3 5506 ret <8 x i32> %res4 5507} 5508 5509declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16) 5510 5511define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { 5512; CHECK-LABEL: test_int_x86_avx512_mask_psrl_di_512: 5513; CHECK: ## BB#0: 5514; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5515; CHECK-NEXT: vpsrld $255, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0xff] 5516; CHECK-NEXT: vpsrld $255, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xd0,0xff] 5517; CHECK-NEXT: vpsrld $255, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0xff] 5518; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] 5519; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] 5520; CHECK-NEXT: retq ## encoding: [0xc3] 5521 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> %x2, i16 %x3) 5522 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> %x2, i16 -1) 5523 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 255, <16 x i32> zeroinitializer, i16 %x3) 5524 %res3 = add <16 x i32> %res, %res1 5525 %res4 = add <16 x i32> %res2, %res3 5526 ret <16 x i32> %res4 5527} 5528 5529declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5530 5531define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5532; CHECK-LABEL: test_int_x86_avx512_mask_psrlv2_di: 5533; CHECK: ## BB#0: 5534; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5535; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1] 5536; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x45,0xd9] 5537; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x45,0xc1] 5538; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] 5539; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 5540; CHECK-NEXT: retq ## encoding: [0xc3] 5541 %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5542 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 5543 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5544 %res3 = add <2 x i64> %res, %res1 5545 %res4 = add <2 x i64> %res3, %res2 5546 ret <2 x i64> %res4 5547} 5548 5549declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8) 5550 5551define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 5552; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_di: 5553; CHECK: ## BB#0: 5554; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5555; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1] 5556; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xd9] 5557; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x45,0xc1] 5558; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] 5559; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 5560; CHECK-NEXT: retq ## encoding: [0xc3] 5561 %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 5562 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 5563 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 5564 %res3 = add <4 x i64> %res, %res1 5565 %res4 = add <4 x i64> %res3, %res2 5566 ret <4 x i64> %res4 5567} 5568 5569declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5570 5571define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 5572; CHECK-LABEL: test_int_x86_avx512_mask_psrlv4_si: 5573; CHECK: ## BB#0: 5574; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5575; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1] 5576; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x45,0xd9] 5577; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x45,0xc1] 5578; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] 5579; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 5580; CHECK-NEXT: retq ## encoding: [0xc3] 5581 %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 5582 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 5583 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 5584 %res3 = add <4 x i32> %res, %res1 5585 %res4 = add <4 x i32> %res3, %res2 5586 ret <4 x i32> %res4 5587} 5588 5589declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5590 5591define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5592; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_si: 5593; CHECK: ## BB#0: 5594; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5595; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1] 5596; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xd9] 5597; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x45,0xc1] 5598; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] 5599; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 5600; CHECK-NEXT: retq ## encoding: [0xc3] 5601 %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 5602 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 5603 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 5604 %res3 = add <8 x i32> %res, %res1 5605 %res4 = add <8 x i32> %res3, %res2 5606 ret <8 x i32> %res4 5607} 5608 5609declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5610 5611define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 5612; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_128: 5613; CHECK: ## BB#0: 5614; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5615; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1] 5616; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xd9] 5617; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe2,0xc1] 5618; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] 5619; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 5620; CHECK-NEXT: retq ## encoding: [0xc3] 5621 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 5622 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 5623 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 5624 %res3 = add <4 x i32> %res, %res1 5625 %res4 = add <4 x i32> %res3, %res2 5626 ret <4 x i32> %res4 5627} 5628 5629declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 5630 5631define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5632; CHECK-LABEL: test_int_x86_avx512_mask_psra_d_256: 5633; CHECK: ## BB#0: 5634; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5635; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1] 5636; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xd9] 5637; CHECK-NEXT: vpsrad %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe2,0xc1] 5638; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] 5639; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 5640; CHECK-NEXT: retq ## encoding: [0xc3] 5641 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 5642 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 5643 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 5644 %res3 = add <8 x i32> %res, %res1 5645 %res4 = add <8 x i32> %res3, %res2 5646 ret <8 x i32> %res4 5647} 5648 5649declare <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32>, i32, <4 x i32>, i8) 5650 5651define <4 x i32>@test_int_x86_avx512_mask_psra_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 5652; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_128: 5653; CHECK: ## BB#0: 5654; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5655; CHECK-NEXT: vpsrad $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xe0,0x03] 5656; CHECK-NEXT: vpsrad $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xe0,0x03] 5657; CHECK-NEXT: vpsrad $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xe0,0x03] 5658; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 5659; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 5660; CHECK-NEXT: retq ## encoding: [0xc3] 5661 %res = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 5662 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 5663 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 5664 %res3 = add <4 x i32> %res, %res1 5665 %res4 = add <4 x i32> %res3, %res2 5666 ret <4 x i32> %res4 5667} 5668 5669declare <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32>, i32, <8 x i32>, i8) 5670 5671define <8 x i32>@test_int_x86_avx512_mask_psra_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 5672; CHECK-LABEL: test_int_x86_avx512_mask_psra_di_256: 5673; CHECK: ## BB#0: 5674; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5675; CHECK-NEXT: vpsrad $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xe0,0x03] 5676; CHECK-NEXT: vpsrad $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xe0,0x03] 5677; CHECK-NEXT: vpsrad $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xe0,0x03] 5678; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 5679; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 5680; CHECK-NEXT: retq ## encoding: [0xc3] 5681 %res = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 5682 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 5683 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 5684 %res3 = add <8 x i32> %res, %res1 5685 %res4 = add <8 x i32> %res3, %res2 5686 ret <8 x i32> %res4 5687} 5688 5689declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5690 5691define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5692; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_128: 5693; CHECK: ## BB#0: 5694; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5695; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1] 5696; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xd9] 5697; CHECK-NEXT: vpsraq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xc1] 5698; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] 5699; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 5700; CHECK-NEXT: retq ## encoding: [0xc3] 5701 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5702 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 5703 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5704 %res3 = add <2 x i64> %res, %res1 5705 %res4 = add <2 x i64> %res3, %res2 5706 ret <2 x i64> %res4 5707} 5708 5709declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 5710 5711define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 5712; CHECK-LABEL: test_int_x86_avx512_mask_psra_q_256: 5713; CHECK: ## BB#0: 5714; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5715; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1] 5716; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xd9] 5717; CHECK-NEXT: vpsraq %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xc1] 5718; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] 5719; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 5720; CHECK-NEXT: retq ## encoding: [0xc3] 5721 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 5722 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 5723 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 5724 %res3 = add <4 x i64> %res, %res1 5725 %res4 = add <4 x i64> %res3, %res2 5726 ret <4 x i64> %res4 5727} 5728 5729declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i32, <2 x i64>, i8) 5730 5731define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 5732; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_128: 5733; CHECK: ## BB#0: 5734; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5735; CHECK-NEXT: vpsraq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03] 5736; CHECK-NEXT: vpsraq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x72,0xe0,0x03] 5737; CHECK-NEXT: vpsraq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x03] 5738; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 5739; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 5740; CHECK-NEXT: retq ## encoding: [0xc3] 5741 %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 5742 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) 5743 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) 5744 %res3 = add <2 x i64> %res, %res1 5745 %res4 = add <2 x i64> %res3, %res2 5746 ret <2 x i64> %res4 5747} 5748 5749declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i32, <4 x i64>, i8) 5750 5751define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 5752; CHECK-LABEL: test_int_x86_avx512_mask_psra_qi_256: 5753; CHECK: ## BB#0: 5754; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5755; CHECK-NEXT: vpsraq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03] 5756; CHECK-NEXT: vpsraq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x72,0xe0,0x03] 5757; CHECK-NEXT: vpsraq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x03] 5758; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 5759; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 5760; CHECK-NEXT: retq ## encoding: [0xc3] 5761 %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 5762 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 5763 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 5764 %res3 = add <4 x i64> %res, %res1 5765 %res4 = add <4 x i64> %res3, %res2 5766 ret <4 x i64> %res4 5767} 5768 5769 5770declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5771 5772define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 5773; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_128: 5774; CHECK: ## BB#0: 5775; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5776; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1] 5777; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xd9] 5778; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf2,0xc1] 5779; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] 5780; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 5781; CHECK-NEXT: retq ## encoding: [0xc3] 5782 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 5783 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 5784 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 5785 %res3 = add <4 x i32> %res, %res1 5786 %res4 = add <4 x i32> %res3, %res2 5787 ret <4 x i32> %res4 5788} 5789 5790declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8) 5791 5792define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5793; CHECK-LABEL: test_int_x86_avx512_mask_psll_d_256: 5794; CHECK: ## BB#0: 5795; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5796; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1] 5797; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xd9] 5798; CHECK-NEXT: vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf2,0xc1] 5799; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] 5800; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 5801; CHECK-NEXT: retq ## encoding: [0xc3] 5802 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) 5803 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 5804 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1) 5805 %res3 = add <8 x i32> %res, %res1 5806 %res4 = add <8 x i32> %res3, %res2 5807 ret <8 x i32> %res4 5808} 5809 5810declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i32, <4 x i32>, i8) 5811 5812define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 5813; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_128: 5814; CHECK: ## BB#0: 5815; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5816; CHECK-NEXT: vpslld $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03] 5817; CHECK-NEXT: vpslld $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xf0,0x03] 5818; CHECK-NEXT: vpslld $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xf0,0x03] 5819; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 5820; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 5821; CHECK-NEXT: retq ## encoding: [0xc3] 5822 %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 5823 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 5824 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 5825 %res3 = add <4 x i32> %res, %res1 5826 %res4 = add <4 x i32> %res3, %res2 5827 ret <4 x i32> %res4 5828} 5829 5830declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i32, <8 x i32>, i8) 5831 5832define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 5833; CHECK-LABEL: test_int_x86_avx512_mask_psll_di_256: 5834; CHECK: ## BB#0: 5835; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5836; CHECK-NEXT: vpslld $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03] 5837; CHECK-NEXT: vpslld $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xf0,0x03] 5838; CHECK-NEXT: vpslld $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xf0,0x03] 5839; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 5840; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 5841; CHECK-NEXT: retq ## encoding: [0xc3] 5842 %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 5843 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 5844 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 5845 %res3 = add <8 x i32> %res, %res1 5846 %res4 = add <8 x i32> %res3, %res2 5847 ret <8 x i32> %res4 5848} 5849 5850declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8) 5851 5852define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) { 5853; CHECK-LABEL: test_int_x86_avx512_mask_psll_q_256: 5854; CHECK: ## BB#0: 5855; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5856; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1] 5857; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xd9] 5858; CHECK-NEXT: vpsllq %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xf3,0xc1] 5859; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] 5860; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 5861; CHECK-NEXT: retq ## encoding: [0xc3] 5862 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) 5863 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 5864 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1) 5865 %res3 = add <4 x i64> %res, %res1 5866 %res4 = add <4 x i64> %res3, %res2 5867 ret <4 x i64> %res4 5868} 5869 5870declare <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64>, i32, <2 x i64>, i8) 5871 5872define <2 x i64>@test_int_x86_avx512_mask_psll_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 5873; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_128: 5874; CHECK: ## BB#0: 5875; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5876; CHECK-NEXT: vpsllq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x73,0xf0,0x03] 5877; CHECK-NEXT: vpsllq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x73,0xf0,0x03] 5878; CHECK-NEXT: vpsllq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x73,0xf0,0x03] 5879; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 5880; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 5881; CHECK-NEXT: retq ## encoding: [0xc3] 5882 %res = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 5883 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) 5884 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psll.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) 5885 %res3 = add <2 x i64> %res, %res1 5886 %res4 = add <2 x i64> %res3, %res2 5887 ret <2 x i64> %res4 5888} 5889 5890declare <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64>, i32, <4 x i64>, i8) 5891 5892define <4 x i64>@test_int_x86_avx512_mask_psll_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 5893; CHECK-LABEL: test_int_x86_avx512_mask_psll_qi_256: 5894; CHECK: ## BB#0: 5895; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 5896; CHECK-NEXT: vpsllq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x73,0xf0,0x03] 5897; CHECK-NEXT: vpsllq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x73,0xf0,0x03] 5898; CHECK-NEXT: vpsllq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x73,0xf0,0x03] 5899; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 5900; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 5901; CHECK-NEXT: retq ## encoding: [0xc3] 5902 %res = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 5903 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 5904 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 5905 %res3 = add <4 x i64> %res, %res1 5906 %res4 = add <4 x i64> %res3, %res2 5907 ret <4 x i64> %res4 5908} 5909 5910declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 5911 5912define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 5913; CHECK-LABEL: test_int_x86_avx512_mask_psrav4_si: 5914; CHECK: ## BB#0: 5915; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5916; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1] 5917; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x46,0xd9] 5918; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x46,0xc1] 5919; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] 5920; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 5921; CHECK-NEXT: retq ## encoding: [0xc3] 5922 %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 5923 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 5924 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 5925 %res3 = add <4 x i32> %res, %res1 5926 %res4 = add <4 x i32> %res3, %res2 5927 ret <4 x i32> %res4 5928} 5929 5930declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 5931 5932define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 5933; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si: 5934; CHECK: ## BB#0: 5935; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5936; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1] 5937; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xd9] 5938; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0xc1] 5939; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] 5940; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 5941; CHECK-NEXT: retq ## encoding: [0xc3] 5942 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 5943 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 5944 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 5945 %res3 = add <8 x i32> %res, %res1 5946 %res4 = add <8 x i32> %res3, %res2 5947 ret <8 x i32> %res4 5948} 5949 5950define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() { 5951; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si_const: 5952; CHECK: ## BB#0: 5953; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 5954; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A] 5955; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI371_0-4, kind: reloc_riprel_4byte 5956; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A] 5957; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI371_1-4, kind: reloc_riprel_4byte 5958; CHECK-NEXT: retq ## encoding: [0xc3] 5959 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1) 5960 ret <8 x i32> %res 5961} 5962 5963declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 5964 5965define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 5966; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128: 5967; CHECK: ## BB#0: 5968; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 5969; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1] 5970; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x46,0xd9] 5971; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0xc1] 5972; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] 5973; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 5974; CHECK-NEXT: retq ## encoding: [0xc3] 5975 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 5976 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 5977 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 5978 %res3 = add <2 x i64> %res, %res1 5979 %res4 = add <2 x i64> %res3, %res2 5980 ret <2 x i64> %res4 5981} 5982 5983define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) { 5984; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128_const: 5985; CHECK: ## BB#0: 5986; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607] 5987; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A] 5988; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI373_0-4, kind: reloc_riprel_4byte 5989; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] 5990; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI373_1-4, kind: reloc_riprel_4byte 5991; CHECK-NEXT: retq ## encoding: [0xc3] 5992 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1) 5993 ret <2 x i64> %res 5994} 5995 5996declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 5997 5998define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 5999; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_256: 6000; CHECK: ## BB#0: 6001; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6002; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1] 6003; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xd9] 6004; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x46,0xc1] 6005; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] 6006; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6007; CHECK-NEXT: retq ## encoding: [0xc3] 6008 %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 6009 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6010 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 6011 %res3 = add <4 x i64> %res, %res1 6012 %res4 = add <4 x i64> %res3, %res2 6013 ret <4 x i64> %res4 6014} 6015 6016declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6017 6018define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6019; CHECK-LABEL: test_int_x86_avx512_mask_psllv2_di: 6020; CHECK: ## BB#0: 6021; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6022; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1] 6023; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x47,0xd9] 6024; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x47,0xc1] 6025; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] 6026; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6027; CHECK-NEXT: retq ## encoding: [0xc3] 6028 %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6029 %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 6030 %res2 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6031 %res3 = add <2 x i64> %res, %res1 6032 %res4 = add <2 x i64> %res3, %res2 6033 ret <2 x i64> %res4 6034} 6035 6036declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6037 6038define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6039; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_di: 6040; CHECK: ## BB#0: 6041; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6042; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1] 6043; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xd9] 6044; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x47,0xc1] 6045; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] 6046; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6047; CHECK-NEXT: retq ## encoding: [0xc3] 6048 %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 6049 %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6050 %res2 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 6051 %res3 = add <4 x i64> %res, %res1 6052 %res4 = add <4 x i64> %res3, %res2 6053 ret <4 x i64> %res4 6054} 6055 6056declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6057 6058define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6059; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_si: 6060; CHECK: ## BB#0: 6061; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6062; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1] 6063; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x47,0xd9] 6064; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x47,0xc1] 6065; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] 6066; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6067; CHECK-NEXT: retq ## encoding: [0xc3] 6068 %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6069 %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6070 %res2 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6071 %res3 = add <4 x i32> %res, %res1 6072 %res4 = add <4 x i32> %res3, %res2 6073 ret <4 x i32> %res4 6074} 6075 6076declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6077 6078define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6079; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_si: 6080; CHECK: ## BB#0: 6081; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6082; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1] 6083; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xd9] 6084; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x47,0xc1] 6085; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] 6086; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6087; CHECK-NEXT: retq ## encoding: [0xc3] 6088 %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6089 %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6090 %res2 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6091 %res3 = add <8 x i32> %res, %res1 6092 %res4 = add <8 x i32> %res3, %res2 6093 ret <8 x i32> %res4 6094} 6095 6096declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6097 6098define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6099; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_128: 6100; CHECK: ## BB#0: 6101; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6102; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1] 6103; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x14,0xd9] 6104; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x14,0xc1] 6105; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] 6106; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6107; CHECK-NEXT: retq ## encoding: [0xc3] 6108 %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6109 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6110 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6111 %res3 = add <4 x i32> %res, %res1 6112 %res4 = add <4 x i32> %res3, %res2 6113 ret <4 x i32> %res4 6114} 6115 6116declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6117 6118define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6119; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_256: 6120; CHECK: ## BB#0: 6121; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6122; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1] 6123; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xd9] 6124; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x14,0xc1] 6125; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] 6126; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6127; CHECK-NEXT: retq ## encoding: [0xc3] 6128 %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6129 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6130 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6131 %res3 = add <8 x i32> %res, %res1 6132 %res4 = add <8 x i32> %res3, %res2 6133 ret <8 x i32> %res4 6134} 6135 6136declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6137 6138define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6139; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_128: 6140; CHECK: ## BB#0: 6141; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6142; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1] 6143; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x14,0xd9] 6144; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x14,0xc1] 6145; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] 6146; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6147; CHECK-NEXT: retq ## encoding: [0xc3] 6148 %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6149 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 6150 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6151 %res3 = add <2 x i64> %res, %res1 6152 %res4 = add <2 x i64> %res3, %res2 6153 ret <2 x i64> %res4 6154} 6155 6156declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6157 6158define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6159; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_256: 6160; CHECK: ## BB#0: 6161; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6162; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1] 6163; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xd9] 6164; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x14,0xc1] 6165; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] 6166; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6167; CHECK-NEXT: retq ## encoding: [0xc3] 6168 %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 6169 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6170 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 6171 %res3 = add <4 x i64> %res, %res1 6172 %res4 = add <4 x i64> %res3, %res2 6173 ret <4 x i64> %res4 6174} 6175 6176declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i32, <4 x i32>, i8) 6177 6178define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 6179; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_128: 6180; CHECK: ## BB#0: 6181; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6182; CHECK-NEXT: vprold $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03] 6183; CHECK-NEXT: vprold $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x03] 6184; CHECK-NEXT: vprold $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x03] 6185; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 6186; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6187; CHECK-NEXT: retq ## encoding: [0xc3] 6188 %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 6189 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 6190 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 6191 %res3 = add <4 x i32> %res, %res1 6192 %res4 = add <4 x i32> %res3, %res2 6193 ret <4 x i32> %res4 6194} 6195 6196declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i32, <8 x i32>, i8) 6197 6198define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 6199; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_256: 6200; CHECK: ## BB#0: 6201; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6202; CHECK-NEXT: vprold $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03] 6203; CHECK-NEXT: vprold $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x03] 6204; CHECK-NEXT: vprold $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x03] 6205; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 6206; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6207; CHECK-NEXT: retq ## encoding: [0xc3] 6208 %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 6209 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 6210 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 6211 %res3 = add <8 x i32> %res, %res1 6212 %res4 = add <8 x i32> %res3, %res2 6213 ret <8 x i32> %res4 6214} 6215 6216declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i32, <2 x i64>, i8) 6217 6218define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 6219; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_128: 6220; CHECK: ## BB#0: 6221; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6222; CHECK-NEXT: vprolq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03] 6223; CHECK-NEXT: vprolq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x03] 6224; CHECK-NEXT: vprolq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x03] 6225; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 6226; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6227; CHECK-NEXT: retq ## encoding: [0xc3] 6228 %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 6229 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) 6230 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) 6231 %res3 = add <2 x i64> %res, %res1 6232 %res4 = add <2 x i64> %res3, %res2 6233 ret <2 x i64> %res4 6234} 6235 6236declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i32, <4 x i64>, i8) 6237 6238define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 6239; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_256: 6240; CHECK: ## BB#0: 6241; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6242; CHECK-NEXT: vprolq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03] 6243; CHECK-NEXT: vprolq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x03] 6244; CHECK-NEXT: vprolq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x03] 6245; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 6246; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6247; CHECK-NEXT: retq ## encoding: [0xc3] 6248 %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 6249 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 6250 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 6251 %res3 = add <4 x i64> %res, %res1 6252 %res4 = add <4 x i64> %res3, %res2 6253 ret <4 x i64> %res4 6254} 6255 6256declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 6257 6258define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { 6259; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_128: 6260; CHECK: ## BB#0: 6261; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6262; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1] 6263; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x15,0xd9] 6264; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x15,0xc1] 6265; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xcb] 6266; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6267; CHECK-NEXT: retq ## encoding: [0xc3] 6268 %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 6269 %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3) 6270 %res2 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 6271 %res3 = add <4 x i32> %res, %res1 6272 %res4 = add <4 x i32> %res3, %res2 6273 ret <4 x i32> %res4 6274} 6275 6276declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6277 6278define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6279; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_256: 6280; CHECK: ## BB#0: 6281; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6282; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1] 6283; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xd9] 6284; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x15,0xc1] 6285; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] 6286; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6287; CHECK-NEXT: retq ## encoding: [0xc3] 6288 %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6289 %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6290 %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6291 %res3 = add <8 x i32> %res, %res1 6292 %res4 = add <8 x i32> %res3, %res2 6293 ret <8 x i32> %res4 6294} 6295 6296declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) 6297 6298define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { 6299; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_128: 6300; CHECK: ## BB#0: 6301; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6302; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1] 6303; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x15,0xd9] 6304; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x15,0xc1] 6305; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] 6306; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6307; CHECK-NEXT: retq ## encoding: [0xc3] 6308 %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) 6309 %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) 6310 %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) 6311 %res3 = add <2 x i64> %res, %res1 6312 %res4 = add <2 x i64> %res3, %res2 6313 ret <2 x i64> %res4 6314} 6315 6316declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6317 6318define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6319; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_256: 6320; CHECK: ## BB#0: 6321; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6322; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1] 6323; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xd9] 6324; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x15,0xc1] 6325; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] 6326; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6327; CHECK-NEXT: retq ## encoding: [0xc3] 6328 %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 6329 %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6330 %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 6331 %res3 = add <4 x i64> %res, %res1 6332 %res4 = add <4 x i64> %res3, %res2 6333 ret <4 x i64> %res4 6334} 6335 6336declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i32, <4 x i32>, i8) 6337 6338define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) { 6339; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_128: 6340; CHECK: ## BB#0: 6341; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6342; CHECK-NEXT: vprord $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03] 6343; CHECK-NEXT: vprord $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x03] 6344; CHECK-NEXT: vprord $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x03] 6345; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 6346; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6347; CHECK-NEXT: retq ## encoding: [0xc3] 6348 %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3) 6349 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3) 6350 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1) 6351 %res3 = add <4 x i32> %res, %res1 6352 %res4 = add <4 x i32> %res3, %res2 6353 ret <4 x i32> %res4 6354} 6355 6356declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i32, <8 x i32>, i8) 6357 6358define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) { 6359; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_256: 6360; CHECK: ## BB#0: 6361; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6362; CHECK-NEXT: vprord $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03] 6363; CHECK-NEXT: vprord $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x03] 6364; CHECK-NEXT: vprord $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x03] 6365; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 6366; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6367; CHECK-NEXT: retq ## encoding: [0xc3] 6368 %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3) 6369 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3) 6370 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1) 6371 %res3 = add <8 x i32> %res, %res1 6372 %res4 = add <8 x i32> %res3, %res2 6373 ret <8 x i32> %res4 6374} 6375 6376declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i32, <2 x i64>, i8) 6377 6378define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) { 6379; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_128: 6380; CHECK: ## BB#0: 6381; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6382; CHECK-NEXT: vprorq $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03] 6383; CHECK-NEXT: vprorq $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x03] 6384; CHECK-NEXT: vprorq $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x03] 6385; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 6386; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6387; CHECK-NEXT: retq ## encoding: [0xc3] 6388 %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3) 6389 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3) 6390 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1) 6391 %res3 = add <2 x i64> %res, %res1 6392 %res4 = add <2 x i64> %res3, %res2 6393 ret <2 x i64> %res4 6394} 6395 6396declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i32, <4 x i64>, i8) 6397 6398define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) { 6399; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_256: 6400; CHECK: ## BB#0: 6401; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 6402; CHECK-NEXT: vprorq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03] 6403; CHECK-NEXT: vprorq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x03] 6404; CHECK-NEXT: vprorq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x03] 6405; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 6406; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6407; CHECK-NEXT: retq ## encoding: [0xc3] 6408 %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3) 6409 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3) 6410 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1) 6411 %res3 = add <4 x i64> %res, %res1 6412 %res4 = add <4 x i64> %res3, %res2 6413 ret <4 x i64> %res4 6414} 6415 6416declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8) 6417 6418define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { 6419; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128: 6420; CHECK: ## BB#0: 6421; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6422; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8] 6423; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6424; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x31,0xd0] 6425; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6426; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x31,0xc0] 6427; CHECK-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6428; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 6429; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6430; CHECK-NEXT: retq ## encoding: [0xc3] 6431 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) 6432 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) 6433 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1) 6434 %res3 = add <4 x i32> %res, %res1 6435 %res4 = add <4 x i32> %res3, %res2 6436 ret <4 x i32> %res4 6437} 6438 6439declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8) 6440 6441define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { 6442; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256: 6443; CHECK: ## BB#0: 6444; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6445; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8] 6446; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6447; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xd0] 6448; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6449; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x31,0xc0] 6450; CHECK-NEXT: ## ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 6451; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 6452; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6453; CHECK-NEXT: retq ## encoding: [0xc3] 6454 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) 6455 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) 6456 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1) 6457 %res3 = add <8 x i32> %res, %res1 6458 %res4 = add <8 x i32> %res3, %res2 6459 ret <8 x i32> %res4 6460} 6461 6462declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8) 6463 6464define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { 6465; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128: 6466; CHECK: ## BB#0: 6467; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6468; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8] 6469; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6470; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x32,0xd0] 6471; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6472; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x32,0xc0] 6473; CHECK-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 6474; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 6475; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6476; CHECK-NEXT: retq ## encoding: [0xc3] 6477 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) 6478 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) 6479 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1) 6480 %res3 = add <2 x i64> %res, %res1 6481 %res4 = add <2 x i64> %res3, %res2 6482 ret <2 x i64> %res4 6483} 6484 6485declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8) 6486 6487define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { 6488; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256: 6489; CHECK: ## BB#0: 6490; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6491; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8] 6492; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6493; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xd0] 6494; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6495; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x32,0xc0] 6496; CHECK-NEXT: ## ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 6497; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 6498; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6499; CHECK-NEXT: retq ## encoding: [0xc3] 6500 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) 6501 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) 6502 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1) 6503 %res3 = add <4 x i64> %res, %res1 6504 %res4 = add <4 x i64> %res3, %res2 6505 ret <4 x i64> %res4 6506} 6507 6508declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8) 6509 6510define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) { 6511; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128: 6512; CHECK: ## BB#0: 6513; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6514; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8] 6515; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero 6516; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x35,0xd0] 6517; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero 6518; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x35,0xc0] 6519; CHECK-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero 6520; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 6521; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6522; CHECK-NEXT: retq ## encoding: [0xc3] 6523 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) 6524 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) 6525 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1) 6526 %res3 = add <2 x i64> %res, %res1 6527 %res4 = add <2 x i64> %res3, %res2 6528 ret <2 x i64> %res4 6529} 6530 6531declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8) 6532 6533define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) { 6534; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256: 6535; CHECK: ## BB#0: 6536; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6537; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8] 6538; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6539; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xd0] 6540; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6541; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x35,0xc0] 6542; CHECK-NEXT: ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6543; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 6544; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6545; CHECK-NEXT: retq ## encoding: [0xc3] 6546 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) 6547 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) 6548 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1) 6549 %res3 = add <4 x i64> %res, %res1 6550 %res4 = add <4 x i64> %res3, %res2 6551 ret <4 x i64> %res4 6552} 6553 6554declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8) 6555 6556define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { 6557; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128: 6558; CHECK: ## BB#0: 6559; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6560; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8] 6561; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6562; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x33,0xd0] 6563; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6564; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x33,0xc0] 6565; CHECK-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 6566; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 6567; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6568; CHECK-NEXT: retq ## encoding: [0xc3] 6569 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) 6570 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) 6571 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1) 6572 %res3 = add <4 x i32> %res, %res1 6573 %res4 = add <4 x i32> %res3, %res2 6574 ret <4 x i32> %res4 6575} 6576 6577declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8) 6578 6579define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { 6580; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256: 6581; CHECK: ## BB#0: 6582; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6583; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8] 6584; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6585; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xd0] 6586; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6587; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x33,0xc0] 6588; CHECK-NEXT: ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 6589; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 6590; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6591; CHECK-NEXT: retq ## encoding: [0xc3] 6592 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) 6593 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) 6594 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1) 6595 %res3 = add <8 x i32> %res, %res1 6596 %res4 = add <8 x i32> %res3, %res2 6597 ret <8 x i32> %res4 6598} 6599 6600declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8) 6601 6602define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { 6603; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128: 6604; CHECK: ## BB#0: 6605; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6606; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8] 6607; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6608; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x34,0xd0] 6609; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6610; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x34,0xc0] 6611; CHECK-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 6612; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 6613; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6614; CHECK-NEXT: retq ## encoding: [0xc3] 6615 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) 6616 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) 6617 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1) 6618 %res3 = add <2 x i64> %res, %res1 6619 %res4 = add <2 x i64> %res3, %res2 6620 ret <2 x i64> %res4 6621} 6622 6623declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8) 6624 6625define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { 6626; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256: 6627; CHECK: ## BB#0: 6628; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6629; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8] 6630; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6631; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xd0] 6632; CHECK-NEXT: ## ymm2 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6633; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x34,0xc0] 6634; CHECK-NEXT: ## ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 6635; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 6636; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6637; CHECK-NEXT: retq ## encoding: [0xc3] 6638 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) 6639 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) 6640 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1) 6641 %res3 = add <4 x i64> %res, %res1 6642 %res4 = add <4 x i64> %res3, %res2 6643 ret <4 x i64> %res4 6644} 6645 6646declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8) 6647 6648define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) { 6649; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128: 6650; CHECK: ## BB#0: 6651; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6652; CHECK-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8] 6653; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x21,0xd0] 6654; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x21,0xc0] 6655; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 6656; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6657; CHECK-NEXT: retq ## encoding: [0xc3] 6658 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) 6659 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2) 6660 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1) 6661 %res3 = add <4 x i32> %res, %res1 6662 %res4 = add <4 x i32> %res3, %res2 6663 ret <4 x i32> %res4 6664} 6665 6666declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8) 6667 6668define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) { 6669; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256: 6670; CHECK: ## BB#0: 6671; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6672; CHECK-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8] 6673; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xd0] 6674; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x21,0xc0] 6675; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 6676; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6677; CHECK-NEXT: retq ## encoding: [0xc3] 6678 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) 6679 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2) 6680 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1) 6681 %res3 = add <8 x i32> %res, %res1 6682 %res4 = add <8 x i32> %res3, %res2 6683 ret <8 x i32> %res4 6684} 6685 6686declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8) 6687 6688define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) { 6689; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128: 6690; CHECK: ## BB#0: 6691; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6692; CHECK-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8] 6693; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x22,0xd0] 6694; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x22,0xc0] 6695; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 6696; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6697; CHECK-NEXT: retq ## encoding: [0xc3] 6698 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) 6699 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2) 6700 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1) 6701 %res3 = add <2 x i64> %res, %res1 6702 %res4 = add <2 x i64> %res3, %res2 6703 ret <2 x i64> %res4 6704} 6705 6706declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8) 6707 6708define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) { 6709; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256: 6710; CHECK: ## BB#0: 6711; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6712; CHECK-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8] 6713; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xd0] 6714; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x22,0xc0] 6715; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 6716; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6717; CHECK-NEXT: retq ## encoding: [0xc3] 6718 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) 6719 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2) 6720 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1) 6721 %res3 = add <4 x i64> %res, %res1 6722 %res4 = add <4 x i64> %res3, %res2 6723 ret <4 x i64> %res4 6724} 6725 6726declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8) 6727 6728define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) { 6729; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128: 6730; CHECK: ## BB#0: 6731; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6732; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8] 6733; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x23,0xd0] 6734; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x23,0xc0] 6735; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xca] 6736; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 6737; CHECK-NEXT: retq ## encoding: [0xc3] 6738 %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) 6739 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2) 6740 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1) 6741 %res3 = add <4 x i32> %res, %res1 6742 %res4 = add <4 x i32> %res3, %res2 6743 ret <4 x i32> %res4 6744} 6745 6746declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8) 6747 6748define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) { 6749; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256: 6750; CHECK: ## BB#0: 6751; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6752; CHECK-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8] 6753; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xd0] 6754; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x23,0xc0] 6755; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] 6756; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6757; CHECK-NEXT: retq ## encoding: [0xc3] 6758 %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) 6759 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2) 6760 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1) 6761 %res3 = add <8 x i32> %res, %res1 6762 %res4 = add <8 x i32> %res3, %res2 6763 ret <8 x i32> %res4 6764} 6765 6766declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8) 6767 6768define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) { 6769; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128: 6770; CHECK: ## BB#0: 6771; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6772; CHECK-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8] 6773; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x24,0xd0] 6774; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x24,0xc0] 6775; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca] 6776; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 6777; CHECK-NEXT: retq ## encoding: [0xc3] 6778 %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) 6779 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2) 6780 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1) 6781 %res3 = add <2 x i64> %res, %res1 6782 %res4 = add <2 x i64> %res3, %res2 6783 ret <2 x i64> %res4 6784} 6785 6786declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8) 6787 6788define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) { 6789; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256: 6790; CHECK: ## BB#0: 6791; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6792; CHECK-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8] 6793; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xd0] 6794; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x24,0xc0] 6795; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca] 6796; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6797; CHECK-NEXT: retq ## encoding: [0xc3] 6798 %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) 6799 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2) 6800 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1) 6801 %res3 = add <4 x i64> %res, %res1 6802 %res4 = add <4 x i64> %res3, %res2 6803 ret <4 x i64> %res4 6804} 6805 6806declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8) 6807 6808define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { 6809; CHECK-LABEL: test_int_x86_avx512_mask_permvar_df_256: 6810; CHECK: ## BB#0: 6811; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6812; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0] 6813; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xd8] 6814; CHECK-NEXT: vpermpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0] 6815; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xcb] 6816; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 6817; CHECK-NEXT: retq ## encoding: [0xc3] 6818 %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) 6819 %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3) 6820 %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1) 6821 %res3 = fadd <4 x double> %res, %res1 6822 %res4 = fadd <4 x double> %res3, %res2 6823 ret <4 x double> %res4 6824} 6825 6826declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) 6827 6828define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { 6829; CHECK-LABEL: test_int_x86_avx512_mask_permvar_di_256: 6830; CHECK: ## BB#0: 6831; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6832; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0] 6833; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xd8] 6834; CHECK-NEXT: vpermq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x36,0xc0] 6835; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] 6836; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 6837; CHECK-NEXT: retq ## encoding: [0xc3] 6838 %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) 6839 %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) 6840 %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) 6841 %res3 = add <4 x i64> %res, %res1 6842 %res4 = add <4 x i64> %res3, %res2 6843 ret <4 x i64> %res4 6844} 6845 6846declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8) 6847 6848define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) { 6849; CHECK-LABEL: test_int_x86_avx512_mask_permvar_sf_256: 6850; CHECK: ## BB#0: 6851; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6852; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0] 6853; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x16,0xd8] 6854; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x16,0xc0] 6855; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xcb] 6856; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] 6857; CHECK-NEXT: retq ## encoding: [0xc3] 6858 %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) 6859 %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3) 6860 %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1) 6861 %res3 = fadd <8 x float> %res, %res1 6862 %res4 = fadd <8 x float> %res3, %res2 6863 ret <8 x float> %res4 6864} 6865 6866declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6867 6868define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { 6869; CHECK-LABEL: test_int_x86_avx512_mask_permvar_si_256: 6870; CHECK: ## BB#0: 6871; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6872; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0] 6873; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0x36,0xd8] 6874; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0x36,0xc0] 6875; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xcb] 6876; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 6877; CHECK-NEXT: retq ## encoding: [0xc3] 6878 %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 6879 %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3) 6880 %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 6881 %res3 = add <8 x i32> %res, %res1 6882 %res4 = add <8 x i32> %res3, %res2 6883 ret <8 x i32> %res4 6884} 6885 6886declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8) 6887 6888define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { 6889; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_128: 6890; CHECK: ## BB#0: 6891; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6892; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 6893; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x54,0xda,0x05] 6894; CHECK-NEXT: vpxord %xmm4, %xmm4, %xmm4 ## encoding: [0x62,0xf1,0x5d,0x08,0xef,0xe4] 6895; CHECK-NEXT: vfixupimmpd $4, %xmm2, %xmm1, %xmm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xe2,0x04] 6896; CHECK-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf3,0xf5,0x08,0x54,0xc2,0x03] 6897; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm1 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xcc] 6898; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] 6899; CHECK-NEXT: retq ## encoding: [0xc3] 6900 %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1,<2 x i64> %x2, i32 5, i8 %x4) 6901 %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> zeroinitializer, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 %x4) 6902 %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 3, i8 -1) 6903 %res3 = fadd <2 x double> %res, %res1 6904 %res4 = fadd <2 x double> %res3, %res2 6905 ret <2 x double> %res4 6906} 6907 6908declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8) 6909 6910define <2 x double>@test_int_x86_avx512_maskz_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) { 6911; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_128: 6912; CHECK: ## BB#0: 6913; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6914; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 6915; CHECK-NEXT: vfixupimmpd $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xda,0x05] 6916; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 6917; CHECK-NEXT: vfixupimmpd $3, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0x89,0x54,0xc2,0x03] 6918; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0] 6919; CHECK-NEXT: retq ## encoding: [0xc3] 6920 %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4) 6921 %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 3, i8 %x4) 6922 ;%res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 4, i8 -1) 6923 %res3 = fadd <2 x double> %res, %res1 6924 ;%res4 = fadd <2 x double> %res3, %res2 6925 ret <2 x double> %res3 6926} 6927 6928declare <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8) 6929 6930define <4 x double>@test_int_x86_avx512_mask_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) { 6931; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_pd_256: 6932; CHECK: ## BB#0: 6933; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6934; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 6935; CHECK-NEXT: vfixupimmpd $4, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x54,0xda,0x04] 6936; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] 6937; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm4 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xe2,0x05] 6938; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03] 6939; CHECK-NEXT: vaddpd %ymm4, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcc] 6940; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 6941; CHECK-NEXT: retq ## encoding: [0xc3] 6942 %res = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 4, i8 %x4) 6943 %res1 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> zeroinitializer, <4 x double> %x1, <4 x i64> %x2 , i32 5, i8 %x4) 6944 %res2 = call <4 x double> @llvm.x86.avx512.mask.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1) 6945 %res3 = fadd <4 x double> %res, %res1 6946 %res4 = fadd <4 x double> %res3, %res2 6947 ret <4 x double> %res4 6948} 6949 6950declare <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double>, <4 x double>, <4 x i64>, i32, i8) 6951 6952define <4 x double>@test_int_x86_avx512_maskz_fixupimm_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i8 %x4) { 6953; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_pd_256: 6954; CHECK: ## BB#0: 6955; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6956; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 6957; CHECK-NEXT: vfixupimmpd $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xda,0x05] 6958; CHECK-NEXT: vpxord %ymm4, %ymm4, %ymm4 ## encoding: [0x62,0xf1,0x5d,0x28,0xef,0xe4] 6959; CHECK-NEXT: vmovaps %zmm0, %zmm5 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe8] 6960; CHECK-NEXT: vfixupimmpd $4, %ymm4, %ymm1, %ymm5 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xa9,0x54,0xec,0x04] 6961; CHECK-NEXT: vfixupimmpd $3, %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0xf5,0x28,0x54,0xc2,0x03] 6962; CHECK-NEXT: vaddpd %ymm5, %ymm3, %ymm1 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xcd] 6963; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] 6964; CHECK-NEXT: retq ## encoding: [0xc3] 6965 %res = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 5, i8 %x4) 6966 %res1 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> zeroinitializer, i32 4, i8 %x4) 6967 %res2 = call <4 x double> @llvm.x86.avx512.maskz.fixupimm.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x i64> %x2, i32 3, i8 -1) 6968 %res3 = fadd <4 x double> %res, %res1 6969 %res4 = fadd <4 x double> %res3, %res2 6970 ret <4 x double> %res4 6971} 6972 6973declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8) 6974 6975define <4 x float>@test_int_x86_avx512_mask_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { 6976; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_128: 6977; CHECK: ## BB#0: 6978; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 6979; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 6980; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xda,0x05] 6981; CHECK-NEXT: vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 6982; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] 6983; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 6984; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x54,0xc2,0x05] 6985; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 6986; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc4] 6987; CHECK-NEXT: retq ## encoding: [0xc3] 6988 %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4) 6989 %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4) 6990 %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1) 6991 %res3 = fadd <4 x float> %res, %res1 6992 %res4 = fadd <4 x float> %res3, %res2 6993 ret <4 x float> %res4 6994} 6995 6996declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float>, <4 x float>, <4 x i32>, i32, i8) 6997 6998define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) { 6999; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_128: 7000; CHECK: ## BB#0: 7001; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7002; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 7003; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xda,0x05] 7004; CHECK-NEXT: vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 7005; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf3,0x75,0x08,0x54,0xe2,0x05] 7006; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2] 7007; CHECK-NEXT: vfixupimmps $5, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0x89,0x54,0xc2,0x05] 7008; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0] 7009; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc4] 7010; CHECK-NEXT: retq ## encoding: [0xc3] 7011 %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4) 7012 %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4) 7013 %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1) 7014 %res3 = fadd <4 x float> %res, %res1 7015 %res4 = fadd <4 x float> %res3, %res2 7016 ret <4 x float> %res4 7017} 7018 7019declare <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8) 7020 7021define <8 x float>@test_int_x86_avx512_mask_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) { 7022; CHECK-LABEL: test_int_x86_avx512_mask_fixupimm_ps_256: 7023; CHECK: ## BB#0: 7024; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7025; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 7026; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xda,0x05] 7027; CHECK-NEXT: vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 7028; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] 7029; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 7030; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x54,0xc2,0x05] 7031; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 7032; CHECK-NEXT: vaddps %ymm4, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc4] 7033; CHECK-NEXT: retq ## encoding: [0xc3] 7034 %res = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4) 7035 %res1 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4) 7036 %res2 = call <8 x float> @llvm.x86.avx512.mask.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1) 7037 %res3 = fadd <8 x float> %res, %res1 7038 %res4 = fadd <8 x float> %res3, %res2 7039 ret <8 x float> %res4 7040} 7041 7042declare <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float>, <8 x float>, <8 x i32>, i32, i8) 7043 7044define <8 x float>@test_int_x86_avx512_maskz_fixupimm_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i8 %x4) { 7045; CHECK-LABEL: test_int_x86_avx512_maskz_fixupimm_ps_256: 7046; CHECK: ## BB#0: 7047; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7048; CHECK-NEXT: vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8] 7049; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xda,0x05] 7050; CHECK-NEXT: vmovaps %zmm0, %zmm4 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xe0] 7051; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm4 ## encoding: [0x62,0xf3,0x75,0x28,0x54,0xe2,0x05] 7052; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2] 7053; CHECK-NEXT: vfixupimmps $5, %ymm2, %ymm1, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xa9,0x54,0xc2,0x05] 7054; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0] 7055; CHECK-NEXT: vaddps %ymm4, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc4] 7056; CHECK-NEXT: retq ## encoding: [0xc3] 7057 %res = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 %x4) 7058 %res1 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> zeroinitializer, i32 5, i8 %x4) 7059 %res2 = call <8 x float> @llvm.x86.avx512.maskz.fixupimm.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x i32> %x2, i32 5, i8 -1) 7060 %res3 = fadd <8 x float> %res, %res1 7061 %res4 = fadd <8 x float> %res3, %res2 7062 ret <8 x float> %res4 7063} 7064 7065declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8) 7066 7067define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 7068; CHECK-LABEL: test_int_x86_avx512_ptestm_d_128: 7069; CHECK: ## BB#0: 7070; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7071; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc1] 7072; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 7073; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1] 7074; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 7075; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 7076; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 7077; CHECK-NEXT: retq ## encoding: [0xc3] 7078 %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 7079 %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) 7080 %res2 = add i8 %res, %res1 7081 ret i8 %res2 7082} 7083 7084declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8) 7085 7086define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 7087; CHECK-LABEL: test_int_x86_avx512_ptestm_d_256: 7088; CHECK: ## BB#0: 7089; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7090; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc1] 7091; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 7092; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] 7093; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 7094; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 7095; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 7096; CHECK-NEXT: retq ## encoding: [0xc3] 7097 %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 7098 %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) 7099 %res2 = add i8 %res, %res1 7100 ret i8 %res2 7101} 7102 7103declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8) 7104 7105define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 7106; CHECK-LABEL: test_int_x86_avx512_ptestm_q_128: 7107; CHECK: ## BB#0: 7108; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7109; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc1] 7110; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 7111; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1] 7112; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 7113; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 7114; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 7115; CHECK-NEXT: retq ## encoding: [0xc3] 7116 %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 7117 %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) 7118 %res2 = add i8 %res, %res1 7119 ret i8 %res2 7120} 7121 7122declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8) 7123 7124define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 7125; CHECK-LABEL: test_int_x86_avx512_ptestm_q_256: 7126; CHECK: ## BB#0: 7127; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7128; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc1] 7129; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 7130; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1] 7131; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 7132; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 7133; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 7134; CHECK-NEXT: retq ## encoding: [0xc3] 7135 %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 7136 %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) 7137 %res2 = add i8 %res, %res1 7138 ret i8 %res2 7139} 7140 7141declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2) 7142 7143define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 7144; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_128: 7145; CHECK: ## BB#0: 7146; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7147; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc1] 7148; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 7149; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1] 7150; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 7151; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 7152; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 7153; CHECK-NEXT: retq ## encoding: [0xc3] 7154 %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 7155 %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1) 7156 %res2 = add i8 %res, %res1 7157 ret i8 %res2 7158} 7159 7160declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2) 7161 7162define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 7163; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_256: 7164; CHECK: ## BB#0: 7165; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7166; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc1] 7167; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 7168; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] 7169; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 7170; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 7171; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 7172; CHECK-NEXT: retq ## encoding: [0xc3] 7173 %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 7174 %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1) 7175 %res2 = add i8 %res, %res1 7176 ret i8 %res2 7177} 7178 7179declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2) 7180 7181define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 7182; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_128: 7183; CHECK: ## BB#0: 7184; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7185; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc1] 7186; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 7187; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1] 7188; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 7189; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 7190; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 7191; CHECK-NEXT: retq ## encoding: [0xc3] 7192 %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 7193 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1) 7194 %res2 = add i8 %res, %res1 7195 ret i8 %res2 7196} 7197 7198declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2) 7199 7200define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 7201; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_256: 7202; CHECK: ## BB#0: 7203; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] 7204; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc1] 7205; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] 7206; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1] 7207; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] 7208; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] 7209; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 7210; CHECK-NEXT: retq ## encoding: [0xc3] 7211 %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 7212 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1) 7213 %res2 = add i8 %res, %res1 7214 ret i8 %res2 7215} 7216 7217declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8) 7218 7219define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) { 7220; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256: 7221; CHECK: ## BB#0: 7222; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7223; CHECK-NEXT: vpbroadcastd %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7] 7224; CHECK-NEXT: vpbroadcastd %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xcf] 7225; CHECK-NEXT: vpbroadcastd %edi, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xd7] 7226; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] 7227; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] 7228; CHECK-NEXT: retq ## encoding: [0xc3] 7229 %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1) 7230 %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask) 7231 %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask) 7232 %res3 = add <8 x i32> %res, %res1 7233 %res4 = add <8 x i32> %res2, %res3 7234 ret <8 x i32> %res4 7235} 7236 7237declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8) 7238 7239define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) { 7240; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128: 7241; CHECK: ## BB#0: 7242; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7243; CHECK-NEXT: vpbroadcastd %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7] 7244; CHECK-NEXT: vpbroadcastd %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xcf] 7245; CHECK-NEXT: vpbroadcastd %edi, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xd7] 7246; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0] 7247; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0] 7248; CHECK-NEXT: retq ## encoding: [0xc3] 7249 %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1) 7250 %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask) 7251 %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask) 7252 %res3 = add <4 x i32> %res, %res1 7253 %res4 = add <4 x i32> %res2, %res3 7254 ret <4 x i32> %res4 7255} 7256 7257declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8) 7258 7259define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) { 7260; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256: 7261; CHECK: ## BB#0: 7262; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7263; CHECK-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7] 7264; CHECK-NEXT: vpbroadcastq %rdi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xcf] 7265; CHECK-NEXT: vpbroadcastq %rdi, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xd7] 7266; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc0] 7267; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] 7268; CHECK-NEXT: retq ## encoding: [0xc3] 7269 %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1) 7270 %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask) 7271 %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask) 7272 %res3 = add <4 x i64> %res, %res1 7273 %res4 = add <4 x i64> %res2, %res3 7274 ret <4 x i64> %res4 7275} 7276 7277declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8) 7278 7279define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) { 7280; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128: 7281; CHECK: ## BB#0: 7282; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] 7283; CHECK-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7] 7284; CHECK-NEXT: vpbroadcastq %rdi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xcf] 7285; CHECK-NEXT: vpbroadcastq %rdi, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xd7] 7286; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc0] 7287; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] 7288; CHECK-NEXT: retq ## encoding: [0xc3] 7289 %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1) 7290 %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask) 7291 %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask) 7292 %res3 = add <2 x i64> %res, %res1 7293 %res4 = add <2 x i64> %res2, %res3 7294 ret <2 x i64> %res4 7295} 7296