1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s 2 3; CHECK: vpaddq %ymm 4define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 5 %x = add <4 x i64> %i, %j 6 ret <4 x i64> %x 7} 8 9; CHECK: vpaddd %ymm 10define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 11 %x = add <8 x i32> %i, %j 12 ret <8 x i32> %x 13} 14 15; CHECK: vpaddw %ymm 16define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 17 %x = add <16 x i16> %i, %j 18 ret <16 x i16> %x 19} 20 21; CHECK: vpaddb %ymm 22define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 23 %x = add <32 x i8> %i, %j 24 ret <32 x i8> %x 25} 26 27; CHECK: vpsubq %ymm 28define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 29 %x = sub <4 x i64> %i, %j 30 ret <4 x i64> %x 31} 32 33; CHECK: vpsubd %ymm 34define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 35 %x = sub <8 x i32> %i, %j 36 ret <8 x i32> %x 37} 38 39; CHECK: vpsubw %ymm 40define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 41 %x = sub <16 x i16> %i, %j 42 ret <16 x i16> %x 43} 44 45; CHECK: vpsubb %ymm 46define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 47 %x = sub <32 x i8> %i, %j 48 ret <32 x i8> %x 49} 50 51; CHECK: vpmulld %ymm 52define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 53 %x = mul <8 x i32> %i, %j 54 ret <8 x i32> %x 55} 56 57; CHECK: vpmullw %ymm 58define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 59 %x = mul <16 x i16> %i, %j 60 ret <16 x i16> %x 61} 62 63; CHECK: mul-v16i8 64; CHECK: # BB#0: 65; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1 66; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 67; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 68; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 69; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 70; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1 71; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0 72; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 73; CHECK-NEXT: vzeroupper 74; CHECK-NEXT: retq 75define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 76 %x = mul <16 x i8> %i, %j 77 ret <16 x i8> %x 78} 79 80; CHECK: mul-v32i8 81; CHECK: # BB#0: 82; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2 83; CHECK-NEXT: vpmovsxbw %xmm2, %ymm2 84; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3 85; CHECK-NEXT: vpmovsxbw %xmm3, %ymm3 86; CHECK-NEXT: vpmullw %ymm2, %ymm3, %ymm2 87; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 88; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> 89; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm3 90; CHECK-NEXT: vpshufb %xmm4, %xmm2, %xmm2 91; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 92; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1 93; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 94; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 95; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 96; CHECK-NEXT: vpshufb %xmm4, %xmm1, %xmm1 97; CHECK-NEXT: vpshufb %xmm4, %xmm0, %xmm0 98; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 99; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 100; CHECK-NEXT: retq 101define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 102 %x = mul <32 x i8> %i, %j 103 ret <32 x i8> %x 104} 105 106; CHECK: mul-v4i64 107; CHECK: vpmuludq %ymm 108; CHECK-NEXT: vpsrlq $32, %ymm 109; CHECK-NEXT: vpmuludq %ymm 110; CHECK-NEXT: vpsllq $32, %ymm 111; CHECK-NEXT: vpaddq %ymm 112; CHECK-NEXT: vpsrlq $32, %ymm 113; CHECK-NEXT: vpmuludq %ymm 114; CHECK-NEXT: vpsllq $32, %ymm 115; CHECK-NEXT: vpaddq %ymm 116define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 117 %x = mul <4 x i64> %i, %j 118 ret <4 x i64> %x 119} 120 121; CHECK: mul_const1 122; CHECK: vpaddd 123; CHECK: ret 124define <8 x i32> @mul_const1(<8 x i32> %x) { 125 %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 126 ret <8 x i32> %y 127} 128 129; CHECK: mul_const2 130; CHECK: vpsllq $2 131; CHECK: ret 132define <4 x i64> @mul_const2(<4 x i64> %x) { 133 %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4> 134 ret <4 x i64> %y 135} 136 137; CHECK: mul_const3 138; CHECK: vpsllw $3 139; CHECK: ret 140define <16 x i16> @mul_const3(<16 x i16> %x) { 141 %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 142 ret <16 x i16> %y 143} 144 145; CHECK: mul_const4 146; CHECK: vpxor 147; CHECK: vpsubq 148; CHECK: ret 149define <4 x i64> @mul_const4(<4 x i64> %x) { 150 %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 151 ret <4 x i64> %y 152} 153 154; CHECK: mul_const5 155; CHECK: vxorps 156; CHECK-NEXT: ret 157define <8 x i32> @mul_const5(<8 x i32> %x) { 158 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 159 ret <8 x i32> %y 160} 161 162; CHECK: mul_const6 163; CHECK: vpmulld 164; CHECK: ret 165define <8 x i32> @mul_const6(<8 x i32> %x) { 166 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0> 167 ret <8 x i32> %y 168} 169 170; CHECK: mul_const7 171; CHECK: vpaddq 172; CHECK: vpaddq 173; CHECK: ret 174define <8 x i64> @mul_const7(<8 x i64> %x) { 175 %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 176 ret <8 x i64> %y 177} 178 179; CHECK: mul_const8 180; CHECK: vpsllw $3 181; CHECK: ret 182define <8 x i16> @mul_const8(<8 x i16> %x) { 183 %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 184 ret <8 x i16> %y 185} 186 187; CHECK: mul_const9 188; CHECK: vpmulld 189; CHECK: ret 190define <8 x i32> @mul_const9(<8 x i32> %x) { 191 %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 192 ret <8 x i32> %y 193} 194 195; CHECK: mul_const10 196; CHECK: vpmulld 197; CHECK: ret 198define <4 x i32> @mul_const10(<4 x i32> %x) { 199 ; %x * 0x01010101 200 %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009> 201 ret <4 x i32> %m 202} 203 204; CHECK: mul_const11 205; CHECK: vpmulld 206; CHECK: ret 207define <4 x i32> @mul_const11(<4 x i32> %x) { 208 ; %x * 0x80808080 209 %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152> 210 ret <4 x i32> %m 211} 212