1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) { 6; SSSE3-LABEL: phaddw1: 7; SSSE3: # %bb.0: 8; SSSE3-NEXT: phaddw %xmm1, %xmm0 9; SSSE3-NEXT: retq 10; 11; AVX-LABEL: phaddw1: 12; AVX: # %bb.0: 13; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 14; AVX-NEXT: retq 15 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 16 %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 17 %r = add <8 x i16> %a, %b 18 ret <8 x i16> %r 19} 20 21define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) { 22; SSSE3-LABEL: phaddw2: 23; SSSE3: # %bb.0: 24; SSSE3-NEXT: phaddw %xmm1, %xmm0 25; SSSE3-NEXT: retq 26; 27; AVX-LABEL: phaddw2: 28; AVX: # %bb.0: 29; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 30; AVX-NEXT: retq 31 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14> 32 %b = shufflevector <8 x i16> %y, <8 x i16> %x, <8 x i32> <i32 8, i32 11, i32 12, i32 15, i32 0, i32 3, i32 4, i32 7> 33 %r = add <8 x i16> %a, %b 34 ret <8 x i16> %r 35} 36 37define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) { 38; SSSE3-LABEL: phaddd1: 39; SSSE3: # %bb.0: 40; SSSE3-NEXT: phaddd %xmm1, %xmm0 41; SSSE3-NEXT: retq 42; 43; AVX-LABEL: phaddd1: 44; AVX: # %bb.0: 45; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 46; AVX-NEXT: retq 47 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 48 %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 49 %r = add <4 x i32> %a, %b 50 ret <4 x i32> %r 51} 52 53define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) { 54; SSSE3-LABEL: phaddd2: 55; SSSE3: # %bb.0: 56; SSSE3-NEXT: phaddd %xmm1, %xmm0 57; SSSE3-NEXT: retq 58; 59; AVX-LABEL: phaddd2: 60; AVX: # %bb.0: 61; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 62; AVX-NEXT: retq 63 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6> 64 %b = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3> 65 %r = add <4 x i32> %a, %b 66 ret <4 x i32> %r 67} 68 69define <4 x i32> @phaddd3(<4 x i32> %x) { 70; SSSE3-LABEL: phaddd3: 71; SSSE3: # %bb.0: 72; SSSE3-NEXT: phaddd %xmm0, %xmm0 73; SSSE3-NEXT: retq 74; 75; AVX-LABEL: phaddd3: 76; AVX: # %bb.0: 77; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 78; AVX-NEXT: retq 79 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 80 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 81 %r = add <4 x i32> %a, %b 82 ret <4 x i32> %r 83} 84 85define <4 x i32> @phaddd4(<4 x i32> %x) { 86; SSSE3-LABEL: phaddd4: 87; SSSE3: # %bb.0: 88; SSSE3-NEXT: phaddd %xmm0, %xmm0 89; SSSE3-NEXT: retq 90; 91; AVX-LABEL: phaddd4: 92; AVX: # %bb.0: 93; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 94; AVX-NEXT: retq 95 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 96 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 97 %r = add <4 x i32> %a, %b 98 ret <4 x i32> %r 99} 100 101define <4 x i32> @phaddd5(<4 x i32> %x) { 102; SSSE3-LABEL: phaddd5: 103; SSSE3: # %bb.0: 104; SSSE3-NEXT: phaddd %xmm0, %xmm0 105; SSSE3-NEXT: retq 106; 107; AVX-LABEL: phaddd5: 108; AVX: # %bb.0: 109; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 110; AVX-NEXT: retq 111 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef> 112 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef> 113 %r = add <4 x i32> %a, %b 114 ret <4 x i32> %r 115} 116 117define <4 x i32> @phaddd6(<4 x i32> %x) { 118; SSSE3-LABEL: phaddd6: 119; SSSE3: # %bb.0: 120; SSSE3-NEXT: phaddd %xmm0, %xmm0 121; SSSE3-NEXT: retq 122; 123; AVX-LABEL: phaddd6: 124; AVX: # %bb.0: 125; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 126; AVX-NEXT: retq 127 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 128 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 129 %r = add <4 x i32> %a, %b 130 ret <4 x i32> %r 131} 132 133define <4 x i32> @phaddd7(<4 x i32> %x) { 134; SSSE3-LABEL: phaddd7: 135; SSSE3: # %bb.0: 136; SSSE3-NEXT: phaddd %xmm0, %xmm0 137; SSSE3-NEXT: retq 138; 139; AVX-LABEL: phaddd7: 140; AVX: # %bb.0: 141; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 142; AVX-NEXT: retq 143 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef> 144 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef> 145 %r = add <4 x i32> %a, %b 146 ret <4 x i32> %r 147} 148 149define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) { 150; SSSE3-LABEL: phsubw1: 151; SSSE3: # %bb.0: 152; SSSE3-NEXT: phsubw %xmm1, %xmm0 153; SSSE3-NEXT: retq 154; 155; AVX-LABEL: phsubw1: 156; AVX: # %bb.0: 157; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 158; AVX-NEXT: retq 159 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 160 %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 161 %r = sub <8 x i16> %a, %b 162 ret <8 x i16> %r 163} 164 165define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) { 166; SSSE3-LABEL: phsubd1: 167; SSSE3: # %bb.0: 168; SSSE3-NEXT: phsubd %xmm1, %xmm0 169; SSSE3-NEXT: retq 170; 171; AVX-LABEL: phsubd1: 172; AVX: # %bb.0: 173; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 174; AVX-NEXT: retq 175 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 176 %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 177 %r = sub <4 x i32> %a, %b 178 ret <4 x i32> %r 179} 180 181define <4 x i32> @phsubd2(<4 x i32> %x) { 182; SSSE3-LABEL: phsubd2: 183; SSSE3: # %bb.0: 184; SSSE3-NEXT: phsubd %xmm0, %xmm0 185; SSSE3-NEXT: retq 186; 187; AVX-LABEL: phsubd2: 188; AVX: # %bb.0: 189; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 190; AVX-NEXT: retq 191 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6> 192 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7> 193 %r = sub <4 x i32> %a, %b 194 ret <4 x i32> %r 195} 196 197define <4 x i32> @phsubd3(<4 x i32> %x) { 198; SSSE3-LABEL: phsubd3: 199; SSSE3: # %bb.0: 200; SSSE3-NEXT: phsubd %xmm0, %xmm0 201; SSSE3-NEXT: retq 202; 203; AVX-LABEL: phsubd3: 204; AVX: # %bb.0: 205; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 206; AVX-NEXT: retq 207 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 208 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 209 %r = sub <4 x i32> %a, %b 210 ret <4 x i32> %r 211} 212 213define <4 x i32> @phsubd4(<4 x i32> %x) { 214; SSSE3-LABEL: phsubd4: 215; SSSE3: # %bb.0: 216; SSSE3-NEXT: phsubd %xmm0, %xmm0 217; SSSE3-NEXT: retq 218; 219; AVX-LABEL: phsubd4: 220; AVX: # %bb.0: 221; AVX-NEXT: vphsubd %xmm0, %xmm0, %xmm0 222; AVX-NEXT: retq 223 %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 224 %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 225 %r = sub <4 x i32> %a, %b 226 ret <4 x i32> %r 227} 228 229define <8 x i16> @phsubw1_reverse(<8 x i16> %x, <8 x i16> %y) { 230; SSSE3-LABEL: phsubw1_reverse: 231; SSSE3: # %bb.0: 232; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] 233; SSSE3-NEXT: movdqa %xmm1, %xmm4 234; SSSE3-NEXT: pshufb %xmm3, %xmm4 235; SSSE3-NEXT: movdqa %xmm0, %xmm2 236; SSSE3-NEXT: pshufb %xmm3, %xmm2 237; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] 238; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 239; SSSE3-NEXT: pshufb %xmm3, %xmm1 240; SSSE3-NEXT: pshufb %xmm3, %xmm0 241; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 242; SSSE3-NEXT: psubw %xmm0, %xmm2 243; SSSE3-NEXT: movdqa %xmm2, %xmm0 244; SSSE3-NEXT: retq 245; 246; AVX-LABEL: phsubw1_reverse: 247; AVX: # %bb.0: 248; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15] 249; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm3 250; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm2 251; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 252; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 253; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm1 254; AVX-NEXT: vpshufb %xmm3, %xmm0, %xmm0 255; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 256; AVX-NEXT: vpsubw %xmm0, %xmm2, %xmm0 257; AVX-NEXT: retq 258 %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 259 %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 260 %r = sub <8 x i16> %a, %b 261 ret <8 x i16> %r 262} 263 264define <4 x i32> @phsubd1_reverse(<4 x i32> %x, <4 x i32> %y) { 265; SSSE3-LABEL: phsubd1_reverse: 266; SSSE3: # %bb.0: 267; SSSE3-NEXT: movaps %xmm0, %xmm2 268; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm1[1,3] 269; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 270; SSSE3-NEXT: psubd %xmm0, %xmm2 271; SSSE3-NEXT: movdqa %xmm2, %xmm0 272; SSSE3-NEXT: retq 273; 274; AVX-LABEL: phsubd1_reverse: 275; AVX: # %bb.0: 276; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,3],xmm1[1,3] 277; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 278; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm0 279; AVX-NEXT: retq 280 %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 281 %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 282 %r = sub <4 x i32> %a, %b 283 ret <4 x i32> %r 284} 285 286