1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s 2 3; Splat patterns below 4 5 6define <4 x i32> @shl4(<4 x i32> %A) nounwind { 7entry: 8; CHECK: shl4 9; CHECK: pslld 10; CHECK: padd 11; CHECK: ret 12 %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 13 %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 14 %K = xor <4 x i32> %B, %C 15 ret <4 x i32> %K 16} 17 18define <4 x i32> @shr4(<4 x i32> %A) nounwind { 19entry: 20; CHECK: shr4 21; CHECK: psrld 22; CHECK-NEXT: psrld 23; CHECK: ret 24 %B = lshr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 25 %C = lshr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 26 %K = xor <4 x i32> %B, %C 27 ret <4 x i32> %K 28} 29 30define <4 x i32> @sra4(<4 x i32> %A) nounwind { 31entry: 32; CHECK: sra4 33; CHECK: psrad 34; CHECK-NEXT: psrad 35; CHECK: ret 36 %B = ashr <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> 37 %C = ashr <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> 38 %K = xor <4 x i32> %B, %C 39 ret <4 x i32> %K 40} 41 42define <2 x i64> @shl2(<2 x i64> %A) nounwind { 43entry: 44; CHECK: shl2 45; CHECK: psllq 46; CHECK-NEXT: psllq 47; CHECK: ret 48 %B = shl <2 x i64> %A, < i64 2, i64 2> 49 %C = shl <2 x i64> %A, < i64 9, i64 9> 50 %K = xor <2 x i64> %B, %C 51 ret <2 x i64> %K 52} 53 54define <2 x i64> @shr2(<2 x i64> %A) nounwind { 55entry: 56; CHECK: shr2 57; CHECK: psrlq 58; CHECK-NEXT: psrlq 59; CHECK: ret 60 %B = lshr <2 x i64> %A, < i64 8, i64 8> 61 %C = lshr <2 x i64> %A, < i64 1, i64 1> 62 %K = xor <2 x i64> %B, %C 63 ret <2 x i64> %K 64} 65 66 67define <8 x i16> @shl8(<8 x i16> %A) nounwind { 68entry: 69; CHECK: shl8 70; CHECK: psllw 71; CHECK: padd 72; CHECK: ret 73 %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 74 %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 75 %K = xor <8 x i16> %B, %C 76 ret <8 x i16> %K 77} 78 79define <8 x i16> @shr8(<8 x i16> %A) nounwind { 80entry: 81; CHECK: shr8 82; CHECK: psrlw 83; CHECK-NEXT: psrlw 84; CHECK: ret 85 %B = lshr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 86 %C = lshr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 87 %K = xor <8 x i16> %B, %C 88 ret <8 x i16> %K 89} 90 91define <8 x i16> @sra8(<8 x i16> %A) nounwind { 92entry: 93; CHECK: sra8 94; CHECK: psraw 95; CHECK-NEXT: psraw 96; CHECK: ret 97 %B = ashr <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 98 %C = ashr <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 99 %K = xor <8 x i16> %B, %C 100 ret <8 x i16> %K 101} 102 103; non-splat test 104 105 106define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind { 107entry: 108; CHECK: sll8_nosplat 109; CHECK-NOT: psll 110; CHECK-NOT: psll 111; CHECK: ret 112 %B = shl <8 x i16> %A, < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2> 113 %C = shl <8 x i16> %A, < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1> 114 %K = xor <8 x i16> %B, %C 115 ret <8 x i16> %K 116} 117 118 119define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind { 120entry: 121; CHECK-LABEL: shr2_nosplat 122; CHECK: movdqa %xmm0, %xmm1 123; CHECK-NEXT: psrlq $1, %xmm1 124; CHECK-NEXT: movdqa %xmm0, %xmm2 125; CHECK-NEXT: psrlq $8, %xmm2 126; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 127; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 128; CHECK-NEXT: xorpd %xmm0, %xmm1 129; CHECK-NEXT: movapd %xmm1, %xmm0 130; CHECK-NEXT: ret 131 %B = lshr <2 x i64> %A, < i64 8, i64 1> 132 %C = lshr <2 x i64> %A, < i64 1, i64 0> 133 %K = xor <2 x i64> %B, %C 134 ret <2 x i64> %K 135} 136 137 138; Other shifts 139 140define <2 x i32> @shl2_other(<2 x i32> %A) nounwind { 141entry: 142; CHECK: shl2_other 143; CHECK: psllq 144; CHECK: ret 145 %B = shl <2 x i32> %A, < i32 2, i32 2> 146 %C = shl <2 x i32> %A, < i32 9, i32 9> 147 %K = xor <2 x i32> %B, %C 148 ret <2 x i32> %K 149} 150 151define <2 x i32> @shr2_other(<2 x i32> %A) nounwind { 152entry: 153; CHECK: shr2_other 154; CHECK: psrlq 155; CHECK: ret 156 %B = lshr <2 x i32> %A, < i32 8, i32 8> 157 %C = lshr <2 x i32> %A, < i32 1, i32 1> 158 %K = xor <2 x i32> %B, %C 159 ret <2 x i32> %K 160} 161 162define <16 x i8> @shl9(<16 x i8> %A) nounwind { 163 %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 164 ret <16 x i8> %B 165; CHECK-LABEL: shl9: 166; CHECK: psllw $3 167; CHECK: pand 168; CHECK: ret 169} 170 171define <16 x i8> @shr9(<16 x i8> %A) nounwind { 172 %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 173 ret <16 x i8> %B 174; CHECK-LABEL: shr9: 175; CHECK: psrlw $3 176; CHECK: pand 177; CHECK: ret 178} 179 180define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind { 181 %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> 182 ret <16 x i8> %B 183; CHECK-LABEL: sra_v16i8_7: 184; CHECK: pxor 185; CHECK: pcmpgtb 186; CHECK: ret 187} 188 189define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind { 190 %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 191 ret <16 x i8> %B 192; CHECK-LABEL: sra_v16i8: 193; CHECK: psrlw $3 194; CHECK: pand 195; CHECK: pxor 196; CHECK: psubb 197; CHECK: ret 198} 199