1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2 2 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-unknown" 5 6declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) 7declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) 8declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) 9 10define <4 x i32> @combine_pshufd1(<4 x i32> %a) { 11; CHECK-SSE2-LABEL: @combine_pshufd1 12; CHECK-SSE2: # BB#0: 13; CHECK-SSE2-NEXT: retq 14 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 15 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27) 16 ret <4 x i32> %c 17} 18 19define <4 x i32> @combine_pshufd2(<4 x i32> %a) { 20; CHECK-SSE2-LABEL: @combine_pshufd2 21; CHECK-SSE2: # BB#0: 22; CHECK-SSE2-NEXT: retq 23 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 24 %b.cast = bitcast <4 x i32> %b to <8 x i16> 25 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28) 26 %c.cast = bitcast <8 x i16> %c to <4 x i32> 27 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 28 ret <4 x i32> %d 29} 30 31define <4 x i32> @combine_pshufd3(<4 x i32> %a) { 32; CHECK-SSE2-LABEL: @combine_pshufd3 33; CHECK-SSE2: # BB#0: 34; CHECK-SSE2-NEXT: retq 35 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 36 %b.cast = bitcast <4 x i32> %b to <8 x i16> 37 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28) 38 %c.cast = bitcast <8 x i16> %c to <4 x i32> 39 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 40 ret <4 x i32> %d 41} 42 43define <4 x i32> @combine_pshufd4(<4 x i32> %a) { 44; CHECK-SSE2-LABEL: @combine_pshufd4 45; CHECK-SSE2: # BB#0: 46; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] 47; CHECK-SSE2-NEXT: retq 48 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31) 49 %b.cast = bitcast <4 x i32> %b to <8 x i16> 50 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27) 51 %c.cast = bitcast <8 x i16> %c to <4 x i32> 52 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31) 53 ret <4 x i32> %d 54} 55 56define <4 x i32> @combine_pshufd5(<4 x i32> %a) { 57; CHECK-SSE2-LABEL: @combine_pshufd5 58; CHECK-SSE2: # BB#0: 59; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] 60; CHECK-SSE2-NEXT: retq 61 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76) 62 %b.cast = bitcast <4 x i32> %b to <8 x i16> 63 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27) 64 %c.cast = bitcast <8 x i16> %c to <4 x i32> 65 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76) 66 ret <4 x i32> %d 67} 68 69define <4 x i32> @combine_pshufd6(<4 x i32> %a) { 70; CHECK-SSE2-LABEL: @combine_pshufd6 71; CHECK-SSE2: # BB#0: 72; CHECK-SSE2-NEXT: pshufd $0 73; CHECK-SSE2-NEXT: retq 74 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0) 75 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8) 76 ret <4 x i32> %c 77} 78 79define <8 x i16> @combine_pshuflw1(<8 x i16> %a) { 80; CHECK-SSE2-LABEL: @combine_pshuflw1 81; CHECK-SSE2: # BB#0: 82; CHECK-SSE2-NEXT: retq 83 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 84 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 85 ret <8 x i16> %c 86} 87 88define <8 x i16> @combine_pshuflw2(<8 x i16> %a) { 89; CHECK-SSE2-LABEL: @combine_pshuflw2 90; CHECK-SSE2: # BB#0: 91; CHECK-SSE2-NEXT: retq 92 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 93 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28) 94 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 95 ret <8 x i16> %d 96} 97 98define <8 x i16> @combine_pshuflw3(<8 x i16> %a) { 99; CHECK-SSE2-LABEL: @combine_pshuflw3 100; CHECK-SSE2: # BB#0: 101; CHECK-SSE2-NEXT: pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4] 102; CHECK-SSE2-NEXT: retq 103 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 104 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27) 105 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 106 ret <8 x i16> %d 107} 108 109define <8 x i16> @combine_pshufhw1(<8 x i16> %a) { 110; CHECK-SSE2-LABEL: @combine_pshufhw1 111; CHECK-SSE2: # BB#0: 112; CHECK-SSE2-NEXT: pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7] 113; CHECK-SSE2-NEXT: retq 114 %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) 115 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 116 %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27) 117 ret <8 x i16> %d 118} 119 120