1; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s 2 3; Test swap removal when a vector splat must be adjusted to make it legal. 4; 5 6; LH: 2016-11-17 7; Updated align attritue from 16 to 8 to keep swap instructions tests. 8; Changes have been made on little-endian to use lvx and stvx 9; instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for 10; aligned vectors with elements up to 4 bytes 11 12; Test generated from following C code: 13; 14; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; 15; vector char vcr; 16; vector short vs = {0, 1, 2, 3, 4, 5, 6, 7}; 17; vector short vsr; 18; vector int vi = {0, 1, 2, 3}; 19; vector int vir; 20; 21; void cfoo () 22; { 23; vcr = (vector char){vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], 24; vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5]}; 25; } 26; 27; void sfoo () 28; { 29; vsr = (vector short){vs[6], vs[6], vs[6], vs[6], 30; vs[6], vs[6], vs[6], vs[6]}; 31; } 32; 33; void ifoo () 34; { 35; vir = (vector int){vi[1], vi[1], vi[1], vi[1]}; 36; } 37 38@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 8 39@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 8 40@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 8 41@vcr = common global <16 x i8> zeroinitializer, align 8 42@vsr = common global <8 x i16> zeroinitializer, align 8 43@vir = common global <4 x i32> zeroinitializer, align 8 44 45; Function Attrs: nounwind 46define void @cfoo() { 47entry: 48 %0 = load <16 x i8>, <16 x i8>* @vc, align 8 49 %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 50 store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 8 51 ret void 52} 53 54; Function Attrs: nounwind 55define void @sfoo() { 56entry: 57 %0 = load <8 x i16>, <8 x i16>* @vs, align 8 58 %vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6> 59 store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 8 60 ret void 61} 62 63; Function Attrs: nounwind 64define void @ifoo() { 65entry: 66 %0 = load <4 x i32>, <4 x i32>* @vi, align 8 67 %vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 68 store <4 x i32> %vecinit6, <4 x i32>* @vir, align 8 69 ret void 70} 71 72; Justification: 73; Byte splat of element 5 (BE) becomes element 15-5 = 10 (LE) 74; which becomes (10+8)%16 = 2 (LE swapped). 75; 76; Halfword splat of element 6 (BE) becomes element 7-6 = 1 (LE) 77; which becomes (1+4)%8 = 5 (LE swapped). 78; 79; Word splat of element 1 (BE) becomes element 3-1 = 2 (LE) 80; which becomes (2+2)%4 = 0 (LE swapped). 81 82; CHECK-NOT: xxpermdi 83; CHECK-NOT: xxswapd 84 85; CHECK-LABEL: @cfoo 86; CHECK: lxvd2x 87; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 2 88; CHECK: stxvd2x 89 90; CHECK-LABEL: @sfoo 91; CHECK: lxvd2x 92; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 5 93; CHECK: stxvd2x 94 95; CHECK-LABEL: @ifoo 96; CHECK: lxvd2x 97; CHECK: xxspltw {{[0-9]+}}, {{[0-9]+}}, 0 98; CHECK: stxvd2x 99