1; RUN: llc < %s -march=x86-64 -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s 2 3;CHECK: load_2_i8 4; A single 16-bit load 5;CHECK: movzwl 6;CHECK: pshufb 7;CHECK: paddq 8;CHECK: pshufb 9; A single 16-bit store 10;CHECK: movw 11;CHECK: ret 12 13define void @load_2_i8(<2 x i8>* %A) { 14 %T = load <2 x i8>* %A 15 %G = add <2 x i8> %T, <i8 9, i8 7> 16 store <2 x i8> %G, <2 x i8>* %A 17 ret void 18} 19 20;CHECK: load_2_i16 21; Read 32-bits 22;CHECK: movd 23;CHECK: pshufb 24;CHECK: paddq 25;CHECK: pshufb 26;CHECK: movd 27;CHECK: ret 28define void @load_2_i16(<2 x i16>* %A) { 29 %T = load <2 x i16>* %A 30 %G = add <2 x i16> %T, <i16 9, i16 7> 31 store <2 x i16> %G, <2 x i16>* %A 32 ret void 33} 34 35;CHECK: load_2_i32 36;CHECK: pshufd 37;CHECK: paddq 38;CHECK: pshufd 39;CHECK: ret 40define void @load_2_i32(<2 x i32>* %A) { 41 %T = load <2 x i32>* %A 42 %G = add <2 x i32> %T, <i32 9, i32 7> 43 store <2 x i32> %G, <2 x i32>* %A 44 ret void 45} 46 47;CHECK: load_4_i8 48;CHECK: movd 49;CHECK: pshufb 50;CHECK: paddd 51;CHECK: pshufb 52;CHECK: ret 53define void @load_4_i8(<4 x i8>* %A) { 54 %T = load <4 x i8>* %A 55 %G = add <4 x i8> %T, <i8 1, i8 4, i8 9, i8 7> 56 store <4 x i8> %G, <4 x i8>* %A 57 ret void 58} 59 60;CHECK: load_4_i16 61;CHECK: punpcklwd 62;CHECK: paddd 63;CHECK: pshufb 64;CHECK: ret 65define void @load_4_i16(<4 x i16>* %A) { 66 %T = load <4 x i16>* %A 67 %G = add <4 x i16> %T, <i16 1, i16 4, i16 9, i16 7> 68 store <4 x i16> %G, <4 x i16>* %A 69 ret void 70} 71 72;CHECK: load_8_i8 73;CHECK: punpcklbw 74;CHECK: paddw 75;CHECK: pshufb 76;CHECK: ret 77define void @load_8_i8(<8 x i8>* %A) { 78 %T = load <8 x i8>* %A 79 %G = add <8 x i8> %T, %T 80 store <8 x i8> %G, <8 x i8>* %A 81 ret void 82} 83