1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSEANY --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSEANY --check-prefix=SSE4 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX 5 6define <4 x i32> @ins_elt_0(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { 7; SSE2-LABEL: ins_elt_0: 8; SSE2: # %bb.0: 9; SSE2-NEXT: movd %edi, %xmm0 10; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 11; SSE2-NEXT: movaps %xmm1, %xmm0 12; SSE2-NEXT: retq 13; 14; SSE4-LABEL: ins_elt_0: 15; SSE4: # %bb.0: 16; SSE4-NEXT: pinsrd $0, %edi, %xmm1 17; SSE4-NEXT: movdqa %xmm1, %xmm0 18; SSE4-NEXT: retq 19; 20; AVX-LABEL: ins_elt_0: 21; AVX: # %bb.0: 22; AVX-NEXT: vpinsrd $0, %edi, %xmm1, %xmm0 23; AVX-NEXT: retq 24 %ins = insertelement <4 x i32> %v1, i32 %x, i32 0 25 %shuf = shufflevector <4 x i32> %ins, <4 x i32> %v2, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 26 ret <4 x i32> %shuf 27} 28 29define <4 x i32> @ins_elt_1(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { 30; SSE2-LABEL: ins_elt_1: 31; SSE2: # %bb.0: 32; SSE2-NEXT: movd %edi, %xmm0 33; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 34; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 35; SSE2-NEXT: retq 36; 37; SSE4-LABEL: ins_elt_1: 38; SSE4: # %bb.0: 39; SSE4-NEXT: pinsrd $1, %edi, %xmm1 40; SSE4-NEXT: movdqa %xmm1, %xmm0 41; SSE4-NEXT: retq 42; 43; AVX-LABEL: ins_elt_1: 44; AVX: # %bb.0: 45; AVX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm0 46; AVX-NEXT: retq 47 %ins = insertelement <4 x i32> %v1, i32 %x, i32 1 48 %shuf = shufflevector <4 x i32> %ins, <4 x i32> %v2, <4 x i32> <i32 4, i32 1, i32 6, i32 7> 49 ret <4 x i32> %shuf 50} 51 52; Verify that the transform still works when the insert element is the 2nd operand to the shuffle. 53 54define <4 x i32> @ins_elt_2_commute(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { 55; SSE2-LABEL: ins_elt_2_commute: 56; SSE2: # %bb.0: 57; SSE2-NEXT: movd %edi, %xmm0 58; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 59; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 60; SSE2-NEXT: movaps %xmm1, %xmm0 61; SSE2-NEXT: retq 62; 63; SSE4-LABEL: ins_elt_2_commute: 64; SSE4: # %bb.0: 65; SSE4-NEXT: pinsrd $2, %edi, %xmm1 66; SSE4-NEXT: movdqa %xmm1, %xmm0 67; SSE4-NEXT: retq 68; 69; AVX-LABEL: ins_elt_2_commute: 70; AVX: # %bb.0: 71; AVX-NEXT: vpinsrd $2, %edi, %xmm1, %xmm0 72; AVX-NEXT: retq 73 %ins = insertelement <4 x i32> %v1, i32 %x, i32 2 74 %shuf = shufflevector <4 x i32> %v2, <4 x i32> %ins, <4 x i32> <i32 0, i32 1, i32 6, i32 3> 75 ret <4 x i32> %shuf 76} 77 78define <4 x i32> @ins_elt_3_commute(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { 79; SSE2-LABEL: ins_elt_3_commute: 80; SSE2: # %bb.0: 81; SSE2-NEXT: movd %edi, %xmm0 82; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] 83; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 84; SSE2-NEXT: movaps %xmm1, %xmm0 85; SSE2-NEXT: retq 86; 87; SSE4-LABEL: ins_elt_3_commute: 88; SSE4: # %bb.0: 89; SSE4-NEXT: pinsrd $3, %edi, %xmm1 90; SSE4-NEXT: movdqa %xmm1, %xmm0 91; SSE4-NEXT: retq 92; 93; AVX-LABEL: ins_elt_3_commute: 94; AVX: # %bb.0: 95; AVX-NEXT: vpinsrd $3, %edi, %xmm1, %xmm0 96; AVX-NEXT: retq 97 %ins = insertelement <4 x i32> %v1, i32 %x, i32 3 98 %shuf = shufflevector <4 x i32> %v2, <4 x i32> %ins, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 99 ret <4 x i32> %shuf 100} 101 102; In the next 4 tests, the shuffle moves the inserted scalar to a different position in the output vector. 103 104define <4 x i32> @ins_elt_0_to_2(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { 105; SSE2-LABEL: ins_elt_0_to_2: 106; SSE2: # %bb.0: 107; SSE2-NEXT: movd %edi, %xmm0 108; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 109; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 110; SSE2-NEXT: movaps %xmm1, %xmm0 111; SSE2-NEXT: retq 112; 113; SSE4-LABEL: ins_elt_0_to_2: 114; SSE4: # %bb.0: 115; SSE4-NEXT: pinsrd $2, %edi, %xmm1 116; SSE4-NEXT: movdqa %xmm1, %xmm0 117; SSE4-NEXT: retq 118; 119; AVX-LABEL: ins_elt_0_to_2: 120; AVX: # %bb.0: 121; AVX-NEXT: vpinsrd $2, %edi, %xmm1, %xmm0 122; AVX-NEXT: retq 123 %ins = insertelement <4 x i32> %v1, i32 %x, i32 0 124 %shuf = shufflevector <4 x i32> %ins, <4 x i32> %v2, <4 x i32> <i32 4, i32 5, i32 0, i32 7> 125 ret <4 x i32> %shuf 126} 127 128define <4 x i32> @ins_elt_1_to_0(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { 129; SSE2-LABEL: ins_elt_1_to_0: 130; SSE2: # %bb.0: 131; SSE2-NEXT: movd %edi, %xmm0 132; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 133; SSE2-NEXT: movaps %xmm1, %xmm0 134; SSE2-NEXT: retq 135; 136; SSE4-LABEL: ins_elt_1_to_0: 137; SSE4: # %bb.0: 138; SSE4-NEXT: pinsrd $0, %edi, %xmm1 139; SSE4-NEXT: movdqa %xmm1, %xmm0 140; SSE4-NEXT: retq 141; 142; AVX-LABEL: ins_elt_1_to_0: 143; AVX: # %bb.0: 144; AVX-NEXT: vpinsrd $0, %edi, %xmm1, %xmm0 145; AVX-NEXT: retq 146 %ins = insertelement <4 x i32> %v1, i32 %x, i32 1 147 %shuf = shufflevector <4 x i32> %ins, <4 x i32> %v2, <4 x i32> <i32 1, i32 5, i32 6, i32 7> 148 ret <4 x i32> %shuf 149} 150 151define <4 x i32> @ins_elt_2_to_3(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { 152; SSE2-LABEL: ins_elt_2_to_3: 153; SSE2: # %bb.0: 154; SSE2-NEXT: movd %edi, %xmm0 155; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0] 156; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 157; SSE2-NEXT: movaps %xmm1, %xmm0 158; SSE2-NEXT: retq 159; 160; SSE4-LABEL: ins_elt_2_to_3: 161; SSE4: # %bb.0: 162; SSE4-NEXT: pinsrd $3, %edi, %xmm1 163; SSE4-NEXT: movdqa %xmm1, %xmm0 164; SSE4-NEXT: retq 165; 166; AVX-LABEL: ins_elt_2_to_3: 167; AVX: # %bb.0: 168; AVX-NEXT: vpinsrd $3, %edi, %xmm1, %xmm0 169; AVX-NEXT: retq 170 %ins = insertelement <4 x i32> %v1, i32 %x, i32 2 171 %shuf = shufflevector <4 x i32> %v2, <4 x i32> %ins, <4 x i32> <i32 0, i32 1, i32 2, i32 6> 172 ret <4 x i32> %shuf 173} 174 175define <4 x i32> @ins_elt_3_to_1(i32 %x, <4 x i32> %v1, <4 x i32> %v2) { 176; SSE2-LABEL: ins_elt_3_to_1: 177; SSE2: # %bb.0: 178; SSE2-NEXT: movd %edi, %xmm0 179; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 180; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 181; SSE2-NEXT: retq 182; 183; SSE4-LABEL: ins_elt_3_to_1: 184; SSE4: # %bb.0: 185; SSE4-NEXT: pinsrd $1, %edi, %xmm1 186; SSE4-NEXT: movdqa %xmm1, %xmm0 187; SSE4-NEXT: retq 188; 189; AVX-LABEL: ins_elt_3_to_1: 190; AVX: # %bb.0: 191; AVX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm0 192; AVX-NEXT: retq 193 %ins = insertelement <4 x i32> %v1, i32 %x, i32 3 194 %shuf = shufflevector <4 x i32> %v2, <4 x i32> %ins, <4 x i32> <i32 0, i32 7, i32 2, i32 3> 195 ret <4 x i32> %shuf 196} 197 198