1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx | FileCheck %s --check-prefixes=X86,X86-MMX 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSE2 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X86,X86-SSE,X86-SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSE2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSE,X64-SSSE3 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512 10 11declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) 12 13; 14; v2i32 15; 16 17define void @build_v2i32_01(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 18; X86-LABEL: build_v2i32_01: 19; X86: # %bb.0: 20; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 21; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 22; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 23; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 24; X86-NEXT: paddd %mm1, %mm1 25; X86-NEXT: movq %mm1, (%eax) 26; X86-NEXT: retl 27; 28; X64-LABEL: build_v2i32_01: 29; X64: # %bb.0: 30; X64-NEXT: movd %edx, %mm0 31; X64-NEXT: movd %esi, %mm1 32; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 33; X64-NEXT: paddd %mm1, %mm1 34; X64-NEXT: movq %mm1, (%rdi) 35; X64-NEXT: retq 36 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 37 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 38 %3 = bitcast <2 x i32> %2 to x86_mmx 39 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 40 store x86_mmx %4, x86_mmx *%p0 41 ret void 42} 43 44define void @build_v2i32_0z(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 45; X86-LABEL: build_v2i32_0z: 46; X86: # %bb.0: 47; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 48; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 49; X86-NEXT: paddd %mm0, %mm0 50; X86-NEXT: movq %mm0, (%eax) 51; X86-NEXT: retl 52; 53; X64-LABEL: build_v2i32_0z: 54; X64: # %bb.0: 55; X64-NEXT: movd %esi, %mm0 56; X64-NEXT: paddd %mm0, %mm0 57; X64-NEXT: movq %mm0, (%rdi) 58; X64-NEXT: retq 59 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 60 %2 = insertelement <2 x i32> %1, i32 0, i32 1 61 %3 = bitcast <2 x i32> %2 to x86_mmx 62 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 63 store x86_mmx %4, x86_mmx *%p0 64 ret void 65} 66 67define void @build_v2i32_u1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 68; X86-MMX-LABEL: build_v2i32_u1: 69; X86-MMX: # %bb.0: 70; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 71; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 72; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 73; X86-MMX-NEXT: paddd %mm0, %mm0 74; X86-MMX-NEXT: movq %mm0, (%eax) 75; X86-MMX-NEXT: retl 76; 77; X86-SSE-LABEL: build_v2i32_u1: 78; X86-SSE: # %bb.0: 79; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 80; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0 81; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 82; X86-SSE-NEXT: paddd %mm0, %mm0 83; X86-SSE-NEXT: movq %mm0, (%eax) 84; X86-SSE-NEXT: retl 85; 86; X64-LABEL: build_v2i32_u1: 87; X64: # %bb.0: 88; X64-NEXT: movd %edx, %mm0 89; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 90; X64-NEXT: paddd %mm0, %mm0 91; X64-NEXT: movq %mm0, (%rdi) 92; X64-NEXT: retq 93 %1 = insertelement <2 x i32> undef, i32 undef, i32 0 94 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 95 %3 = bitcast <2 x i32> %2 to x86_mmx 96 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 97 store x86_mmx %4, x86_mmx *%p0 98 ret void 99} 100 101define void @build_v2i32_z1(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 102; X86-LABEL: build_v2i32_z1: 103; X86: # %bb.0: 104; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 105; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 106; X86-NEXT: pxor %mm1, %mm1 107; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 108; X86-NEXT: paddd %mm1, %mm1 109; X86-NEXT: movq %mm1, (%eax) 110; X86-NEXT: retl 111; 112; X64-LABEL: build_v2i32_z1: 113; X64: # %bb.0: 114; X64-NEXT: movd %edx, %mm0 115; X64-NEXT: pxor %mm1, %mm1 116; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 117; X64-NEXT: paddd %mm1, %mm1 118; X64-NEXT: movq %mm1, (%rdi) 119; X64-NEXT: retq 120 %1 = insertelement <2 x i32> undef, i32 0, i32 0 121 %2 = insertelement <2 x i32> %1, i32 %a1, i32 1 122 %3 = bitcast <2 x i32> %2 to x86_mmx 123 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 124 store x86_mmx %4, x86_mmx *%p0 125 ret void 126} 127 128define void @build_v2i32_00(x86_mmx *%p0, i32 %a0, i32 %a1) nounwind { 129; X86-MMX-LABEL: build_v2i32_00: 130; X86-MMX: # %bb.0: 131; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 132; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 133; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 134; X86-MMX-NEXT: paddd %mm0, %mm0 135; X86-MMX-NEXT: movq %mm0, (%eax) 136; X86-MMX-NEXT: retl 137; 138; X86-SSE-LABEL: build_v2i32_00: 139; X86-SSE: # %bb.0: 140; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 141; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0 142; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 143; X86-SSE-NEXT: paddd %mm0, %mm0 144; X86-SSE-NEXT: movq %mm0, (%eax) 145; X86-SSE-NEXT: retl 146; 147; X64-LABEL: build_v2i32_00: 148; X64: # %bb.0: 149; X64-NEXT: movd %esi, %mm0 150; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 151; X64-NEXT: paddd %mm0, %mm0 152; X64-NEXT: movq %mm0, (%rdi) 153; X64-NEXT: retq 154 %1 = insertelement <2 x i32> undef, i32 %a0, i32 0 155 %2 = insertelement <2 x i32> %1, i32 %a0, i32 1 156 %3 = bitcast <2 x i32> %2 to x86_mmx 157 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 158 store x86_mmx %4, x86_mmx *%p0 159 ret void 160} 161 162; 163; v4i16 164; 165 166define void @build_v4i16_0123(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 167; X86-LABEL: build_v4i16_0123: 168; X86: # %bb.0: 169; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 170; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 171; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 172; X86-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 173; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 174; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 175; X86-NEXT: punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1] 176; X86-NEXT: punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0] 177; X86-NEXT: paddd %mm2, %mm2 178; X86-NEXT: movq %mm2, (%eax) 179; X86-NEXT: retl 180; 181; X64-LABEL: build_v4i16_0123: 182; X64: # %bb.0: 183; X64-NEXT: movd %r8d, %mm0 184; X64-NEXT: movd %ecx, %mm1 185; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 186; X64-NEXT: movd %edx, %mm0 187; X64-NEXT: movd %esi, %mm2 188; X64-NEXT: punpcklwd %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1] 189; X64-NEXT: punpckldq %mm1, %mm2 # mm2 = mm2[0],mm1[0] 190; X64-NEXT: paddd %mm2, %mm2 191; X64-NEXT: movq %mm2, (%rdi) 192; X64-NEXT: retq 193 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 194 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 195 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2 196 %4 = insertelement <4 x i16> %3, i16 %a3, i32 3 197 %5 = bitcast <4 x i16> %4 to x86_mmx 198 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 199 store x86_mmx %6, x86_mmx *%p0 200 ret void 201} 202 203define void @build_v4i16_01zz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 204; X86-LABEL: build_v4i16_01zz: 205; X86: # %bb.0: 206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 207; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 208; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 209; X86-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 210; X86-NEXT: pxor %mm0, %mm0 211; X86-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 212; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 213; X86-NEXT: paddd %mm1, %mm1 214; X86-NEXT: movq %mm1, (%eax) 215; X86-NEXT: retl 216; 217; X64-LABEL: build_v4i16_01zz: 218; X64: # %bb.0: 219; X64-NEXT: movd %edx, %mm0 220; X64-NEXT: movd %esi, %mm1 221; X64-NEXT: punpcklwd %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1] 222; X64-NEXT: pxor %mm0, %mm0 223; X64-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 224; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 225; X64-NEXT: paddd %mm1, %mm1 226; X64-NEXT: movq %mm1, (%rdi) 227; X64-NEXT: retq 228 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 229 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 230 %3 = insertelement <4 x i16> %2, i16 0, i32 2 231 %4 = insertelement <4 x i16> %3, i16 0, i32 3 232 %5 = bitcast <4 x i16> %4 to x86_mmx 233 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 234 store x86_mmx %6, x86_mmx *%p0 235 ret void 236} 237 238define void @build_v4i16_0uuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 239; X86-LABEL: build_v4i16_0uuz: 240; X86: # %bb.0: 241; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 242; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 243; X86-NEXT: paddd %mm0, %mm0 244; X86-NEXT: movq %mm0, (%eax) 245; X86-NEXT: retl 246; 247; X64-LABEL: build_v4i16_0uuz: 248; X64: # %bb.0: 249; X64-NEXT: movd %esi, %mm0 250; X64-NEXT: paddd %mm0, %mm0 251; X64-NEXT: movq %mm0, (%rdi) 252; X64-NEXT: retq 253 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 254 %2 = insertelement <4 x i16> %1, i16 undef, i32 1 255 %3 = insertelement <4 x i16> %2, i16 undef, i32 2 256 %4 = insertelement <4 x i16> %3, i16 0, i32 3 257 %5 = bitcast <4 x i16> %4 to x86_mmx 258 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 259 store x86_mmx %6, x86_mmx *%p0 260 ret void 261} 262 263define void @build_v4i16_0zuz(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 264; X86-LABEL: build_v4i16_0zuz: 265; X86: # %bb.0: 266; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 267; X86-NEXT: movd %eax, %mm0 268; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 269; X86-NEXT: paddd %mm0, %mm0 270; X86-NEXT: movq %mm0, (%eax) 271; X86-NEXT: retl 272; 273; X64-LABEL: build_v4i16_0zuz: 274; X64: # %bb.0: 275; X64-NEXT: movzwl %si, %eax 276; X64-NEXT: movd %eax, %mm0 277; X64-NEXT: paddd %mm0, %mm0 278; X64-NEXT: movq %mm0, (%rdi) 279; X64-NEXT: retq 280 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 281 %2 = insertelement <4 x i16> %1, i16 0, i32 1 282 %3 = insertelement <4 x i16> %2, i16 undef, i32 2 283 %4 = insertelement <4 x i16> %3, i16 0, i32 3 284 %5 = bitcast <4 x i16> %4 to x86_mmx 285 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 286 store x86_mmx %6, x86_mmx *%p0 287 ret void 288} 289 290define void @build_v4i16_012u(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 291; X86-LABEL: build_v4i16_012u: 292; X86: # %bb.0: 293; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 294; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 295; X86-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 296; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 297; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 298; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 299; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 300; X86-NEXT: paddd %mm2, %mm2 301; X86-NEXT: movq %mm2, (%eax) 302; X86-NEXT: retl 303; 304; X64-LABEL: build_v4i16_012u: 305; X64: # %bb.0: 306; X64-NEXT: movd %ecx, %mm0 307; X64-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 308; X64-NEXT: movd %edx, %mm1 309; X64-NEXT: movd %esi, %mm2 310; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 311; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 312; X64-NEXT: paddd %mm2, %mm2 313; X64-NEXT: movq %mm2, (%rdi) 314; X64-NEXT: retq 315 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 316 %2 = insertelement <4 x i16> %1, i16 %a1, i32 1 317 %3 = insertelement <4 x i16> %2, i16 %a2, i32 2 318 %4 = insertelement <4 x i16> %3, i16 undef, i32 3 319 %5 = bitcast <4 x i16> %4 to x86_mmx 320 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 321 store x86_mmx %6, x86_mmx *%p0 322 ret void 323} 324 325define void @build_v4i16_0u00(x86_mmx *%p0, i16 %a0, i16 %a1, i16 %a2, i16 %a3) nounwind { 326; X86-MMX-LABEL: build_v4i16_0u00: 327; X86-MMX: # %bb.0: 328; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 329; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 330; X86-MMX-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 331; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 332; X86-MMX-NEXT: paddd %mm0, %mm0 333; X86-MMX-NEXT: movq %mm0, (%eax) 334; X86-MMX-NEXT: retl 335; 336; X86-SSE-LABEL: build_v4i16_0u00: 337; X86-SSE: # %bb.0: 338; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 339; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0 340; X86-SSE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] 341; X86-SSE-NEXT: paddd %mm0, %mm0 342; X86-SSE-NEXT: movq %mm0, (%eax) 343; X86-SSE-NEXT: retl 344; 345; X64-LABEL: build_v4i16_0u00: 346; X64: # %bb.0: 347; X64-NEXT: movd %esi, %mm0 348; X64-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] 349; X64-NEXT: paddd %mm0, %mm0 350; X64-NEXT: movq %mm0, (%rdi) 351; X64-NEXT: retq 352 %1 = insertelement <4 x i16> undef, i16 %a0, i32 0 353 %2 = insertelement <4 x i16> %1, i16 undef, i32 1 354 %3 = insertelement <4 x i16> %2, i16 %a0, i32 2 355 %4 = insertelement <4 x i16> %3, i16 %a0, i32 3 356 %5 = bitcast <4 x i16> %4 to x86_mmx 357 %6 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %5, x86_mmx %5) 358 store x86_mmx %6, x86_mmx *%p0 359 ret void 360} 361 362; 363; v8i8 364; 365 366define void @build_v8i8_01234567(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 367; X86-LABEL: build_v8i8_01234567: 368; X86: # %bb.0: 369; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 370; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 371; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 372; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 373; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 374; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 375; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3] 376; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 377; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 378; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 379; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 380; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 381; X86-NEXT: movd {{[0-9]+}}(%esp), %mm3 382; X86-NEXT: punpcklbw %mm0, %mm3 # mm3 = mm3[0],mm0[0],mm3[1],mm0[1],mm3[2],mm0[2],mm3[3],mm0[3] 383; X86-NEXT: punpcklwd %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1] 384; X86-NEXT: punpckldq %mm2, %mm3 # mm3 = mm3[0],mm2[0] 385; X86-NEXT: paddd %mm3, %mm3 386; X86-NEXT: movq %mm3, (%eax) 387; X86-NEXT: retl 388; 389; X64-LABEL: build_v8i8_01234567: 390; X64: # %bb.0: 391; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm0 392; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm1 393; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 394; X64-NEXT: movd %r9d, %mm0 395; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm2 396; X64-NEXT: punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3] 397; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 398; X64-NEXT: movd %r8d, %mm1 399; X64-NEXT: movd %ecx, %mm2 400; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3] 401; X64-NEXT: movd %edx, %mm1 402; X64-NEXT: movd %esi, %mm3 403; X64-NEXT: punpcklbw %mm1, %mm3 # mm3 = mm3[0],mm1[0],mm3[1],mm1[1],mm3[2],mm1[2],mm3[3],mm1[3] 404; X64-NEXT: punpcklwd %mm2, %mm3 # mm3 = mm3[0],mm2[0],mm3[1],mm2[1] 405; X64-NEXT: punpckldq %mm0, %mm3 # mm3 = mm3[0],mm0[0] 406; X64-NEXT: paddd %mm3, %mm3 407; X64-NEXT: movq %mm3, (%rdi) 408; X64-NEXT: retq 409 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 410 %2 = insertelement <8 x i8> %1, i8 %a1, i32 1 411 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2 412 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3 413 %5 = insertelement <8 x i8> %4, i8 %a4, i32 4 414 %6 = insertelement <8 x i8> %5, i8 %a5, i32 5 415 %7 = insertelement <8 x i8> %6, i8 %a6, i32 6 416 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7 417 %9 = bitcast <8 x i8> %8 to x86_mmx 418 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 419 store x86_mmx %10, x86_mmx *%p0 420 ret void 421} 422 423define void @build_v8i8_0u2345z7(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 424; X86-LABEL: build_v8i8_0u2345z7: 425; X86: # %bb.0: 426; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 427; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 428; X86-NEXT: pxor %mm1, %mm1 429; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 430; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 431; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 432; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3] 433; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 434; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 435; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 436; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 437; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 438; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 439; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 440; X86-NEXT: punpckldq %mm2, %mm0 # mm0 = mm0[0],mm2[0] 441; X86-NEXT: paddd %mm0, %mm0 442; X86-NEXT: movq %mm0, (%eax) 443; X86-NEXT: retl 444; 445; X64-LABEL: build_v8i8_0u2345z7: 446; X64: # %bb.0: 447; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm0 448; X64-NEXT: pxor %mm1, %mm1 449; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 450; X64-NEXT: movd %r9d, %mm0 451; X64-NEXT: movd {{[0-9]+}}(%rsp), %mm2 452; X64-NEXT: punpcklbw %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1],mm0[2],mm2[2],mm0[3],mm2[3] 453; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 454; X64-NEXT: movd %r8d, %mm1 455; X64-NEXT: movd %ecx, %mm2 456; X64-NEXT: punpcklbw %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1],mm2[2],mm1[2],mm2[3],mm1[3] 457; X64-NEXT: movd %esi, %mm1 458; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 459; X64-NEXT: punpcklwd %mm2, %mm1 # mm1 = mm1[0],mm2[0],mm1[1],mm2[1] 460; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 461; X64-NEXT: paddd %mm1, %mm1 462; X64-NEXT: movq %mm1, (%rdi) 463; X64-NEXT: retq 464 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 465 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 466 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2 467 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3 468 %5 = insertelement <8 x i8> %4, i8 %a4, i32 4 469 %6 = insertelement <8 x i8> %5, i8 %a5, i32 5 470 %7 = insertelement <8 x i8> %6, i8 0, i32 6 471 %8 = insertelement <8 x i8> %7, i8 %a7, i32 7 472 %9 = bitcast <8 x i8> %8 to x86_mmx 473 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 474 store x86_mmx %10, x86_mmx *%p0 475 ret void 476} 477 478define void @build_v8i8_0123zzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 479; X86-LABEL: build_v8i8_0123zzzu: 480; X86: # %bb.0: 481; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 482; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 483; X86-NEXT: movd {{[0-9]+}}(%esp), %mm1 484; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 485; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 486; X86-NEXT: movd {{[0-9]+}}(%esp), %mm2 487; X86-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3] 488; X86-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 489; X86-NEXT: pxor %mm0, %mm0 490; X86-NEXT: pxor %mm1, %mm1 491; X86-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 492; X86-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 493; X86-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 494; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 495; X86-NEXT: paddd %mm2, %mm2 496; X86-NEXT: movq %mm2, (%eax) 497; X86-NEXT: retl 498; 499; X64-LABEL: build_v8i8_0123zzzu: 500; X64: # %bb.0: 501; X64-NEXT: movd %r8d, %mm0 502; X64-NEXT: movd %ecx, %mm1 503; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 504; X64-NEXT: movd %edx, %mm0 505; X64-NEXT: movd %esi, %mm2 506; X64-NEXT: punpcklbw %mm0, %mm2 # mm2 = mm2[0],mm0[0],mm2[1],mm0[1],mm2[2],mm0[2],mm2[3],mm0[3] 507; X64-NEXT: punpcklwd %mm1, %mm2 # mm2 = mm2[0],mm1[0],mm2[1],mm1[1] 508; X64-NEXT: pxor %mm0, %mm0 509; X64-NEXT: pxor %mm1, %mm1 510; X64-NEXT: punpcklbw %mm0, %mm1 # mm1 = mm1[0],mm0[0],mm1[1],mm0[1],mm1[2],mm0[2],mm1[3],mm0[3] 511; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 512; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] 513; X64-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] 514; X64-NEXT: paddd %mm2, %mm2 515; X64-NEXT: movq %mm2, (%rdi) 516; X64-NEXT: retq 517 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 518 %2 = insertelement <8 x i8> %1, i8 %a1, i32 1 519 %3 = insertelement <8 x i8> %2, i8 %a2, i32 2 520 %4 = insertelement <8 x i8> %3, i8 %a3, i32 3 521 %5 = insertelement <8 x i8> %4, i8 0, i32 4 522 %6 = insertelement <8 x i8> %5, i8 0, i32 5 523 %7 = insertelement <8 x i8> %6, i8 0, i32 6 524 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 525 %9 = bitcast <8 x i8> %8 to x86_mmx 526 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 527 store x86_mmx %10, x86_mmx *%p0 528 ret void 529} 530 531define void @build_v8i8_0uuuuzzz(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 532; X86-LABEL: build_v8i8_0uuuuzzz: 533; X86: # %bb.0: 534; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 535; X86-NEXT: movd {{[0-9]+}}(%esp), %mm0 536; X86-NEXT: paddd %mm0, %mm0 537; X86-NEXT: movq %mm0, (%eax) 538; X86-NEXT: retl 539; 540; X64-LABEL: build_v8i8_0uuuuzzz: 541; X64: # %bb.0: 542; X64-NEXT: movd %esi, %mm0 543; X64-NEXT: paddd %mm0, %mm0 544; X64-NEXT: movq %mm0, (%rdi) 545; X64-NEXT: retq 546 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 547 %2 = insertelement <8 x i8> %1, i8 undef, i32 1 548 %3 = insertelement <8 x i8> %2, i8 undef, i32 2 549 %4 = insertelement <8 x i8> %3, i8 undef, i32 3 550 %5 = insertelement <8 x i8> %4, i8 undef, i32 4 551 %6 = insertelement <8 x i8> %5, i8 0, i32 5 552 %7 = insertelement <8 x i8> %6, i8 0, i32 6 553 %8 = insertelement <8 x i8> %7, i8 0, i32 7 554 %9 = bitcast <8 x i8> %8 to x86_mmx 555 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 556 store x86_mmx %10, x86_mmx *%p0 557 ret void 558} 559 560define void @build_v8i8_0zzzzzzu(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 561; X86-LABEL: build_v8i8_0zzzzzzu: 562; X86: # %bb.0: 563; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 564; X86-NEXT: movd %eax, %mm0 565; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 566; X86-NEXT: paddd %mm0, %mm0 567; X86-NEXT: movq %mm0, (%eax) 568; X86-NEXT: retl 569; 570; X64-LABEL: build_v8i8_0zzzzzzu: 571; X64: # %bb.0: 572; X64-NEXT: movzbl %sil, %eax 573; X64-NEXT: movd %eax, %mm0 574; X64-NEXT: paddd %mm0, %mm0 575; X64-NEXT: movq %mm0, (%rdi) 576; X64-NEXT: retq 577 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 578 %2 = insertelement <8 x i8> %1, i8 0, i32 1 579 %3 = insertelement <8 x i8> %2, i8 0, i32 2 580 %4 = insertelement <8 x i8> %3, i8 0, i32 3 581 %5 = insertelement <8 x i8> %4, i8 0, i32 4 582 %6 = insertelement <8 x i8> %5, i8 0, i32 5 583 %7 = insertelement <8 x i8> %6, i8 0, i32 6 584 %8 = insertelement <8 x i8> %7, i8 undef, i32 7 585 %9 = bitcast <8 x i8> %8 to x86_mmx 586 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 587 store x86_mmx %10, x86_mmx *%p0 588 ret void 589} 590 591define void @build_v8i8_00000000(x86_mmx *%p0, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7) nounwind { 592; X86-MMX-LABEL: build_v8i8_00000000: 593; X86-MMX: # %bb.0: 594; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 595; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 596; X86-MMX-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 597; X86-MMX-NEXT: punpcklwd %mm0, %mm0 # mm0 = mm0[0,0,1,1] 598; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 599; X86-MMX-NEXT: paddd %mm0, %mm0 600; X86-MMX-NEXT: movq %mm0, (%eax) 601; X86-MMX-NEXT: retl 602; 603; X86-SSE-LABEL: build_v8i8_00000000: 604; X86-SSE: # %bb.0: 605; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 606; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %mm0 607; X86-SSE-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 608; X86-SSE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] 609; X86-SSE-NEXT: paddd %mm0, %mm0 610; X86-SSE-NEXT: movq %mm0, (%eax) 611; X86-SSE-NEXT: retl 612; 613; X64-LABEL: build_v8i8_00000000: 614; X64: # %bb.0: 615; X64-NEXT: movd %esi, %mm0 616; X64-NEXT: punpcklbw %mm0, %mm0 # mm0 = mm0[0,0,1,1,2,2,3,3] 617; X64-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] 618; X64-NEXT: paddd %mm0, %mm0 619; X64-NEXT: movq %mm0, (%rdi) 620; X64-NEXT: retq 621 %1 = insertelement <8 x i8> undef, i8 %a0, i32 0 622 %2 = insertelement <8 x i8> %1, i8 %a0, i32 1 623 %3 = insertelement <8 x i8> %2, i8 %a0, i32 2 624 %4 = insertelement <8 x i8> %3, i8 %a0, i32 3 625 %5 = insertelement <8 x i8> %4, i8 %a0, i32 4 626 %6 = insertelement <8 x i8> %5, i8 %a0, i32 5 627 %7 = insertelement <8 x i8> %6, i8 %a0, i32 6 628 %8 = insertelement <8 x i8> %7, i8 %a0, i32 7 629 %9 = bitcast <8 x i8> %8 to x86_mmx 630 %10 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %9, x86_mmx %9) 631 store x86_mmx %10, x86_mmx *%p0 632 ret void 633} 634 635; 636; v2f32 637; 638 639define void @build_v2f32_01(x86_mmx *%p0, float %a0, float %a1) nounwind { 640; X86-MMX-LABEL: build_v2f32_01: 641; X86-MMX: # %bb.0: 642; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 643; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 644; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm1 645; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 646; X86-MMX-NEXT: paddd %mm1, %mm1 647; X86-MMX-NEXT: movq %mm1, (%eax) 648; X86-MMX-NEXT: retl 649; 650; X86-SSE-LABEL: build_v2f32_01: 651; X86-SSE: # %bb.0: 652; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 653; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 654; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 655; X86-SSE-NEXT: movdq2q %xmm1, %mm0 656; X86-SSE-NEXT: movdq2q %xmm0, %mm1 657; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 658; X86-SSE-NEXT: paddd %mm1, %mm1 659; X86-SSE-NEXT: movq %mm1, (%eax) 660; X86-SSE-NEXT: retl 661; 662; X64-LABEL: build_v2f32_01: 663; X64: # %bb.0: 664; X64-NEXT: movdq2q %xmm1, %mm0 665; X64-NEXT: movdq2q %xmm0, %mm1 666; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 667; X64-NEXT: paddd %mm1, %mm1 668; X64-NEXT: movq %mm1, (%rdi) 669; X64-NEXT: retq 670 %1 = insertelement <2 x float> undef, float %a0, i32 0 671 %2 = insertelement <2 x float> %1, float %a1, i32 1 672 %3 = bitcast <2 x float> %2 to x86_mmx 673 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 674 store x86_mmx %4, x86_mmx *%p0 675 ret void 676} 677 678define void @build_v2f32_0z(x86_mmx *%p0, float %a0, float %a1) nounwind { 679; X86-MMX-LABEL: build_v2f32_0z: 680; X86-MMX: # %bb.0: 681; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 682; X86-MMX-NEXT: pxor %mm0, %mm0 683; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm1 684; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 685; X86-MMX-NEXT: paddd %mm1, %mm1 686; X86-MMX-NEXT: movq %mm1, (%eax) 687; X86-MMX-NEXT: retl 688; 689; X86-SSE-LABEL: build_v2f32_0z: 690; X86-SSE: # %bb.0: 691; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 692; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 693; X86-SSE-NEXT: movdq2q %xmm0, %mm0 694; X86-SSE-NEXT: pxor %mm1, %mm1 695; X86-SSE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] 696; X86-SSE-NEXT: paddd %mm0, %mm0 697; X86-SSE-NEXT: movq %mm0, (%eax) 698; X86-SSE-NEXT: retl 699; 700; X64-LABEL: build_v2f32_0z: 701; X64: # %bb.0: 702; X64-NEXT: movdq2q %xmm0, %mm0 703; X64-NEXT: pxor %mm1, %mm1 704; X64-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] 705; X64-NEXT: paddd %mm0, %mm0 706; X64-NEXT: movq %mm0, (%rdi) 707; X64-NEXT: retq 708 %1 = insertelement <2 x float> undef, float %a0, i32 0 709 %2 = insertelement <2 x float> %1, float 0.0, i32 1 710 %3 = bitcast <2 x float> %2 to x86_mmx 711 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 712 store x86_mmx %4, x86_mmx *%p0 713 ret void 714} 715 716define void @build_v2f32_u1(x86_mmx *%p0, float %a0, float %a1) nounwind { 717; X86-MMX-LABEL: build_v2f32_u1: 718; X86-MMX: # %bb.0: 719; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 720; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 721; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 722; X86-MMX-NEXT: paddd %mm0, %mm0 723; X86-MMX-NEXT: movq %mm0, (%eax) 724; X86-MMX-NEXT: retl 725; 726; X86-SSE-LABEL: build_v2f32_u1: 727; X86-SSE: # %bb.0: 728; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 729; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 730; X86-SSE-NEXT: movdq2q %xmm0, %mm0 731; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 732; X86-SSE-NEXT: paddd %mm0, %mm0 733; X86-SSE-NEXT: movq %mm0, (%eax) 734; X86-SSE-NEXT: retl 735; 736; X64-LABEL: build_v2f32_u1: 737; X64: # %bb.0: 738; X64-NEXT: movdq2q %xmm1, %mm0 739; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 740; X64-NEXT: paddd %mm0, %mm0 741; X64-NEXT: movq %mm0, (%rdi) 742; X64-NEXT: retq 743 %1 = insertelement <2 x float> undef, float undef, i32 0 744 %2 = insertelement <2 x float> %1, float %a1, i32 1 745 %3 = bitcast <2 x float> %2 to x86_mmx 746 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 747 store x86_mmx %4, x86_mmx *%p0 748 ret void 749} 750 751define void @build_v2f32_z1(x86_mmx *%p0, float %a0, float %a1) nounwind { 752; X86-MMX-LABEL: build_v2f32_z1: 753; X86-MMX: # %bb.0: 754; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 755; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 756; X86-MMX-NEXT: pxor %mm1, %mm1 757; X86-MMX-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 758; X86-MMX-NEXT: paddd %mm1, %mm1 759; X86-MMX-NEXT: movq %mm1, (%eax) 760; X86-MMX-NEXT: retl 761; 762; X86-SSE-LABEL: build_v2f32_z1: 763; X86-SSE: # %bb.0: 764; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 765; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 766; X86-SSE-NEXT: movdq2q %xmm0, %mm0 767; X86-SSE-NEXT: pxor %mm1, %mm1 768; X86-SSE-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 769; X86-SSE-NEXT: paddd %mm1, %mm1 770; X86-SSE-NEXT: movq %mm1, (%eax) 771; X86-SSE-NEXT: retl 772; 773; X64-LABEL: build_v2f32_z1: 774; X64: # %bb.0: 775; X64-NEXT: movdq2q %xmm1, %mm0 776; X64-NEXT: pxor %mm1, %mm1 777; X64-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] 778; X64-NEXT: paddd %mm1, %mm1 779; X64-NEXT: movq %mm1, (%rdi) 780; X64-NEXT: retq 781 %1 = insertelement <2 x float> undef, float 0.0, i32 0 782 %2 = insertelement <2 x float> %1, float %a1, i32 1 783 %3 = bitcast <2 x float> %2 to x86_mmx 784 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 785 store x86_mmx %4, x86_mmx *%p0 786 ret void 787} 788 789define void @build_v2f32_00(x86_mmx *%p0, float %a0, float %a1) nounwind { 790; X86-MMX-LABEL: build_v2f32_00: 791; X86-MMX: # %bb.0: 792; X86-MMX-NEXT: movl {{[0-9]+}}(%esp), %eax 793; X86-MMX-NEXT: movd {{[0-9]+}}(%esp), %mm0 794; X86-MMX-NEXT: punpckldq %mm0, %mm0 # mm0 = mm0[0,0] 795; X86-MMX-NEXT: paddd %mm0, %mm0 796; X86-MMX-NEXT: movq %mm0, (%eax) 797; X86-MMX-NEXT: retl 798; 799; X86-SSE-LABEL: build_v2f32_00: 800; X86-SSE: # %bb.0: 801; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 802; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 803; X86-SSE-NEXT: movdq2q %xmm0, %mm0 804; X86-SSE-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 805; X86-SSE-NEXT: paddd %mm0, %mm0 806; X86-SSE-NEXT: movq %mm0, (%eax) 807; X86-SSE-NEXT: retl 808; 809; X64-LABEL: build_v2f32_00: 810; X64: # %bb.0: 811; X64-NEXT: movdq2q %xmm0, %mm0 812; X64-NEXT: pshufw $68, %mm0, %mm0 # mm0 = mm0[0,1,0,1] 813; X64-NEXT: paddd %mm0, %mm0 814; X64-NEXT: movq %mm0, (%rdi) 815; X64-NEXT: retq 816 %1 = insertelement <2 x float> undef, float %a0, i32 0 817 %2 = insertelement <2 x float> %1, float %a0, i32 1 818 %3 = bitcast <2 x float> %2 to x86_mmx 819 %4 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %3, x86_mmx %3) 820 store x86_mmx %4, x86_mmx *%p0 821 ret void 822} 823