1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 4 5; Test based on pr5626 to load/store 6; 7 8%i32vec3 = type <3 x i32> 9define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { 10; X86-LABEL: add3i32: 11; X86: # %bb.0: 12; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 13; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 14; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 15; X86-NEXT: movdqa (%edx), %xmm0 16; X86-NEXT: paddd (%ecx), %xmm0 17; X86-NEXT: pextrd $2, %xmm0, 8(%eax) 18; X86-NEXT: pextrd $1, %xmm0, 4(%eax) 19; X86-NEXT: movd %xmm0, (%eax) 20; X86-NEXT: retl $4 21; 22; X64-LABEL: add3i32: 23; X64: # %bb.0: 24; X64-NEXT: movdqa (%rsi), %xmm0 25; X64-NEXT: paddd (%rdx), %xmm0 26; X64-NEXT: pextrd $2, %xmm0, 8(%rdi) 27; X64-NEXT: movq %xmm0, (%rdi) 28; X64-NEXT: movq %rdi, %rax 29; X64-NEXT: retq 30 %a = load %i32vec3, %i32vec3* %ap, align 16 31 %b = load %i32vec3, %i32vec3* %bp, align 16 32 %x = add %i32vec3 %a, %b 33 store %i32vec3 %x, %i32vec3* %ret, align 16 34 ret void 35} 36 37define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) { 38; X86-LABEL: add3i32_2: 39; X86: # %bb.0: 40; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 41; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 42; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 43; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 44; X86-NEXT: pinsrd $1, 4(%edx), %xmm0 45; X86-NEXT: pinsrd $2, 8(%edx), %xmm0 46; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 47; X86-NEXT: pinsrd $1, 4(%ecx), %xmm1 48; X86-NEXT: pinsrd $2, 8(%ecx), %xmm1 49; X86-NEXT: paddd %xmm0, %xmm1 50; X86-NEXT: pextrd $2, %xmm1, 8(%eax) 51; X86-NEXT: pextrd $1, %xmm1, 4(%eax) 52; X86-NEXT: movd %xmm1, (%eax) 53; X86-NEXT: retl $4 54; 55; X64-LABEL: add3i32_2: 56; X64: # %bb.0: 57; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 58; X64-NEXT: pinsrd $2, 8(%rsi), %xmm0 59; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 60; X64-NEXT: pinsrd $2, 8(%rdx), %xmm1 61; X64-NEXT: paddd %xmm0, %xmm1 62; X64-NEXT: pextrd $2, %xmm1, 8(%rdi) 63; X64-NEXT: movq %xmm1, (%rdi) 64; X64-NEXT: movq %rdi, %rax 65; X64-NEXT: retq 66 %a = load %i32vec3, %i32vec3* %ap, align 8 67 %b = load %i32vec3, %i32vec3* %bp, align 8 68 %x = add %i32vec3 %a, %b 69 store %i32vec3 %x, %i32vec3* %ret, align 8 70 ret void 71} 72 73%i32vec7 = type <7 x i32> 74define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) { 75; X86-LABEL: add7i32: 76; X86: # %bb.0: 77; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 78; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 79; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 80; X86-NEXT: movdqa (%edx), %xmm0 81; X86-NEXT: movdqa 16(%edx), %xmm1 82; X86-NEXT: paddd (%ecx), %xmm0 83; X86-NEXT: paddd 16(%ecx), %xmm1 84; X86-NEXT: pextrd $2, %xmm1, 24(%eax) 85; X86-NEXT: pextrd $1, %xmm1, 20(%eax) 86; X86-NEXT: movd %xmm1, 16(%eax) 87; X86-NEXT: movdqa %xmm0, (%eax) 88; X86-NEXT: retl $4 89; 90; X64-LABEL: add7i32: 91; X64: # %bb.0: 92; X64-NEXT: movdqa (%rsi), %xmm0 93; X64-NEXT: movdqa 16(%rsi), %xmm1 94; X64-NEXT: paddd (%rdx), %xmm0 95; X64-NEXT: paddd 16(%rdx), %xmm1 96; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) 97; X64-NEXT: movq %xmm1, 16(%rdi) 98; X64-NEXT: movdqa %xmm0, (%rdi) 99; X64-NEXT: movq %rdi, %rax 100; X64-NEXT: retq 101 %a = load %i32vec7, %i32vec7* %ap, align 16 102 %b = load %i32vec7, %i32vec7* %bp, align 16 103 %x = add %i32vec7 %a, %b 104 store %i32vec7 %x, %i32vec7* %ret, align 16 105 ret void 106} 107 108%i32vec12 = type <12 x i32> 109define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) { 110; X86-LABEL: add12i32: 111; X86: # %bb.0: 112; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 113; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 114; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 115; X86-NEXT: movdqa 32(%edx), %xmm0 116; X86-NEXT: movdqa (%edx), %xmm1 117; X86-NEXT: movdqa 16(%edx), %xmm2 118; X86-NEXT: paddd (%ecx), %xmm1 119; X86-NEXT: paddd 16(%ecx), %xmm2 120; X86-NEXT: paddd 32(%ecx), %xmm0 121; X86-NEXT: movdqa %xmm0, 32(%eax) 122; X86-NEXT: movdqa %xmm2, 16(%eax) 123; X86-NEXT: movdqa %xmm1, (%eax) 124; X86-NEXT: retl $4 125; 126; X64-LABEL: add12i32: 127; X64: # %bb.0: 128; X64-NEXT: movdqa (%rsi), %xmm0 129; X64-NEXT: movdqa 16(%rsi), %xmm1 130; X64-NEXT: movdqa 32(%rsi), %xmm2 131; X64-NEXT: paddd (%rdx), %xmm0 132; X64-NEXT: paddd 16(%rdx), %xmm1 133; X64-NEXT: paddd 32(%rdx), %xmm2 134; X64-NEXT: movdqa %xmm2, 32(%rdi) 135; X64-NEXT: movdqa %xmm1, 16(%rdi) 136; X64-NEXT: movdqa %xmm0, (%rdi) 137; X64-NEXT: movq %rdi, %rax 138; X64-NEXT: retq 139 %a = load %i32vec12, %i32vec12* %ap, align 16 140 %b = load %i32vec12, %i32vec12* %bp, align 16 141 %x = add %i32vec12 %a, %b 142 store %i32vec12 %x, %i32vec12* %ret, align 16 143 ret void 144} 145 146 147%i16vec3 = type <3 x i16> 148define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind { 149; X86-LABEL: add3i16: 150; X86: # %bb.0: 151; X86-NEXT: pushl %ebp 152; X86-NEXT: movl %esp, %ebp 153; X86-NEXT: andl $-8, %esp 154; X86-NEXT: subl $24, %esp 155; X86-NEXT: movl 8(%ebp), %eax 156; X86-NEXT: movl 16(%ebp), %ecx 157; X86-NEXT: movl 12(%ebp), %edx 158; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 159; X86-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 160; X86-NEXT: pinsrd $2, 4(%edx), %xmm0 161; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 162; X86-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 163; X86-NEXT: pinsrd $2, 4(%ecx), %xmm1 164; X86-NEXT: paddd %xmm0, %xmm1 165; X86-NEXT: pextrw $4, %xmm1, 4(%eax) 166; X86-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 167; X86-NEXT: movd %xmm1, (%eax) 168; X86-NEXT: movl %ebp, %esp 169; X86-NEXT: popl %ebp 170; X86-NEXT: retl $4 171; 172; X64-LABEL: add3i16: 173; X64: # %bb.0: 174; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 175; X64-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 176; X64-NEXT: paddd %xmm0, %xmm1 177; X64-NEXT: pextrw $4, %xmm1, 4(%rdi) 178; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 179; X64-NEXT: movd %xmm1, (%rdi) 180; X64-NEXT: movq %rdi, %rax 181; X64-NEXT: retq 182 %a = load %i16vec3, %i16vec3* %ap, align 16 183 %b = load %i16vec3, %i16vec3* %bp, align 16 184 %x = add %i16vec3 %a, %b 185 store %i16vec3 %x, %i16vec3* %ret, align 16 186 ret void 187} 188 189%i16vec4 = type <4 x i16> 190define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind { 191; X86-LABEL: add4i16: 192; X86: # %bb.0: 193; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 194; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 195; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 196; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 197; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 198; X86-NEXT: paddw %xmm0, %xmm1 199; X86-NEXT: movq %xmm1, (%eax) 200; X86-NEXT: retl $4 201; 202; X64-LABEL: add4i16: 203; X64: # %bb.0: 204; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 205; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 206; X64-NEXT: paddw %xmm0, %xmm1 207; X64-NEXT: movq %xmm1, (%rdi) 208; X64-NEXT: movq %rdi, %rax 209; X64-NEXT: retq 210 %a = load %i16vec4, %i16vec4* %ap, align 16 211 %b = load %i16vec4, %i16vec4* %bp, align 16 212 %x = add %i16vec4 %a, %b 213 store %i16vec4 %x, %i16vec4* %ret, align 16 214 ret void 215} 216 217%i16vec12 = type <12 x i16> 218define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind { 219; X86-LABEL: add12i16: 220; X86: # %bb.0: 221; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 222; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 223; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 224; X86-NEXT: movdqa (%edx), %xmm0 225; X86-NEXT: movdqa 16(%edx), %xmm1 226; X86-NEXT: paddw (%ecx), %xmm0 227; X86-NEXT: paddw 16(%ecx), %xmm1 228; X86-NEXT: pextrd $1, %xmm1, 20(%eax) 229; X86-NEXT: movd %xmm1, 16(%eax) 230; X86-NEXT: movdqa %xmm0, (%eax) 231; X86-NEXT: retl $4 232; 233; X64-LABEL: add12i16: 234; X64: # %bb.0: 235; X64-NEXT: movdqa (%rsi), %xmm0 236; X64-NEXT: movdqa 16(%rsi), %xmm1 237; X64-NEXT: paddw (%rdx), %xmm0 238; X64-NEXT: paddw 16(%rdx), %xmm1 239; X64-NEXT: movq %xmm1, 16(%rdi) 240; X64-NEXT: movdqa %xmm0, (%rdi) 241; X64-NEXT: movq %rdi, %rax 242; X64-NEXT: retq 243 %a = load %i16vec12, %i16vec12* %ap, align 16 244 %b = load %i16vec12, %i16vec12* %bp, align 16 245 %x = add %i16vec12 %a, %b 246 store %i16vec12 %x, %i16vec12* %ret, align 16 247 ret void 248} 249 250%i16vec18 = type <18 x i16> 251define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind { 252; X86-LABEL: add18i16: 253; X86: # %bb.0: 254; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 255; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 256; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 257; X86-NEXT: movdqa 32(%edx), %xmm0 258; X86-NEXT: movdqa (%edx), %xmm1 259; X86-NEXT: movdqa 16(%edx), %xmm2 260; X86-NEXT: paddw (%ecx), %xmm1 261; X86-NEXT: paddw 16(%ecx), %xmm2 262; X86-NEXT: paddw 32(%ecx), %xmm0 263; X86-NEXT: movd %xmm0, 32(%eax) 264; X86-NEXT: movdqa %xmm2, 16(%eax) 265; X86-NEXT: movdqa %xmm1, (%eax) 266; X86-NEXT: retl $4 267; 268; X64-LABEL: add18i16: 269; X64: # %bb.0: 270; X64-NEXT: movdqa (%rsi), %xmm0 271; X64-NEXT: movdqa 16(%rsi), %xmm1 272; X64-NEXT: movdqa 32(%rsi), %xmm2 273; X64-NEXT: paddw (%rdx), %xmm0 274; X64-NEXT: paddw 16(%rdx), %xmm1 275; X64-NEXT: paddw 32(%rdx), %xmm2 276; X64-NEXT: movd %xmm2, 32(%rdi) 277; X64-NEXT: movdqa %xmm1, 16(%rdi) 278; X64-NEXT: movdqa %xmm0, (%rdi) 279; X64-NEXT: movq %rdi, %rax 280; X64-NEXT: retq 281 %a = load %i16vec18, %i16vec18* %ap, align 16 282 %b = load %i16vec18, %i16vec18* %bp, align 16 283 %x = add %i16vec18 %a, %b 284 store %i16vec18 %x, %i16vec18* %ret, align 16 285 ret void 286} 287 288 289%i8vec3 = type <3 x i8> 290define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind { 291; X86-LABEL: add3i8: 292; X86: # %bb.0: 293; X86-NEXT: subl $12, %esp 294; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 295; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 296; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 297; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 298; X86-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 299; X86-NEXT: paddd %xmm0, %xmm1 300; X86-NEXT: pextrb $8, %xmm1, 2(%eax) 301; X86-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 302; X86-NEXT: pextrw $0, %xmm1, (%eax) 303; X86-NEXT: addl $12, %esp 304; X86-NEXT: retl $4 305; 306; X64-LABEL: add3i8: 307; X64: # %bb.0: 308; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 309; X64-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 310; X64-NEXT: paddd %xmm0, %xmm1 311; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) 312; X64-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 313; X64-NEXT: pextrw $0, %xmm1, (%rdi) 314; X64-NEXT: movq %rdi, %rax 315; X64-NEXT: retq 316 %a = load %i8vec3, %i8vec3* %ap, align 16 317 %b = load %i8vec3, %i8vec3* %bp, align 16 318 %x = add %i8vec3 %a, %b 319 store %i8vec3 %x, %i8vec3* %ret, align 16 320 ret void 321} 322 323%i8vec31 = type <31 x i8> 324define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind { 325; X86-LABEL: add31i8: 326; X86: # %bb.0: 327; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 328; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 329; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 330; X86-NEXT: movdqa (%edx), %xmm0 331; X86-NEXT: movdqa 16(%edx), %xmm1 332; X86-NEXT: paddb (%ecx), %xmm0 333; X86-NEXT: paddb 16(%ecx), %xmm1 334; X86-NEXT: pextrb $14, %xmm1, 30(%eax) 335; X86-NEXT: pextrw $6, %xmm1, 28(%eax) 336; X86-NEXT: pextrd $2, %xmm1, 24(%eax) 337; X86-NEXT: pextrd $1, %xmm1, 20(%eax) 338; X86-NEXT: movd %xmm1, 16(%eax) 339; X86-NEXT: movdqa %xmm0, (%eax) 340; X86-NEXT: retl $4 341; 342; X64-LABEL: add31i8: 343; X64: # %bb.0: 344; X64-NEXT: movdqa (%rsi), %xmm0 345; X64-NEXT: movdqa 16(%rsi), %xmm1 346; X64-NEXT: paddb (%rdx), %xmm0 347; X64-NEXT: paddb 16(%rdx), %xmm1 348; X64-NEXT: pextrb $14, %xmm1, 30(%rdi) 349; X64-NEXT: pextrw $6, %xmm1, 28(%rdi) 350; X64-NEXT: pextrd $2, %xmm1, 24(%rdi) 351; X64-NEXT: movq %xmm1, 16(%rdi) 352; X64-NEXT: movdqa %xmm0, (%rdi) 353; X64-NEXT: movq %rdi, %rax 354; X64-NEXT: retq 355 %a = load %i8vec31, %i8vec31* %ap, align 16 356 %b = load %i8vec31, %i8vec31* %bp, align 16 357 %x = add %i8vec31 %a, %b 358 store %i8vec31 %x, %i8vec31* %ret, align 16 359 ret void 360} 361 362 363%i8vec3pack = type { <3 x i8>, i8 } 364define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pack* %rot) nounwind { 365; X86-LABEL: rot: 366; X86: # %bb.0: # %entry 367; X86-NEXT: subl $16, %esp 368; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 369; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 370; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 371; X86-NEXT: movb $-98, 2(%edx) 372; X86-NEXT: movw $-24930, (%edx) # imm = 0x9E9E 373; X86-NEXT: movb $1, 2(%ecx) 374; X86-NEXT: movw $257, (%ecx) # imm = 0x101 375; X86-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 376; X86-NEXT: movdqa %xmm0, %xmm1 377; X86-NEXT: psrld $1, %xmm1 378; X86-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 379; X86-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 380; X86-NEXT: pextrb $8, %xmm1, 2(%eax) 381; X86-NEXT: pextrw $0, %xmm0, (%eax) 382; X86-NEXT: addl $16, %esp 383; X86-NEXT: retl $4 384; 385; X64-LABEL: rot: 386; X64: # %bb.0: # %entry 387; X64-NEXT: movb $-98, 2(%rsi) 388; X64-NEXT: movw $-24930, (%rsi) # imm = 0x9E9E 389; X64-NEXT: movb $1, 2(%rdx) 390; X64-NEXT: movw $257, (%rdx) # imm = 0x101 391; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 392; X64-NEXT: movdqa %xmm0, %xmm1 393; X64-NEXT: psrld $1, %xmm1 394; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7] 395; X64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] 396; X64-NEXT: pextrb $8, %xmm1, 2(%rdi) 397; X64-NEXT: pextrw $0, %xmm0, (%rdi) 398; X64-NEXT: movq %rdi, %rax 399; X64-NEXT: retq 400entry: 401 %storetmp = bitcast %i8vec3pack* %X to <3 x i8>* 402 store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp 403 %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>* 404 store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1 405 %tmp = load %i8vec3pack, %i8vec3pack* %X 406 %extractVec = extractvalue %i8vec3pack %tmp, 0 407 %tmp2 = load %i8vec3pack, %i8vec3pack* %rot 408 %extractVec3 = extractvalue %i8vec3pack %tmp2, 0 409 %shr = lshr <3 x i8> %extractVec, %extractVec3 410 %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>* 411 store <3 x i8> %shr, <3 x i8>* %storetmp4 412 ret void 413} 414 415