1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BASELINE 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE1 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP 6 7; https://bugs.llvm.org/show_bug.cgi?id=37104 8 9; All the advanced stuff (negative tests, commutativity) is handled in the 10; scalar version of the test only. 11 12; ============================================================================ ; 13; 8-bit vector width 14; ============================================================================ ; 15 16define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 17; CHECK-LABEL: out_v1i8: 18; CHECK: # %bb.0: 19; CHECK-NEXT: movl %edx, %eax 20; CHECK-NEXT: andl %edx, %edi 21; CHECK-NEXT: notb %al 22; CHECK-NEXT: andb %sil, %al 23; CHECK-NEXT: orb %dil, %al 24; CHECK-NEXT: # kill: def $al killed $al killed $eax 25; CHECK-NEXT: retq 26 %mx = and <1 x i8> %x, %mask 27 %notmask = xor <1 x i8> %mask, <i8 -1> 28 %my = and <1 x i8> %y, %notmask 29 %r = or <1 x i8> %mx, %my 30 ret <1 x i8> %r 31} 32 33; ============================================================================ ; 34; 16-bit vector width 35; ============================================================================ ; 36 37define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { 38; CHECK-BASELINE-LABEL: out_v2i8: 39; CHECK-BASELINE: # %bb.0: 40; CHECK-BASELINE-NEXT: movl %r8d, %eax 41; CHECK-BASELINE-NEXT: andl %r9d, %esi 42; CHECK-BASELINE-NEXT: andl %r8d, %edi 43; CHECK-BASELINE-NEXT: notb %al 44; CHECK-BASELINE-NEXT: notb %r9b 45; CHECK-BASELINE-NEXT: andb %cl, %r9b 46; CHECK-BASELINE-NEXT: andb %dl, %al 47; CHECK-BASELINE-NEXT: orb %dil, %al 48; CHECK-BASELINE-NEXT: orb %sil, %r9b 49; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax 50; CHECK-BASELINE-NEXT: movl %r9d, %edx 51; CHECK-BASELINE-NEXT: retq 52; 53; CHECK-SSE1-LABEL: out_v2i8: 54; CHECK-SSE1: # %bb.0: 55; CHECK-SSE1-NEXT: movl %r8d, %eax 56; CHECK-SSE1-NEXT: andl %r9d, %esi 57; CHECK-SSE1-NEXT: andl %r8d, %edi 58; CHECK-SSE1-NEXT: notb %al 59; CHECK-SSE1-NEXT: notb %r9b 60; CHECK-SSE1-NEXT: andb %cl, %r9b 61; CHECK-SSE1-NEXT: andb %dl, %al 62; CHECK-SSE1-NEXT: orb %dil, %al 63; CHECK-SSE1-NEXT: orb %sil, %r9b 64; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax 65; CHECK-SSE1-NEXT: movl %r9d, %edx 66; CHECK-SSE1-NEXT: retq 67; 68; CHECK-SSE2-LABEL: out_v2i8: 69; CHECK-SSE2: # %bb.0: 70; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 71; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 72; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 73; CHECK-SSE2-NEXT: retq 74; 75; CHECK-XOP-LABEL: out_v2i8: 76; CHECK-XOP: # %bb.0: 77; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 78; CHECK-XOP-NEXT: retq 79 %mx = and <2 x i8> %x, %mask 80 %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1> 81 %my = and <2 x i8> %y, %notmask 82 %r = or <2 x i8> %mx, %my 83 ret <2 x i8> %r 84} 85 86define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 87; CHECK-LABEL: out_v1i16: 88; CHECK: # %bb.0: 89; CHECK-NEXT: movl %edx, %eax 90; CHECK-NEXT: andl %edx, %edi 91; CHECK-NEXT: notl %eax 92; CHECK-NEXT: andl %esi, %eax 93; CHECK-NEXT: orl %edi, %eax 94; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 95; CHECK-NEXT: retq 96 %mx = and <1 x i16> %x, %mask 97 %notmask = xor <1 x i16> %mask, <i16 -1> 98 %my = and <1 x i16> %y, %notmask 99 %r = or <1 x i16> %mx, %my 100 ret <1 x i16> %r 101} 102 103; ============================================================================ ; 104; 32-bit vector width 105; ============================================================================ ; 106 107define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 108; CHECK-BASELINE-LABEL: out_v4i8: 109; CHECK-BASELINE: # %bb.0: 110; CHECK-BASELINE-NEXT: pushq %rbx 111; CHECK-BASELINE-NEXT: movq %rdi, %rax 112; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil 113; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 114; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 115; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 116; CHECK-BASELINE-NEXT: andb %bl, %r8b 117; CHECK-BASELINE-NEXT: andb %r11b, %cl 118; CHECK-BASELINE-NEXT: andb %r10b, %dl 119; CHECK-BASELINE-NEXT: andb %dil, %sil 120; CHECK-BASELINE-NEXT: notb %r10b 121; CHECK-BASELINE-NEXT: notb %r11b 122; CHECK-BASELINE-NEXT: notb %bl 123; CHECK-BASELINE-NEXT: notb %dil 124; CHECK-BASELINE-NEXT: andb %r9b, %dil 125; CHECK-BASELINE-NEXT: orb %sil, %dil 126; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 127; CHECK-BASELINE-NEXT: orb %r8b, %bl 128; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 129; CHECK-BASELINE-NEXT: orb %cl, %r11b 130; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 131; CHECK-BASELINE-NEXT: orb %dl, %r10b 132; CHECK-BASELINE-NEXT: movb %bl, 3(%rax) 133; CHECK-BASELINE-NEXT: movb %r11b, 2(%rax) 134; CHECK-BASELINE-NEXT: movb %r10b, 1(%rax) 135; CHECK-BASELINE-NEXT: movb %dil, (%rax) 136; CHECK-BASELINE-NEXT: popq %rbx 137; CHECK-BASELINE-NEXT: retq 138; 139; CHECK-SSE1-LABEL: out_v4i8: 140; CHECK-SSE1: # %bb.0: 141; CHECK-SSE1-NEXT: pushq %rbx 142; CHECK-SSE1-NEXT: movq %rdi, %rax 143; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil 144; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 145; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 146; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 147; CHECK-SSE1-NEXT: andb %bl, %r8b 148; CHECK-SSE1-NEXT: andb %r11b, %cl 149; CHECK-SSE1-NEXT: andb %r10b, %dl 150; CHECK-SSE1-NEXT: andb %dil, %sil 151; CHECK-SSE1-NEXT: notb %r10b 152; CHECK-SSE1-NEXT: notb %r11b 153; CHECK-SSE1-NEXT: notb %bl 154; CHECK-SSE1-NEXT: notb %dil 155; CHECK-SSE1-NEXT: andb %r9b, %dil 156; CHECK-SSE1-NEXT: orb %sil, %dil 157; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 158; CHECK-SSE1-NEXT: orb %r8b, %bl 159; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 160; CHECK-SSE1-NEXT: orb %cl, %r11b 161; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 162; CHECK-SSE1-NEXT: orb %dl, %r10b 163; CHECK-SSE1-NEXT: movb %bl, 3(%rax) 164; CHECK-SSE1-NEXT: movb %r11b, 2(%rax) 165; CHECK-SSE1-NEXT: movb %r10b, 1(%rax) 166; CHECK-SSE1-NEXT: movb %dil, (%rax) 167; CHECK-SSE1-NEXT: popq %rbx 168; CHECK-SSE1-NEXT: retq 169; 170; CHECK-SSE2-LABEL: out_v4i8: 171; CHECK-SSE2: # %bb.0: 172; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 173; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 174; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 175; CHECK-SSE2-NEXT: retq 176; 177; CHECK-XOP-LABEL: out_v4i8: 178; CHECK-XOP: # %bb.0: 179; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 180; CHECK-XOP-NEXT: retq 181 %mx = and <4 x i8> %x, %mask 182 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1> 183 %my = and <4 x i8> %y, %notmask 184 %r = or <4 x i8> %mx, %my 185 ret <4 x i8> %r 186} 187 188define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 189; CHECK-BASELINE-LABEL: out_v4i8_undef: 190; CHECK-BASELINE: # %bb.0: 191; CHECK-BASELINE-NEXT: movq %rdi, %rax 192; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil 193; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 194; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 195; CHECK-BASELINE-NEXT: andb %r11b, %r8b 196; CHECK-BASELINE-NEXT: andb %r10b, %dl 197; CHECK-BASELINE-NEXT: andb %dil, %sil 198; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 199; CHECK-BASELINE-NEXT: notb %r10b 200; CHECK-BASELINE-NEXT: notb %r11b 201; CHECK-BASELINE-NEXT: notb %dil 202; CHECK-BASELINE-NEXT: andb %r9b, %dil 203; CHECK-BASELINE-NEXT: orb %sil, %dil 204; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 205; CHECK-BASELINE-NEXT: orb %r8b, %r11b 206; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 207; CHECK-BASELINE-NEXT: orb %dl, %r10b 208; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) 209; CHECK-BASELINE-NEXT: movb %r11b, 3(%rax) 210; CHECK-BASELINE-NEXT: movb %r10b, 1(%rax) 211; CHECK-BASELINE-NEXT: movb %dil, (%rax) 212; CHECK-BASELINE-NEXT: retq 213; 214; CHECK-SSE1-LABEL: out_v4i8_undef: 215; CHECK-SSE1: # %bb.0: 216; CHECK-SSE1-NEXT: movq %rdi, %rax 217; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil 218; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 219; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 220; CHECK-SSE1-NEXT: andb %r11b, %r8b 221; CHECK-SSE1-NEXT: andb %r10b, %dl 222; CHECK-SSE1-NEXT: andb %dil, %sil 223; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 224; CHECK-SSE1-NEXT: notb %r10b 225; CHECK-SSE1-NEXT: notb %r11b 226; CHECK-SSE1-NEXT: notb %dil 227; CHECK-SSE1-NEXT: andb %r9b, %dil 228; CHECK-SSE1-NEXT: orb %sil, %dil 229; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 230; CHECK-SSE1-NEXT: orb %r8b, %r11b 231; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 232; CHECK-SSE1-NEXT: orb %dl, %r10b 233; CHECK-SSE1-NEXT: movb %cl, 2(%rax) 234; CHECK-SSE1-NEXT: movb %r11b, 3(%rax) 235; CHECK-SSE1-NEXT: movb %r10b, 1(%rax) 236; CHECK-SSE1-NEXT: movb %dil, (%rax) 237; CHECK-SSE1-NEXT: retq 238; 239; CHECK-SSE2-LABEL: out_v4i8_undef: 240; CHECK-SSE2: # %bb.0: 241; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 242; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 243; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 244; CHECK-SSE2-NEXT: retq 245; 246; CHECK-XOP-LABEL: out_v4i8_undef: 247; CHECK-XOP: # %bb.0: 248; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 249; CHECK-XOP-NEXT: retq 250 %mx = and <4 x i8> %x, %mask 251 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1> 252 %my = and <4 x i8> %y, %notmask 253 %r = or <4 x i8> %mx, %my 254 ret <4 x i8> %r 255} 256 257define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 258; CHECK-BASELINE-LABEL: out_v2i16: 259; CHECK-BASELINE: # %bb.0: 260; CHECK-BASELINE-NEXT: movl %r8d, %eax 261; CHECK-BASELINE-NEXT: andl %r9d, %esi 262; CHECK-BASELINE-NEXT: andl %r8d, %edi 263; CHECK-BASELINE-NEXT: notl %eax 264; CHECK-BASELINE-NEXT: notl %r9d 265; CHECK-BASELINE-NEXT: andl %ecx, %r9d 266; CHECK-BASELINE-NEXT: orl %esi, %r9d 267; CHECK-BASELINE-NEXT: andl %edx, %eax 268; CHECK-BASELINE-NEXT: orl %edi, %eax 269; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax 270; CHECK-BASELINE-NEXT: movl %r9d, %edx 271; CHECK-BASELINE-NEXT: retq 272; 273; CHECK-SSE1-LABEL: out_v2i16: 274; CHECK-SSE1: # %bb.0: 275; CHECK-SSE1-NEXT: movl %r8d, %eax 276; CHECK-SSE1-NEXT: andl %r9d, %esi 277; CHECK-SSE1-NEXT: andl %r8d, %edi 278; CHECK-SSE1-NEXT: notl %eax 279; CHECK-SSE1-NEXT: notl %r9d 280; CHECK-SSE1-NEXT: andl %ecx, %r9d 281; CHECK-SSE1-NEXT: orl %esi, %r9d 282; CHECK-SSE1-NEXT: andl %edx, %eax 283; CHECK-SSE1-NEXT: orl %edi, %eax 284; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax 285; CHECK-SSE1-NEXT: movl %r9d, %edx 286; CHECK-SSE1-NEXT: retq 287; 288; CHECK-SSE2-LABEL: out_v2i16: 289; CHECK-SSE2: # %bb.0: 290; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 291; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 292; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 293; CHECK-SSE2-NEXT: retq 294; 295; CHECK-XOP-LABEL: out_v2i16: 296; CHECK-XOP: # %bb.0: 297; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 298; CHECK-XOP-NEXT: retq 299 %mx = and <2 x i16> %x, %mask 300 %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1> 301 %my = and <2 x i16> %y, %notmask 302 %r = or <2 x i16> %mx, %my 303 ret <2 x i16> %r 304} 305 306define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 307; CHECK-LABEL: out_v1i32: 308; CHECK: # %bb.0: 309; CHECK-NEXT: movl %edx, %eax 310; CHECK-NEXT: andl %edx, %edi 311; CHECK-NEXT: notl %eax 312; CHECK-NEXT: andl %esi, %eax 313; CHECK-NEXT: orl %edi, %eax 314; CHECK-NEXT: retq 315 %mx = and <1 x i32> %x, %mask 316 %notmask = xor <1 x i32> %mask, <i32 -1> 317 %my = and <1 x i32> %y, %notmask 318 %r = or <1 x i32> %mx, %my 319 ret <1 x i32> %r 320} 321 322; ============================================================================ ; 323; 64-bit vector width 324; ============================================================================ ; 325 326define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 327; CHECK-BASELINE-LABEL: out_v8i8: 328; CHECK-BASELINE: # %bb.0: 329; CHECK-BASELINE-NEXT: pushq %rbp 330; CHECK-BASELINE-NEXT: pushq %r15 331; CHECK-BASELINE-NEXT: pushq %r14 332; CHECK-BASELINE-NEXT: pushq %r13 333; CHECK-BASELINE-NEXT: pushq %r12 334; CHECK-BASELINE-NEXT: pushq %rbx 335; CHECK-BASELINE-NEXT: movq %rdi, %rax 336; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 337; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 338; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 339; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil 340; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 341; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 342; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 343; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 344; CHECK-BASELINE-NEXT: andb %bl, %r9b 345; CHECK-BASELINE-NEXT: andb %r15b, %r8b 346; CHECK-BASELINE-NEXT: andb %bpl, %cl 347; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 348; CHECK-BASELINE-NEXT: andb %r11b, %dl 349; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 350; CHECK-BASELINE-NEXT: andb %dil, %sil 351; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 352; CHECK-BASELINE-NEXT: andb %r12b, %r13b 353; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 354; CHECK-BASELINE-NEXT: andb %r14b, %cl 355; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl 356; CHECK-BASELINE-NEXT: andb %r10b, %dl 357; CHECK-BASELINE-NEXT: notb %dil 358; CHECK-BASELINE-NEXT: notb %r11b 359; CHECK-BASELINE-NEXT: notb %bpl 360; CHECK-BASELINE-NEXT: notb %r15b 361; CHECK-BASELINE-NEXT: notb %bl 362; CHECK-BASELINE-NEXT: notb %r10b 363; CHECK-BASELINE-NEXT: notb %r14b 364; CHECK-BASELINE-NEXT: notb %r12b 365; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b 366; CHECK-BASELINE-NEXT: orb %r13b, %r12b 367; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b 368; CHECK-BASELINE-NEXT: orb %cl, %r14b 369; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 370; CHECK-BASELINE-NEXT: orb %dl, %r10b 371; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 372; CHECK-BASELINE-NEXT: orb %r9b, %bl 373; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 374; CHECK-BASELINE-NEXT: orb %r8b, %r15b 375; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl 376; CHECK-BASELINE-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %bpl # 1-byte Folded Reload 377; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 378; CHECK-BASELINE-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload 379; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil 380; CHECK-BASELINE-NEXT: orb %sil, %dil 381; CHECK-BASELINE-NEXT: movb %r12b, 7(%rax) 382; CHECK-BASELINE-NEXT: movb %r14b, 6(%rax) 383; CHECK-BASELINE-NEXT: movb %r10b, 5(%rax) 384; CHECK-BASELINE-NEXT: movb %bl, 4(%rax) 385; CHECK-BASELINE-NEXT: movb %r15b, 3(%rax) 386; CHECK-BASELINE-NEXT: movb %bpl, 2(%rax) 387; CHECK-BASELINE-NEXT: movb %r11b, 1(%rax) 388; CHECK-BASELINE-NEXT: movb %dil, (%rax) 389; CHECK-BASELINE-NEXT: popq %rbx 390; CHECK-BASELINE-NEXT: popq %r12 391; CHECK-BASELINE-NEXT: popq %r13 392; CHECK-BASELINE-NEXT: popq %r14 393; CHECK-BASELINE-NEXT: popq %r15 394; CHECK-BASELINE-NEXT: popq %rbp 395; CHECK-BASELINE-NEXT: retq 396; 397; CHECK-SSE1-LABEL: out_v8i8: 398; CHECK-SSE1: # %bb.0: 399; CHECK-SSE1-NEXT: pushq %rbp 400; CHECK-SSE1-NEXT: pushq %r15 401; CHECK-SSE1-NEXT: pushq %r14 402; CHECK-SSE1-NEXT: pushq %r13 403; CHECK-SSE1-NEXT: pushq %r12 404; CHECK-SSE1-NEXT: pushq %rbx 405; CHECK-SSE1-NEXT: movq %rdi, %rax 406; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 407; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 408; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 409; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil 410; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 411; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 412; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 413; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 414; CHECK-SSE1-NEXT: andb %bl, %r9b 415; CHECK-SSE1-NEXT: andb %r15b, %r8b 416; CHECK-SSE1-NEXT: andb %bpl, %cl 417; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 418; CHECK-SSE1-NEXT: andb %r11b, %dl 419; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 420; CHECK-SSE1-NEXT: andb %dil, %sil 421; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 422; CHECK-SSE1-NEXT: andb %r12b, %r13b 423; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 424; CHECK-SSE1-NEXT: andb %r14b, %cl 425; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl 426; CHECK-SSE1-NEXT: andb %r10b, %dl 427; CHECK-SSE1-NEXT: notb %dil 428; CHECK-SSE1-NEXT: notb %r11b 429; CHECK-SSE1-NEXT: notb %bpl 430; CHECK-SSE1-NEXT: notb %r15b 431; CHECK-SSE1-NEXT: notb %bl 432; CHECK-SSE1-NEXT: notb %r10b 433; CHECK-SSE1-NEXT: notb %r14b 434; CHECK-SSE1-NEXT: notb %r12b 435; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b 436; CHECK-SSE1-NEXT: orb %r13b, %r12b 437; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b 438; CHECK-SSE1-NEXT: orb %cl, %r14b 439; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 440; CHECK-SSE1-NEXT: orb %dl, %r10b 441; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 442; CHECK-SSE1-NEXT: orb %r9b, %bl 443; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 444; CHECK-SSE1-NEXT: orb %r8b, %r15b 445; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl 446; CHECK-SSE1-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %bpl # 1-byte Folded Reload 447; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 448; CHECK-SSE1-NEXT: orb {{[-0-9]+}}(%r{{[sb]}}p), %r11b # 1-byte Folded Reload 449; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil 450; CHECK-SSE1-NEXT: orb %sil, %dil 451; CHECK-SSE1-NEXT: movb %r12b, 7(%rax) 452; CHECK-SSE1-NEXT: movb %r14b, 6(%rax) 453; CHECK-SSE1-NEXT: movb %r10b, 5(%rax) 454; CHECK-SSE1-NEXT: movb %bl, 4(%rax) 455; CHECK-SSE1-NEXT: movb %r15b, 3(%rax) 456; CHECK-SSE1-NEXT: movb %bpl, 2(%rax) 457; CHECK-SSE1-NEXT: movb %r11b, 1(%rax) 458; CHECK-SSE1-NEXT: movb %dil, (%rax) 459; CHECK-SSE1-NEXT: popq %rbx 460; CHECK-SSE1-NEXT: popq %r12 461; CHECK-SSE1-NEXT: popq %r13 462; CHECK-SSE1-NEXT: popq %r14 463; CHECK-SSE1-NEXT: popq %r15 464; CHECK-SSE1-NEXT: popq %rbp 465; CHECK-SSE1-NEXT: retq 466; 467; CHECK-SSE2-LABEL: out_v8i8: 468; CHECK-SSE2: # %bb.0: 469; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 470; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 471; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 472; CHECK-SSE2-NEXT: retq 473; 474; CHECK-XOP-LABEL: out_v8i8: 475; CHECK-XOP: # %bb.0: 476; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 477; CHECK-XOP-NEXT: retq 478 %mx = and <8 x i8> %x, %mask 479 %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 480 %my = and <8 x i8> %y, %notmask 481 %r = or <8 x i8> %mx, %my 482 ret <8 x i8> %r 483} 484 485define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 486; CHECK-BASELINE-LABEL: out_v4i16: 487; CHECK-BASELINE: # %bb.0: 488; CHECK-BASELINE-NEXT: pushq %rbx 489; CHECK-BASELINE-NEXT: movq %rdi, %rax 490; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 491; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 492; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi 493; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 494; CHECK-BASELINE-NEXT: andl %ebx, %esi 495; CHECK-BASELINE-NEXT: andl %edi, %r8d 496; CHECK-BASELINE-NEXT: andl %r11d, %ecx 497; CHECK-BASELINE-NEXT: andl %r10d, %edx 498; CHECK-BASELINE-NEXT: notl %r10d 499; CHECK-BASELINE-NEXT: notl %r11d 500; CHECK-BASELINE-NEXT: notl %edi 501; CHECK-BASELINE-NEXT: notl %ebx 502; CHECK-BASELINE-NEXT: andl %r9d, %ebx 503; CHECK-BASELINE-NEXT: orl %esi, %ebx 504; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %di 505; CHECK-BASELINE-NEXT: orl %r8d, %edi 506; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w 507; CHECK-BASELINE-NEXT: orl %ecx, %r11d 508; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w 509; CHECK-BASELINE-NEXT: orl %edx, %r10d 510; CHECK-BASELINE-NEXT: movw %bx, (%rax) 511; CHECK-BASELINE-NEXT: movw %di, 6(%rax) 512; CHECK-BASELINE-NEXT: movw %r11w, 4(%rax) 513; CHECK-BASELINE-NEXT: movw %r10w, 2(%rax) 514; CHECK-BASELINE-NEXT: popq %rbx 515; CHECK-BASELINE-NEXT: retq 516; 517; CHECK-SSE1-LABEL: out_v4i16: 518; CHECK-SSE1: # %bb.0: 519; CHECK-SSE1-NEXT: pushq %rbx 520; CHECK-SSE1-NEXT: movq %rdi, %rax 521; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 522; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 523; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi 524; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 525; CHECK-SSE1-NEXT: andl %ebx, %esi 526; CHECK-SSE1-NEXT: andl %edi, %r8d 527; CHECK-SSE1-NEXT: andl %r11d, %ecx 528; CHECK-SSE1-NEXT: andl %r10d, %edx 529; CHECK-SSE1-NEXT: notl %r10d 530; CHECK-SSE1-NEXT: notl %r11d 531; CHECK-SSE1-NEXT: notl %edi 532; CHECK-SSE1-NEXT: notl %ebx 533; CHECK-SSE1-NEXT: andl %r9d, %ebx 534; CHECK-SSE1-NEXT: orl %esi, %ebx 535; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %di 536; CHECK-SSE1-NEXT: orl %r8d, %edi 537; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w 538; CHECK-SSE1-NEXT: orl %ecx, %r11d 539; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w 540; CHECK-SSE1-NEXT: orl %edx, %r10d 541; CHECK-SSE1-NEXT: movw %bx, (%rax) 542; CHECK-SSE1-NEXT: movw %di, 6(%rax) 543; CHECK-SSE1-NEXT: movw %r11w, 4(%rax) 544; CHECK-SSE1-NEXT: movw %r10w, 2(%rax) 545; CHECK-SSE1-NEXT: popq %rbx 546; CHECK-SSE1-NEXT: retq 547; 548; CHECK-SSE2-LABEL: out_v4i16: 549; CHECK-SSE2: # %bb.0: 550; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 551; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 552; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 553; CHECK-SSE2-NEXT: retq 554; 555; CHECK-XOP-LABEL: out_v4i16: 556; CHECK-XOP: # %bb.0: 557; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 558; CHECK-XOP-NEXT: retq 559 %mx = and <4 x i16> %x, %mask 560 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1> 561 %my = and <4 x i16> %y, %notmask 562 %r = or <4 x i16> %mx, %my 563 ret <4 x i16> %r 564} 565 566define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 567; CHECK-BASELINE-LABEL: out_v4i16_undef: 568; CHECK-BASELINE: # %bb.0: 569; CHECK-BASELINE-NEXT: movq %rdi, %rax 570; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 571; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 572; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi 573; CHECK-BASELINE-NEXT: andl %edi, %esi 574; CHECK-BASELINE-NEXT: andl %r11d, %r8d 575; CHECK-BASELINE-NEXT: andl %r10d, %edx 576; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 577; CHECK-BASELINE-NEXT: notl %r10d 578; CHECK-BASELINE-NEXT: notl %r11d 579; CHECK-BASELINE-NEXT: notl %edi 580; CHECK-BASELINE-NEXT: andl %r9d, %edi 581; CHECK-BASELINE-NEXT: orl %esi, %edi 582; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w 583; CHECK-BASELINE-NEXT: orl %r8d, %r11d 584; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w 585; CHECK-BASELINE-NEXT: orl %edx, %r10d 586; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) 587; CHECK-BASELINE-NEXT: movw %di, (%rax) 588; CHECK-BASELINE-NEXT: movw %r11w, 6(%rax) 589; CHECK-BASELINE-NEXT: movw %r10w, 2(%rax) 590; CHECK-BASELINE-NEXT: retq 591; 592; CHECK-SSE1-LABEL: out_v4i16_undef: 593; CHECK-SSE1: # %bb.0: 594; CHECK-SSE1-NEXT: movq %rdi, %rax 595; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 596; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 597; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi 598; CHECK-SSE1-NEXT: andl %edi, %esi 599; CHECK-SSE1-NEXT: andl %r11d, %r8d 600; CHECK-SSE1-NEXT: andl %r10d, %edx 601; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 602; CHECK-SSE1-NEXT: notl %r10d 603; CHECK-SSE1-NEXT: notl %r11d 604; CHECK-SSE1-NEXT: notl %edi 605; CHECK-SSE1-NEXT: andl %r9d, %edi 606; CHECK-SSE1-NEXT: orl %esi, %edi 607; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w 608; CHECK-SSE1-NEXT: orl %r8d, %r11d 609; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w 610; CHECK-SSE1-NEXT: orl %edx, %r10d 611; CHECK-SSE1-NEXT: movw %cx, 4(%rax) 612; CHECK-SSE1-NEXT: movw %di, (%rax) 613; CHECK-SSE1-NEXT: movw %r11w, 6(%rax) 614; CHECK-SSE1-NEXT: movw %r10w, 2(%rax) 615; CHECK-SSE1-NEXT: retq 616; 617; CHECK-SSE2-LABEL: out_v4i16_undef: 618; CHECK-SSE2: # %bb.0: 619; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 620; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 621; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 622; CHECK-SSE2-NEXT: retq 623; 624; CHECK-XOP-LABEL: out_v4i16_undef: 625; CHECK-XOP: # %bb.0: 626; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 627; CHECK-XOP-NEXT: retq 628 %mx = and <4 x i16> %x, %mask 629 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1> 630 %my = and <4 x i16> %y, %notmask 631 %r = or <4 x i16> %mx, %my 632 ret <4 x i16> %r 633} 634 635define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 636; CHECK-BASELINE-LABEL: out_v2i32: 637; CHECK-BASELINE: # %bb.0: 638; CHECK-BASELINE-NEXT: movl %r8d, %eax 639; CHECK-BASELINE-NEXT: andl %r9d, %esi 640; CHECK-BASELINE-NEXT: andl %r8d, %edi 641; CHECK-BASELINE-NEXT: notl %eax 642; CHECK-BASELINE-NEXT: notl %r9d 643; CHECK-BASELINE-NEXT: andl %ecx, %r9d 644; CHECK-BASELINE-NEXT: orl %esi, %r9d 645; CHECK-BASELINE-NEXT: andl %edx, %eax 646; CHECK-BASELINE-NEXT: orl %edi, %eax 647; CHECK-BASELINE-NEXT: movl %r9d, %edx 648; CHECK-BASELINE-NEXT: retq 649; 650; CHECK-SSE1-LABEL: out_v2i32: 651; CHECK-SSE1: # %bb.0: 652; CHECK-SSE1-NEXT: movl %r8d, %eax 653; CHECK-SSE1-NEXT: andl %r9d, %esi 654; CHECK-SSE1-NEXT: andl %r8d, %edi 655; CHECK-SSE1-NEXT: notl %eax 656; CHECK-SSE1-NEXT: notl %r9d 657; CHECK-SSE1-NEXT: andl %ecx, %r9d 658; CHECK-SSE1-NEXT: orl %esi, %r9d 659; CHECK-SSE1-NEXT: andl %edx, %eax 660; CHECK-SSE1-NEXT: orl %edi, %eax 661; CHECK-SSE1-NEXT: movl %r9d, %edx 662; CHECK-SSE1-NEXT: retq 663; 664; CHECK-SSE2-LABEL: out_v2i32: 665; CHECK-SSE2: # %bb.0: 666; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 667; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 668; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 669; CHECK-SSE2-NEXT: retq 670; 671; CHECK-XOP-LABEL: out_v2i32: 672; CHECK-XOP: # %bb.0: 673; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 674; CHECK-XOP-NEXT: retq 675 %mx = and <2 x i32> %x, %mask 676 %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1> 677 %my = and <2 x i32> %y, %notmask 678 %r = or <2 x i32> %mx, %my 679 ret <2 x i32> %r 680} 681 682define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 683; CHECK-LABEL: out_v1i64: 684; CHECK: # %bb.0: 685; CHECK-NEXT: movq %rdx, %rax 686; CHECK-NEXT: andq %rdx, %rdi 687; CHECK-NEXT: notq %rax 688; CHECK-NEXT: andq %rsi, %rax 689; CHECK-NEXT: orq %rdi, %rax 690; CHECK-NEXT: retq 691 %mx = and <1 x i64> %x, %mask 692 %notmask = xor <1 x i64> %mask, <i64 -1> 693 %my = and <1 x i64> %y, %notmask 694 %r = or <1 x i64> %mx, %my 695 ret <1 x i64> %r 696} 697 698; ============================================================================ ; 699; 128-bit vector width 700; ============================================================================ ; 701 702define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 703; CHECK-BASELINE-LABEL: out_v16i8: 704; CHECK-BASELINE: # %bb.0: 705; CHECK-BASELINE-NEXT: pushq %rbp 706; CHECK-BASELINE-NEXT: pushq %r15 707; CHECK-BASELINE-NEXT: pushq %r14 708; CHECK-BASELINE-NEXT: pushq %r13 709; CHECK-BASELINE-NEXT: pushq %r12 710; CHECK-BASELINE-NEXT: pushq %rbx 711; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 712; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 713; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 714; CHECK-BASELINE-NEXT: movq %rdi, %rax 715; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil 716; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 717; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 718; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 719; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 720; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 721; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 722; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 723; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 724; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl 725; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 726; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 727; CHECK-BASELINE-NEXT: andb %cl, %sil 728; CHECK-BASELINE-NEXT: notb %cl 729; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 730; CHECK-BASELINE-NEXT: orb %sil, %cl 731; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 732; CHECK-BASELINE-NEXT: andb %dl, %sil 733; CHECK-BASELINE-NEXT: notb %dl 734; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 735; CHECK-BASELINE-NEXT: orb %sil, %dl 736; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 737; CHECK-BASELINE-NEXT: andb %bl, %sil 738; CHECK-BASELINE-NEXT: notb %bl 739; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 740; CHECK-BASELINE-NEXT: orb %sil, %bl 741; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 742; CHECK-BASELINE-NEXT: andb %r13b, %sil 743; CHECK-BASELINE-NEXT: notb %r13b 744; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b 745; CHECK-BASELINE-NEXT: orb %sil, %r13b 746; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 747; CHECK-BASELINE-NEXT: andb %r12b, %sil 748; CHECK-BASELINE-NEXT: notb %r12b 749; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b 750; CHECK-BASELINE-NEXT: orb %sil, %r12b 751; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 752; CHECK-BASELINE-NEXT: andb %r15b, %sil 753; CHECK-BASELINE-NEXT: notb %r15b 754; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 755; CHECK-BASELINE-NEXT: orb %sil, %r15b 756; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 757; CHECK-BASELINE-NEXT: andb %r14b, %sil 758; CHECK-BASELINE-NEXT: notb %r14b 759; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b 760; CHECK-BASELINE-NEXT: orb %sil, %r14b 761; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 762; CHECK-BASELINE-NEXT: andb %bpl, %sil 763; CHECK-BASELINE-NEXT: notb %bpl 764; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl 765; CHECK-BASELINE-NEXT: orb %sil, %bpl 766; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 767; CHECK-BASELINE-NEXT: andb %r11b, %sil 768; CHECK-BASELINE-NEXT: notb %r11b 769; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 770; CHECK-BASELINE-NEXT: orb %sil, %r11b 771; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 772; CHECK-BASELINE-NEXT: andb %r10b, %sil 773; CHECK-BASELINE-NEXT: notb %r10b 774; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 775; CHECK-BASELINE-NEXT: orb %sil, %r10b 776; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 777; CHECK-BASELINE-NEXT: andb %dil, %sil 778; CHECK-BASELINE-NEXT: notb %dil 779; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil 780; CHECK-BASELINE-NEXT: orb %sil, %dil 781; CHECK-BASELINE-NEXT: movb %cl, 15(%rax) 782; CHECK-BASELINE-NEXT: movb %dl, 14(%rax) 783; CHECK-BASELINE-NEXT: movb %bl, 13(%rax) 784; CHECK-BASELINE-NEXT: movb %r13b, 12(%rax) 785; CHECK-BASELINE-NEXT: movb %r12b, 11(%rax) 786; CHECK-BASELINE-NEXT: movb %r15b, 10(%rax) 787; CHECK-BASELINE-NEXT: movb %r14b, 9(%rax) 788; CHECK-BASELINE-NEXT: movb %bpl, 8(%rax) 789; CHECK-BASELINE-NEXT: movb %r11b, 7(%rax) 790; CHECK-BASELINE-NEXT: movb %r10b, 6(%rax) 791; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 792; CHECK-BASELINE-NEXT: andb %cl, %r9b 793; CHECK-BASELINE-NEXT: notb %cl 794; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 795; CHECK-BASELINE-NEXT: orb %r9b, %cl 796; CHECK-BASELINE-NEXT: movb %dil, 5(%rax) 797; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl 798; CHECK-BASELINE-NEXT: andb %dl, %r8b 799; CHECK-BASELINE-NEXT: notb %dl 800; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 801; CHECK-BASELINE-NEXT: orb %r8b, %dl 802; CHECK-BASELINE-NEXT: movb %cl, 4(%rax) 803; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 804; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload 805; CHECK-BASELINE-NEXT: andb %cl, %sil 806; CHECK-BASELINE-NEXT: notb %cl 807; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 808; CHECK-BASELINE-NEXT: orb %sil, %cl 809; CHECK-BASELINE-NEXT: movb %dl, 3(%rax) 810; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl 811; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload 812; CHECK-BASELINE-NEXT: andb %dl, %sil 813; CHECK-BASELINE-NEXT: notb %dl 814; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 815; CHECK-BASELINE-NEXT: orb %sil, %dl 816; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) 817; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 818; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload 819; CHECK-BASELINE-NEXT: andb %cl, %sil 820; CHECK-BASELINE-NEXT: notb %cl 821; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 822; CHECK-BASELINE-NEXT: orb %sil, %cl 823; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) 824; CHECK-BASELINE-NEXT: movb %cl, (%rax) 825; CHECK-BASELINE-NEXT: popq %rbx 826; CHECK-BASELINE-NEXT: popq %r12 827; CHECK-BASELINE-NEXT: popq %r13 828; CHECK-BASELINE-NEXT: popq %r14 829; CHECK-BASELINE-NEXT: popq %r15 830; CHECK-BASELINE-NEXT: popq %rbp 831; CHECK-BASELINE-NEXT: retq 832; 833; CHECK-SSE1-LABEL: out_v16i8: 834; CHECK-SSE1: # %bb.0: 835; CHECK-SSE1-NEXT: pushq %rbp 836; CHECK-SSE1-NEXT: pushq %r15 837; CHECK-SSE1-NEXT: pushq %r14 838; CHECK-SSE1-NEXT: pushq %r13 839; CHECK-SSE1-NEXT: pushq %r12 840; CHECK-SSE1-NEXT: pushq %rbx 841; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 842; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 843; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 844; CHECK-SSE1-NEXT: movq %rdi, %rax 845; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil 846; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 847; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 848; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 849; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 850; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 851; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 852; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 853; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 854; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl 855; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 856; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 857; CHECK-SSE1-NEXT: andb %cl, %sil 858; CHECK-SSE1-NEXT: notb %cl 859; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 860; CHECK-SSE1-NEXT: orb %sil, %cl 861; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 862; CHECK-SSE1-NEXT: andb %dl, %sil 863; CHECK-SSE1-NEXT: notb %dl 864; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 865; CHECK-SSE1-NEXT: orb %sil, %dl 866; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 867; CHECK-SSE1-NEXT: andb %bl, %sil 868; CHECK-SSE1-NEXT: notb %bl 869; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 870; CHECK-SSE1-NEXT: orb %sil, %bl 871; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 872; CHECK-SSE1-NEXT: andb %r13b, %sil 873; CHECK-SSE1-NEXT: notb %r13b 874; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b 875; CHECK-SSE1-NEXT: orb %sil, %r13b 876; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 877; CHECK-SSE1-NEXT: andb %r12b, %sil 878; CHECK-SSE1-NEXT: notb %r12b 879; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b 880; CHECK-SSE1-NEXT: orb %sil, %r12b 881; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 882; CHECK-SSE1-NEXT: andb %r15b, %sil 883; CHECK-SSE1-NEXT: notb %r15b 884; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 885; CHECK-SSE1-NEXT: orb %sil, %r15b 886; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 887; CHECK-SSE1-NEXT: andb %r14b, %sil 888; CHECK-SSE1-NEXT: notb %r14b 889; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b 890; CHECK-SSE1-NEXT: orb %sil, %r14b 891; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 892; CHECK-SSE1-NEXT: andb %bpl, %sil 893; CHECK-SSE1-NEXT: notb %bpl 894; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl 895; CHECK-SSE1-NEXT: orb %sil, %bpl 896; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 897; CHECK-SSE1-NEXT: andb %r11b, %sil 898; CHECK-SSE1-NEXT: notb %r11b 899; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 900; CHECK-SSE1-NEXT: orb %sil, %r11b 901; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 902; CHECK-SSE1-NEXT: andb %r10b, %sil 903; CHECK-SSE1-NEXT: notb %r10b 904; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 905; CHECK-SSE1-NEXT: orb %sil, %r10b 906; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 907; CHECK-SSE1-NEXT: andb %dil, %sil 908; CHECK-SSE1-NEXT: notb %dil 909; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil 910; CHECK-SSE1-NEXT: orb %sil, %dil 911; CHECK-SSE1-NEXT: movb %cl, 15(%rax) 912; CHECK-SSE1-NEXT: movb %dl, 14(%rax) 913; CHECK-SSE1-NEXT: movb %bl, 13(%rax) 914; CHECK-SSE1-NEXT: movb %r13b, 12(%rax) 915; CHECK-SSE1-NEXT: movb %r12b, 11(%rax) 916; CHECK-SSE1-NEXT: movb %r15b, 10(%rax) 917; CHECK-SSE1-NEXT: movb %r14b, 9(%rax) 918; CHECK-SSE1-NEXT: movb %bpl, 8(%rax) 919; CHECK-SSE1-NEXT: movb %r11b, 7(%rax) 920; CHECK-SSE1-NEXT: movb %r10b, 6(%rax) 921; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 922; CHECK-SSE1-NEXT: andb %cl, %r9b 923; CHECK-SSE1-NEXT: notb %cl 924; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 925; CHECK-SSE1-NEXT: orb %r9b, %cl 926; CHECK-SSE1-NEXT: movb %dil, 5(%rax) 927; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl 928; CHECK-SSE1-NEXT: andb %dl, %r8b 929; CHECK-SSE1-NEXT: notb %dl 930; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 931; CHECK-SSE1-NEXT: orb %r8b, %dl 932; CHECK-SSE1-NEXT: movb %cl, 4(%rax) 933; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 934; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload 935; CHECK-SSE1-NEXT: andb %cl, %sil 936; CHECK-SSE1-NEXT: notb %cl 937; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 938; CHECK-SSE1-NEXT: orb %sil, %cl 939; CHECK-SSE1-NEXT: movb %dl, 3(%rax) 940; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl 941; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload 942; CHECK-SSE1-NEXT: andb %dl, %sil 943; CHECK-SSE1-NEXT: notb %dl 944; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 945; CHECK-SSE1-NEXT: orb %sil, %dl 946; CHECK-SSE1-NEXT: movb %cl, 2(%rax) 947; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 948; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Reload 949; CHECK-SSE1-NEXT: andb %cl, %sil 950; CHECK-SSE1-NEXT: notb %cl 951; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 952; CHECK-SSE1-NEXT: orb %sil, %cl 953; CHECK-SSE1-NEXT: movb %dl, 1(%rax) 954; CHECK-SSE1-NEXT: movb %cl, (%rax) 955; CHECK-SSE1-NEXT: popq %rbx 956; CHECK-SSE1-NEXT: popq %r12 957; CHECK-SSE1-NEXT: popq %r13 958; CHECK-SSE1-NEXT: popq %r14 959; CHECK-SSE1-NEXT: popq %r15 960; CHECK-SSE1-NEXT: popq %rbp 961; CHECK-SSE1-NEXT: retq 962; 963; CHECK-SSE2-LABEL: out_v16i8: 964; CHECK-SSE2: # %bb.0: 965; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 966; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 967; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 968; CHECK-SSE2-NEXT: retq 969; 970; CHECK-XOP-LABEL: out_v16i8: 971; CHECK-XOP: # %bb.0: 972; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 973; CHECK-XOP-NEXT: retq 974 %mx = and <16 x i8> %x, %mask 975 %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 976 %my = and <16 x i8> %y, %notmask 977 %r = or <16 x i8> %mx, %my 978 ret <16 x i8> %r 979} 980 981define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 982; CHECK-BASELINE-LABEL: out_v8i16: 983; CHECK-BASELINE: # %bb.0: 984; CHECK-BASELINE-NEXT: pushq %rbp 985; CHECK-BASELINE-NEXT: pushq %r14 986; CHECK-BASELINE-NEXT: pushq %rbx 987; CHECK-BASELINE-NEXT: movq %rdi, %rax 988; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 989; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 990; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r14d 991; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 992; CHECK-BASELINE-NEXT: andw %r14w, %bx 993; CHECK-BASELINE-NEXT: notl %r14d 994; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r14w 995; CHECK-BASELINE-NEXT: orl %ebx, %r14d 996; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 997; CHECK-BASELINE-NEXT: andw %r11w, %bx 998; CHECK-BASELINE-NEXT: notl %r11d 999; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w 1000; CHECK-BASELINE-NEXT: orl %ebx, %r11d 1001; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1002; CHECK-BASELINE-NEXT: andw %r10w, %bx 1003; CHECK-BASELINE-NEXT: notl %r10d 1004; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w 1005; CHECK-BASELINE-NEXT: orl %ebx, %r10d 1006; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 1007; CHECK-BASELINE-NEXT: andl %ebx, %r9d 1008; CHECK-BASELINE-NEXT: notl %ebx 1009; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx 1010; CHECK-BASELINE-NEXT: orl %r9d, %ebx 1011; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi 1012; CHECK-BASELINE-NEXT: andl %edi, %r8d 1013; CHECK-BASELINE-NEXT: notl %edi 1014; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %di 1015; CHECK-BASELINE-NEXT: orl %r8d, %edi 1016; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebp 1017; CHECK-BASELINE-NEXT: andl %ebp, %ecx 1018; CHECK-BASELINE-NEXT: notl %ebp 1019; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bp 1020; CHECK-BASELINE-NEXT: orl %ecx, %ebp 1021; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ecx 1022; CHECK-BASELINE-NEXT: andl %ecx, %edx 1023; CHECK-BASELINE-NEXT: notl %ecx 1024; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 1025; CHECK-BASELINE-NEXT: orl %edx, %ecx 1026; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edx 1027; CHECK-BASELINE-NEXT: andl %edx, %esi 1028; CHECK-BASELINE-NEXT: notl %edx 1029; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 1030; CHECK-BASELINE-NEXT: orl %esi, %edx 1031; CHECK-BASELINE-NEXT: movw %r14w, 14(%rax) 1032; CHECK-BASELINE-NEXT: movw %r11w, 12(%rax) 1033; CHECK-BASELINE-NEXT: movw %r10w, 10(%rax) 1034; CHECK-BASELINE-NEXT: movw %bx, 8(%rax) 1035; CHECK-BASELINE-NEXT: movw %di, 6(%rax) 1036; CHECK-BASELINE-NEXT: movw %bp, 4(%rax) 1037; CHECK-BASELINE-NEXT: movw %cx, 2(%rax) 1038; CHECK-BASELINE-NEXT: movw %dx, (%rax) 1039; CHECK-BASELINE-NEXT: popq %rbx 1040; CHECK-BASELINE-NEXT: popq %r14 1041; CHECK-BASELINE-NEXT: popq %rbp 1042; CHECK-BASELINE-NEXT: retq 1043; 1044; CHECK-SSE1-LABEL: out_v8i16: 1045; CHECK-SSE1: # %bb.0: 1046; CHECK-SSE1-NEXT: pushq %rbp 1047; CHECK-SSE1-NEXT: pushq %r14 1048; CHECK-SSE1-NEXT: pushq %rbx 1049; CHECK-SSE1-NEXT: movq %rdi, %rax 1050; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 1051; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 1052; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r14d 1053; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1054; CHECK-SSE1-NEXT: andw %r14w, %bx 1055; CHECK-SSE1-NEXT: notl %r14d 1056; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r14w 1057; CHECK-SSE1-NEXT: orl %ebx, %r14d 1058; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1059; CHECK-SSE1-NEXT: andw %r11w, %bx 1060; CHECK-SSE1-NEXT: notl %r11d 1061; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w 1062; CHECK-SSE1-NEXT: orl %ebx, %r11d 1063; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 1064; CHECK-SSE1-NEXT: andw %r10w, %bx 1065; CHECK-SSE1-NEXT: notl %r10d 1066; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w 1067; CHECK-SSE1-NEXT: orl %ebx, %r10d 1068; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 1069; CHECK-SSE1-NEXT: andl %ebx, %r9d 1070; CHECK-SSE1-NEXT: notl %ebx 1071; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx 1072; CHECK-SSE1-NEXT: orl %r9d, %ebx 1073; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi 1074; CHECK-SSE1-NEXT: andl %edi, %r8d 1075; CHECK-SSE1-NEXT: notl %edi 1076; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %di 1077; CHECK-SSE1-NEXT: orl %r8d, %edi 1078; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebp 1079; CHECK-SSE1-NEXT: andl %ebp, %ecx 1080; CHECK-SSE1-NEXT: notl %ebp 1081; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bp 1082; CHECK-SSE1-NEXT: orl %ecx, %ebp 1083; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ecx 1084; CHECK-SSE1-NEXT: andl %ecx, %edx 1085; CHECK-SSE1-NEXT: notl %ecx 1086; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 1087; CHECK-SSE1-NEXT: orl %edx, %ecx 1088; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edx 1089; CHECK-SSE1-NEXT: andl %edx, %esi 1090; CHECK-SSE1-NEXT: notl %edx 1091; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 1092; CHECK-SSE1-NEXT: orl %esi, %edx 1093; CHECK-SSE1-NEXT: movw %r14w, 14(%rax) 1094; CHECK-SSE1-NEXT: movw %r11w, 12(%rax) 1095; CHECK-SSE1-NEXT: movw %r10w, 10(%rax) 1096; CHECK-SSE1-NEXT: movw %bx, 8(%rax) 1097; CHECK-SSE1-NEXT: movw %di, 6(%rax) 1098; CHECK-SSE1-NEXT: movw %bp, 4(%rax) 1099; CHECK-SSE1-NEXT: movw %cx, 2(%rax) 1100; CHECK-SSE1-NEXT: movw %dx, (%rax) 1101; CHECK-SSE1-NEXT: popq %rbx 1102; CHECK-SSE1-NEXT: popq %r14 1103; CHECK-SSE1-NEXT: popq %rbp 1104; CHECK-SSE1-NEXT: retq 1105; 1106; CHECK-SSE2-LABEL: out_v8i16: 1107; CHECK-SSE2: # %bb.0: 1108; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 1109; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 1110; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 1111; CHECK-SSE2-NEXT: retq 1112; 1113; CHECK-XOP-LABEL: out_v8i16: 1114; CHECK-XOP: # %bb.0: 1115; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 1116; CHECK-XOP-NEXT: retq 1117 %mx = and <8 x i16> %x, %mask 1118 %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1119 %my = and <8 x i16> %y, %notmask 1120 %r = or <8 x i16> %mx, %my 1121 ret <8 x i16> %r 1122} 1123 1124define <4 x i32> @out_v4i32(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind { 1125; CHECK-BASELINE-LABEL: out_v4i32: 1126; CHECK-BASELINE: # %bb.0: 1127; CHECK-BASELINE-NEXT: pushq %rbx 1128; CHECK-BASELINE-NEXT: movq %rdi, %rax 1129; CHECK-BASELINE-NEXT: movl (%rcx), %r8d 1130; CHECK-BASELINE-NEXT: movl 4(%rcx), %r9d 1131; CHECK-BASELINE-NEXT: movl 8(%rcx), %edi 1132; CHECK-BASELINE-NEXT: movl 12(%rcx), %ecx 1133; CHECK-BASELINE-NEXT: movl 12(%rsi), %r10d 1134; CHECK-BASELINE-NEXT: andl %ecx, %r10d 1135; CHECK-BASELINE-NEXT: movl 8(%rsi), %r11d 1136; CHECK-BASELINE-NEXT: andl %edi, %r11d 1137; CHECK-BASELINE-NEXT: movl 4(%rsi), %ebx 1138; CHECK-BASELINE-NEXT: andl %r9d, %ebx 1139; CHECK-BASELINE-NEXT: movl (%rsi), %esi 1140; CHECK-BASELINE-NEXT: andl %r8d, %esi 1141; CHECK-BASELINE-NEXT: notl %r8d 1142; CHECK-BASELINE-NEXT: notl %r9d 1143; CHECK-BASELINE-NEXT: notl %edi 1144; CHECK-BASELINE-NEXT: notl %ecx 1145; CHECK-BASELINE-NEXT: andl 12(%rdx), %ecx 1146; CHECK-BASELINE-NEXT: orl %r10d, %ecx 1147; CHECK-BASELINE-NEXT: andl 8(%rdx), %edi 1148; CHECK-BASELINE-NEXT: orl %r11d, %edi 1149; CHECK-BASELINE-NEXT: andl 4(%rdx), %r9d 1150; CHECK-BASELINE-NEXT: orl %ebx, %r9d 1151; CHECK-BASELINE-NEXT: andl (%rdx), %r8d 1152; CHECK-BASELINE-NEXT: orl %esi, %r8d 1153; CHECK-BASELINE-NEXT: movl %ecx, 12(%rax) 1154; CHECK-BASELINE-NEXT: movl %edi, 8(%rax) 1155; CHECK-BASELINE-NEXT: movl %r9d, 4(%rax) 1156; CHECK-BASELINE-NEXT: movl %r8d, (%rax) 1157; CHECK-BASELINE-NEXT: popq %rbx 1158; CHECK-BASELINE-NEXT: retq 1159; 1160; CHECK-SSE1-LABEL: out_v4i32: 1161; CHECK-SSE1: # %bb.0: 1162; CHECK-SSE1-NEXT: movq %rdi, %rax 1163; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 1164; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 1165; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 1166; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 1167; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 1168; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 1169; CHECK-SSE1-NEXT: retq 1170; 1171; CHECK-SSE2-LABEL: out_v4i32: 1172; CHECK-SSE2: # %bb.0: 1173; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1174; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 1175; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 1176; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1177; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 1178; CHECK-SSE2-NEXT: retq 1179; 1180; CHECK-XOP-LABEL: out_v4i32: 1181; CHECK-XOP: # %bb.0: 1182; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 1183; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 1184; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 1185; CHECK-XOP-NEXT: retq 1186 %x = load <4 x i32>, <4 x i32> *%px, align 16 1187 %y = load <4 x i32>, <4 x i32> *%py, align 16 1188 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 1189 %mx = and <4 x i32> %x, %mask 1190 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 1191 %my = and <4 x i32> %y, %notmask 1192 %r = or <4 x i32> %mx, %my 1193 ret <4 x i32> %r 1194} 1195 1196define <4 x i32> @out_v4i32_undef(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind { 1197; CHECK-BASELINE-LABEL: out_v4i32_undef: 1198; CHECK-BASELINE: # %bb.0: 1199; CHECK-BASELINE-NEXT: movq %rdi, %rax 1200; CHECK-BASELINE-NEXT: movl 8(%rsi), %r8d 1201; CHECK-BASELINE-NEXT: movl (%rcx), %r9d 1202; CHECK-BASELINE-NEXT: movl 4(%rcx), %r10d 1203; CHECK-BASELINE-NEXT: movl 12(%rcx), %edi 1204; CHECK-BASELINE-NEXT: andl 8(%rcx), %r8d 1205; CHECK-BASELINE-NEXT: movl 12(%rsi), %ecx 1206; CHECK-BASELINE-NEXT: andl %edi, %ecx 1207; CHECK-BASELINE-NEXT: movl 4(%rsi), %r11d 1208; CHECK-BASELINE-NEXT: andl %r10d, %r11d 1209; CHECK-BASELINE-NEXT: movl (%rsi), %esi 1210; CHECK-BASELINE-NEXT: andl %r9d, %esi 1211; CHECK-BASELINE-NEXT: notl %r9d 1212; CHECK-BASELINE-NEXT: notl %r10d 1213; CHECK-BASELINE-NEXT: notl %edi 1214; CHECK-BASELINE-NEXT: andl 12(%rdx), %edi 1215; CHECK-BASELINE-NEXT: orl %ecx, %edi 1216; CHECK-BASELINE-NEXT: andl 4(%rdx), %r10d 1217; CHECK-BASELINE-NEXT: orl %r11d, %r10d 1218; CHECK-BASELINE-NEXT: andl (%rdx), %r9d 1219; CHECK-BASELINE-NEXT: orl %esi, %r9d 1220; CHECK-BASELINE-NEXT: movl %r8d, 8(%rax) 1221; CHECK-BASELINE-NEXT: movl %edi, 12(%rax) 1222; CHECK-BASELINE-NEXT: movl %r10d, 4(%rax) 1223; CHECK-BASELINE-NEXT: movl %r9d, (%rax) 1224; CHECK-BASELINE-NEXT: retq 1225; 1226; CHECK-SSE1-LABEL: out_v4i32_undef: 1227; CHECK-SSE1: # %bb.0: 1228; CHECK-SSE1-NEXT: movq %rdi, %rax 1229; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 1230; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 1231; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 1232; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 1233; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 1234; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 1235; CHECK-SSE1-NEXT: retq 1236; 1237; CHECK-SSE2-LABEL: out_v4i32_undef: 1238; CHECK-SSE2: # %bb.0: 1239; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1240; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 1241; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 1242; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1243; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 1244; CHECK-SSE2-NEXT: retq 1245; 1246; CHECK-XOP-LABEL: out_v4i32_undef: 1247; CHECK-XOP: # %bb.0: 1248; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 1249; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 1250; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 1251; CHECK-XOP-NEXT: retq 1252 %x = load <4 x i32>, <4 x i32> *%px, align 16 1253 %y = load <4 x i32>, <4 x i32> *%py, align 16 1254 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 1255 %mx = and <4 x i32> %x, %mask 1256 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1> 1257 %my = and <4 x i32> %y, %notmask 1258 %r = or <4 x i32> %mx, %my 1259 ret <4 x i32> %r 1260} 1261 1262define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 1263; CHECK-BASELINE-LABEL: out_v2i64: 1264; CHECK-BASELINE: # %bb.0: 1265; CHECK-BASELINE-NEXT: movq %r8, %rax 1266; CHECK-BASELINE-NEXT: andq %r9, %rsi 1267; CHECK-BASELINE-NEXT: andq %r8, %rdi 1268; CHECK-BASELINE-NEXT: notq %rax 1269; CHECK-BASELINE-NEXT: notq %r9 1270; CHECK-BASELINE-NEXT: andq %rcx, %r9 1271; CHECK-BASELINE-NEXT: orq %rsi, %r9 1272; CHECK-BASELINE-NEXT: andq %rdx, %rax 1273; CHECK-BASELINE-NEXT: orq %rdi, %rax 1274; CHECK-BASELINE-NEXT: movq %r9, %rdx 1275; CHECK-BASELINE-NEXT: retq 1276; 1277; CHECK-SSE1-LABEL: out_v2i64: 1278; CHECK-SSE1: # %bb.0: 1279; CHECK-SSE1-NEXT: movq %r8, %rax 1280; CHECK-SSE1-NEXT: andq %r9, %rsi 1281; CHECK-SSE1-NEXT: andq %r8, %rdi 1282; CHECK-SSE1-NEXT: notq %rax 1283; CHECK-SSE1-NEXT: notq %r9 1284; CHECK-SSE1-NEXT: andq %rcx, %r9 1285; CHECK-SSE1-NEXT: orq %rsi, %r9 1286; CHECK-SSE1-NEXT: andq %rdx, %rax 1287; CHECK-SSE1-NEXT: orq %rdi, %rax 1288; CHECK-SSE1-NEXT: movq %r9, %rdx 1289; CHECK-SSE1-NEXT: retq 1290; 1291; CHECK-SSE2-LABEL: out_v2i64: 1292; CHECK-SSE2: # %bb.0: 1293; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 1294; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 1295; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 1296; CHECK-SSE2-NEXT: retq 1297; 1298; CHECK-XOP-LABEL: out_v2i64: 1299; CHECK-XOP: # %bb.0: 1300; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 1301; CHECK-XOP-NEXT: retq 1302 %mx = and <2 x i64> %x, %mask 1303 %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1> 1304 %my = and <2 x i64> %y, %notmask 1305 %r = or <2 x i64> %mx, %my 1306 ret <2 x i64> %r 1307} 1308 1309; ============================================================================ ; 1310; 256-bit vector width 1311; ============================================================================ ; 1312 1313define <32 x i8> @out_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) nounwind { 1314; CHECK-BASELINE-LABEL: out_v32i8: 1315; CHECK-BASELINE: # %bb.0: 1316; CHECK-BASELINE-NEXT: pushq %rbp 1317; CHECK-BASELINE-NEXT: pushq %r15 1318; CHECK-BASELINE-NEXT: pushq %r14 1319; CHECK-BASELINE-NEXT: pushq %r13 1320; CHECK-BASELINE-NEXT: pushq %r12 1321; CHECK-BASELINE-NEXT: pushq %rbx 1322; CHECK-BASELINE-NEXT: movq %rcx, %r15 1323; CHECK-BASELINE-NEXT: movq %rsi, %r14 1324; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1325; CHECK-BASELINE-NEXT: movb 16(%rcx), %al 1326; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1327; CHECK-BASELINE-NEXT: movb 17(%rcx), %al 1328; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1329; CHECK-BASELINE-NEXT: movb 18(%rcx), %al 1330; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1331; CHECK-BASELINE-NEXT: movb 19(%rcx), %al 1332; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1333; CHECK-BASELINE-NEXT: movb 20(%rcx), %al 1334; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1335; CHECK-BASELINE-NEXT: movb 21(%rcx), %r12b 1336; CHECK-BASELINE-NEXT: movb 22(%rcx), %r9b 1337; CHECK-BASELINE-NEXT: movb 23(%rcx), %r10b 1338; CHECK-BASELINE-NEXT: movb 24(%rcx), %r11b 1339; CHECK-BASELINE-NEXT: movb 25(%rcx), %bpl 1340; CHECK-BASELINE-NEXT: movb 26(%rcx), %r13b 1341; CHECK-BASELINE-NEXT: movb 27(%rcx), %r8b 1342; CHECK-BASELINE-NEXT: movb 28(%rcx), %dil 1343; CHECK-BASELINE-NEXT: movb 29(%rcx), %sil 1344; CHECK-BASELINE-NEXT: movb 30(%rcx), %bl 1345; CHECK-BASELINE-NEXT: movb 31(%rcx), %al 1346; CHECK-BASELINE-NEXT: movb 31(%r14), %cl 1347; CHECK-BASELINE-NEXT: andb %al, %cl 1348; CHECK-BASELINE-NEXT: notb %al 1349; CHECK-BASELINE-NEXT: andb 31(%rdx), %al 1350; CHECK-BASELINE-NEXT: orb %cl, %al 1351; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1352; CHECK-BASELINE-NEXT: movb 30(%r14), %al 1353; CHECK-BASELINE-NEXT: andb %bl, %al 1354; CHECK-BASELINE-NEXT: notb %bl 1355; CHECK-BASELINE-NEXT: andb 30(%rdx), %bl 1356; CHECK-BASELINE-NEXT: orb %al, %bl 1357; CHECK-BASELINE-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1358; CHECK-BASELINE-NEXT: movb 29(%r14), %al 1359; CHECK-BASELINE-NEXT: andb %sil, %al 1360; CHECK-BASELINE-NEXT: notb %sil 1361; CHECK-BASELINE-NEXT: andb 29(%rdx), %sil 1362; CHECK-BASELINE-NEXT: orb %al, %sil 1363; CHECK-BASELINE-NEXT: movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1364; CHECK-BASELINE-NEXT: movb 28(%r14), %al 1365; CHECK-BASELINE-NEXT: andb %dil, %al 1366; CHECK-BASELINE-NEXT: notb %dil 1367; CHECK-BASELINE-NEXT: andb 28(%rdx), %dil 1368; CHECK-BASELINE-NEXT: orb %al, %dil 1369; CHECK-BASELINE-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1370; CHECK-BASELINE-NEXT: movb 27(%r14), %al 1371; CHECK-BASELINE-NEXT: andb %r8b, %al 1372; CHECK-BASELINE-NEXT: notb %r8b 1373; CHECK-BASELINE-NEXT: andb 27(%rdx), %r8b 1374; CHECK-BASELINE-NEXT: orb %al, %r8b 1375; CHECK-BASELINE-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1376; CHECK-BASELINE-NEXT: movb 26(%r14), %al 1377; CHECK-BASELINE-NEXT: andb %r13b, %al 1378; CHECK-BASELINE-NEXT: notb %r13b 1379; CHECK-BASELINE-NEXT: andb 26(%rdx), %r13b 1380; CHECK-BASELINE-NEXT: orb %al, %r13b 1381; CHECK-BASELINE-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1382; CHECK-BASELINE-NEXT: movb 25(%r14), %al 1383; CHECK-BASELINE-NEXT: andb %bpl, %al 1384; CHECK-BASELINE-NEXT: notb %bpl 1385; CHECK-BASELINE-NEXT: andb 25(%rdx), %bpl 1386; CHECK-BASELINE-NEXT: orb %al, %bpl 1387; CHECK-BASELINE-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1388; CHECK-BASELINE-NEXT: movb 24(%r14), %al 1389; CHECK-BASELINE-NEXT: andb %r11b, %al 1390; CHECK-BASELINE-NEXT: notb %r11b 1391; CHECK-BASELINE-NEXT: andb 24(%rdx), %r11b 1392; CHECK-BASELINE-NEXT: orb %al, %r11b 1393; CHECK-BASELINE-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1394; CHECK-BASELINE-NEXT: movb 23(%r14), %al 1395; CHECK-BASELINE-NEXT: andb %r10b, %al 1396; CHECK-BASELINE-NEXT: notb %r10b 1397; CHECK-BASELINE-NEXT: andb 23(%rdx), %r10b 1398; CHECK-BASELINE-NEXT: orb %al, %r10b 1399; CHECK-BASELINE-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1400; CHECK-BASELINE-NEXT: movb 22(%r14), %al 1401; CHECK-BASELINE-NEXT: andb %r9b, %al 1402; CHECK-BASELINE-NEXT: notb %r9b 1403; CHECK-BASELINE-NEXT: andb 22(%rdx), %r9b 1404; CHECK-BASELINE-NEXT: orb %al, %r9b 1405; CHECK-BASELINE-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1406; CHECK-BASELINE-NEXT: movb 21(%r14), %al 1407; CHECK-BASELINE-NEXT: andb %r12b, %al 1408; CHECK-BASELINE-NEXT: notb %r12b 1409; CHECK-BASELINE-NEXT: andb 21(%rdx), %r12b 1410; CHECK-BASELINE-NEXT: orb %al, %r12b 1411; CHECK-BASELINE-NEXT: movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1412; CHECK-BASELINE-NEXT: movb 20(%r14), %al 1413; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1414; CHECK-BASELINE-NEXT: andb %cl, %al 1415; CHECK-BASELINE-NEXT: notb %cl 1416; CHECK-BASELINE-NEXT: andb 20(%rdx), %cl 1417; CHECK-BASELINE-NEXT: orb %al, %cl 1418; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1419; CHECK-BASELINE-NEXT: movb 19(%r14), %al 1420; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1421; CHECK-BASELINE-NEXT: andb %cl, %al 1422; CHECK-BASELINE-NEXT: notb %cl 1423; CHECK-BASELINE-NEXT: andb 19(%rdx), %cl 1424; CHECK-BASELINE-NEXT: orb %al, %cl 1425; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1426; CHECK-BASELINE-NEXT: movb 18(%r14), %al 1427; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1428; CHECK-BASELINE-NEXT: andb %cl, %al 1429; CHECK-BASELINE-NEXT: notb %cl 1430; CHECK-BASELINE-NEXT: andb 18(%rdx), %cl 1431; CHECK-BASELINE-NEXT: orb %al, %cl 1432; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1433; CHECK-BASELINE-NEXT: movb 17(%r14), %al 1434; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1435; CHECK-BASELINE-NEXT: andb %cl, %al 1436; CHECK-BASELINE-NEXT: notb %cl 1437; CHECK-BASELINE-NEXT: movq %rdx, %rbx 1438; CHECK-BASELINE-NEXT: andb 17(%rdx), %cl 1439; CHECK-BASELINE-NEXT: orb %al, %cl 1440; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1441; CHECK-BASELINE-NEXT: movb 16(%r14), %al 1442; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1443; CHECK-BASELINE-NEXT: andb %cl, %al 1444; CHECK-BASELINE-NEXT: notb %cl 1445; CHECK-BASELINE-NEXT: andb 16(%rdx), %cl 1446; CHECK-BASELINE-NEXT: orb %al, %cl 1447; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1448; CHECK-BASELINE-NEXT: movb 15(%r15), %cl 1449; CHECK-BASELINE-NEXT: movb 15(%r14), %al 1450; CHECK-BASELINE-NEXT: andb %cl, %al 1451; CHECK-BASELINE-NEXT: notb %cl 1452; CHECK-BASELINE-NEXT: andb 15(%rdx), %cl 1453; CHECK-BASELINE-NEXT: orb %al, %cl 1454; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1455; CHECK-BASELINE-NEXT: movb 14(%r15), %cl 1456; CHECK-BASELINE-NEXT: movb 14(%r14), %al 1457; CHECK-BASELINE-NEXT: andb %cl, %al 1458; CHECK-BASELINE-NEXT: notb %cl 1459; CHECK-BASELINE-NEXT: andb 14(%rdx), %cl 1460; CHECK-BASELINE-NEXT: orb %al, %cl 1461; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1462; CHECK-BASELINE-NEXT: movb 13(%r15), %cl 1463; CHECK-BASELINE-NEXT: movb 13(%r14), %al 1464; CHECK-BASELINE-NEXT: andb %cl, %al 1465; CHECK-BASELINE-NEXT: notb %cl 1466; CHECK-BASELINE-NEXT: andb 13(%rdx), %cl 1467; CHECK-BASELINE-NEXT: orb %al, %cl 1468; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1469; CHECK-BASELINE-NEXT: movb 12(%r15), %cl 1470; CHECK-BASELINE-NEXT: movb 12(%r14), %al 1471; CHECK-BASELINE-NEXT: andb %cl, %al 1472; CHECK-BASELINE-NEXT: notb %cl 1473; CHECK-BASELINE-NEXT: andb 12(%rdx), %cl 1474; CHECK-BASELINE-NEXT: orb %al, %cl 1475; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1476; CHECK-BASELINE-NEXT: movb 11(%r15), %r13b 1477; CHECK-BASELINE-NEXT: movb 11(%r14), %al 1478; CHECK-BASELINE-NEXT: andb %r13b, %al 1479; CHECK-BASELINE-NEXT: notb %r13b 1480; CHECK-BASELINE-NEXT: andb 11(%rdx), %r13b 1481; CHECK-BASELINE-NEXT: orb %al, %r13b 1482; CHECK-BASELINE-NEXT: movb 10(%r15), %r12b 1483; CHECK-BASELINE-NEXT: movb 10(%r14), %al 1484; CHECK-BASELINE-NEXT: andb %r12b, %al 1485; CHECK-BASELINE-NEXT: notb %r12b 1486; CHECK-BASELINE-NEXT: andb 10(%rdx), %r12b 1487; CHECK-BASELINE-NEXT: orb %al, %r12b 1488; CHECK-BASELINE-NEXT: movb 9(%r15), %bpl 1489; CHECK-BASELINE-NEXT: movb 9(%r14), %al 1490; CHECK-BASELINE-NEXT: andb %bpl, %al 1491; CHECK-BASELINE-NEXT: notb %bpl 1492; CHECK-BASELINE-NEXT: andb 9(%rdx), %bpl 1493; CHECK-BASELINE-NEXT: orb %al, %bpl 1494; CHECK-BASELINE-NEXT: movb 8(%r15), %r11b 1495; CHECK-BASELINE-NEXT: movb 8(%r14), %al 1496; CHECK-BASELINE-NEXT: andb %r11b, %al 1497; CHECK-BASELINE-NEXT: notb %r11b 1498; CHECK-BASELINE-NEXT: andb 8(%rdx), %r11b 1499; CHECK-BASELINE-NEXT: orb %al, %r11b 1500; CHECK-BASELINE-NEXT: movb 7(%r15), %r10b 1501; CHECK-BASELINE-NEXT: movb 7(%r14), %al 1502; CHECK-BASELINE-NEXT: andb %r10b, %al 1503; CHECK-BASELINE-NEXT: notb %r10b 1504; CHECK-BASELINE-NEXT: andb 7(%rdx), %r10b 1505; CHECK-BASELINE-NEXT: orb %al, %r10b 1506; CHECK-BASELINE-NEXT: movb 6(%r15), %r9b 1507; CHECK-BASELINE-NEXT: movb 6(%r14), %al 1508; CHECK-BASELINE-NEXT: andb %r9b, %al 1509; CHECK-BASELINE-NEXT: notb %r9b 1510; CHECK-BASELINE-NEXT: andb 6(%rdx), %r9b 1511; CHECK-BASELINE-NEXT: orb %al, %r9b 1512; CHECK-BASELINE-NEXT: movb 5(%r15), %r8b 1513; CHECK-BASELINE-NEXT: movb 5(%r14), %al 1514; CHECK-BASELINE-NEXT: andb %r8b, %al 1515; CHECK-BASELINE-NEXT: notb %r8b 1516; CHECK-BASELINE-NEXT: andb 5(%rdx), %r8b 1517; CHECK-BASELINE-NEXT: orb %al, %r8b 1518; CHECK-BASELINE-NEXT: movb 4(%r15), %dil 1519; CHECK-BASELINE-NEXT: movb 4(%r14), %al 1520; CHECK-BASELINE-NEXT: andb %dil, %al 1521; CHECK-BASELINE-NEXT: notb %dil 1522; CHECK-BASELINE-NEXT: andb 4(%rdx), %dil 1523; CHECK-BASELINE-NEXT: orb %al, %dil 1524; CHECK-BASELINE-NEXT: movb 3(%r15), %sil 1525; CHECK-BASELINE-NEXT: movb 3(%r14), %al 1526; CHECK-BASELINE-NEXT: andb %sil, %al 1527; CHECK-BASELINE-NEXT: notb %sil 1528; CHECK-BASELINE-NEXT: andb 3(%rdx), %sil 1529; CHECK-BASELINE-NEXT: orb %al, %sil 1530; CHECK-BASELINE-NEXT: movb 2(%r15), %dl 1531; CHECK-BASELINE-NEXT: movb 2(%r14), %al 1532; CHECK-BASELINE-NEXT: andb %dl, %al 1533; CHECK-BASELINE-NEXT: notb %dl 1534; CHECK-BASELINE-NEXT: andb 2(%rbx), %dl 1535; CHECK-BASELINE-NEXT: orb %al, %dl 1536; CHECK-BASELINE-NEXT: movb 1(%r15), %al 1537; CHECK-BASELINE-NEXT: movb 1(%r14), %cl 1538; CHECK-BASELINE-NEXT: andb %al, %cl 1539; CHECK-BASELINE-NEXT: notb %al 1540; CHECK-BASELINE-NEXT: andb 1(%rbx), %al 1541; CHECK-BASELINE-NEXT: orb %cl, %al 1542; CHECK-BASELINE-NEXT: movb (%r15), %r15b 1543; CHECK-BASELINE-NEXT: movb (%r14), %r14b 1544; CHECK-BASELINE-NEXT: andb %r15b, %r14b 1545; CHECK-BASELINE-NEXT: notb %r15b 1546; CHECK-BASELINE-NEXT: andb (%rbx), %r15b 1547; CHECK-BASELINE-NEXT: orb %r14b, %r15b 1548; CHECK-BASELINE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 1549; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1550; CHECK-BASELINE-NEXT: movb %bl, 31(%rcx) 1551; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1552; CHECK-BASELINE-NEXT: movb %bl, 30(%rcx) 1553; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1554; CHECK-BASELINE-NEXT: movb %bl, 29(%rcx) 1555; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1556; CHECK-BASELINE-NEXT: movb %bl, 28(%rcx) 1557; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1558; CHECK-BASELINE-NEXT: movb %bl, 27(%rcx) 1559; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1560; CHECK-BASELINE-NEXT: movb %bl, 26(%rcx) 1561; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1562; CHECK-BASELINE-NEXT: movb %bl, 25(%rcx) 1563; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1564; CHECK-BASELINE-NEXT: movb %bl, 24(%rcx) 1565; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1566; CHECK-BASELINE-NEXT: movb %bl, 23(%rcx) 1567; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1568; CHECK-BASELINE-NEXT: movb %bl, 22(%rcx) 1569; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1570; CHECK-BASELINE-NEXT: movb %bl, 21(%rcx) 1571; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1572; CHECK-BASELINE-NEXT: movb %bl, 20(%rcx) 1573; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1574; CHECK-BASELINE-NEXT: movb %bl, 19(%rcx) 1575; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1576; CHECK-BASELINE-NEXT: movb %bl, 18(%rcx) 1577; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1578; CHECK-BASELINE-NEXT: movb %bl, 17(%rcx) 1579; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1580; CHECK-BASELINE-NEXT: movb %bl, 16(%rcx) 1581; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1582; CHECK-BASELINE-NEXT: movb %bl, 15(%rcx) 1583; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1584; CHECK-BASELINE-NEXT: movb %bl, 14(%rcx) 1585; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1586; CHECK-BASELINE-NEXT: movb %bl, 13(%rcx) 1587; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1588; CHECK-BASELINE-NEXT: movb %bl, 12(%rcx) 1589; CHECK-BASELINE-NEXT: movb %r13b, 11(%rcx) 1590; CHECK-BASELINE-NEXT: movb %r12b, 10(%rcx) 1591; CHECK-BASELINE-NEXT: movb %bpl, 9(%rcx) 1592; CHECK-BASELINE-NEXT: movb %r11b, 8(%rcx) 1593; CHECK-BASELINE-NEXT: movb %r10b, 7(%rcx) 1594; CHECK-BASELINE-NEXT: movb %r9b, 6(%rcx) 1595; CHECK-BASELINE-NEXT: movb %r8b, 5(%rcx) 1596; CHECK-BASELINE-NEXT: movb %dil, 4(%rcx) 1597; CHECK-BASELINE-NEXT: movb %sil, 3(%rcx) 1598; CHECK-BASELINE-NEXT: movb %dl, 2(%rcx) 1599; CHECK-BASELINE-NEXT: movb %al, 1(%rcx) 1600; CHECK-BASELINE-NEXT: movb %r15b, (%rcx) 1601; CHECK-BASELINE-NEXT: movq %rcx, %rax 1602; CHECK-BASELINE-NEXT: popq %rbx 1603; CHECK-BASELINE-NEXT: popq %r12 1604; CHECK-BASELINE-NEXT: popq %r13 1605; CHECK-BASELINE-NEXT: popq %r14 1606; CHECK-BASELINE-NEXT: popq %r15 1607; CHECK-BASELINE-NEXT: popq %rbp 1608; CHECK-BASELINE-NEXT: retq 1609; 1610; CHECK-SSE1-LABEL: out_v32i8: 1611; CHECK-SSE1: # %bb.0: 1612; CHECK-SSE1-NEXT: pushq %rbp 1613; CHECK-SSE1-NEXT: pushq %r15 1614; CHECK-SSE1-NEXT: pushq %r14 1615; CHECK-SSE1-NEXT: pushq %r13 1616; CHECK-SSE1-NEXT: pushq %r12 1617; CHECK-SSE1-NEXT: pushq %rbx 1618; CHECK-SSE1-NEXT: movq %rcx, %r15 1619; CHECK-SSE1-NEXT: movq %rsi, %r14 1620; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1621; CHECK-SSE1-NEXT: movb 16(%rcx), %al 1622; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1623; CHECK-SSE1-NEXT: movb 17(%rcx), %al 1624; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1625; CHECK-SSE1-NEXT: movb 18(%rcx), %al 1626; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1627; CHECK-SSE1-NEXT: movb 19(%rcx), %al 1628; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1629; CHECK-SSE1-NEXT: movb 20(%rcx), %al 1630; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1631; CHECK-SSE1-NEXT: movb 21(%rcx), %r12b 1632; CHECK-SSE1-NEXT: movb 22(%rcx), %r9b 1633; CHECK-SSE1-NEXT: movb 23(%rcx), %r10b 1634; CHECK-SSE1-NEXT: movb 24(%rcx), %r11b 1635; CHECK-SSE1-NEXT: movb 25(%rcx), %bpl 1636; CHECK-SSE1-NEXT: movb 26(%rcx), %r13b 1637; CHECK-SSE1-NEXT: movb 27(%rcx), %r8b 1638; CHECK-SSE1-NEXT: movb 28(%rcx), %dil 1639; CHECK-SSE1-NEXT: movb 29(%rcx), %sil 1640; CHECK-SSE1-NEXT: movb 30(%rcx), %bl 1641; CHECK-SSE1-NEXT: movb 31(%rcx), %al 1642; CHECK-SSE1-NEXT: movb 31(%r14), %cl 1643; CHECK-SSE1-NEXT: andb %al, %cl 1644; CHECK-SSE1-NEXT: notb %al 1645; CHECK-SSE1-NEXT: andb 31(%rdx), %al 1646; CHECK-SSE1-NEXT: orb %cl, %al 1647; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1648; CHECK-SSE1-NEXT: movb 30(%r14), %al 1649; CHECK-SSE1-NEXT: andb %bl, %al 1650; CHECK-SSE1-NEXT: notb %bl 1651; CHECK-SSE1-NEXT: andb 30(%rdx), %bl 1652; CHECK-SSE1-NEXT: orb %al, %bl 1653; CHECK-SSE1-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1654; CHECK-SSE1-NEXT: movb 29(%r14), %al 1655; CHECK-SSE1-NEXT: andb %sil, %al 1656; CHECK-SSE1-NEXT: notb %sil 1657; CHECK-SSE1-NEXT: andb 29(%rdx), %sil 1658; CHECK-SSE1-NEXT: orb %al, %sil 1659; CHECK-SSE1-NEXT: movb %sil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1660; CHECK-SSE1-NEXT: movb 28(%r14), %al 1661; CHECK-SSE1-NEXT: andb %dil, %al 1662; CHECK-SSE1-NEXT: notb %dil 1663; CHECK-SSE1-NEXT: andb 28(%rdx), %dil 1664; CHECK-SSE1-NEXT: orb %al, %dil 1665; CHECK-SSE1-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1666; CHECK-SSE1-NEXT: movb 27(%r14), %al 1667; CHECK-SSE1-NEXT: andb %r8b, %al 1668; CHECK-SSE1-NEXT: notb %r8b 1669; CHECK-SSE1-NEXT: andb 27(%rdx), %r8b 1670; CHECK-SSE1-NEXT: orb %al, %r8b 1671; CHECK-SSE1-NEXT: movb %r8b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1672; CHECK-SSE1-NEXT: movb 26(%r14), %al 1673; CHECK-SSE1-NEXT: andb %r13b, %al 1674; CHECK-SSE1-NEXT: notb %r13b 1675; CHECK-SSE1-NEXT: andb 26(%rdx), %r13b 1676; CHECK-SSE1-NEXT: orb %al, %r13b 1677; CHECK-SSE1-NEXT: movb %r13b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1678; CHECK-SSE1-NEXT: movb 25(%r14), %al 1679; CHECK-SSE1-NEXT: andb %bpl, %al 1680; CHECK-SSE1-NEXT: notb %bpl 1681; CHECK-SSE1-NEXT: andb 25(%rdx), %bpl 1682; CHECK-SSE1-NEXT: orb %al, %bpl 1683; CHECK-SSE1-NEXT: movb %bpl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1684; CHECK-SSE1-NEXT: movb 24(%r14), %al 1685; CHECK-SSE1-NEXT: andb %r11b, %al 1686; CHECK-SSE1-NEXT: notb %r11b 1687; CHECK-SSE1-NEXT: andb 24(%rdx), %r11b 1688; CHECK-SSE1-NEXT: orb %al, %r11b 1689; CHECK-SSE1-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1690; CHECK-SSE1-NEXT: movb 23(%r14), %al 1691; CHECK-SSE1-NEXT: andb %r10b, %al 1692; CHECK-SSE1-NEXT: notb %r10b 1693; CHECK-SSE1-NEXT: andb 23(%rdx), %r10b 1694; CHECK-SSE1-NEXT: orb %al, %r10b 1695; CHECK-SSE1-NEXT: movb %r10b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1696; CHECK-SSE1-NEXT: movb 22(%r14), %al 1697; CHECK-SSE1-NEXT: andb %r9b, %al 1698; CHECK-SSE1-NEXT: notb %r9b 1699; CHECK-SSE1-NEXT: andb 22(%rdx), %r9b 1700; CHECK-SSE1-NEXT: orb %al, %r9b 1701; CHECK-SSE1-NEXT: movb %r9b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1702; CHECK-SSE1-NEXT: movb 21(%r14), %al 1703; CHECK-SSE1-NEXT: andb %r12b, %al 1704; CHECK-SSE1-NEXT: notb %r12b 1705; CHECK-SSE1-NEXT: andb 21(%rdx), %r12b 1706; CHECK-SSE1-NEXT: orb %al, %r12b 1707; CHECK-SSE1-NEXT: movb %r12b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1708; CHECK-SSE1-NEXT: movb 20(%r14), %al 1709; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1710; CHECK-SSE1-NEXT: andb %cl, %al 1711; CHECK-SSE1-NEXT: notb %cl 1712; CHECK-SSE1-NEXT: andb 20(%rdx), %cl 1713; CHECK-SSE1-NEXT: orb %al, %cl 1714; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1715; CHECK-SSE1-NEXT: movb 19(%r14), %al 1716; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1717; CHECK-SSE1-NEXT: andb %cl, %al 1718; CHECK-SSE1-NEXT: notb %cl 1719; CHECK-SSE1-NEXT: andb 19(%rdx), %cl 1720; CHECK-SSE1-NEXT: orb %al, %cl 1721; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1722; CHECK-SSE1-NEXT: movb 18(%r14), %al 1723; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1724; CHECK-SSE1-NEXT: andb %cl, %al 1725; CHECK-SSE1-NEXT: notb %cl 1726; CHECK-SSE1-NEXT: andb 18(%rdx), %cl 1727; CHECK-SSE1-NEXT: orb %al, %cl 1728; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1729; CHECK-SSE1-NEXT: movb 17(%r14), %al 1730; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1731; CHECK-SSE1-NEXT: andb %cl, %al 1732; CHECK-SSE1-NEXT: notb %cl 1733; CHECK-SSE1-NEXT: movq %rdx, %rbx 1734; CHECK-SSE1-NEXT: andb 17(%rdx), %cl 1735; CHECK-SSE1-NEXT: orb %al, %cl 1736; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1737; CHECK-SSE1-NEXT: movb 16(%r14), %al 1738; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload 1739; CHECK-SSE1-NEXT: andb %cl, %al 1740; CHECK-SSE1-NEXT: notb %cl 1741; CHECK-SSE1-NEXT: andb 16(%rdx), %cl 1742; CHECK-SSE1-NEXT: orb %al, %cl 1743; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1744; CHECK-SSE1-NEXT: movb 15(%r15), %cl 1745; CHECK-SSE1-NEXT: movb 15(%r14), %al 1746; CHECK-SSE1-NEXT: andb %cl, %al 1747; CHECK-SSE1-NEXT: notb %cl 1748; CHECK-SSE1-NEXT: andb 15(%rdx), %cl 1749; CHECK-SSE1-NEXT: orb %al, %cl 1750; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1751; CHECK-SSE1-NEXT: movb 14(%r15), %cl 1752; CHECK-SSE1-NEXT: movb 14(%r14), %al 1753; CHECK-SSE1-NEXT: andb %cl, %al 1754; CHECK-SSE1-NEXT: notb %cl 1755; CHECK-SSE1-NEXT: andb 14(%rdx), %cl 1756; CHECK-SSE1-NEXT: orb %al, %cl 1757; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1758; CHECK-SSE1-NEXT: movb 13(%r15), %cl 1759; CHECK-SSE1-NEXT: movb 13(%r14), %al 1760; CHECK-SSE1-NEXT: andb %cl, %al 1761; CHECK-SSE1-NEXT: notb %cl 1762; CHECK-SSE1-NEXT: andb 13(%rdx), %cl 1763; CHECK-SSE1-NEXT: orb %al, %cl 1764; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1765; CHECK-SSE1-NEXT: movb 12(%r15), %cl 1766; CHECK-SSE1-NEXT: movb 12(%r14), %al 1767; CHECK-SSE1-NEXT: andb %cl, %al 1768; CHECK-SSE1-NEXT: notb %cl 1769; CHECK-SSE1-NEXT: andb 12(%rdx), %cl 1770; CHECK-SSE1-NEXT: orb %al, %cl 1771; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1772; CHECK-SSE1-NEXT: movb 11(%r15), %r13b 1773; CHECK-SSE1-NEXT: movb 11(%r14), %al 1774; CHECK-SSE1-NEXT: andb %r13b, %al 1775; CHECK-SSE1-NEXT: notb %r13b 1776; CHECK-SSE1-NEXT: andb 11(%rdx), %r13b 1777; CHECK-SSE1-NEXT: orb %al, %r13b 1778; CHECK-SSE1-NEXT: movb 10(%r15), %r12b 1779; CHECK-SSE1-NEXT: movb 10(%r14), %al 1780; CHECK-SSE1-NEXT: andb %r12b, %al 1781; CHECK-SSE1-NEXT: notb %r12b 1782; CHECK-SSE1-NEXT: andb 10(%rdx), %r12b 1783; CHECK-SSE1-NEXT: orb %al, %r12b 1784; CHECK-SSE1-NEXT: movb 9(%r15), %bpl 1785; CHECK-SSE1-NEXT: movb 9(%r14), %al 1786; CHECK-SSE1-NEXT: andb %bpl, %al 1787; CHECK-SSE1-NEXT: notb %bpl 1788; CHECK-SSE1-NEXT: andb 9(%rdx), %bpl 1789; CHECK-SSE1-NEXT: orb %al, %bpl 1790; CHECK-SSE1-NEXT: movb 8(%r15), %r11b 1791; CHECK-SSE1-NEXT: movb 8(%r14), %al 1792; CHECK-SSE1-NEXT: andb %r11b, %al 1793; CHECK-SSE1-NEXT: notb %r11b 1794; CHECK-SSE1-NEXT: andb 8(%rdx), %r11b 1795; CHECK-SSE1-NEXT: orb %al, %r11b 1796; CHECK-SSE1-NEXT: movb 7(%r15), %r10b 1797; CHECK-SSE1-NEXT: movb 7(%r14), %al 1798; CHECK-SSE1-NEXT: andb %r10b, %al 1799; CHECK-SSE1-NEXT: notb %r10b 1800; CHECK-SSE1-NEXT: andb 7(%rdx), %r10b 1801; CHECK-SSE1-NEXT: orb %al, %r10b 1802; CHECK-SSE1-NEXT: movb 6(%r15), %r9b 1803; CHECK-SSE1-NEXT: movb 6(%r14), %al 1804; CHECK-SSE1-NEXT: andb %r9b, %al 1805; CHECK-SSE1-NEXT: notb %r9b 1806; CHECK-SSE1-NEXT: andb 6(%rdx), %r9b 1807; CHECK-SSE1-NEXT: orb %al, %r9b 1808; CHECK-SSE1-NEXT: movb 5(%r15), %r8b 1809; CHECK-SSE1-NEXT: movb 5(%r14), %al 1810; CHECK-SSE1-NEXT: andb %r8b, %al 1811; CHECK-SSE1-NEXT: notb %r8b 1812; CHECK-SSE1-NEXT: andb 5(%rdx), %r8b 1813; CHECK-SSE1-NEXT: orb %al, %r8b 1814; CHECK-SSE1-NEXT: movb 4(%r15), %dil 1815; CHECK-SSE1-NEXT: movb 4(%r14), %al 1816; CHECK-SSE1-NEXT: andb %dil, %al 1817; CHECK-SSE1-NEXT: notb %dil 1818; CHECK-SSE1-NEXT: andb 4(%rdx), %dil 1819; CHECK-SSE1-NEXT: orb %al, %dil 1820; CHECK-SSE1-NEXT: movb 3(%r15), %sil 1821; CHECK-SSE1-NEXT: movb 3(%r14), %al 1822; CHECK-SSE1-NEXT: andb %sil, %al 1823; CHECK-SSE1-NEXT: notb %sil 1824; CHECK-SSE1-NEXT: andb 3(%rdx), %sil 1825; CHECK-SSE1-NEXT: orb %al, %sil 1826; CHECK-SSE1-NEXT: movb 2(%r15), %dl 1827; CHECK-SSE1-NEXT: movb 2(%r14), %al 1828; CHECK-SSE1-NEXT: andb %dl, %al 1829; CHECK-SSE1-NEXT: notb %dl 1830; CHECK-SSE1-NEXT: andb 2(%rbx), %dl 1831; CHECK-SSE1-NEXT: orb %al, %dl 1832; CHECK-SSE1-NEXT: movb 1(%r15), %al 1833; CHECK-SSE1-NEXT: movb 1(%r14), %cl 1834; CHECK-SSE1-NEXT: andb %al, %cl 1835; CHECK-SSE1-NEXT: notb %al 1836; CHECK-SSE1-NEXT: andb 1(%rbx), %al 1837; CHECK-SSE1-NEXT: orb %cl, %al 1838; CHECK-SSE1-NEXT: movb (%r15), %r15b 1839; CHECK-SSE1-NEXT: movb (%r14), %r14b 1840; CHECK-SSE1-NEXT: andb %r15b, %r14b 1841; CHECK-SSE1-NEXT: notb %r15b 1842; CHECK-SSE1-NEXT: andb (%rbx), %r15b 1843; CHECK-SSE1-NEXT: orb %r14b, %r15b 1844; CHECK-SSE1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 1845; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1846; CHECK-SSE1-NEXT: movb %bl, 31(%rcx) 1847; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1848; CHECK-SSE1-NEXT: movb %bl, 30(%rcx) 1849; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1850; CHECK-SSE1-NEXT: movb %bl, 29(%rcx) 1851; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1852; CHECK-SSE1-NEXT: movb %bl, 28(%rcx) 1853; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1854; CHECK-SSE1-NEXT: movb %bl, 27(%rcx) 1855; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1856; CHECK-SSE1-NEXT: movb %bl, 26(%rcx) 1857; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1858; CHECK-SSE1-NEXT: movb %bl, 25(%rcx) 1859; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1860; CHECK-SSE1-NEXT: movb %bl, 24(%rcx) 1861; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1862; CHECK-SSE1-NEXT: movb %bl, 23(%rcx) 1863; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1864; CHECK-SSE1-NEXT: movb %bl, 22(%rcx) 1865; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1866; CHECK-SSE1-NEXT: movb %bl, 21(%rcx) 1867; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1868; CHECK-SSE1-NEXT: movb %bl, 20(%rcx) 1869; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1870; CHECK-SSE1-NEXT: movb %bl, 19(%rcx) 1871; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1872; CHECK-SSE1-NEXT: movb %bl, 18(%rcx) 1873; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1874; CHECK-SSE1-NEXT: movb %bl, 17(%rcx) 1875; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1876; CHECK-SSE1-NEXT: movb %bl, 16(%rcx) 1877; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1878; CHECK-SSE1-NEXT: movb %bl, 15(%rcx) 1879; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1880; CHECK-SSE1-NEXT: movb %bl, 14(%rcx) 1881; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1882; CHECK-SSE1-NEXT: movb %bl, 13(%rcx) 1883; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %bl # 1-byte Reload 1884; CHECK-SSE1-NEXT: movb %bl, 12(%rcx) 1885; CHECK-SSE1-NEXT: movb %r13b, 11(%rcx) 1886; CHECK-SSE1-NEXT: movb %r12b, 10(%rcx) 1887; CHECK-SSE1-NEXT: movb %bpl, 9(%rcx) 1888; CHECK-SSE1-NEXT: movb %r11b, 8(%rcx) 1889; CHECK-SSE1-NEXT: movb %r10b, 7(%rcx) 1890; CHECK-SSE1-NEXT: movb %r9b, 6(%rcx) 1891; CHECK-SSE1-NEXT: movb %r8b, 5(%rcx) 1892; CHECK-SSE1-NEXT: movb %dil, 4(%rcx) 1893; CHECK-SSE1-NEXT: movb %sil, 3(%rcx) 1894; CHECK-SSE1-NEXT: movb %dl, 2(%rcx) 1895; CHECK-SSE1-NEXT: movb %al, 1(%rcx) 1896; CHECK-SSE1-NEXT: movb %r15b, (%rcx) 1897; CHECK-SSE1-NEXT: movq %rcx, %rax 1898; CHECK-SSE1-NEXT: popq %rbx 1899; CHECK-SSE1-NEXT: popq %r12 1900; CHECK-SSE1-NEXT: popq %r13 1901; CHECK-SSE1-NEXT: popq %r14 1902; CHECK-SSE1-NEXT: popq %r15 1903; CHECK-SSE1-NEXT: popq %rbp 1904; CHECK-SSE1-NEXT: retq 1905; 1906; CHECK-SSE2-LABEL: out_v32i8: 1907; CHECK-SSE2: # %bb.0: 1908; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1909; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 1910; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 1911; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 1912; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 1913; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 1914; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 1915; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 1916; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1917; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 1918; CHECK-SSE2-NEXT: retq 1919; 1920; CHECK-XOP-LABEL: out_v32i8: 1921; CHECK-XOP: # %bb.0: 1922; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 1923; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 1924; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 1925; CHECK-XOP-NEXT: retq 1926 %x = load <32 x i8>, <32 x i8> *%px, align 32 1927 %y = load <32 x i8>, <32 x i8> *%py, align 32 1928 %mask = load <32 x i8>, <32 x i8> *%pmask, align 32 1929 %mx = and <32 x i8> %x, %mask 1930 %notmask = xor <32 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1931 %my = and <32 x i8> %y, %notmask 1932 %r = or <32 x i8> %mx, %my 1933 ret <32 x i8> %r 1934} 1935 1936define <16 x i16> @out_v16i16(<16 x i16> *%px, <16 x i16> *%py, <16 x i16> *%pmask) nounwind { 1937; CHECK-BASELINE-LABEL: out_v16i16: 1938; CHECK-BASELINE: # %bb.0: 1939; CHECK-BASELINE-NEXT: pushq %rbp 1940; CHECK-BASELINE-NEXT: pushq %r15 1941; CHECK-BASELINE-NEXT: pushq %r14 1942; CHECK-BASELINE-NEXT: pushq %r13 1943; CHECK-BASELINE-NEXT: pushq %r12 1944; CHECK-BASELINE-NEXT: pushq %rbx 1945; CHECK-BASELINE-NEXT: movq %rcx, %r9 1946; CHECK-BASELINE-NEXT: movq %rdx, %r10 1947; CHECK-BASELINE-NEXT: movq %rsi, %r8 1948; CHECK-BASELINE-NEXT: movq %rdi, %r11 1949; CHECK-BASELINE-NEXT: movl 12(%rcx), %eax 1950; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1951; CHECK-BASELINE-NEXT: movzwl 14(%rcx), %edx 1952; CHECK-BASELINE-NEXT: movl 16(%rcx), %esi 1953; CHECK-BASELINE-NEXT: movzwl 18(%rcx), %edi 1954; CHECK-BASELINE-NEXT: movl 20(%rcx), %ecx 1955; CHECK-BASELINE-NEXT: movzwl 22(%r9), %r15d 1956; CHECK-BASELINE-NEXT: movl 24(%r9), %r12d 1957; CHECK-BASELINE-NEXT: movzwl 26(%r9), %r14d 1958; CHECK-BASELINE-NEXT: movl 28(%r9), %ebx 1959; CHECK-BASELINE-NEXT: movzwl 30(%r9), %ebp 1960; CHECK-BASELINE-NEXT: movzwl 30(%r8), %r13d 1961; CHECK-BASELINE-NEXT: andw %bp, %r13w 1962; CHECK-BASELINE-NEXT: notl %ebp 1963; CHECK-BASELINE-NEXT: andw 30(%r10), %bp 1964; CHECK-BASELINE-NEXT: orl %r13d, %ebp 1965; CHECK-BASELINE-NEXT: movzwl 28(%r8), %eax 1966; CHECK-BASELINE-NEXT: andw %bx, %ax 1967; CHECK-BASELINE-NEXT: notl %ebx 1968; CHECK-BASELINE-NEXT: andw 28(%r10), %bx 1969; CHECK-BASELINE-NEXT: orl %eax, %ebx 1970; CHECK-BASELINE-NEXT: movzwl 26(%r8), %eax 1971; CHECK-BASELINE-NEXT: andw %r14w, %ax 1972; CHECK-BASELINE-NEXT: notl %r14d 1973; CHECK-BASELINE-NEXT: andw 26(%r10), %r14w 1974; CHECK-BASELINE-NEXT: orl %eax, %r14d 1975; CHECK-BASELINE-NEXT: movzwl 24(%r8), %eax 1976; CHECK-BASELINE-NEXT: andw %r12w, %ax 1977; CHECK-BASELINE-NEXT: notl %r12d 1978; CHECK-BASELINE-NEXT: andw 24(%r10), %r12w 1979; CHECK-BASELINE-NEXT: orl %eax, %r12d 1980; CHECK-BASELINE-NEXT: movzwl 22(%r8), %eax 1981; CHECK-BASELINE-NEXT: andw %r15w, %ax 1982; CHECK-BASELINE-NEXT: notl %r15d 1983; CHECK-BASELINE-NEXT: andw 22(%r10), %r15w 1984; CHECK-BASELINE-NEXT: orl %eax, %r15d 1985; CHECK-BASELINE-NEXT: movzwl 20(%r8), %eax 1986; CHECK-BASELINE-NEXT: andw %cx, %ax 1987; CHECK-BASELINE-NEXT: notl %ecx 1988; CHECK-BASELINE-NEXT: andw 20(%r10), %cx 1989; CHECK-BASELINE-NEXT: orl %eax, %ecx 1990; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1991; CHECK-BASELINE-NEXT: movzwl 18(%r8), %eax 1992; CHECK-BASELINE-NEXT: andw %di, %ax 1993; CHECK-BASELINE-NEXT: notl %edi 1994; CHECK-BASELINE-NEXT: andw 18(%r10), %di 1995; CHECK-BASELINE-NEXT: orl %eax, %edi 1996; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1997; CHECK-BASELINE-NEXT: movzwl 16(%r8), %eax 1998; CHECK-BASELINE-NEXT: andw %si, %ax 1999; CHECK-BASELINE-NEXT: notl %esi 2000; CHECK-BASELINE-NEXT: andw 16(%r10), %si 2001; CHECK-BASELINE-NEXT: orl %eax, %esi 2002; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2003; CHECK-BASELINE-NEXT: movzwl 14(%r8), %eax 2004; CHECK-BASELINE-NEXT: andw %dx, %ax 2005; CHECK-BASELINE-NEXT: notl %edx 2006; CHECK-BASELINE-NEXT: andw 14(%r10), %dx 2007; CHECK-BASELINE-NEXT: orl %eax, %edx 2008; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2009; CHECK-BASELINE-NEXT: movzwl 12(%r8), %eax 2010; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2011; CHECK-BASELINE-NEXT: andw %cx, %ax 2012; CHECK-BASELINE-NEXT: notl %ecx 2013; CHECK-BASELINE-NEXT: andw 12(%r10), %cx 2014; CHECK-BASELINE-NEXT: orl %eax, %ecx 2015; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2016; CHECK-BASELINE-NEXT: movzwl 10(%r9), %r13d 2017; CHECK-BASELINE-NEXT: movzwl 10(%r8), %eax 2018; CHECK-BASELINE-NEXT: andw %r13w, %ax 2019; CHECK-BASELINE-NEXT: notl %r13d 2020; CHECK-BASELINE-NEXT: andw 10(%r10), %r13w 2021; CHECK-BASELINE-NEXT: orl %eax, %r13d 2022; CHECK-BASELINE-NEXT: movl 8(%r9), %edi 2023; CHECK-BASELINE-NEXT: movzwl 8(%r8), %eax 2024; CHECK-BASELINE-NEXT: andw %di, %ax 2025; CHECK-BASELINE-NEXT: notl %edi 2026; CHECK-BASELINE-NEXT: andw 8(%r10), %di 2027; CHECK-BASELINE-NEXT: orl %eax, %edi 2028; CHECK-BASELINE-NEXT: movzwl 6(%r9), %esi 2029; CHECK-BASELINE-NEXT: movzwl 6(%r8), %eax 2030; CHECK-BASELINE-NEXT: andw %si, %ax 2031; CHECK-BASELINE-NEXT: notl %esi 2032; CHECK-BASELINE-NEXT: andw 6(%r10), %si 2033; CHECK-BASELINE-NEXT: orl %eax, %esi 2034; CHECK-BASELINE-NEXT: movl 4(%r9), %edx 2035; CHECK-BASELINE-NEXT: movzwl 4(%r8), %eax 2036; CHECK-BASELINE-NEXT: andw %dx, %ax 2037; CHECK-BASELINE-NEXT: notl %edx 2038; CHECK-BASELINE-NEXT: andw 4(%r10), %dx 2039; CHECK-BASELINE-NEXT: orl %eax, %edx 2040; CHECK-BASELINE-NEXT: movzwl 2(%r9), %eax 2041; CHECK-BASELINE-NEXT: movzwl 2(%r8), %ecx 2042; CHECK-BASELINE-NEXT: andw %ax, %cx 2043; CHECK-BASELINE-NEXT: notl %eax 2044; CHECK-BASELINE-NEXT: andw 2(%r10), %ax 2045; CHECK-BASELINE-NEXT: orl %ecx, %eax 2046; CHECK-BASELINE-NEXT: movl (%r9), %r9d 2047; CHECK-BASELINE-NEXT: movzwl (%r8), %ecx 2048; CHECK-BASELINE-NEXT: andw %r9w, %cx 2049; CHECK-BASELINE-NEXT: notl %r9d 2050; CHECK-BASELINE-NEXT: andw (%r10), %r9w 2051; CHECK-BASELINE-NEXT: orl %ecx, %r9d 2052; CHECK-BASELINE-NEXT: movw %bp, 30(%r11) 2053; CHECK-BASELINE-NEXT: movw %bx, 28(%r11) 2054; CHECK-BASELINE-NEXT: movw %r14w, 26(%r11) 2055; CHECK-BASELINE-NEXT: movw %r12w, 24(%r11) 2056; CHECK-BASELINE-NEXT: movw %r15w, 22(%r11) 2057; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2058; CHECK-BASELINE-NEXT: movw %cx, 20(%r11) 2059; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2060; CHECK-BASELINE-NEXT: movw %cx, 18(%r11) 2061; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2062; CHECK-BASELINE-NEXT: movw %cx, 16(%r11) 2063; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2064; CHECK-BASELINE-NEXT: movw %cx, 14(%r11) 2065; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2066; CHECK-BASELINE-NEXT: movw %cx, 12(%r11) 2067; CHECK-BASELINE-NEXT: movw %r13w, 10(%r11) 2068; CHECK-BASELINE-NEXT: movw %di, 8(%r11) 2069; CHECK-BASELINE-NEXT: movw %si, 6(%r11) 2070; CHECK-BASELINE-NEXT: movw %dx, 4(%r11) 2071; CHECK-BASELINE-NEXT: movw %ax, 2(%r11) 2072; CHECK-BASELINE-NEXT: movw %r9w, (%r11) 2073; CHECK-BASELINE-NEXT: movq %r11, %rax 2074; CHECK-BASELINE-NEXT: popq %rbx 2075; CHECK-BASELINE-NEXT: popq %r12 2076; CHECK-BASELINE-NEXT: popq %r13 2077; CHECK-BASELINE-NEXT: popq %r14 2078; CHECK-BASELINE-NEXT: popq %r15 2079; CHECK-BASELINE-NEXT: popq %rbp 2080; CHECK-BASELINE-NEXT: retq 2081; 2082; CHECK-SSE1-LABEL: out_v16i16: 2083; CHECK-SSE1: # %bb.0: 2084; CHECK-SSE1-NEXT: pushq %rbp 2085; CHECK-SSE1-NEXT: pushq %r15 2086; CHECK-SSE1-NEXT: pushq %r14 2087; CHECK-SSE1-NEXT: pushq %r13 2088; CHECK-SSE1-NEXT: pushq %r12 2089; CHECK-SSE1-NEXT: pushq %rbx 2090; CHECK-SSE1-NEXT: movq %rcx, %r9 2091; CHECK-SSE1-NEXT: movq %rdx, %r10 2092; CHECK-SSE1-NEXT: movq %rsi, %r8 2093; CHECK-SSE1-NEXT: movq %rdi, %r11 2094; CHECK-SSE1-NEXT: movl 12(%rcx), %eax 2095; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2096; CHECK-SSE1-NEXT: movzwl 14(%rcx), %edx 2097; CHECK-SSE1-NEXT: movl 16(%rcx), %esi 2098; CHECK-SSE1-NEXT: movzwl 18(%rcx), %edi 2099; CHECK-SSE1-NEXT: movl 20(%rcx), %ecx 2100; CHECK-SSE1-NEXT: movzwl 22(%r9), %r15d 2101; CHECK-SSE1-NEXT: movl 24(%r9), %r12d 2102; CHECK-SSE1-NEXT: movzwl 26(%r9), %r14d 2103; CHECK-SSE1-NEXT: movl 28(%r9), %ebx 2104; CHECK-SSE1-NEXT: movzwl 30(%r9), %ebp 2105; CHECK-SSE1-NEXT: movzwl 30(%r8), %r13d 2106; CHECK-SSE1-NEXT: andw %bp, %r13w 2107; CHECK-SSE1-NEXT: notl %ebp 2108; CHECK-SSE1-NEXT: andw 30(%r10), %bp 2109; CHECK-SSE1-NEXT: orl %r13d, %ebp 2110; CHECK-SSE1-NEXT: movzwl 28(%r8), %eax 2111; CHECK-SSE1-NEXT: andw %bx, %ax 2112; CHECK-SSE1-NEXT: notl %ebx 2113; CHECK-SSE1-NEXT: andw 28(%r10), %bx 2114; CHECK-SSE1-NEXT: orl %eax, %ebx 2115; CHECK-SSE1-NEXT: movzwl 26(%r8), %eax 2116; CHECK-SSE1-NEXT: andw %r14w, %ax 2117; CHECK-SSE1-NEXT: notl %r14d 2118; CHECK-SSE1-NEXT: andw 26(%r10), %r14w 2119; CHECK-SSE1-NEXT: orl %eax, %r14d 2120; CHECK-SSE1-NEXT: movzwl 24(%r8), %eax 2121; CHECK-SSE1-NEXT: andw %r12w, %ax 2122; CHECK-SSE1-NEXT: notl %r12d 2123; CHECK-SSE1-NEXT: andw 24(%r10), %r12w 2124; CHECK-SSE1-NEXT: orl %eax, %r12d 2125; CHECK-SSE1-NEXT: movzwl 22(%r8), %eax 2126; CHECK-SSE1-NEXT: andw %r15w, %ax 2127; CHECK-SSE1-NEXT: notl %r15d 2128; CHECK-SSE1-NEXT: andw 22(%r10), %r15w 2129; CHECK-SSE1-NEXT: orl %eax, %r15d 2130; CHECK-SSE1-NEXT: movzwl 20(%r8), %eax 2131; CHECK-SSE1-NEXT: andw %cx, %ax 2132; CHECK-SSE1-NEXT: notl %ecx 2133; CHECK-SSE1-NEXT: andw 20(%r10), %cx 2134; CHECK-SSE1-NEXT: orl %eax, %ecx 2135; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2136; CHECK-SSE1-NEXT: movzwl 18(%r8), %eax 2137; CHECK-SSE1-NEXT: andw %di, %ax 2138; CHECK-SSE1-NEXT: notl %edi 2139; CHECK-SSE1-NEXT: andw 18(%r10), %di 2140; CHECK-SSE1-NEXT: orl %eax, %edi 2141; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2142; CHECK-SSE1-NEXT: movzwl 16(%r8), %eax 2143; CHECK-SSE1-NEXT: andw %si, %ax 2144; CHECK-SSE1-NEXT: notl %esi 2145; CHECK-SSE1-NEXT: andw 16(%r10), %si 2146; CHECK-SSE1-NEXT: orl %eax, %esi 2147; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2148; CHECK-SSE1-NEXT: movzwl 14(%r8), %eax 2149; CHECK-SSE1-NEXT: andw %dx, %ax 2150; CHECK-SSE1-NEXT: notl %edx 2151; CHECK-SSE1-NEXT: andw 14(%r10), %dx 2152; CHECK-SSE1-NEXT: orl %eax, %edx 2153; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2154; CHECK-SSE1-NEXT: movzwl 12(%r8), %eax 2155; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2156; CHECK-SSE1-NEXT: andw %cx, %ax 2157; CHECK-SSE1-NEXT: notl %ecx 2158; CHECK-SSE1-NEXT: andw 12(%r10), %cx 2159; CHECK-SSE1-NEXT: orl %eax, %ecx 2160; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2161; CHECK-SSE1-NEXT: movzwl 10(%r9), %r13d 2162; CHECK-SSE1-NEXT: movzwl 10(%r8), %eax 2163; CHECK-SSE1-NEXT: andw %r13w, %ax 2164; CHECK-SSE1-NEXT: notl %r13d 2165; CHECK-SSE1-NEXT: andw 10(%r10), %r13w 2166; CHECK-SSE1-NEXT: orl %eax, %r13d 2167; CHECK-SSE1-NEXT: movl 8(%r9), %edi 2168; CHECK-SSE1-NEXT: movzwl 8(%r8), %eax 2169; CHECK-SSE1-NEXT: andw %di, %ax 2170; CHECK-SSE1-NEXT: notl %edi 2171; CHECK-SSE1-NEXT: andw 8(%r10), %di 2172; CHECK-SSE1-NEXT: orl %eax, %edi 2173; CHECK-SSE1-NEXT: movzwl 6(%r9), %esi 2174; CHECK-SSE1-NEXT: movzwl 6(%r8), %eax 2175; CHECK-SSE1-NEXT: andw %si, %ax 2176; CHECK-SSE1-NEXT: notl %esi 2177; CHECK-SSE1-NEXT: andw 6(%r10), %si 2178; CHECK-SSE1-NEXT: orl %eax, %esi 2179; CHECK-SSE1-NEXT: movl 4(%r9), %edx 2180; CHECK-SSE1-NEXT: movzwl 4(%r8), %eax 2181; CHECK-SSE1-NEXT: andw %dx, %ax 2182; CHECK-SSE1-NEXT: notl %edx 2183; CHECK-SSE1-NEXT: andw 4(%r10), %dx 2184; CHECK-SSE1-NEXT: orl %eax, %edx 2185; CHECK-SSE1-NEXT: movzwl 2(%r9), %eax 2186; CHECK-SSE1-NEXT: movzwl 2(%r8), %ecx 2187; CHECK-SSE1-NEXT: andw %ax, %cx 2188; CHECK-SSE1-NEXT: notl %eax 2189; CHECK-SSE1-NEXT: andw 2(%r10), %ax 2190; CHECK-SSE1-NEXT: orl %ecx, %eax 2191; CHECK-SSE1-NEXT: movl (%r9), %r9d 2192; CHECK-SSE1-NEXT: movzwl (%r8), %ecx 2193; CHECK-SSE1-NEXT: andw %r9w, %cx 2194; CHECK-SSE1-NEXT: notl %r9d 2195; CHECK-SSE1-NEXT: andw (%r10), %r9w 2196; CHECK-SSE1-NEXT: orl %ecx, %r9d 2197; CHECK-SSE1-NEXT: movw %bp, 30(%r11) 2198; CHECK-SSE1-NEXT: movw %bx, 28(%r11) 2199; CHECK-SSE1-NEXT: movw %r14w, 26(%r11) 2200; CHECK-SSE1-NEXT: movw %r12w, 24(%r11) 2201; CHECK-SSE1-NEXT: movw %r15w, 22(%r11) 2202; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2203; CHECK-SSE1-NEXT: movw %cx, 20(%r11) 2204; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2205; CHECK-SSE1-NEXT: movw %cx, 18(%r11) 2206; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2207; CHECK-SSE1-NEXT: movw %cx, 16(%r11) 2208; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2209; CHECK-SSE1-NEXT: movw %cx, 14(%r11) 2210; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2211; CHECK-SSE1-NEXT: movw %cx, 12(%r11) 2212; CHECK-SSE1-NEXT: movw %r13w, 10(%r11) 2213; CHECK-SSE1-NEXT: movw %di, 8(%r11) 2214; CHECK-SSE1-NEXT: movw %si, 6(%r11) 2215; CHECK-SSE1-NEXT: movw %dx, 4(%r11) 2216; CHECK-SSE1-NEXT: movw %ax, 2(%r11) 2217; CHECK-SSE1-NEXT: movw %r9w, (%r11) 2218; CHECK-SSE1-NEXT: movq %r11, %rax 2219; CHECK-SSE1-NEXT: popq %rbx 2220; CHECK-SSE1-NEXT: popq %r12 2221; CHECK-SSE1-NEXT: popq %r13 2222; CHECK-SSE1-NEXT: popq %r14 2223; CHECK-SSE1-NEXT: popq %r15 2224; CHECK-SSE1-NEXT: popq %rbp 2225; CHECK-SSE1-NEXT: retq 2226; 2227; CHECK-SSE2-LABEL: out_v16i16: 2228; CHECK-SSE2: # %bb.0: 2229; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2230; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2231; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2232; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2233; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2234; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2235; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2236; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2237; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2238; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2239; CHECK-SSE2-NEXT: retq 2240; 2241; CHECK-XOP-LABEL: out_v16i16: 2242; CHECK-XOP: # %bb.0: 2243; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2244; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2245; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2246; CHECK-XOP-NEXT: retq 2247 %x = load <16 x i16>, <16 x i16> *%px, align 32 2248 %y = load <16 x i16>, <16 x i16> *%py, align 32 2249 %mask = load <16 x i16>, <16 x i16> *%pmask, align 32 2250 %mx = and <16 x i16> %x, %mask 2251 %notmask = xor <16 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 2252 %my = and <16 x i16> %y, %notmask 2253 %r = or <16 x i16> %mx, %my 2254 ret <16 x i16> %r 2255} 2256 2257define <8 x i32> @out_v8i32(<8 x i32> *%px, <8 x i32> *%py, <8 x i32> *%pmask) nounwind { 2258; CHECK-BASELINE-LABEL: out_v8i32: 2259; CHECK-BASELINE: # %bb.0: 2260; CHECK-BASELINE-NEXT: pushq %rbp 2261; CHECK-BASELINE-NEXT: pushq %r15 2262; CHECK-BASELINE-NEXT: pushq %r14 2263; CHECK-BASELINE-NEXT: pushq %rbx 2264; CHECK-BASELINE-NEXT: movq %rdi, %rax 2265; CHECK-BASELINE-NEXT: movl 4(%rcx), %r8d 2266; CHECK-BASELINE-NEXT: movl 8(%rcx), %r9d 2267; CHECK-BASELINE-NEXT: movl 12(%rcx), %r10d 2268; CHECK-BASELINE-NEXT: movl 16(%rcx), %r11d 2269; CHECK-BASELINE-NEXT: movl 20(%rcx), %r15d 2270; CHECK-BASELINE-NEXT: movl 24(%rcx), %ebx 2271; CHECK-BASELINE-NEXT: movl 28(%rcx), %ebp 2272; CHECK-BASELINE-NEXT: movl 28(%rsi), %r14d 2273; CHECK-BASELINE-NEXT: andl %ebp, %r14d 2274; CHECK-BASELINE-NEXT: notl %ebp 2275; CHECK-BASELINE-NEXT: andl 28(%rdx), %ebp 2276; CHECK-BASELINE-NEXT: orl %r14d, %ebp 2277; CHECK-BASELINE-NEXT: movl 24(%rsi), %edi 2278; CHECK-BASELINE-NEXT: andl %ebx, %edi 2279; CHECK-BASELINE-NEXT: notl %ebx 2280; CHECK-BASELINE-NEXT: andl 24(%rdx), %ebx 2281; CHECK-BASELINE-NEXT: orl %edi, %ebx 2282; CHECK-BASELINE-NEXT: movl 20(%rsi), %edi 2283; CHECK-BASELINE-NEXT: andl %r15d, %edi 2284; CHECK-BASELINE-NEXT: notl %r15d 2285; CHECK-BASELINE-NEXT: andl 20(%rdx), %r15d 2286; CHECK-BASELINE-NEXT: orl %edi, %r15d 2287; CHECK-BASELINE-NEXT: movl 16(%rsi), %edi 2288; CHECK-BASELINE-NEXT: andl %r11d, %edi 2289; CHECK-BASELINE-NEXT: notl %r11d 2290; CHECK-BASELINE-NEXT: andl 16(%rdx), %r11d 2291; CHECK-BASELINE-NEXT: orl %edi, %r11d 2292; CHECK-BASELINE-NEXT: movl 12(%rsi), %edi 2293; CHECK-BASELINE-NEXT: andl %r10d, %edi 2294; CHECK-BASELINE-NEXT: notl %r10d 2295; CHECK-BASELINE-NEXT: andl 12(%rdx), %r10d 2296; CHECK-BASELINE-NEXT: orl %edi, %r10d 2297; CHECK-BASELINE-NEXT: movl 8(%rsi), %edi 2298; CHECK-BASELINE-NEXT: andl %r9d, %edi 2299; CHECK-BASELINE-NEXT: notl %r9d 2300; CHECK-BASELINE-NEXT: andl 8(%rdx), %r9d 2301; CHECK-BASELINE-NEXT: orl %edi, %r9d 2302; CHECK-BASELINE-NEXT: movl 4(%rsi), %edi 2303; CHECK-BASELINE-NEXT: andl %r8d, %edi 2304; CHECK-BASELINE-NEXT: notl %r8d 2305; CHECK-BASELINE-NEXT: andl 4(%rdx), %r8d 2306; CHECK-BASELINE-NEXT: orl %edi, %r8d 2307; CHECK-BASELINE-NEXT: movl (%rcx), %ecx 2308; CHECK-BASELINE-NEXT: movl (%rsi), %esi 2309; CHECK-BASELINE-NEXT: andl %ecx, %esi 2310; CHECK-BASELINE-NEXT: notl %ecx 2311; CHECK-BASELINE-NEXT: andl (%rdx), %ecx 2312; CHECK-BASELINE-NEXT: orl %esi, %ecx 2313; CHECK-BASELINE-NEXT: movl %ebp, 28(%rax) 2314; CHECK-BASELINE-NEXT: movl %ebx, 24(%rax) 2315; CHECK-BASELINE-NEXT: movl %r15d, 20(%rax) 2316; CHECK-BASELINE-NEXT: movl %r11d, 16(%rax) 2317; CHECK-BASELINE-NEXT: movl %r10d, 12(%rax) 2318; CHECK-BASELINE-NEXT: movl %r9d, 8(%rax) 2319; CHECK-BASELINE-NEXT: movl %r8d, 4(%rax) 2320; CHECK-BASELINE-NEXT: movl %ecx, (%rax) 2321; CHECK-BASELINE-NEXT: popq %rbx 2322; CHECK-BASELINE-NEXT: popq %r14 2323; CHECK-BASELINE-NEXT: popq %r15 2324; CHECK-BASELINE-NEXT: popq %rbp 2325; CHECK-BASELINE-NEXT: retq 2326; 2327; CHECK-SSE1-LABEL: out_v8i32: 2328; CHECK-SSE1: # %bb.0: 2329; CHECK-SSE1-NEXT: pushq %rbp 2330; CHECK-SSE1-NEXT: pushq %r15 2331; CHECK-SSE1-NEXT: pushq %r14 2332; CHECK-SSE1-NEXT: pushq %rbx 2333; CHECK-SSE1-NEXT: movq %rdi, %rax 2334; CHECK-SSE1-NEXT: movl 4(%rcx), %r8d 2335; CHECK-SSE1-NEXT: movl 8(%rcx), %r9d 2336; CHECK-SSE1-NEXT: movl 12(%rcx), %r10d 2337; CHECK-SSE1-NEXT: movl 16(%rcx), %r11d 2338; CHECK-SSE1-NEXT: movl 20(%rcx), %r15d 2339; CHECK-SSE1-NEXT: movl 24(%rcx), %ebx 2340; CHECK-SSE1-NEXT: movl 28(%rcx), %ebp 2341; CHECK-SSE1-NEXT: movl 28(%rsi), %r14d 2342; CHECK-SSE1-NEXT: andl %ebp, %r14d 2343; CHECK-SSE1-NEXT: notl %ebp 2344; CHECK-SSE1-NEXT: andl 28(%rdx), %ebp 2345; CHECK-SSE1-NEXT: orl %r14d, %ebp 2346; CHECK-SSE1-NEXT: movl 24(%rsi), %edi 2347; CHECK-SSE1-NEXT: andl %ebx, %edi 2348; CHECK-SSE1-NEXT: notl %ebx 2349; CHECK-SSE1-NEXT: andl 24(%rdx), %ebx 2350; CHECK-SSE1-NEXT: orl %edi, %ebx 2351; CHECK-SSE1-NEXT: movl 20(%rsi), %edi 2352; CHECK-SSE1-NEXT: andl %r15d, %edi 2353; CHECK-SSE1-NEXT: notl %r15d 2354; CHECK-SSE1-NEXT: andl 20(%rdx), %r15d 2355; CHECK-SSE1-NEXT: orl %edi, %r15d 2356; CHECK-SSE1-NEXT: movl 16(%rsi), %edi 2357; CHECK-SSE1-NEXT: andl %r11d, %edi 2358; CHECK-SSE1-NEXT: notl %r11d 2359; CHECK-SSE1-NEXT: andl 16(%rdx), %r11d 2360; CHECK-SSE1-NEXT: orl %edi, %r11d 2361; CHECK-SSE1-NEXT: movl 12(%rsi), %edi 2362; CHECK-SSE1-NEXT: andl %r10d, %edi 2363; CHECK-SSE1-NEXT: notl %r10d 2364; CHECK-SSE1-NEXT: andl 12(%rdx), %r10d 2365; CHECK-SSE1-NEXT: orl %edi, %r10d 2366; CHECK-SSE1-NEXT: movl 8(%rsi), %edi 2367; CHECK-SSE1-NEXT: andl %r9d, %edi 2368; CHECK-SSE1-NEXT: notl %r9d 2369; CHECK-SSE1-NEXT: andl 8(%rdx), %r9d 2370; CHECK-SSE1-NEXT: orl %edi, %r9d 2371; CHECK-SSE1-NEXT: movl 4(%rsi), %edi 2372; CHECK-SSE1-NEXT: andl %r8d, %edi 2373; CHECK-SSE1-NEXT: notl %r8d 2374; CHECK-SSE1-NEXT: andl 4(%rdx), %r8d 2375; CHECK-SSE1-NEXT: orl %edi, %r8d 2376; CHECK-SSE1-NEXT: movl (%rcx), %ecx 2377; CHECK-SSE1-NEXT: movl (%rsi), %esi 2378; CHECK-SSE1-NEXT: andl %ecx, %esi 2379; CHECK-SSE1-NEXT: notl %ecx 2380; CHECK-SSE1-NEXT: andl (%rdx), %ecx 2381; CHECK-SSE1-NEXT: orl %esi, %ecx 2382; CHECK-SSE1-NEXT: movl %ebp, 28(%rax) 2383; CHECK-SSE1-NEXT: movl %ebx, 24(%rax) 2384; CHECK-SSE1-NEXT: movl %r15d, 20(%rax) 2385; CHECK-SSE1-NEXT: movl %r11d, 16(%rax) 2386; CHECK-SSE1-NEXT: movl %r10d, 12(%rax) 2387; CHECK-SSE1-NEXT: movl %r9d, 8(%rax) 2388; CHECK-SSE1-NEXT: movl %r8d, 4(%rax) 2389; CHECK-SSE1-NEXT: movl %ecx, (%rax) 2390; CHECK-SSE1-NEXT: popq %rbx 2391; CHECK-SSE1-NEXT: popq %r14 2392; CHECK-SSE1-NEXT: popq %r15 2393; CHECK-SSE1-NEXT: popq %rbp 2394; CHECK-SSE1-NEXT: retq 2395; 2396; CHECK-SSE2-LABEL: out_v8i32: 2397; CHECK-SSE2: # %bb.0: 2398; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2399; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2400; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2401; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2402; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2403; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2404; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2405; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2406; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2407; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2408; CHECK-SSE2-NEXT: retq 2409; 2410; CHECK-XOP-LABEL: out_v8i32: 2411; CHECK-XOP: # %bb.0: 2412; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2413; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2414; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2415; CHECK-XOP-NEXT: retq 2416 %x = load <8 x i32>, <8 x i32> *%px, align 32 2417 %y = load <8 x i32>, <8 x i32> *%py, align 32 2418 %mask = load <8 x i32>, <8 x i32> *%pmask, align 32 2419 %mx = and <8 x i32> %x, %mask 2420 %notmask = xor <8 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 2421 %my = and <8 x i32> %y, %notmask 2422 %r = or <8 x i32> %mx, %my 2423 ret <8 x i32> %r 2424} 2425 2426define <4 x i64> @out_v4i64(<4 x i64> *%px, <4 x i64> *%py, <4 x i64> *%pmask) nounwind { 2427; CHECK-BASELINE-LABEL: out_v4i64: 2428; CHECK-BASELINE: # %bb.0: 2429; CHECK-BASELINE-NEXT: pushq %rbx 2430; CHECK-BASELINE-NEXT: movq %rdi, %rax 2431; CHECK-BASELINE-NEXT: movq (%rcx), %r8 2432; CHECK-BASELINE-NEXT: movq 8(%rcx), %r9 2433; CHECK-BASELINE-NEXT: movq 16(%rcx), %rdi 2434; CHECK-BASELINE-NEXT: movq 24(%rcx), %rcx 2435; CHECK-BASELINE-NEXT: movq 24(%rsi), %r10 2436; CHECK-BASELINE-NEXT: andq %rcx, %r10 2437; CHECK-BASELINE-NEXT: movq 16(%rsi), %r11 2438; CHECK-BASELINE-NEXT: andq %rdi, %r11 2439; CHECK-BASELINE-NEXT: movq 8(%rsi), %rbx 2440; CHECK-BASELINE-NEXT: andq %r9, %rbx 2441; CHECK-BASELINE-NEXT: movq (%rsi), %rsi 2442; CHECK-BASELINE-NEXT: andq %r8, %rsi 2443; CHECK-BASELINE-NEXT: notq %r8 2444; CHECK-BASELINE-NEXT: notq %r9 2445; CHECK-BASELINE-NEXT: notq %rdi 2446; CHECK-BASELINE-NEXT: notq %rcx 2447; CHECK-BASELINE-NEXT: andq 24(%rdx), %rcx 2448; CHECK-BASELINE-NEXT: orq %r10, %rcx 2449; CHECK-BASELINE-NEXT: andq 16(%rdx), %rdi 2450; CHECK-BASELINE-NEXT: orq %r11, %rdi 2451; CHECK-BASELINE-NEXT: andq 8(%rdx), %r9 2452; CHECK-BASELINE-NEXT: orq %rbx, %r9 2453; CHECK-BASELINE-NEXT: andq (%rdx), %r8 2454; CHECK-BASELINE-NEXT: orq %rsi, %r8 2455; CHECK-BASELINE-NEXT: movq %rcx, 24(%rax) 2456; CHECK-BASELINE-NEXT: movq %rdi, 16(%rax) 2457; CHECK-BASELINE-NEXT: movq %r9, 8(%rax) 2458; CHECK-BASELINE-NEXT: movq %r8, (%rax) 2459; CHECK-BASELINE-NEXT: popq %rbx 2460; CHECK-BASELINE-NEXT: retq 2461; 2462; CHECK-SSE1-LABEL: out_v4i64: 2463; CHECK-SSE1: # %bb.0: 2464; CHECK-SSE1-NEXT: pushq %rbx 2465; CHECK-SSE1-NEXT: movq %rdi, %rax 2466; CHECK-SSE1-NEXT: movq (%rcx), %r8 2467; CHECK-SSE1-NEXT: movq 8(%rcx), %r9 2468; CHECK-SSE1-NEXT: movq 16(%rcx), %rdi 2469; CHECK-SSE1-NEXT: movq 24(%rcx), %rcx 2470; CHECK-SSE1-NEXT: movq 24(%rsi), %r10 2471; CHECK-SSE1-NEXT: andq %rcx, %r10 2472; CHECK-SSE1-NEXT: movq 16(%rsi), %r11 2473; CHECK-SSE1-NEXT: andq %rdi, %r11 2474; CHECK-SSE1-NEXT: movq 8(%rsi), %rbx 2475; CHECK-SSE1-NEXT: andq %r9, %rbx 2476; CHECK-SSE1-NEXT: movq (%rsi), %rsi 2477; CHECK-SSE1-NEXT: andq %r8, %rsi 2478; CHECK-SSE1-NEXT: notq %r8 2479; CHECK-SSE1-NEXT: notq %r9 2480; CHECK-SSE1-NEXT: notq %rdi 2481; CHECK-SSE1-NEXT: notq %rcx 2482; CHECK-SSE1-NEXT: andq 24(%rdx), %rcx 2483; CHECK-SSE1-NEXT: orq %r10, %rcx 2484; CHECK-SSE1-NEXT: andq 16(%rdx), %rdi 2485; CHECK-SSE1-NEXT: orq %r11, %rdi 2486; CHECK-SSE1-NEXT: andq 8(%rdx), %r9 2487; CHECK-SSE1-NEXT: orq %rbx, %r9 2488; CHECK-SSE1-NEXT: andq (%rdx), %r8 2489; CHECK-SSE1-NEXT: orq %rsi, %r8 2490; CHECK-SSE1-NEXT: movq %rcx, 24(%rax) 2491; CHECK-SSE1-NEXT: movq %rdi, 16(%rax) 2492; CHECK-SSE1-NEXT: movq %r9, 8(%rax) 2493; CHECK-SSE1-NEXT: movq %r8, (%rax) 2494; CHECK-SSE1-NEXT: popq %rbx 2495; CHECK-SSE1-NEXT: retq 2496; 2497; CHECK-SSE2-LABEL: out_v4i64: 2498; CHECK-SSE2: # %bb.0: 2499; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2500; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2501; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2502; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2503; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2504; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2505; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2506; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2507; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2508; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2509; CHECK-SSE2-NEXT: retq 2510; 2511; CHECK-XOP-LABEL: out_v4i64: 2512; CHECK-XOP: # %bb.0: 2513; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2514; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2515; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2516; CHECK-XOP-NEXT: retq 2517 %x = load <4 x i64>, <4 x i64> *%px, align 32 2518 %y = load <4 x i64>, <4 x i64> *%py, align 32 2519 %mask = load <4 x i64>, <4 x i64> *%pmask, align 32 2520 %mx = and <4 x i64> %x, %mask 2521 %notmask = xor <4 x i64> %mask, <i64 -1, i64 -1, i64 -1, i64 -1> 2522 %my = and <4 x i64> %y, %notmask 2523 %r = or <4 x i64> %mx, %my 2524 ret <4 x i64> %r 2525} 2526 2527;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2528; Should be the same as the previous one. 2529;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2530 2531; ============================================================================ ; 2532; 8-bit vector width 2533; ============================================================================ ; 2534 2535define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 2536; CHECK-LABEL: in_v1i8: 2537; CHECK: # %bb.0: 2538; CHECK-NEXT: movl %edi, %eax 2539; CHECK-NEXT: xorl %esi, %eax 2540; CHECK-NEXT: andl %edx, %eax 2541; CHECK-NEXT: xorl %esi, %eax 2542; CHECK-NEXT: # kill: def $al killed $al killed $eax 2543; CHECK-NEXT: retq 2544 %n0 = xor <1 x i8> %x, %y 2545 %n1 = and <1 x i8> %n0, %mask 2546 %r = xor <1 x i8> %n1, %y 2547 ret <1 x i8> %r 2548} 2549 2550; ============================================================================ ; 2551; 16-bit vector width 2552; ============================================================================ ; 2553 2554define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { 2555; CHECK-BASELINE-LABEL: in_v2i8: 2556; CHECK-BASELINE: # %bb.0: 2557; CHECK-BASELINE-NEXT: movl %edi, %eax 2558; CHECK-BASELINE-NEXT: xorl %edx, %eax 2559; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2560; CHECK-BASELINE-NEXT: andl %r9d, %esi 2561; CHECK-BASELINE-NEXT: andl %r8d, %eax 2562; CHECK-BASELINE-NEXT: xorl %edx, %eax 2563; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2564; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax 2565; CHECK-BASELINE-NEXT: movl %esi, %edx 2566; CHECK-BASELINE-NEXT: retq 2567; 2568; CHECK-SSE1-LABEL: in_v2i8: 2569; CHECK-SSE1: # %bb.0: 2570; CHECK-SSE1-NEXT: movl %edi, %eax 2571; CHECK-SSE1-NEXT: xorl %edx, %eax 2572; CHECK-SSE1-NEXT: xorl %ecx, %esi 2573; CHECK-SSE1-NEXT: andl %r9d, %esi 2574; CHECK-SSE1-NEXT: andl %r8d, %eax 2575; CHECK-SSE1-NEXT: xorl %edx, %eax 2576; CHECK-SSE1-NEXT: xorl %ecx, %esi 2577; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax 2578; CHECK-SSE1-NEXT: movl %esi, %edx 2579; CHECK-SSE1-NEXT: retq 2580; 2581; CHECK-SSE2-LABEL: in_v2i8: 2582; CHECK-SSE2: # %bb.0: 2583; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2584; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2585; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2586; CHECK-SSE2-NEXT: retq 2587; 2588; CHECK-XOP-LABEL: in_v2i8: 2589; CHECK-XOP: # %bb.0: 2590; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2591; CHECK-XOP-NEXT: retq 2592 %n0 = xor <2 x i8> %x, %y 2593 %n1 = and <2 x i8> %n0, %mask 2594 %r = xor <2 x i8> %n1, %y 2595 ret <2 x i8> %r 2596} 2597 2598define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 2599; CHECK-LABEL: in_v1i16: 2600; CHECK: # %bb.0: 2601; CHECK-NEXT: movl %edi, %eax 2602; CHECK-NEXT: xorl %esi, %eax 2603; CHECK-NEXT: andl %edx, %eax 2604; CHECK-NEXT: xorl %esi, %eax 2605; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 2606; CHECK-NEXT: retq 2607 %n0 = xor <1 x i16> %x, %y 2608 %n1 = and <1 x i16> %n0, %mask 2609 %r = xor <1 x i16> %n1, %y 2610 ret <1 x i16> %r 2611} 2612 2613; ============================================================================ ; 2614; 32-bit vector width 2615; ============================================================================ ; 2616 2617define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 2618; CHECK-BASELINE-LABEL: in_v4i8: 2619; CHECK-BASELINE: # %bb.0: 2620; CHECK-BASELINE-NEXT: movq %rdi, %rax 2621; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil 2622; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 2623; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 2624; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2625; CHECK-BASELINE-NEXT: xorb %r11b, %dl 2626; CHECK-BASELINE-NEXT: xorb %r10b, %cl 2627; CHECK-BASELINE-NEXT: xorb %dil, %r8b 2628; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2629; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2630; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 2631; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 2632; CHECK-BASELINE-NEXT: xorb %r9b, %sil 2633; CHECK-BASELINE-NEXT: xorb %r11b, %dl 2634; CHECK-BASELINE-NEXT: xorb %r10b, %cl 2635; CHECK-BASELINE-NEXT: xorb %dil, %r8b 2636; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) 2637; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) 2638; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) 2639; CHECK-BASELINE-NEXT: movb %sil, (%rax) 2640; CHECK-BASELINE-NEXT: retq 2641; 2642; CHECK-SSE1-LABEL: in_v4i8: 2643; CHECK-SSE1: # %bb.0: 2644; CHECK-SSE1-NEXT: movq %rdi, %rax 2645; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil 2646; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 2647; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 2648; CHECK-SSE1-NEXT: xorl %r9d, %esi 2649; CHECK-SSE1-NEXT: xorb %r11b, %dl 2650; CHECK-SSE1-NEXT: xorb %r10b, %cl 2651; CHECK-SSE1-NEXT: xorb %dil, %r8b 2652; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2653; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2654; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 2655; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 2656; CHECK-SSE1-NEXT: xorb %r9b, %sil 2657; CHECK-SSE1-NEXT: xorb %r11b, %dl 2658; CHECK-SSE1-NEXT: xorb %r10b, %cl 2659; CHECK-SSE1-NEXT: xorb %dil, %r8b 2660; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) 2661; CHECK-SSE1-NEXT: movb %cl, 2(%rax) 2662; CHECK-SSE1-NEXT: movb %dl, 1(%rax) 2663; CHECK-SSE1-NEXT: movb %sil, (%rax) 2664; CHECK-SSE1-NEXT: retq 2665; 2666; CHECK-SSE2-LABEL: in_v4i8: 2667; CHECK-SSE2: # %bb.0: 2668; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2669; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2670; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2671; CHECK-SSE2-NEXT: retq 2672; 2673; CHECK-XOP-LABEL: in_v4i8: 2674; CHECK-XOP: # %bb.0: 2675; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2676; CHECK-XOP-NEXT: retq 2677 %n0 = xor <4 x i8> %x, %y 2678 %n1 = and <4 x i8> %n0, %mask 2679 %r = xor <4 x i8> %n1, %y 2680 ret <4 x i8> %r 2681} 2682 2683define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 2684; CHECK-BASELINE-LABEL: in_v2i16: 2685; CHECK-BASELINE: # %bb.0: 2686; CHECK-BASELINE-NEXT: movl %edi, %eax 2687; CHECK-BASELINE-NEXT: xorl %edx, %eax 2688; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2689; CHECK-BASELINE-NEXT: andl %r9d, %esi 2690; CHECK-BASELINE-NEXT: andl %r8d, %eax 2691; CHECK-BASELINE-NEXT: xorl %edx, %eax 2692; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2693; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax 2694; CHECK-BASELINE-NEXT: movl %esi, %edx 2695; CHECK-BASELINE-NEXT: retq 2696; 2697; CHECK-SSE1-LABEL: in_v2i16: 2698; CHECK-SSE1: # %bb.0: 2699; CHECK-SSE1-NEXT: movl %edi, %eax 2700; CHECK-SSE1-NEXT: xorl %edx, %eax 2701; CHECK-SSE1-NEXT: xorl %ecx, %esi 2702; CHECK-SSE1-NEXT: andl %r9d, %esi 2703; CHECK-SSE1-NEXT: andl %r8d, %eax 2704; CHECK-SSE1-NEXT: xorl %edx, %eax 2705; CHECK-SSE1-NEXT: xorl %ecx, %esi 2706; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax 2707; CHECK-SSE1-NEXT: movl %esi, %edx 2708; CHECK-SSE1-NEXT: retq 2709; 2710; CHECK-SSE2-LABEL: in_v2i16: 2711; CHECK-SSE2: # %bb.0: 2712; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2713; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2714; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2715; CHECK-SSE2-NEXT: retq 2716; 2717; CHECK-XOP-LABEL: in_v2i16: 2718; CHECK-XOP: # %bb.0: 2719; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2720; CHECK-XOP-NEXT: retq 2721 %n0 = xor <2 x i16> %x, %y 2722 %n1 = and <2 x i16> %n0, %mask 2723 %r = xor <2 x i16> %n1, %y 2724 ret <2 x i16> %r 2725} 2726 2727define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 2728; CHECK-LABEL: in_v1i32: 2729; CHECK: # %bb.0: 2730; CHECK-NEXT: movl %edi, %eax 2731; CHECK-NEXT: xorl %esi, %eax 2732; CHECK-NEXT: andl %edx, %eax 2733; CHECK-NEXT: xorl %esi, %eax 2734; CHECK-NEXT: retq 2735 %n0 = xor <1 x i32> %x, %y 2736 %n1 = and <1 x i32> %n0, %mask 2737 %r = xor <1 x i32> %n1, %y 2738 ret <1 x i32> %r 2739} 2740 2741; ============================================================================ ; 2742; 64-bit vector width 2743; ============================================================================ ; 2744 2745define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 2746; CHECK-BASELINE-LABEL: in_v8i8: 2747; CHECK-BASELINE: # %bb.0: 2748; CHECK-BASELINE-NEXT: pushq %rbp 2749; CHECK-BASELINE-NEXT: pushq %r15 2750; CHECK-BASELINE-NEXT: pushq %r14 2751; CHECK-BASELINE-NEXT: pushq %r13 2752; CHECK-BASELINE-NEXT: pushq %r12 2753; CHECK-BASELINE-NEXT: pushq %rbx 2754; CHECK-BASELINE-NEXT: movl %ecx, %r10d 2755; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 2756; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 2757; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 2758; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 2759; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 2760; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 2761; CHECK-BASELINE-NEXT: xorb %r13b, %sil 2762; CHECK-BASELINE-NEXT: xorb %r12b, %dl 2763; CHECK-BASELINE-NEXT: xorb %r15b, %r10b 2764; CHECK-BASELINE-NEXT: xorb %r14b, %r8b 2765; CHECK-BASELINE-NEXT: xorb %bpl, %r9b 2766; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 2767; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %bl 2768; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 2769; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %cl 2770; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 2771; CHECK-BASELINE-NEXT: xorb %r11b, %al 2772; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b 2773; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2774; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 2775; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 2776; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 2777; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 2778; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2779; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 2780; CHECK-BASELINE-NEXT: xorb %r13b, %sil 2781; CHECK-BASELINE-NEXT: xorb %r12b, %dl 2782; CHECK-BASELINE-NEXT: xorb %r15b, %r10b 2783; CHECK-BASELINE-NEXT: xorb %r14b, %r8b 2784; CHECK-BASELINE-NEXT: xorb %bpl, %r9b 2785; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %bl 2786; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %cl 2787; CHECK-BASELINE-NEXT: xorb %r11b, %al 2788; CHECK-BASELINE-NEXT: movb %al, 7(%rdi) 2789; CHECK-BASELINE-NEXT: movb %cl, 6(%rdi) 2790; CHECK-BASELINE-NEXT: movb %bl, 5(%rdi) 2791; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) 2792; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) 2793; CHECK-BASELINE-NEXT: movb %r10b, 2(%rdi) 2794; CHECK-BASELINE-NEXT: movb %dl, 1(%rdi) 2795; CHECK-BASELINE-NEXT: movb %sil, (%rdi) 2796; CHECK-BASELINE-NEXT: movq %rdi, %rax 2797; CHECK-BASELINE-NEXT: popq %rbx 2798; CHECK-BASELINE-NEXT: popq %r12 2799; CHECK-BASELINE-NEXT: popq %r13 2800; CHECK-BASELINE-NEXT: popq %r14 2801; CHECK-BASELINE-NEXT: popq %r15 2802; CHECK-BASELINE-NEXT: popq %rbp 2803; CHECK-BASELINE-NEXT: retq 2804; 2805; CHECK-SSE1-LABEL: in_v8i8: 2806; CHECK-SSE1: # %bb.0: 2807; CHECK-SSE1-NEXT: pushq %rbp 2808; CHECK-SSE1-NEXT: pushq %r15 2809; CHECK-SSE1-NEXT: pushq %r14 2810; CHECK-SSE1-NEXT: pushq %r13 2811; CHECK-SSE1-NEXT: pushq %r12 2812; CHECK-SSE1-NEXT: pushq %rbx 2813; CHECK-SSE1-NEXT: movl %ecx, %r10d 2814; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 2815; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 2816; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 2817; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 2818; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 2819; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 2820; CHECK-SSE1-NEXT: xorb %r13b, %sil 2821; CHECK-SSE1-NEXT: xorb %r12b, %dl 2822; CHECK-SSE1-NEXT: xorb %r15b, %r10b 2823; CHECK-SSE1-NEXT: xorb %r14b, %r8b 2824; CHECK-SSE1-NEXT: xorb %bpl, %r9b 2825; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 2826; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %bl 2827; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 2828; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %cl 2829; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 2830; CHECK-SSE1-NEXT: xorb %r11b, %al 2831; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b 2832; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2833; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 2834; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 2835; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 2836; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 2837; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2838; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 2839; CHECK-SSE1-NEXT: xorb %r13b, %sil 2840; CHECK-SSE1-NEXT: xorb %r12b, %dl 2841; CHECK-SSE1-NEXT: xorb %r15b, %r10b 2842; CHECK-SSE1-NEXT: xorb %r14b, %r8b 2843; CHECK-SSE1-NEXT: xorb %bpl, %r9b 2844; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %bl 2845; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %cl 2846; CHECK-SSE1-NEXT: xorb %r11b, %al 2847; CHECK-SSE1-NEXT: movb %al, 7(%rdi) 2848; CHECK-SSE1-NEXT: movb %cl, 6(%rdi) 2849; CHECK-SSE1-NEXT: movb %bl, 5(%rdi) 2850; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) 2851; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) 2852; CHECK-SSE1-NEXT: movb %r10b, 2(%rdi) 2853; CHECK-SSE1-NEXT: movb %dl, 1(%rdi) 2854; CHECK-SSE1-NEXT: movb %sil, (%rdi) 2855; CHECK-SSE1-NEXT: movq %rdi, %rax 2856; CHECK-SSE1-NEXT: popq %rbx 2857; CHECK-SSE1-NEXT: popq %r12 2858; CHECK-SSE1-NEXT: popq %r13 2859; CHECK-SSE1-NEXT: popq %r14 2860; CHECK-SSE1-NEXT: popq %r15 2861; CHECK-SSE1-NEXT: popq %rbp 2862; CHECK-SSE1-NEXT: retq 2863; 2864; CHECK-SSE2-LABEL: in_v8i8: 2865; CHECK-SSE2: # %bb.0: 2866; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2867; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2868; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2869; CHECK-SSE2-NEXT: retq 2870; 2871; CHECK-XOP-LABEL: in_v8i8: 2872; CHECK-XOP: # %bb.0: 2873; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2874; CHECK-XOP-NEXT: retq 2875 %n0 = xor <8 x i8> %x, %y 2876 %n1 = and <8 x i8> %n0, %mask 2877 %r = xor <8 x i8> %n1, %y 2878 ret <8 x i8> %r 2879} 2880 2881define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 2882; CHECK-BASELINE-LABEL: in_v4i16: 2883; CHECK-BASELINE: # %bb.0: 2884; CHECK-BASELINE-NEXT: movq %rdi, %rax 2885; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 2886; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 2887; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi 2888; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2889; CHECK-BASELINE-NEXT: xorl %edi, %edx 2890; CHECK-BASELINE-NEXT: xorl %r11d, %ecx 2891; CHECK-BASELINE-NEXT: xorl %r10d, %r8d 2892; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 2893; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 2894; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 2895; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 2896; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2897; CHECK-BASELINE-NEXT: xorl %edi, %edx 2898; CHECK-BASELINE-NEXT: xorl %r11d, %ecx 2899; CHECK-BASELINE-NEXT: xorl %r10d, %r8d 2900; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) 2901; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) 2902; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) 2903; CHECK-BASELINE-NEXT: movw %si, (%rax) 2904; CHECK-BASELINE-NEXT: retq 2905; 2906; CHECK-SSE1-LABEL: in_v4i16: 2907; CHECK-SSE1: # %bb.0: 2908; CHECK-SSE1-NEXT: movq %rdi, %rax 2909; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 2910; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 2911; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi 2912; CHECK-SSE1-NEXT: xorl %r9d, %esi 2913; CHECK-SSE1-NEXT: xorl %edi, %edx 2914; CHECK-SSE1-NEXT: xorl %r11d, %ecx 2915; CHECK-SSE1-NEXT: xorl %r10d, %r8d 2916; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 2917; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 2918; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 2919; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 2920; CHECK-SSE1-NEXT: xorl %r9d, %esi 2921; CHECK-SSE1-NEXT: xorl %edi, %edx 2922; CHECK-SSE1-NEXT: xorl %r11d, %ecx 2923; CHECK-SSE1-NEXT: xorl %r10d, %r8d 2924; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) 2925; CHECK-SSE1-NEXT: movw %cx, 4(%rax) 2926; CHECK-SSE1-NEXT: movw %dx, 2(%rax) 2927; CHECK-SSE1-NEXT: movw %si, (%rax) 2928; CHECK-SSE1-NEXT: retq 2929; 2930; CHECK-SSE2-LABEL: in_v4i16: 2931; CHECK-SSE2: # %bb.0: 2932; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2933; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2934; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2935; CHECK-SSE2-NEXT: retq 2936; 2937; CHECK-XOP-LABEL: in_v4i16: 2938; CHECK-XOP: # %bb.0: 2939; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2940; CHECK-XOP-NEXT: retq 2941 %n0 = xor <4 x i16> %x, %y 2942 %n1 = and <4 x i16> %n0, %mask 2943 %r = xor <4 x i16> %n1, %y 2944 ret <4 x i16> %r 2945} 2946 2947define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 2948; CHECK-BASELINE-LABEL: in_v2i32: 2949; CHECK-BASELINE: # %bb.0: 2950; CHECK-BASELINE-NEXT: movl %edi, %eax 2951; CHECK-BASELINE-NEXT: xorl %edx, %eax 2952; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2953; CHECK-BASELINE-NEXT: andl %r9d, %esi 2954; CHECK-BASELINE-NEXT: andl %r8d, %eax 2955; CHECK-BASELINE-NEXT: xorl %edx, %eax 2956; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2957; CHECK-BASELINE-NEXT: movl %esi, %edx 2958; CHECK-BASELINE-NEXT: retq 2959; 2960; CHECK-SSE1-LABEL: in_v2i32: 2961; CHECK-SSE1: # %bb.0: 2962; CHECK-SSE1-NEXT: movl %edi, %eax 2963; CHECK-SSE1-NEXT: xorl %edx, %eax 2964; CHECK-SSE1-NEXT: xorl %ecx, %esi 2965; CHECK-SSE1-NEXT: andl %r9d, %esi 2966; CHECK-SSE1-NEXT: andl %r8d, %eax 2967; CHECK-SSE1-NEXT: xorl %edx, %eax 2968; CHECK-SSE1-NEXT: xorl %ecx, %esi 2969; CHECK-SSE1-NEXT: movl %esi, %edx 2970; CHECK-SSE1-NEXT: retq 2971; 2972; CHECK-SSE2-LABEL: in_v2i32: 2973; CHECK-SSE2: # %bb.0: 2974; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2975; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2976; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2977; CHECK-SSE2-NEXT: retq 2978; 2979; CHECK-XOP-LABEL: in_v2i32: 2980; CHECK-XOP: # %bb.0: 2981; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2982; CHECK-XOP-NEXT: retq 2983 %n0 = xor <2 x i32> %x, %y 2984 %n1 = and <2 x i32> %n0, %mask 2985 %r = xor <2 x i32> %n1, %y 2986 ret <2 x i32> %r 2987} 2988 2989define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 2990; CHECK-LABEL: in_v1i64: 2991; CHECK: # %bb.0: 2992; CHECK-NEXT: movq %rdi, %rax 2993; CHECK-NEXT: xorq %rsi, %rax 2994; CHECK-NEXT: andq %rdx, %rax 2995; CHECK-NEXT: xorq %rsi, %rax 2996; CHECK-NEXT: retq 2997 %n0 = xor <1 x i64> %x, %y 2998 %n1 = and <1 x i64> %n0, %mask 2999 %r = xor <1 x i64> %n1, %y 3000 ret <1 x i64> %r 3001} 3002 3003; ============================================================================ ; 3004; 128-bit vector width 3005; ============================================================================ ; 3006 3007define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 3008; CHECK-BASELINE-LABEL: in_v16i8: 3009; CHECK-BASELINE: # %bb.0: 3010; CHECK-BASELINE-NEXT: pushq %rbp 3011; CHECK-BASELINE-NEXT: pushq %r15 3012; CHECK-BASELINE-NEXT: pushq %r14 3013; CHECK-BASELINE-NEXT: pushq %r13 3014; CHECK-BASELINE-NEXT: pushq %r12 3015; CHECK-BASELINE-NEXT: pushq %rbx 3016; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3017; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3018; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3019; CHECK-BASELINE-NEXT: movq %rdi, %rdx 3020; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil 3021; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 3022; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3023; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3024; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3025; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3026; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3027; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 3028; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 3029; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3030; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3031; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil 3032; CHECK-BASELINE-NEXT: xorb %dil, %r9b 3033; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b 3034; CHECK-BASELINE-NEXT: xorb %dil, %r9b 3035; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil 3036; CHECK-BASELINE-NEXT: xorb %r10b, %dil 3037; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil 3038; CHECK-BASELINE-NEXT: xorb %r10b, %dil 3039; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3040; CHECK-BASELINE-NEXT: xorb %r11b, %r10b 3041; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 3042; CHECK-BASELINE-NEXT: xorb %r11b, %r10b 3043; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3044; CHECK-BASELINE-NEXT: xorb %bl, %r11b 3045; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 3046; CHECK-BASELINE-NEXT: xorb %bl, %r11b 3047; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl 3048; CHECK-BASELINE-NEXT: xorb %r13b, %bl 3049; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 3050; CHECK-BASELINE-NEXT: xorb %r13b, %bl 3051; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b 3052; CHECK-BASELINE-NEXT: xorb %r12b, %r13b 3053; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b 3054; CHECK-BASELINE-NEXT: xorb %r12b, %r13b 3055; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3056; CHECK-BASELINE-NEXT: xorb %r15b, %r12b 3057; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b 3058; CHECK-BASELINE-NEXT: xorb %r15b, %r12b 3059; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3060; CHECK-BASELINE-NEXT: xorb %r14b, %r15b 3061; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 3062; CHECK-BASELINE-NEXT: xorb %r14b, %r15b 3063; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3064; CHECK-BASELINE-NEXT: xorb %bpl, %r14b 3065; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b 3066; CHECK-BASELINE-NEXT: xorb %bpl, %r14b 3067; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3068; CHECK-BASELINE-NEXT: xorb %al, %bpl 3069; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl 3070; CHECK-BASELINE-NEXT: xorb %al, %bpl 3071; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3072; CHECK-BASELINE-NEXT: xorb %cl, %al 3073; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 3074; CHECK-BASELINE-NEXT: xorb %cl, %al 3075; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl 3076; CHECK-BASELINE-NEXT: xorb %sil, %cl 3077; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 3078; CHECK-BASELINE-NEXT: xorb %sil, %cl 3079; CHECK-BASELINE-NEXT: movb %cl, 15(%rdx) 3080; CHECK-BASELINE-NEXT: movb %al, 14(%rdx) 3081; CHECK-BASELINE-NEXT: movb %bpl, 13(%rdx) 3082; CHECK-BASELINE-NEXT: movb %r14b, 12(%rdx) 3083; CHECK-BASELINE-NEXT: movb %r15b, 11(%rdx) 3084; CHECK-BASELINE-NEXT: movb %r12b, 10(%rdx) 3085; CHECK-BASELINE-NEXT: movb %r13b, 9(%rdx) 3086; CHECK-BASELINE-NEXT: movb %bl, 8(%rdx) 3087; CHECK-BASELINE-NEXT: movb %r11b, 7(%rdx) 3088; CHECK-BASELINE-NEXT: movb %r10b, 6(%rdx) 3089; CHECK-BASELINE-NEXT: movb %dil, 5(%rdx) 3090; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdx) 3091; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3092; CHECK-BASELINE-NEXT: xorb %al, %r8b 3093; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 3094; CHECK-BASELINE-NEXT: xorb %al, %r8b 3095; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdx) 3096; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3097; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3098; CHECK-BASELINE-NEXT: xorb %al, %cl 3099; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 3100; CHECK-BASELINE-NEXT: xorb %al, %cl 3101; CHECK-BASELINE-NEXT: movb %cl, 2(%rdx) 3102; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3103; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3104; CHECK-BASELINE-NEXT: xorb %al, %cl 3105; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 3106; CHECK-BASELINE-NEXT: xorb %al, %cl 3107; CHECK-BASELINE-NEXT: movb %cl, 1(%rdx) 3108; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al 3109; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3110; CHECK-BASELINE-NEXT: xorb %al, %cl 3111; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 3112; CHECK-BASELINE-NEXT: xorb %al, %cl 3113; CHECK-BASELINE-NEXT: movb %cl, (%rdx) 3114; CHECK-BASELINE-NEXT: movq %rdx, %rax 3115; CHECK-BASELINE-NEXT: popq %rbx 3116; CHECK-BASELINE-NEXT: popq %r12 3117; CHECK-BASELINE-NEXT: popq %r13 3118; CHECK-BASELINE-NEXT: popq %r14 3119; CHECK-BASELINE-NEXT: popq %r15 3120; CHECK-BASELINE-NEXT: popq %rbp 3121; CHECK-BASELINE-NEXT: retq 3122; 3123; CHECK-SSE1-LABEL: in_v16i8: 3124; CHECK-SSE1: # %bb.0: 3125; CHECK-SSE1-NEXT: pushq %rbp 3126; CHECK-SSE1-NEXT: pushq %r15 3127; CHECK-SSE1-NEXT: pushq %r14 3128; CHECK-SSE1-NEXT: pushq %r13 3129; CHECK-SSE1-NEXT: pushq %r12 3130; CHECK-SSE1-NEXT: pushq %rbx 3131; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3132; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3133; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3134; CHECK-SSE1-NEXT: movq %rdi, %rdx 3135; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil 3136; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 3137; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3138; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3139; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3140; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3141; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3142; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 3143; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 3144; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3145; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3146; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil 3147; CHECK-SSE1-NEXT: xorb %dil, %r9b 3148; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b 3149; CHECK-SSE1-NEXT: xorb %dil, %r9b 3150; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil 3151; CHECK-SSE1-NEXT: xorb %r10b, %dil 3152; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil 3153; CHECK-SSE1-NEXT: xorb %r10b, %dil 3154; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b 3155; CHECK-SSE1-NEXT: xorb %r11b, %r10b 3156; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 3157; CHECK-SSE1-NEXT: xorb %r11b, %r10b 3158; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b 3159; CHECK-SSE1-NEXT: xorb %bl, %r11b 3160; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 3161; CHECK-SSE1-NEXT: xorb %bl, %r11b 3162; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl 3163; CHECK-SSE1-NEXT: xorb %r13b, %bl 3164; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 3165; CHECK-SSE1-NEXT: xorb %r13b, %bl 3166; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b 3167; CHECK-SSE1-NEXT: xorb %r12b, %r13b 3168; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b 3169; CHECK-SSE1-NEXT: xorb %r12b, %r13b 3170; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b 3171; CHECK-SSE1-NEXT: xorb %r15b, %r12b 3172; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b 3173; CHECK-SSE1-NEXT: xorb %r15b, %r12b 3174; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b 3175; CHECK-SSE1-NEXT: xorb %r14b, %r15b 3176; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 3177; CHECK-SSE1-NEXT: xorb %r14b, %r15b 3178; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b 3179; CHECK-SSE1-NEXT: xorb %bpl, %r14b 3180; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b 3181; CHECK-SSE1-NEXT: xorb %bpl, %r14b 3182; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl 3183; CHECK-SSE1-NEXT: xorb %al, %bpl 3184; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl 3185; CHECK-SSE1-NEXT: xorb %al, %bpl 3186; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3187; CHECK-SSE1-NEXT: xorb %cl, %al 3188; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 3189; CHECK-SSE1-NEXT: xorb %cl, %al 3190; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl 3191; CHECK-SSE1-NEXT: xorb %sil, %cl 3192; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 3193; CHECK-SSE1-NEXT: xorb %sil, %cl 3194; CHECK-SSE1-NEXT: movb %cl, 15(%rdx) 3195; CHECK-SSE1-NEXT: movb %al, 14(%rdx) 3196; CHECK-SSE1-NEXT: movb %bpl, 13(%rdx) 3197; CHECK-SSE1-NEXT: movb %r14b, 12(%rdx) 3198; CHECK-SSE1-NEXT: movb %r15b, 11(%rdx) 3199; CHECK-SSE1-NEXT: movb %r12b, 10(%rdx) 3200; CHECK-SSE1-NEXT: movb %r13b, 9(%rdx) 3201; CHECK-SSE1-NEXT: movb %bl, 8(%rdx) 3202; CHECK-SSE1-NEXT: movb %r11b, 7(%rdx) 3203; CHECK-SSE1-NEXT: movb %r10b, 6(%rdx) 3204; CHECK-SSE1-NEXT: movb %dil, 5(%rdx) 3205; CHECK-SSE1-NEXT: movb %r9b, 4(%rdx) 3206; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3207; CHECK-SSE1-NEXT: xorb %al, %r8b 3208; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 3209; CHECK-SSE1-NEXT: xorb %al, %r8b 3210; CHECK-SSE1-NEXT: movb %r8b, 3(%rdx) 3211; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3212; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3213; CHECK-SSE1-NEXT: xorb %al, %cl 3214; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 3215; CHECK-SSE1-NEXT: xorb %al, %cl 3216; CHECK-SSE1-NEXT: movb %cl, 2(%rdx) 3217; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3218; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3219; CHECK-SSE1-NEXT: xorb %al, %cl 3220; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 3221; CHECK-SSE1-NEXT: xorb %al, %cl 3222; CHECK-SSE1-NEXT: movb %cl, 1(%rdx) 3223; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al 3224; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3225; CHECK-SSE1-NEXT: xorb %al, %cl 3226; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 3227; CHECK-SSE1-NEXT: xorb %al, %cl 3228; CHECK-SSE1-NEXT: movb %cl, (%rdx) 3229; CHECK-SSE1-NEXT: movq %rdx, %rax 3230; CHECK-SSE1-NEXT: popq %rbx 3231; CHECK-SSE1-NEXT: popq %r12 3232; CHECK-SSE1-NEXT: popq %r13 3233; CHECK-SSE1-NEXT: popq %r14 3234; CHECK-SSE1-NEXT: popq %r15 3235; CHECK-SSE1-NEXT: popq %rbp 3236; CHECK-SSE1-NEXT: retq 3237; 3238; CHECK-SSE2-LABEL: in_v16i8: 3239; CHECK-SSE2: # %bb.0: 3240; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3241; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3242; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3243; CHECK-SSE2-NEXT: retq 3244; 3245; CHECK-XOP-LABEL: in_v16i8: 3246; CHECK-XOP: # %bb.0: 3247; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3248; CHECK-XOP-NEXT: retq 3249 %n0 = xor <16 x i8> %x, %y 3250 %n1 = and <16 x i8> %n0, %mask 3251 %r = xor <16 x i8> %n1, %y 3252 ret <16 x i8> %r 3253} 3254 3255define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 3256; CHECK-BASELINE-LABEL: in_v8i16: 3257; CHECK-BASELINE: # %bb.0: 3258; CHECK-BASELINE-NEXT: pushq %rbp 3259; CHECK-BASELINE-NEXT: pushq %rbx 3260; CHECK-BASELINE-NEXT: movq %rdi, %rax 3261; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 3262; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 3263; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi 3264; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3265; CHECK-BASELINE-NEXT: xorl %ebx, %esi 3266; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 3267; CHECK-BASELINE-NEXT: xorl %ebx, %esi 3268; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3269; CHECK-BASELINE-NEXT: xorl %ebx, %edx 3270; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 3271; CHECK-BASELINE-NEXT: xorl %ebx, %edx 3272; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3273; CHECK-BASELINE-NEXT: xorl %ebx, %ecx 3274; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 3275; CHECK-BASELINE-NEXT: xorl %ebx, %ecx 3276; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3277; CHECK-BASELINE-NEXT: xorl %ebx, %r8d 3278; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 3279; CHECK-BASELINE-NEXT: xorl %ebx, %r8d 3280; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3281; CHECK-BASELINE-NEXT: xorl %ebx, %r9d 3282; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r9w 3283; CHECK-BASELINE-NEXT: xorl %ebx, %r9d 3284; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp 3285; CHECK-BASELINE-NEXT: xorw %di, %bp 3286; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bp 3287; CHECK-BASELINE-NEXT: xorl %edi, %ebp 3288; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi 3289; CHECK-BASELINE-NEXT: xorw %r11w, %di 3290; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %di 3291; CHECK-BASELINE-NEXT: xorl %r11d, %edi 3292; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 3293; CHECK-BASELINE-NEXT: xorw %r10w, %bx 3294; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx 3295; CHECK-BASELINE-NEXT: xorl %r10d, %ebx 3296; CHECK-BASELINE-NEXT: movw %bx, 14(%rax) 3297; CHECK-BASELINE-NEXT: movw %di, 12(%rax) 3298; CHECK-BASELINE-NEXT: movw %bp, 10(%rax) 3299; CHECK-BASELINE-NEXT: movw %r9w, 8(%rax) 3300; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) 3301; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) 3302; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) 3303; CHECK-BASELINE-NEXT: movw %si, (%rax) 3304; CHECK-BASELINE-NEXT: popq %rbx 3305; CHECK-BASELINE-NEXT: popq %rbp 3306; CHECK-BASELINE-NEXT: retq 3307; 3308; CHECK-SSE1-LABEL: in_v8i16: 3309; CHECK-SSE1: # %bb.0: 3310; CHECK-SSE1-NEXT: pushq %rbp 3311; CHECK-SSE1-NEXT: pushq %rbx 3312; CHECK-SSE1-NEXT: movq %rdi, %rax 3313; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 3314; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 3315; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi 3316; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3317; CHECK-SSE1-NEXT: xorl %ebx, %esi 3318; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 3319; CHECK-SSE1-NEXT: xorl %ebx, %esi 3320; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3321; CHECK-SSE1-NEXT: xorl %ebx, %edx 3322; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 3323; CHECK-SSE1-NEXT: xorl %ebx, %edx 3324; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3325; CHECK-SSE1-NEXT: xorl %ebx, %ecx 3326; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 3327; CHECK-SSE1-NEXT: xorl %ebx, %ecx 3328; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3329; CHECK-SSE1-NEXT: xorl %ebx, %r8d 3330; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 3331; CHECK-SSE1-NEXT: xorl %ebx, %r8d 3332; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3333; CHECK-SSE1-NEXT: xorl %ebx, %r9d 3334; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r9w 3335; CHECK-SSE1-NEXT: xorl %ebx, %r9d 3336; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp 3337; CHECK-SSE1-NEXT: xorw %di, %bp 3338; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bp 3339; CHECK-SSE1-NEXT: xorl %edi, %ebp 3340; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi 3341; CHECK-SSE1-NEXT: xorw %r11w, %di 3342; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %di 3343; CHECK-SSE1-NEXT: xorl %r11d, %edi 3344; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 3345; CHECK-SSE1-NEXT: xorw %r10w, %bx 3346; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx 3347; CHECK-SSE1-NEXT: xorl %r10d, %ebx 3348; CHECK-SSE1-NEXT: movw %bx, 14(%rax) 3349; CHECK-SSE1-NEXT: movw %di, 12(%rax) 3350; CHECK-SSE1-NEXT: movw %bp, 10(%rax) 3351; CHECK-SSE1-NEXT: movw %r9w, 8(%rax) 3352; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) 3353; CHECK-SSE1-NEXT: movw %cx, 4(%rax) 3354; CHECK-SSE1-NEXT: movw %dx, 2(%rax) 3355; CHECK-SSE1-NEXT: movw %si, (%rax) 3356; CHECK-SSE1-NEXT: popq %rbx 3357; CHECK-SSE1-NEXT: popq %rbp 3358; CHECK-SSE1-NEXT: retq 3359; 3360; CHECK-SSE2-LABEL: in_v8i16: 3361; CHECK-SSE2: # %bb.0: 3362; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3363; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3364; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3365; CHECK-SSE2-NEXT: retq 3366; 3367; CHECK-XOP-LABEL: in_v8i16: 3368; CHECK-XOP: # %bb.0: 3369; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3370; CHECK-XOP-NEXT: retq 3371 %n0 = xor <8 x i16> %x, %y 3372 %n1 = and <8 x i16> %n0, %mask 3373 %r = xor <8 x i16> %n1, %y 3374 ret <8 x i16> %r 3375} 3376 3377define <4 x i32> @in_v4i32(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) nounwind { 3378; CHECK-BASELINE-LABEL: in_v4i32: 3379; CHECK-BASELINE: # %bb.0: 3380; CHECK-BASELINE-NEXT: pushq %rbx 3381; CHECK-BASELINE-NEXT: movq %rdi, %rax 3382; CHECK-BASELINE-NEXT: movl 12(%rdx), %r8d 3383; CHECK-BASELINE-NEXT: movl 8(%rdx), %r9d 3384; CHECK-BASELINE-NEXT: movl (%rdx), %r11d 3385; CHECK-BASELINE-NEXT: movl 4(%rdx), %r10d 3386; CHECK-BASELINE-NEXT: movl (%rsi), %edx 3387; CHECK-BASELINE-NEXT: xorl %r11d, %edx 3388; CHECK-BASELINE-NEXT: movl 4(%rsi), %edi 3389; CHECK-BASELINE-NEXT: xorl %r10d, %edi 3390; CHECK-BASELINE-NEXT: movl 8(%rsi), %ebx 3391; CHECK-BASELINE-NEXT: xorl %r9d, %ebx 3392; CHECK-BASELINE-NEXT: movl 12(%rsi), %esi 3393; CHECK-BASELINE-NEXT: xorl %r8d, %esi 3394; CHECK-BASELINE-NEXT: andl 12(%rcx), %esi 3395; CHECK-BASELINE-NEXT: andl 8(%rcx), %ebx 3396; CHECK-BASELINE-NEXT: andl 4(%rcx), %edi 3397; CHECK-BASELINE-NEXT: andl (%rcx), %edx 3398; CHECK-BASELINE-NEXT: xorl %r11d, %edx 3399; CHECK-BASELINE-NEXT: xorl %r10d, %edi 3400; CHECK-BASELINE-NEXT: xorl %r9d, %ebx 3401; CHECK-BASELINE-NEXT: xorl %r8d, %esi 3402; CHECK-BASELINE-NEXT: movl %esi, 12(%rax) 3403; CHECK-BASELINE-NEXT: movl %ebx, 8(%rax) 3404; CHECK-BASELINE-NEXT: movl %edi, 4(%rax) 3405; CHECK-BASELINE-NEXT: movl %edx, (%rax) 3406; CHECK-BASELINE-NEXT: popq %rbx 3407; CHECK-BASELINE-NEXT: retq 3408; 3409; CHECK-SSE1-LABEL: in_v4i32: 3410; CHECK-SSE1: # %bb.0: 3411; CHECK-SSE1-NEXT: movq %rdi, %rax 3412; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 3413; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 3414; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 3415; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 3416; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 3417; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 3418; CHECK-SSE1-NEXT: retq 3419; 3420; CHECK-SSE2-LABEL: in_v4i32: 3421; CHECK-SSE2: # %bb.0: 3422; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 3423; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 3424; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1 3425; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 3426; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 3427; CHECK-SSE2-NEXT: retq 3428; 3429; CHECK-XOP-LABEL: in_v4i32: 3430; CHECK-XOP: # %bb.0: 3431; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 3432; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 3433; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 3434; CHECK-XOP-NEXT: retq 3435 %x = load <4 x i32>, <4 x i32> *%px, align 16 3436 %y = load <4 x i32>, <4 x i32> *%py, align 16 3437 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 3438 %n0 = xor <4 x i32> %x, %y 3439 %n1 = and <4 x i32> %n0, %mask 3440 %r = xor <4 x i32> %n1, %y 3441 ret <4 x i32> %r 3442} 3443 3444define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 3445; CHECK-BASELINE-LABEL: in_v2i64: 3446; CHECK-BASELINE: # %bb.0: 3447; CHECK-BASELINE-NEXT: movq %rdi, %rax 3448; CHECK-BASELINE-NEXT: xorq %rdx, %rax 3449; CHECK-BASELINE-NEXT: xorq %rcx, %rsi 3450; CHECK-BASELINE-NEXT: andq %r9, %rsi 3451; CHECK-BASELINE-NEXT: andq %r8, %rax 3452; CHECK-BASELINE-NEXT: xorq %rdx, %rax 3453; CHECK-BASELINE-NEXT: xorq %rcx, %rsi 3454; CHECK-BASELINE-NEXT: movq %rsi, %rdx 3455; CHECK-BASELINE-NEXT: retq 3456; 3457; CHECK-SSE1-LABEL: in_v2i64: 3458; CHECK-SSE1: # %bb.0: 3459; CHECK-SSE1-NEXT: movq %rdi, %rax 3460; CHECK-SSE1-NEXT: xorq %rdx, %rax 3461; CHECK-SSE1-NEXT: xorq %rcx, %rsi 3462; CHECK-SSE1-NEXT: andq %r9, %rsi 3463; CHECK-SSE1-NEXT: andq %r8, %rax 3464; CHECK-SSE1-NEXT: xorq %rdx, %rax 3465; CHECK-SSE1-NEXT: xorq %rcx, %rsi 3466; CHECK-SSE1-NEXT: movq %rsi, %rdx 3467; CHECK-SSE1-NEXT: retq 3468; 3469; CHECK-SSE2-LABEL: in_v2i64: 3470; CHECK-SSE2: # %bb.0: 3471; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3472; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3473; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3474; CHECK-SSE2-NEXT: retq 3475; 3476; CHECK-XOP-LABEL: in_v2i64: 3477; CHECK-XOP: # %bb.0: 3478; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3479; CHECK-XOP-NEXT: retq 3480 %n0 = xor <2 x i64> %x, %y 3481 %n1 = and <2 x i64> %n0, %mask 3482 %r = xor <2 x i64> %n1, %y 3483 ret <2 x i64> %r 3484} 3485 3486; ============================================================================ ; 3487; 256-bit vector width 3488; ============================================================================ ; 3489 3490define <32 x i8> @in_v32i8(<32 x i8> *%px, <32 x i8> *%py, <32 x i8> *%pmask) nounwind { 3491; CHECK-BASELINE-LABEL: in_v32i8: 3492; CHECK-BASELINE: # %bb.0: 3493; CHECK-BASELINE-NEXT: pushq %rbp 3494; CHECK-BASELINE-NEXT: pushq %r15 3495; CHECK-BASELINE-NEXT: pushq %r14 3496; CHECK-BASELINE-NEXT: pushq %r13 3497; CHECK-BASELINE-NEXT: pushq %r12 3498; CHECK-BASELINE-NEXT: pushq %rbx 3499; CHECK-BASELINE-NEXT: movq %rdx, %r13 3500; CHECK-BASELINE-NEXT: movq %rsi, %rbx 3501; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3502; CHECK-BASELINE-NEXT: movb 15(%rdx), %r12b 3503; CHECK-BASELINE-NEXT: movb 14(%rdx), %al 3504; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3505; CHECK-BASELINE-NEXT: movb 13(%rdx), %al 3506; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3507; CHECK-BASELINE-NEXT: movb 12(%rdx), %al 3508; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3509; CHECK-BASELINE-NEXT: movb 11(%rdx), %al 3510; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3511; CHECK-BASELINE-NEXT: movb 10(%rdx), %al 3512; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3513; CHECK-BASELINE-NEXT: movb 9(%rdx), %r9b 3514; CHECK-BASELINE-NEXT: movb 8(%rdx), %r10b 3515; CHECK-BASELINE-NEXT: movb 7(%rdx), %r11b 3516; CHECK-BASELINE-NEXT: movb 6(%rdx), %r8b 3517; CHECK-BASELINE-NEXT: movb 5(%rdx), %bpl 3518; CHECK-BASELINE-NEXT: movb 4(%rdx), %sil 3519; CHECK-BASELINE-NEXT: movb 3(%rdx), %dil 3520; CHECK-BASELINE-NEXT: movb 2(%rdx), %r14b 3521; CHECK-BASELINE-NEXT: movb (%rdx), %al 3522; CHECK-BASELINE-NEXT: movb 1(%rdx), %r15b 3523; CHECK-BASELINE-NEXT: movb (%rbx), %dl 3524; CHECK-BASELINE-NEXT: xorb %al, %dl 3525; CHECK-BASELINE-NEXT: andb (%rcx), %dl 3526; CHECK-BASELINE-NEXT: xorb %al, %dl 3527; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3528; CHECK-BASELINE-NEXT: movb 1(%rbx), %al 3529; CHECK-BASELINE-NEXT: xorb %r15b, %al 3530; CHECK-BASELINE-NEXT: andb 1(%rcx), %al 3531; CHECK-BASELINE-NEXT: xorb %r15b, %al 3532; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3533; CHECK-BASELINE-NEXT: movb 2(%rbx), %al 3534; CHECK-BASELINE-NEXT: xorb %r14b, %al 3535; CHECK-BASELINE-NEXT: andb 2(%rcx), %al 3536; CHECK-BASELINE-NEXT: xorb %r14b, %al 3537; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3538; CHECK-BASELINE-NEXT: movb 3(%rbx), %al 3539; CHECK-BASELINE-NEXT: xorb %dil, %al 3540; CHECK-BASELINE-NEXT: andb 3(%rcx), %al 3541; CHECK-BASELINE-NEXT: xorb %dil, %al 3542; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3543; CHECK-BASELINE-NEXT: movb 4(%rbx), %al 3544; CHECK-BASELINE-NEXT: xorb %sil, %al 3545; CHECK-BASELINE-NEXT: andb 4(%rcx), %al 3546; CHECK-BASELINE-NEXT: xorb %sil, %al 3547; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3548; CHECK-BASELINE-NEXT: movb 5(%rbx), %al 3549; CHECK-BASELINE-NEXT: xorb %bpl, %al 3550; CHECK-BASELINE-NEXT: andb 5(%rcx), %al 3551; CHECK-BASELINE-NEXT: xorb %bpl, %al 3552; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3553; CHECK-BASELINE-NEXT: movb 6(%rbx), %al 3554; CHECK-BASELINE-NEXT: xorb %r8b, %al 3555; CHECK-BASELINE-NEXT: andb 6(%rcx), %al 3556; CHECK-BASELINE-NEXT: xorb %r8b, %al 3557; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3558; CHECK-BASELINE-NEXT: movb 7(%rbx), %al 3559; CHECK-BASELINE-NEXT: xorb %r11b, %al 3560; CHECK-BASELINE-NEXT: andb 7(%rcx), %al 3561; CHECK-BASELINE-NEXT: xorb %r11b, %al 3562; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3563; CHECK-BASELINE-NEXT: movb 8(%rbx), %al 3564; CHECK-BASELINE-NEXT: xorb %r10b, %al 3565; CHECK-BASELINE-NEXT: andb 8(%rcx), %al 3566; CHECK-BASELINE-NEXT: xorb %r10b, %al 3567; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3568; CHECK-BASELINE-NEXT: movb 9(%rbx), %al 3569; CHECK-BASELINE-NEXT: xorb %r9b, %al 3570; CHECK-BASELINE-NEXT: andb 9(%rcx), %al 3571; CHECK-BASELINE-NEXT: xorb %r9b, %al 3572; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3573; CHECK-BASELINE-NEXT: movb 10(%rbx), %dl 3574; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3575; CHECK-BASELINE-NEXT: xorb %al, %dl 3576; CHECK-BASELINE-NEXT: andb 10(%rcx), %dl 3577; CHECK-BASELINE-NEXT: xorb %al, %dl 3578; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3579; CHECK-BASELINE-NEXT: movb 11(%rbx), %dl 3580; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3581; CHECK-BASELINE-NEXT: xorb %al, %dl 3582; CHECK-BASELINE-NEXT: andb 11(%rcx), %dl 3583; CHECK-BASELINE-NEXT: xorb %al, %dl 3584; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3585; CHECK-BASELINE-NEXT: movb 12(%rbx), %dl 3586; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3587; CHECK-BASELINE-NEXT: xorb %al, %dl 3588; CHECK-BASELINE-NEXT: andb 12(%rcx), %dl 3589; CHECK-BASELINE-NEXT: xorb %al, %dl 3590; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3591; CHECK-BASELINE-NEXT: movb 13(%rbx), %dl 3592; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3593; CHECK-BASELINE-NEXT: xorb %al, %dl 3594; CHECK-BASELINE-NEXT: andb 13(%rcx), %dl 3595; CHECK-BASELINE-NEXT: xorb %al, %dl 3596; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3597; CHECK-BASELINE-NEXT: movb 14(%rbx), %dl 3598; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3599; CHECK-BASELINE-NEXT: xorb %al, %dl 3600; CHECK-BASELINE-NEXT: andb 14(%rcx), %dl 3601; CHECK-BASELINE-NEXT: xorb %al, %dl 3602; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3603; CHECK-BASELINE-NEXT: movb 15(%rbx), %al 3604; CHECK-BASELINE-NEXT: xorb %r12b, %al 3605; CHECK-BASELINE-NEXT: andb 15(%rcx), %al 3606; CHECK-BASELINE-NEXT: xorb %r12b, %al 3607; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3608; CHECK-BASELINE-NEXT: movb 16(%r13), %al 3609; CHECK-BASELINE-NEXT: movb 16(%rbx), %dl 3610; CHECK-BASELINE-NEXT: xorb %al, %dl 3611; CHECK-BASELINE-NEXT: andb 16(%rcx), %dl 3612; CHECK-BASELINE-NEXT: xorb %al, %dl 3613; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3614; CHECK-BASELINE-NEXT: movb 17(%r13), %al 3615; CHECK-BASELINE-NEXT: movb 17(%rbx), %dl 3616; CHECK-BASELINE-NEXT: xorb %al, %dl 3617; CHECK-BASELINE-NEXT: andb 17(%rcx), %dl 3618; CHECK-BASELINE-NEXT: xorb %al, %dl 3619; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3620; CHECK-BASELINE-NEXT: movb 18(%r13), %al 3621; CHECK-BASELINE-NEXT: movb 18(%rbx), %dl 3622; CHECK-BASELINE-NEXT: xorb %al, %dl 3623; CHECK-BASELINE-NEXT: andb 18(%rcx), %dl 3624; CHECK-BASELINE-NEXT: xorb %al, %dl 3625; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3626; CHECK-BASELINE-NEXT: movb 19(%r13), %al 3627; CHECK-BASELINE-NEXT: movb 19(%rbx), %r12b 3628; CHECK-BASELINE-NEXT: xorb %al, %r12b 3629; CHECK-BASELINE-NEXT: andb 19(%rcx), %r12b 3630; CHECK-BASELINE-NEXT: xorb %al, %r12b 3631; CHECK-BASELINE-NEXT: movb 20(%r13), %al 3632; CHECK-BASELINE-NEXT: movb 20(%rbx), %r15b 3633; CHECK-BASELINE-NEXT: xorb %al, %r15b 3634; CHECK-BASELINE-NEXT: andb 20(%rcx), %r15b 3635; CHECK-BASELINE-NEXT: movq %rcx, %rsi 3636; CHECK-BASELINE-NEXT: xorb %al, %r15b 3637; CHECK-BASELINE-NEXT: movb 21(%r13), %al 3638; CHECK-BASELINE-NEXT: movb 21(%rbx), %r14b 3639; CHECK-BASELINE-NEXT: xorb %al, %r14b 3640; CHECK-BASELINE-NEXT: andb 21(%rcx), %r14b 3641; CHECK-BASELINE-NEXT: xorb %al, %r14b 3642; CHECK-BASELINE-NEXT: movb 22(%r13), %al 3643; CHECK-BASELINE-NEXT: movb 22(%rbx), %bpl 3644; CHECK-BASELINE-NEXT: xorb %al, %bpl 3645; CHECK-BASELINE-NEXT: andb 22(%rcx), %bpl 3646; CHECK-BASELINE-NEXT: xorb %al, %bpl 3647; CHECK-BASELINE-NEXT: movb 23(%r13), %al 3648; CHECK-BASELINE-NEXT: movb 23(%rbx), %r11b 3649; CHECK-BASELINE-NEXT: xorb %al, %r11b 3650; CHECK-BASELINE-NEXT: andb 23(%rcx), %r11b 3651; CHECK-BASELINE-NEXT: xorb %al, %r11b 3652; CHECK-BASELINE-NEXT: movb 24(%r13), %al 3653; CHECK-BASELINE-NEXT: movb 24(%rbx), %r10b 3654; CHECK-BASELINE-NEXT: xorb %al, %r10b 3655; CHECK-BASELINE-NEXT: andb 24(%rcx), %r10b 3656; CHECK-BASELINE-NEXT: xorb %al, %r10b 3657; CHECK-BASELINE-NEXT: movb 25(%r13), %al 3658; CHECK-BASELINE-NEXT: movb 25(%rbx), %r9b 3659; CHECK-BASELINE-NEXT: xorb %al, %r9b 3660; CHECK-BASELINE-NEXT: andb 25(%rcx), %r9b 3661; CHECK-BASELINE-NEXT: xorb %al, %r9b 3662; CHECK-BASELINE-NEXT: movb 26(%r13), %al 3663; CHECK-BASELINE-NEXT: movb 26(%rbx), %r8b 3664; CHECK-BASELINE-NEXT: xorb %al, %r8b 3665; CHECK-BASELINE-NEXT: andb 26(%rcx), %r8b 3666; CHECK-BASELINE-NEXT: xorb %al, %r8b 3667; CHECK-BASELINE-NEXT: movb 27(%r13), %al 3668; CHECK-BASELINE-NEXT: movb 27(%rbx), %dil 3669; CHECK-BASELINE-NEXT: xorb %al, %dil 3670; CHECK-BASELINE-NEXT: andb 27(%rcx), %dil 3671; CHECK-BASELINE-NEXT: xorb %al, %dil 3672; CHECK-BASELINE-NEXT: movb 28(%r13), %al 3673; CHECK-BASELINE-NEXT: movb 28(%rbx), %dl 3674; CHECK-BASELINE-NEXT: xorb %al, %dl 3675; CHECK-BASELINE-NEXT: andb 28(%rcx), %dl 3676; CHECK-BASELINE-NEXT: xorb %al, %dl 3677; CHECK-BASELINE-NEXT: movb 29(%r13), %al 3678; CHECK-BASELINE-NEXT: movb 29(%rbx), %cl 3679; CHECK-BASELINE-NEXT: xorb %al, %cl 3680; CHECK-BASELINE-NEXT: andb 29(%rsi), %cl 3681; CHECK-BASELINE-NEXT: xorb %al, %cl 3682; CHECK-BASELINE-NEXT: movb 30(%r13), %al 3683; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3684; CHECK-BASELINE-NEXT: movb 30(%rbx), %al 3685; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload 3686; CHECK-BASELINE-NEXT: andb 30(%rsi), %al 3687; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload 3688; CHECK-BASELINE-NEXT: movb 31(%r13), %r13b 3689; CHECK-BASELINE-NEXT: movb 31(%rbx), %bl 3690; CHECK-BASELINE-NEXT: xorb %r13b, %bl 3691; CHECK-BASELINE-NEXT: andb 31(%rsi), %bl 3692; CHECK-BASELINE-NEXT: xorb %r13b, %bl 3693; CHECK-BASELINE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 3694; CHECK-BASELINE-NEXT: movb %bl, 31(%r13) 3695; CHECK-BASELINE-NEXT: movb %al, 30(%r13) 3696; CHECK-BASELINE-NEXT: movb %cl, 29(%r13) 3697; CHECK-BASELINE-NEXT: movb %dl, 28(%r13) 3698; CHECK-BASELINE-NEXT: movb %dil, 27(%r13) 3699; CHECK-BASELINE-NEXT: movb %r8b, 26(%r13) 3700; CHECK-BASELINE-NEXT: movb %r9b, 25(%r13) 3701; CHECK-BASELINE-NEXT: movb %r10b, 24(%r13) 3702; CHECK-BASELINE-NEXT: movb %r11b, 23(%r13) 3703; CHECK-BASELINE-NEXT: movb %bpl, 22(%r13) 3704; CHECK-BASELINE-NEXT: movb %r14b, 21(%r13) 3705; CHECK-BASELINE-NEXT: movb %r15b, 20(%r13) 3706; CHECK-BASELINE-NEXT: movb %r12b, 19(%r13) 3707; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3708; CHECK-BASELINE-NEXT: movb %al, 18(%r13) 3709; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3710; CHECK-BASELINE-NEXT: movb %al, 17(%r13) 3711; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3712; CHECK-BASELINE-NEXT: movb %al, 16(%r13) 3713; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3714; CHECK-BASELINE-NEXT: movb %al, 15(%r13) 3715; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3716; CHECK-BASELINE-NEXT: movb %al, 14(%r13) 3717; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3718; CHECK-BASELINE-NEXT: movb %al, 13(%r13) 3719; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3720; CHECK-BASELINE-NEXT: movb %al, 12(%r13) 3721; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3722; CHECK-BASELINE-NEXT: movb %al, 11(%r13) 3723; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3724; CHECK-BASELINE-NEXT: movb %al, 10(%r13) 3725; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3726; CHECK-BASELINE-NEXT: movb %al, 9(%r13) 3727; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3728; CHECK-BASELINE-NEXT: movb %al, 8(%r13) 3729; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3730; CHECK-BASELINE-NEXT: movb %al, 7(%r13) 3731; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3732; CHECK-BASELINE-NEXT: movb %al, 6(%r13) 3733; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3734; CHECK-BASELINE-NEXT: movb %al, 5(%r13) 3735; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3736; CHECK-BASELINE-NEXT: movb %al, 4(%r13) 3737; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3738; CHECK-BASELINE-NEXT: movb %al, 3(%r13) 3739; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3740; CHECK-BASELINE-NEXT: movb %al, 2(%r13) 3741; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3742; CHECK-BASELINE-NEXT: movb %al, 1(%r13) 3743; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3744; CHECK-BASELINE-NEXT: movb %al, (%r13) 3745; CHECK-BASELINE-NEXT: movq %r13, %rax 3746; CHECK-BASELINE-NEXT: popq %rbx 3747; CHECK-BASELINE-NEXT: popq %r12 3748; CHECK-BASELINE-NEXT: popq %r13 3749; CHECK-BASELINE-NEXT: popq %r14 3750; CHECK-BASELINE-NEXT: popq %r15 3751; CHECK-BASELINE-NEXT: popq %rbp 3752; CHECK-BASELINE-NEXT: retq 3753; 3754; CHECK-SSE1-LABEL: in_v32i8: 3755; CHECK-SSE1: # %bb.0: 3756; CHECK-SSE1-NEXT: pushq %rbp 3757; CHECK-SSE1-NEXT: pushq %r15 3758; CHECK-SSE1-NEXT: pushq %r14 3759; CHECK-SSE1-NEXT: pushq %r13 3760; CHECK-SSE1-NEXT: pushq %r12 3761; CHECK-SSE1-NEXT: pushq %rbx 3762; CHECK-SSE1-NEXT: movq %rdx, %r13 3763; CHECK-SSE1-NEXT: movq %rsi, %rbx 3764; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3765; CHECK-SSE1-NEXT: movb 15(%rdx), %r12b 3766; CHECK-SSE1-NEXT: movb 14(%rdx), %al 3767; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3768; CHECK-SSE1-NEXT: movb 13(%rdx), %al 3769; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3770; CHECK-SSE1-NEXT: movb 12(%rdx), %al 3771; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3772; CHECK-SSE1-NEXT: movb 11(%rdx), %al 3773; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3774; CHECK-SSE1-NEXT: movb 10(%rdx), %al 3775; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3776; CHECK-SSE1-NEXT: movb 9(%rdx), %r9b 3777; CHECK-SSE1-NEXT: movb 8(%rdx), %r10b 3778; CHECK-SSE1-NEXT: movb 7(%rdx), %r11b 3779; CHECK-SSE1-NEXT: movb 6(%rdx), %r8b 3780; CHECK-SSE1-NEXT: movb 5(%rdx), %bpl 3781; CHECK-SSE1-NEXT: movb 4(%rdx), %sil 3782; CHECK-SSE1-NEXT: movb 3(%rdx), %dil 3783; CHECK-SSE1-NEXT: movb 2(%rdx), %r14b 3784; CHECK-SSE1-NEXT: movb (%rdx), %al 3785; CHECK-SSE1-NEXT: movb 1(%rdx), %r15b 3786; CHECK-SSE1-NEXT: movb (%rbx), %dl 3787; CHECK-SSE1-NEXT: xorb %al, %dl 3788; CHECK-SSE1-NEXT: andb (%rcx), %dl 3789; CHECK-SSE1-NEXT: xorb %al, %dl 3790; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3791; CHECK-SSE1-NEXT: movb 1(%rbx), %al 3792; CHECK-SSE1-NEXT: xorb %r15b, %al 3793; CHECK-SSE1-NEXT: andb 1(%rcx), %al 3794; CHECK-SSE1-NEXT: xorb %r15b, %al 3795; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3796; CHECK-SSE1-NEXT: movb 2(%rbx), %al 3797; CHECK-SSE1-NEXT: xorb %r14b, %al 3798; CHECK-SSE1-NEXT: andb 2(%rcx), %al 3799; CHECK-SSE1-NEXT: xorb %r14b, %al 3800; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3801; CHECK-SSE1-NEXT: movb 3(%rbx), %al 3802; CHECK-SSE1-NEXT: xorb %dil, %al 3803; CHECK-SSE1-NEXT: andb 3(%rcx), %al 3804; CHECK-SSE1-NEXT: xorb %dil, %al 3805; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3806; CHECK-SSE1-NEXT: movb 4(%rbx), %al 3807; CHECK-SSE1-NEXT: xorb %sil, %al 3808; CHECK-SSE1-NEXT: andb 4(%rcx), %al 3809; CHECK-SSE1-NEXT: xorb %sil, %al 3810; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3811; CHECK-SSE1-NEXT: movb 5(%rbx), %al 3812; CHECK-SSE1-NEXT: xorb %bpl, %al 3813; CHECK-SSE1-NEXT: andb 5(%rcx), %al 3814; CHECK-SSE1-NEXT: xorb %bpl, %al 3815; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3816; CHECK-SSE1-NEXT: movb 6(%rbx), %al 3817; CHECK-SSE1-NEXT: xorb %r8b, %al 3818; CHECK-SSE1-NEXT: andb 6(%rcx), %al 3819; CHECK-SSE1-NEXT: xorb %r8b, %al 3820; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3821; CHECK-SSE1-NEXT: movb 7(%rbx), %al 3822; CHECK-SSE1-NEXT: xorb %r11b, %al 3823; CHECK-SSE1-NEXT: andb 7(%rcx), %al 3824; CHECK-SSE1-NEXT: xorb %r11b, %al 3825; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3826; CHECK-SSE1-NEXT: movb 8(%rbx), %al 3827; CHECK-SSE1-NEXT: xorb %r10b, %al 3828; CHECK-SSE1-NEXT: andb 8(%rcx), %al 3829; CHECK-SSE1-NEXT: xorb %r10b, %al 3830; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3831; CHECK-SSE1-NEXT: movb 9(%rbx), %al 3832; CHECK-SSE1-NEXT: xorb %r9b, %al 3833; CHECK-SSE1-NEXT: andb 9(%rcx), %al 3834; CHECK-SSE1-NEXT: xorb %r9b, %al 3835; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3836; CHECK-SSE1-NEXT: movb 10(%rbx), %dl 3837; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3838; CHECK-SSE1-NEXT: xorb %al, %dl 3839; CHECK-SSE1-NEXT: andb 10(%rcx), %dl 3840; CHECK-SSE1-NEXT: xorb %al, %dl 3841; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3842; CHECK-SSE1-NEXT: movb 11(%rbx), %dl 3843; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3844; CHECK-SSE1-NEXT: xorb %al, %dl 3845; CHECK-SSE1-NEXT: andb 11(%rcx), %dl 3846; CHECK-SSE1-NEXT: xorb %al, %dl 3847; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3848; CHECK-SSE1-NEXT: movb 12(%rbx), %dl 3849; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3850; CHECK-SSE1-NEXT: xorb %al, %dl 3851; CHECK-SSE1-NEXT: andb 12(%rcx), %dl 3852; CHECK-SSE1-NEXT: xorb %al, %dl 3853; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3854; CHECK-SSE1-NEXT: movb 13(%rbx), %dl 3855; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3856; CHECK-SSE1-NEXT: xorb %al, %dl 3857; CHECK-SSE1-NEXT: andb 13(%rcx), %dl 3858; CHECK-SSE1-NEXT: xorb %al, %dl 3859; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3860; CHECK-SSE1-NEXT: movb 14(%rbx), %dl 3861; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3862; CHECK-SSE1-NEXT: xorb %al, %dl 3863; CHECK-SSE1-NEXT: andb 14(%rcx), %dl 3864; CHECK-SSE1-NEXT: xorb %al, %dl 3865; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3866; CHECK-SSE1-NEXT: movb 15(%rbx), %al 3867; CHECK-SSE1-NEXT: xorb %r12b, %al 3868; CHECK-SSE1-NEXT: andb 15(%rcx), %al 3869; CHECK-SSE1-NEXT: xorb %r12b, %al 3870; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3871; CHECK-SSE1-NEXT: movb 16(%r13), %al 3872; CHECK-SSE1-NEXT: movb 16(%rbx), %dl 3873; CHECK-SSE1-NEXT: xorb %al, %dl 3874; CHECK-SSE1-NEXT: andb 16(%rcx), %dl 3875; CHECK-SSE1-NEXT: xorb %al, %dl 3876; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3877; CHECK-SSE1-NEXT: movb 17(%r13), %al 3878; CHECK-SSE1-NEXT: movb 17(%rbx), %dl 3879; CHECK-SSE1-NEXT: xorb %al, %dl 3880; CHECK-SSE1-NEXT: andb 17(%rcx), %dl 3881; CHECK-SSE1-NEXT: xorb %al, %dl 3882; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3883; CHECK-SSE1-NEXT: movb 18(%r13), %al 3884; CHECK-SSE1-NEXT: movb 18(%rbx), %dl 3885; CHECK-SSE1-NEXT: xorb %al, %dl 3886; CHECK-SSE1-NEXT: andb 18(%rcx), %dl 3887; CHECK-SSE1-NEXT: xorb %al, %dl 3888; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3889; CHECK-SSE1-NEXT: movb 19(%r13), %al 3890; CHECK-SSE1-NEXT: movb 19(%rbx), %r12b 3891; CHECK-SSE1-NEXT: xorb %al, %r12b 3892; CHECK-SSE1-NEXT: andb 19(%rcx), %r12b 3893; CHECK-SSE1-NEXT: xorb %al, %r12b 3894; CHECK-SSE1-NEXT: movb 20(%r13), %al 3895; CHECK-SSE1-NEXT: movb 20(%rbx), %r15b 3896; CHECK-SSE1-NEXT: xorb %al, %r15b 3897; CHECK-SSE1-NEXT: andb 20(%rcx), %r15b 3898; CHECK-SSE1-NEXT: movq %rcx, %rsi 3899; CHECK-SSE1-NEXT: xorb %al, %r15b 3900; CHECK-SSE1-NEXT: movb 21(%r13), %al 3901; CHECK-SSE1-NEXT: movb 21(%rbx), %r14b 3902; CHECK-SSE1-NEXT: xorb %al, %r14b 3903; CHECK-SSE1-NEXT: andb 21(%rcx), %r14b 3904; CHECK-SSE1-NEXT: xorb %al, %r14b 3905; CHECK-SSE1-NEXT: movb 22(%r13), %al 3906; CHECK-SSE1-NEXT: movb 22(%rbx), %bpl 3907; CHECK-SSE1-NEXT: xorb %al, %bpl 3908; CHECK-SSE1-NEXT: andb 22(%rcx), %bpl 3909; CHECK-SSE1-NEXT: xorb %al, %bpl 3910; CHECK-SSE1-NEXT: movb 23(%r13), %al 3911; CHECK-SSE1-NEXT: movb 23(%rbx), %r11b 3912; CHECK-SSE1-NEXT: xorb %al, %r11b 3913; CHECK-SSE1-NEXT: andb 23(%rcx), %r11b 3914; CHECK-SSE1-NEXT: xorb %al, %r11b 3915; CHECK-SSE1-NEXT: movb 24(%r13), %al 3916; CHECK-SSE1-NEXT: movb 24(%rbx), %r10b 3917; CHECK-SSE1-NEXT: xorb %al, %r10b 3918; CHECK-SSE1-NEXT: andb 24(%rcx), %r10b 3919; CHECK-SSE1-NEXT: xorb %al, %r10b 3920; CHECK-SSE1-NEXT: movb 25(%r13), %al 3921; CHECK-SSE1-NEXT: movb 25(%rbx), %r9b 3922; CHECK-SSE1-NEXT: xorb %al, %r9b 3923; CHECK-SSE1-NEXT: andb 25(%rcx), %r9b 3924; CHECK-SSE1-NEXT: xorb %al, %r9b 3925; CHECK-SSE1-NEXT: movb 26(%r13), %al 3926; CHECK-SSE1-NEXT: movb 26(%rbx), %r8b 3927; CHECK-SSE1-NEXT: xorb %al, %r8b 3928; CHECK-SSE1-NEXT: andb 26(%rcx), %r8b 3929; CHECK-SSE1-NEXT: xorb %al, %r8b 3930; CHECK-SSE1-NEXT: movb 27(%r13), %al 3931; CHECK-SSE1-NEXT: movb 27(%rbx), %dil 3932; CHECK-SSE1-NEXT: xorb %al, %dil 3933; CHECK-SSE1-NEXT: andb 27(%rcx), %dil 3934; CHECK-SSE1-NEXT: xorb %al, %dil 3935; CHECK-SSE1-NEXT: movb 28(%r13), %al 3936; CHECK-SSE1-NEXT: movb 28(%rbx), %dl 3937; CHECK-SSE1-NEXT: xorb %al, %dl 3938; CHECK-SSE1-NEXT: andb 28(%rcx), %dl 3939; CHECK-SSE1-NEXT: xorb %al, %dl 3940; CHECK-SSE1-NEXT: movb 29(%r13), %al 3941; CHECK-SSE1-NEXT: movb 29(%rbx), %cl 3942; CHECK-SSE1-NEXT: xorb %al, %cl 3943; CHECK-SSE1-NEXT: andb 29(%rsi), %cl 3944; CHECK-SSE1-NEXT: xorb %al, %cl 3945; CHECK-SSE1-NEXT: movb 30(%r13), %al 3946; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3947; CHECK-SSE1-NEXT: movb 30(%rbx), %al 3948; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload 3949; CHECK-SSE1-NEXT: andb 30(%rsi), %al 3950; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload 3951; CHECK-SSE1-NEXT: movb 31(%r13), %r13b 3952; CHECK-SSE1-NEXT: movb 31(%rbx), %bl 3953; CHECK-SSE1-NEXT: xorb %r13b, %bl 3954; CHECK-SSE1-NEXT: andb 31(%rsi), %bl 3955; CHECK-SSE1-NEXT: xorb %r13b, %bl 3956; CHECK-SSE1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 3957; CHECK-SSE1-NEXT: movb %bl, 31(%r13) 3958; CHECK-SSE1-NEXT: movb %al, 30(%r13) 3959; CHECK-SSE1-NEXT: movb %cl, 29(%r13) 3960; CHECK-SSE1-NEXT: movb %dl, 28(%r13) 3961; CHECK-SSE1-NEXT: movb %dil, 27(%r13) 3962; CHECK-SSE1-NEXT: movb %r8b, 26(%r13) 3963; CHECK-SSE1-NEXT: movb %r9b, 25(%r13) 3964; CHECK-SSE1-NEXT: movb %r10b, 24(%r13) 3965; CHECK-SSE1-NEXT: movb %r11b, 23(%r13) 3966; CHECK-SSE1-NEXT: movb %bpl, 22(%r13) 3967; CHECK-SSE1-NEXT: movb %r14b, 21(%r13) 3968; CHECK-SSE1-NEXT: movb %r15b, 20(%r13) 3969; CHECK-SSE1-NEXT: movb %r12b, 19(%r13) 3970; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3971; CHECK-SSE1-NEXT: movb %al, 18(%r13) 3972; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3973; CHECK-SSE1-NEXT: movb %al, 17(%r13) 3974; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3975; CHECK-SSE1-NEXT: movb %al, 16(%r13) 3976; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3977; CHECK-SSE1-NEXT: movb %al, 15(%r13) 3978; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3979; CHECK-SSE1-NEXT: movb %al, 14(%r13) 3980; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3981; CHECK-SSE1-NEXT: movb %al, 13(%r13) 3982; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3983; CHECK-SSE1-NEXT: movb %al, 12(%r13) 3984; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3985; CHECK-SSE1-NEXT: movb %al, 11(%r13) 3986; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3987; CHECK-SSE1-NEXT: movb %al, 10(%r13) 3988; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3989; CHECK-SSE1-NEXT: movb %al, 9(%r13) 3990; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3991; CHECK-SSE1-NEXT: movb %al, 8(%r13) 3992; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3993; CHECK-SSE1-NEXT: movb %al, 7(%r13) 3994; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3995; CHECK-SSE1-NEXT: movb %al, 6(%r13) 3996; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3997; CHECK-SSE1-NEXT: movb %al, 5(%r13) 3998; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 3999; CHECK-SSE1-NEXT: movb %al, 4(%r13) 4000; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4001; CHECK-SSE1-NEXT: movb %al, 3(%r13) 4002; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4003; CHECK-SSE1-NEXT: movb %al, 2(%r13) 4004; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4005; CHECK-SSE1-NEXT: movb %al, 1(%r13) 4006; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload 4007; CHECK-SSE1-NEXT: movb %al, (%r13) 4008; CHECK-SSE1-NEXT: movq %r13, %rax 4009; CHECK-SSE1-NEXT: popq %rbx 4010; CHECK-SSE1-NEXT: popq %r12 4011; CHECK-SSE1-NEXT: popq %r13 4012; CHECK-SSE1-NEXT: popq %r14 4013; CHECK-SSE1-NEXT: popq %r15 4014; CHECK-SSE1-NEXT: popq %rbp 4015; CHECK-SSE1-NEXT: retq 4016; 4017; CHECK-SSE2-LABEL: in_v32i8: 4018; CHECK-SSE2: # %bb.0: 4019; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4020; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4021; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4022; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4023; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4024; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4025; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4026; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4027; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4028; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4029; CHECK-SSE2-NEXT: retq 4030; 4031; CHECK-XOP-LABEL: in_v32i8: 4032; CHECK-XOP: # %bb.0: 4033; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4034; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4035; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4036; CHECK-XOP-NEXT: retq 4037 %x = load <32 x i8>, <32 x i8> *%px, align 32 4038 %y = load <32 x i8>, <32 x i8> *%py, align 32 4039 %mask = load <32 x i8>, <32 x i8> *%pmask, align 32 4040 %n0 = xor <32 x i8> %x, %y 4041 %n1 = and <32 x i8> %n0, %mask 4042 %r = xor <32 x i8> %n1, %y 4043 ret <32 x i8> %r 4044} 4045 4046define <16 x i16> @in_v16i16(<16 x i16> *%px, <16 x i16> *%py, <16 x i16> *%pmask) nounwind { 4047; CHECK-BASELINE-LABEL: in_v16i16: 4048; CHECK-BASELINE: # %bb.0: 4049; CHECK-BASELINE-NEXT: pushq %rbp 4050; CHECK-BASELINE-NEXT: pushq %r15 4051; CHECK-BASELINE-NEXT: pushq %r14 4052; CHECK-BASELINE-NEXT: pushq %r13 4053; CHECK-BASELINE-NEXT: pushq %r12 4054; CHECK-BASELINE-NEXT: pushq %rbx 4055; CHECK-BASELINE-NEXT: movq %rcx, %r9 4056; CHECK-BASELINE-NEXT: movq %rdi, %r10 4057; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %edi 4058; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4059; CHECK-BASELINE-NEXT: movl 28(%rdx), %edi 4060; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4061; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %edi 4062; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4063; CHECK-BASELINE-NEXT: movl 24(%rdx), %eax 4064; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4065; CHECK-BASELINE-NEXT: movzwl 22(%rdx), %eax 4066; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4067; CHECK-BASELINE-NEXT: movl 20(%rdx), %r11d 4068; CHECK-BASELINE-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4069; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r14d 4070; CHECK-BASELINE-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4071; CHECK-BASELINE-NEXT: movl 16(%rdx), %r15d 4072; CHECK-BASELINE-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4073; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %r12d 4074; CHECK-BASELINE-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4075; CHECK-BASELINE-NEXT: movl 12(%rdx), %r13d 4076; CHECK-BASELINE-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4077; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r8d 4078; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4079; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebx 4080; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4081; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %ebp 4082; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4083; CHECK-BASELINE-NEXT: movl (%rdx), %ecx 4084; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4085; CHECK-BASELINE-NEXT: movl 4(%rdx), %edi 4086; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4087; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %eax 4088; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4089; CHECK-BASELINE-NEXT: movzwl (%rsi), %edx 4090; CHECK-BASELINE-NEXT: xorw %cx, %dx 4091; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4092; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %ecx 4093; CHECK-BASELINE-NEXT: xorw %ax, %cx 4094; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4095; CHECK-BASELINE-NEXT: movzwl 4(%rsi), %eax 4096; CHECK-BASELINE-NEXT: xorw %di, %ax 4097; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4098; CHECK-BASELINE-NEXT: movzwl 6(%rsi), %edx 4099; CHECK-BASELINE-NEXT: xorw %bp, %dx 4100; CHECK-BASELINE-NEXT: movl %edx, %eax 4101; CHECK-BASELINE-NEXT: movzwl 8(%rsi), %ecx 4102; CHECK-BASELINE-NEXT: xorw %bx, %cx 4103; CHECK-BASELINE-NEXT: movzwl 10(%rsi), %edx 4104; CHECK-BASELINE-NEXT: xorw %r8w, %dx 4105; CHECK-BASELINE-NEXT: movl %edx, %r8d 4106; CHECK-BASELINE-NEXT: movzwl 12(%rsi), %edx 4107; CHECK-BASELINE-NEXT: xorw %r13w, %dx 4108; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %r13d 4109; CHECK-BASELINE-NEXT: xorw %r12w, %r13w 4110; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %r12d 4111; CHECK-BASELINE-NEXT: xorw %r15w, %r12w 4112; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %r15d 4113; CHECK-BASELINE-NEXT: xorw %r14w, %r15w 4114; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %r14d 4115; CHECK-BASELINE-NEXT: xorw %r11w, %r14w 4116; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %ebp 4117; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload 4118; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %ebx 4119; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload 4120; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r11d 4121; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload 4122; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %edi 4123; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload 4124; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %esi 4125; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload 4126; CHECK-BASELINE-NEXT: andw 30(%r9), %si 4127; CHECK-BASELINE-NEXT: andw 28(%r9), %di 4128; CHECK-BASELINE-NEXT: andw 26(%r9), %r11w 4129; CHECK-BASELINE-NEXT: andw 24(%r9), %bx 4130; CHECK-BASELINE-NEXT: andw 22(%r9), %bp 4131; CHECK-BASELINE-NEXT: andw 20(%r9), %r14w 4132; CHECK-BASELINE-NEXT: andw 18(%r9), %r15w 4133; CHECK-BASELINE-NEXT: andw 16(%r9), %r12w 4134; CHECK-BASELINE-NEXT: andw 14(%r9), %r13w 4135; CHECK-BASELINE-NEXT: andw 12(%r9), %dx 4136; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4137; CHECK-BASELINE-NEXT: andw 10(%r9), %r8w 4138; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4139; CHECK-BASELINE-NEXT: movl %ecx, %edx 4140; CHECK-BASELINE-NEXT: andw 8(%r9), %dx 4141; CHECK-BASELINE-NEXT: andw 6(%r9), %ax 4142; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4143; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload 4144; CHECK-BASELINE-NEXT: andw 4(%r9), %r8w 4145; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4146; CHECK-BASELINE-NEXT: andw 2(%r9), %ax 4147; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 4148; CHECK-BASELINE-NEXT: andw (%r9), %cx 4149; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload 4150; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4151; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4152; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4153; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload 4154; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload 4155; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload 4156; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4157; CHECK-BASELINE-NEXT: movl %edx, %ecx 4158; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 4159; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4160; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4161; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4162; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload 4163; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload 4164; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload 4165; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload 4166; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4167; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4168; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload 4169; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload 4170; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload 4171; CHECK-BASELINE-NEXT: movw %si, 30(%r10) 4172; CHECK-BASELINE-NEXT: movw %di, 28(%r10) 4173; CHECK-BASELINE-NEXT: movw %r11w, 26(%r10) 4174; CHECK-BASELINE-NEXT: movw %bx, 24(%r10) 4175; CHECK-BASELINE-NEXT: movw %bp, 22(%r10) 4176; CHECK-BASELINE-NEXT: movw %r14w, 20(%r10) 4177; CHECK-BASELINE-NEXT: movw %r15w, 18(%r10) 4178; CHECK-BASELINE-NEXT: movw %r12w, 16(%r10) 4179; CHECK-BASELINE-NEXT: movw %r13w, 14(%r10) 4180; CHECK-BASELINE-NEXT: movw %ax, 12(%r10) 4181; CHECK-BASELINE-NEXT: movw %dx, 10(%r10) 4182; CHECK-BASELINE-NEXT: movw %cx, 8(%r10) 4183; CHECK-BASELINE-NEXT: movw %r9w, 6(%r10) 4184; CHECK-BASELINE-NEXT: movw %r8w, 4(%r10) 4185; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4186; CHECK-BASELINE-NEXT: movw %ax, 2(%r10) 4187; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4188; CHECK-BASELINE-NEXT: movw %ax, (%r10) 4189; CHECK-BASELINE-NEXT: movq %r10, %rax 4190; CHECK-BASELINE-NEXT: popq %rbx 4191; CHECK-BASELINE-NEXT: popq %r12 4192; CHECK-BASELINE-NEXT: popq %r13 4193; CHECK-BASELINE-NEXT: popq %r14 4194; CHECK-BASELINE-NEXT: popq %r15 4195; CHECK-BASELINE-NEXT: popq %rbp 4196; CHECK-BASELINE-NEXT: retq 4197; 4198; CHECK-SSE1-LABEL: in_v16i16: 4199; CHECK-SSE1: # %bb.0: 4200; CHECK-SSE1-NEXT: pushq %rbp 4201; CHECK-SSE1-NEXT: pushq %r15 4202; CHECK-SSE1-NEXT: pushq %r14 4203; CHECK-SSE1-NEXT: pushq %r13 4204; CHECK-SSE1-NEXT: pushq %r12 4205; CHECK-SSE1-NEXT: pushq %rbx 4206; CHECK-SSE1-NEXT: movq %rcx, %r9 4207; CHECK-SSE1-NEXT: movq %rdi, %r10 4208; CHECK-SSE1-NEXT: movzwl 30(%rdx), %edi 4209; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4210; CHECK-SSE1-NEXT: movl 28(%rdx), %edi 4211; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4212; CHECK-SSE1-NEXT: movzwl 26(%rdx), %edi 4213; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4214; CHECK-SSE1-NEXT: movl 24(%rdx), %eax 4215; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4216; CHECK-SSE1-NEXT: movzwl 22(%rdx), %eax 4217; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4218; CHECK-SSE1-NEXT: movl 20(%rdx), %r11d 4219; CHECK-SSE1-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4220; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r14d 4221; CHECK-SSE1-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4222; CHECK-SSE1-NEXT: movl 16(%rdx), %r15d 4223; CHECK-SSE1-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4224; CHECK-SSE1-NEXT: movzwl 14(%rdx), %r12d 4225; CHECK-SSE1-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4226; CHECK-SSE1-NEXT: movl 12(%rdx), %r13d 4227; CHECK-SSE1-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4228; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r8d 4229; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4230; CHECK-SSE1-NEXT: movl 8(%rdx), %ebx 4231; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4232; CHECK-SSE1-NEXT: movzwl 6(%rdx), %ebp 4233; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4234; CHECK-SSE1-NEXT: movl (%rdx), %ecx 4235; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4236; CHECK-SSE1-NEXT: movl 4(%rdx), %edi 4237; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4238; CHECK-SSE1-NEXT: movzwl 2(%rdx), %eax 4239; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4240; CHECK-SSE1-NEXT: movzwl (%rsi), %edx 4241; CHECK-SSE1-NEXT: xorw %cx, %dx 4242; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4243; CHECK-SSE1-NEXT: movzwl 2(%rsi), %ecx 4244; CHECK-SSE1-NEXT: xorw %ax, %cx 4245; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4246; CHECK-SSE1-NEXT: movzwl 4(%rsi), %eax 4247; CHECK-SSE1-NEXT: xorw %di, %ax 4248; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4249; CHECK-SSE1-NEXT: movzwl 6(%rsi), %edx 4250; CHECK-SSE1-NEXT: xorw %bp, %dx 4251; CHECK-SSE1-NEXT: movl %edx, %eax 4252; CHECK-SSE1-NEXT: movzwl 8(%rsi), %ecx 4253; CHECK-SSE1-NEXT: xorw %bx, %cx 4254; CHECK-SSE1-NEXT: movzwl 10(%rsi), %edx 4255; CHECK-SSE1-NEXT: xorw %r8w, %dx 4256; CHECK-SSE1-NEXT: movl %edx, %r8d 4257; CHECK-SSE1-NEXT: movzwl 12(%rsi), %edx 4258; CHECK-SSE1-NEXT: xorw %r13w, %dx 4259; CHECK-SSE1-NEXT: movzwl 14(%rsi), %r13d 4260; CHECK-SSE1-NEXT: xorw %r12w, %r13w 4261; CHECK-SSE1-NEXT: movzwl 16(%rsi), %r12d 4262; CHECK-SSE1-NEXT: xorw %r15w, %r12w 4263; CHECK-SSE1-NEXT: movzwl 18(%rsi), %r15d 4264; CHECK-SSE1-NEXT: xorw %r14w, %r15w 4265; CHECK-SSE1-NEXT: movzwl 20(%rsi), %r14d 4266; CHECK-SSE1-NEXT: xorw %r11w, %r14w 4267; CHECK-SSE1-NEXT: movzwl 22(%rsi), %ebp 4268; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload 4269; CHECK-SSE1-NEXT: movzwl 24(%rsi), %ebx 4270; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload 4271; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r11d 4272; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload 4273; CHECK-SSE1-NEXT: movzwl 28(%rsi), %edi 4274; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload 4275; CHECK-SSE1-NEXT: movzwl 30(%rsi), %esi 4276; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload 4277; CHECK-SSE1-NEXT: andw 30(%r9), %si 4278; CHECK-SSE1-NEXT: andw 28(%r9), %di 4279; CHECK-SSE1-NEXT: andw 26(%r9), %r11w 4280; CHECK-SSE1-NEXT: andw 24(%r9), %bx 4281; CHECK-SSE1-NEXT: andw 22(%r9), %bp 4282; CHECK-SSE1-NEXT: andw 20(%r9), %r14w 4283; CHECK-SSE1-NEXT: andw 18(%r9), %r15w 4284; CHECK-SSE1-NEXT: andw 16(%r9), %r12w 4285; CHECK-SSE1-NEXT: andw 14(%r9), %r13w 4286; CHECK-SSE1-NEXT: andw 12(%r9), %dx 4287; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4288; CHECK-SSE1-NEXT: andw 10(%r9), %r8w 4289; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4290; CHECK-SSE1-NEXT: movl %ecx, %edx 4291; CHECK-SSE1-NEXT: andw 8(%r9), %dx 4292; CHECK-SSE1-NEXT: andw 6(%r9), %ax 4293; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4294; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload 4295; CHECK-SSE1-NEXT: andw 4(%r9), %r8w 4296; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4297; CHECK-SSE1-NEXT: andw 2(%r9), %ax 4298; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 4299; CHECK-SSE1-NEXT: andw (%r9), %cx 4300; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload 4301; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4302; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4303; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4304; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload 4305; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload 4306; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload 4307; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4308; CHECK-SSE1-NEXT: movl %edx, %ecx 4309; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 4310; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4311; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4312; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4313; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload 4314; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload 4315; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload 4316; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload 4317; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4318; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4319; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload 4320; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload 4321; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload 4322; CHECK-SSE1-NEXT: movw %si, 30(%r10) 4323; CHECK-SSE1-NEXT: movw %di, 28(%r10) 4324; CHECK-SSE1-NEXT: movw %r11w, 26(%r10) 4325; CHECK-SSE1-NEXT: movw %bx, 24(%r10) 4326; CHECK-SSE1-NEXT: movw %bp, 22(%r10) 4327; CHECK-SSE1-NEXT: movw %r14w, 20(%r10) 4328; CHECK-SSE1-NEXT: movw %r15w, 18(%r10) 4329; CHECK-SSE1-NEXT: movw %r12w, 16(%r10) 4330; CHECK-SSE1-NEXT: movw %r13w, 14(%r10) 4331; CHECK-SSE1-NEXT: movw %ax, 12(%r10) 4332; CHECK-SSE1-NEXT: movw %dx, 10(%r10) 4333; CHECK-SSE1-NEXT: movw %cx, 8(%r10) 4334; CHECK-SSE1-NEXT: movw %r9w, 6(%r10) 4335; CHECK-SSE1-NEXT: movw %r8w, 4(%r10) 4336; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4337; CHECK-SSE1-NEXT: movw %ax, 2(%r10) 4338; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4339; CHECK-SSE1-NEXT: movw %ax, (%r10) 4340; CHECK-SSE1-NEXT: movq %r10, %rax 4341; CHECK-SSE1-NEXT: popq %rbx 4342; CHECK-SSE1-NEXT: popq %r12 4343; CHECK-SSE1-NEXT: popq %r13 4344; CHECK-SSE1-NEXT: popq %r14 4345; CHECK-SSE1-NEXT: popq %r15 4346; CHECK-SSE1-NEXT: popq %rbp 4347; CHECK-SSE1-NEXT: retq 4348; 4349; CHECK-SSE2-LABEL: in_v16i16: 4350; CHECK-SSE2: # %bb.0: 4351; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4352; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4353; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4354; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4355; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4356; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4357; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4358; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4359; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4360; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4361; CHECK-SSE2-NEXT: retq 4362; 4363; CHECK-XOP-LABEL: in_v16i16: 4364; CHECK-XOP: # %bb.0: 4365; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4366; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4367; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4368; CHECK-XOP-NEXT: retq 4369 %x = load <16 x i16>, <16 x i16> *%px, align 32 4370 %y = load <16 x i16>, <16 x i16> *%py, align 32 4371 %mask = load <16 x i16>, <16 x i16> *%pmask, align 32 4372 %n0 = xor <16 x i16> %x, %y 4373 %n1 = and <16 x i16> %n0, %mask 4374 %r = xor <16 x i16> %n1, %y 4375 ret <16 x i16> %r 4376} 4377 4378define <8 x i32> @in_v8i32(<8 x i32> *%px, <8 x i32> *%py, <8 x i32> *%pmask) nounwind { 4379; CHECK-BASELINE-LABEL: in_v8i32: 4380; CHECK-BASELINE: # %bb.0: 4381; CHECK-BASELINE-NEXT: pushq %rbp 4382; CHECK-BASELINE-NEXT: pushq %r15 4383; CHECK-BASELINE-NEXT: pushq %r14 4384; CHECK-BASELINE-NEXT: pushq %r13 4385; CHECK-BASELINE-NEXT: pushq %r12 4386; CHECK-BASELINE-NEXT: pushq %rbx 4387; CHECK-BASELINE-NEXT: movl 28(%rdx), %r15d 4388; CHECK-BASELINE-NEXT: movl 24(%rdx), %r14d 4389; CHECK-BASELINE-NEXT: movl 20(%rdx), %r10d 4390; CHECK-BASELINE-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4391; CHECK-BASELINE-NEXT: movl 16(%rdx), %eax 4392; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4393; CHECK-BASELINE-NEXT: movl 12(%rdx), %ebp 4394; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4395; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebx 4396; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4397; CHECK-BASELINE-NEXT: movl (%rdx), %r12d 4398; CHECK-BASELINE-NEXT: movl 4(%rdx), %r13d 4399; CHECK-BASELINE-NEXT: movl (%rsi), %r11d 4400; CHECK-BASELINE-NEXT: xorl %r12d, %r11d 4401; CHECK-BASELINE-NEXT: movl 4(%rsi), %r9d 4402; CHECK-BASELINE-NEXT: xorl %r13d, %r9d 4403; CHECK-BASELINE-NEXT: movl 8(%rsi), %r8d 4404; CHECK-BASELINE-NEXT: xorl %ebx, %r8d 4405; CHECK-BASELINE-NEXT: movl 12(%rsi), %ebx 4406; CHECK-BASELINE-NEXT: xorl %ebp, %ebx 4407; CHECK-BASELINE-NEXT: movl 16(%rsi), %ebp 4408; CHECK-BASELINE-NEXT: xorl %eax, %ebp 4409; CHECK-BASELINE-NEXT: movl 20(%rsi), %edx 4410; CHECK-BASELINE-NEXT: xorl %r10d, %edx 4411; CHECK-BASELINE-NEXT: movl 24(%rsi), %eax 4412; CHECK-BASELINE-NEXT: xorl %r14d, %eax 4413; CHECK-BASELINE-NEXT: movl 28(%rsi), %esi 4414; CHECK-BASELINE-NEXT: xorl %r15d, %esi 4415; CHECK-BASELINE-NEXT: andl 28(%rcx), %esi 4416; CHECK-BASELINE-NEXT: andl 24(%rcx), %eax 4417; CHECK-BASELINE-NEXT: andl 20(%rcx), %edx 4418; CHECK-BASELINE-NEXT: andl 16(%rcx), %ebp 4419; CHECK-BASELINE-NEXT: andl 12(%rcx), %ebx 4420; CHECK-BASELINE-NEXT: andl 8(%rcx), %r8d 4421; CHECK-BASELINE-NEXT: andl 4(%rcx), %r9d 4422; CHECK-BASELINE-NEXT: andl (%rcx), %r11d 4423; CHECK-BASELINE-NEXT: xorl %r12d, %r11d 4424; CHECK-BASELINE-NEXT: xorl %r13d, %r9d 4425; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload 4426; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4427; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4428; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4429; CHECK-BASELINE-NEXT: xorl %r14d, %eax 4430; CHECK-BASELINE-NEXT: xorl %r15d, %esi 4431; CHECK-BASELINE-NEXT: movl %esi, 28(%rdi) 4432; CHECK-BASELINE-NEXT: movl %eax, 24(%rdi) 4433; CHECK-BASELINE-NEXT: movl %edx, 20(%rdi) 4434; CHECK-BASELINE-NEXT: movl %ebp, 16(%rdi) 4435; CHECK-BASELINE-NEXT: movl %ebx, 12(%rdi) 4436; CHECK-BASELINE-NEXT: movl %r8d, 8(%rdi) 4437; CHECK-BASELINE-NEXT: movl %r9d, 4(%rdi) 4438; CHECK-BASELINE-NEXT: movl %r11d, (%rdi) 4439; CHECK-BASELINE-NEXT: movq %rdi, %rax 4440; CHECK-BASELINE-NEXT: popq %rbx 4441; CHECK-BASELINE-NEXT: popq %r12 4442; CHECK-BASELINE-NEXT: popq %r13 4443; CHECK-BASELINE-NEXT: popq %r14 4444; CHECK-BASELINE-NEXT: popq %r15 4445; CHECK-BASELINE-NEXT: popq %rbp 4446; CHECK-BASELINE-NEXT: retq 4447; 4448; CHECK-SSE1-LABEL: in_v8i32: 4449; CHECK-SSE1: # %bb.0: 4450; CHECK-SSE1-NEXT: pushq %rbp 4451; CHECK-SSE1-NEXT: pushq %r15 4452; CHECK-SSE1-NEXT: pushq %r14 4453; CHECK-SSE1-NEXT: pushq %r13 4454; CHECK-SSE1-NEXT: pushq %r12 4455; CHECK-SSE1-NEXT: pushq %rbx 4456; CHECK-SSE1-NEXT: movl 28(%rdx), %r15d 4457; CHECK-SSE1-NEXT: movl 24(%rdx), %r14d 4458; CHECK-SSE1-NEXT: movl 20(%rdx), %r10d 4459; CHECK-SSE1-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4460; CHECK-SSE1-NEXT: movl 16(%rdx), %eax 4461; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4462; CHECK-SSE1-NEXT: movl 12(%rdx), %ebp 4463; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4464; CHECK-SSE1-NEXT: movl 8(%rdx), %ebx 4465; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4466; CHECK-SSE1-NEXT: movl (%rdx), %r12d 4467; CHECK-SSE1-NEXT: movl 4(%rdx), %r13d 4468; CHECK-SSE1-NEXT: movl (%rsi), %r11d 4469; CHECK-SSE1-NEXT: xorl %r12d, %r11d 4470; CHECK-SSE1-NEXT: movl 4(%rsi), %r9d 4471; CHECK-SSE1-NEXT: xorl %r13d, %r9d 4472; CHECK-SSE1-NEXT: movl 8(%rsi), %r8d 4473; CHECK-SSE1-NEXT: xorl %ebx, %r8d 4474; CHECK-SSE1-NEXT: movl 12(%rsi), %ebx 4475; CHECK-SSE1-NEXT: xorl %ebp, %ebx 4476; CHECK-SSE1-NEXT: movl 16(%rsi), %ebp 4477; CHECK-SSE1-NEXT: xorl %eax, %ebp 4478; CHECK-SSE1-NEXT: movl 20(%rsi), %edx 4479; CHECK-SSE1-NEXT: xorl %r10d, %edx 4480; CHECK-SSE1-NEXT: movl 24(%rsi), %eax 4481; CHECK-SSE1-NEXT: xorl %r14d, %eax 4482; CHECK-SSE1-NEXT: movl 28(%rsi), %esi 4483; CHECK-SSE1-NEXT: xorl %r15d, %esi 4484; CHECK-SSE1-NEXT: andl 28(%rcx), %esi 4485; CHECK-SSE1-NEXT: andl 24(%rcx), %eax 4486; CHECK-SSE1-NEXT: andl 20(%rcx), %edx 4487; CHECK-SSE1-NEXT: andl 16(%rcx), %ebp 4488; CHECK-SSE1-NEXT: andl 12(%rcx), %ebx 4489; CHECK-SSE1-NEXT: andl 8(%rcx), %r8d 4490; CHECK-SSE1-NEXT: andl 4(%rcx), %r9d 4491; CHECK-SSE1-NEXT: andl (%rcx), %r11d 4492; CHECK-SSE1-NEXT: xorl %r12d, %r11d 4493; CHECK-SSE1-NEXT: xorl %r13d, %r9d 4494; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload 4495; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4496; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4497; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4498; CHECK-SSE1-NEXT: xorl %r14d, %eax 4499; CHECK-SSE1-NEXT: xorl %r15d, %esi 4500; CHECK-SSE1-NEXT: movl %esi, 28(%rdi) 4501; CHECK-SSE1-NEXT: movl %eax, 24(%rdi) 4502; CHECK-SSE1-NEXT: movl %edx, 20(%rdi) 4503; CHECK-SSE1-NEXT: movl %ebp, 16(%rdi) 4504; CHECK-SSE1-NEXT: movl %ebx, 12(%rdi) 4505; CHECK-SSE1-NEXT: movl %r8d, 8(%rdi) 4506; CHECK-SSE1-NEXT: movl %r9d, 4(%rdi) 4507; CHECK-SSE1-NEXT: movl %r11d, (%rdi) 4508; CHECK-SSE1-NEXT: movq %rdi, %rax 4509; CHECK-SSE1-NEXT: popq %rbx 4510; CHECK-SSE1-NEXT: popq %r12 4511; CHECK-SSE1-NEXT: popq %r13 4512; CHECK-SSE1-NEXT: popq %r14 4513; CHECK-SSE1-NEXT: popq %r15 4514; CHECK-SSE1-NEXT: popq %rbp 4515; CHECK-SSE1-NEXT: retq 4516; 4517; CHECK-SSE2-LABEL: in_v8i32: 4518; CHECK-SSE2: # %bb.0: 4519; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4520; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4521; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4522; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4523; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4524; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4525; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4526; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4527; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4528; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4529; CHECK-SSE2-NEXT: retq 4530; 4531; CHECK-XOP-LABEL: in_v8i32: 4532; CHECK-XOP: # %bb.0: 4533; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4534; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4535; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4536; CHECK-XOP-NEXT: retq 4537 %x = load <8 x i32>, <8 x i32> *%px, align 32 4538 %y = load <8 x i32>, <8 x i32> *%py, align 32 4539 %mask = load <8 x i32>, <8 x i32> *%pmask, align 32 4540 %n0 = xor <8 x i32> %x, %y 4541 %n1 = and <8 x i32> %n0, %mask 4542 %r = xor <8 x i32> %n1, %y 4543 ret <8 x i32> %r 4544} 4545 4546define <4 x i64> @in_v4i64(<4 x i64> *%px, <4 x i64> *%py, <4 x i64> *%pmask) nounwind { 4547; CHECK-BASELINE-LABEL: in_v4i64: 4548; CHECK-BASELINE: # %bb.0: 4549; CHECK-BASELINE-NEXT: pushq %rbx 4550; CHECK-BASELINE-NEXT: movq %rdi, %rax 4551; CHECK-BASELINE-NEXT: movq 24(%rdx), %r8 4552; CHECK-BASELINE-NEXT: movq 16(%rdx), %r9 4553; CHECK-BASELINE-NEXT: movq (%rdx), %r11 4554; CHECK-BASELINE-NEXT: movq 8(%rdx), %r10 4555; CHECK-BASELINE-NEXT: movq (%rsi), %rdx 4556; CHECK-BASELINE-NEXT: xorq %r11, %rdx 4557; CHECK-BASELINE-NEXT: movq 8(%rsi), %rdi 4558; CHECK-BASELINE-NEXT: xorq %r10, %rdi 4559; CHECK-BASELINE-NEXT: movq 16(%rsi), %rbx 4560; CHECK-BASELINE-NEXT: xorq %r9, %rbx 4561; CHECK-BASELINE-NEXT: movq 24(%rsi), %rsi 4562; CHECK-BASELINE-NEXT: xorq %r8, %rsi 4563; CHECK-BASELINE-NEXT: andq 24(%rcx), %rsi 4564; CHECK-BASELINE-NEXT: andq 16(%rcx), %rbx 4565; CHECK-BASELINE-NEXT: andq 8(%rcx), %rdi 4566; CHECK-BASELINE-NEXT: andq (%rcx), %rdx 4567; CHECK-BASELINE-NEXT: xorq %r11, %rdx 4568; CHECK-BASELINE-NEXT: xorq %r10, %rdi 4569; CHECK-BASELINE-NEXT: xorq %r9, %rbx 4570; CHECK-BASELINE-NEXT: xorq %r8, %rsi 4571; CHECK-BASELINE-NEXT: movq %rsi, 24(%rax) 4572; CHECK-BASELINE-NEXT: movq %rbx, 16(%rax) 4573; CHECK-BASELINE-NEXT: movq %rdi, 8(%rax) 4574; CHECK-BASELINE-NEXT: movq %rdx, (%rax) 4575; CHECK-BASELINE-NEXT: popq %rbx 4576; CHECK-BASELINE-NEXT: retq 4577; 4578; CHECK-SSE1-LABEL: in_v4i64: 4579; CHECK-SSE1: # %bb.0: 4580; CHECK-SSE1-NEXT: pushq %rbx 4581; CHECK-SSE1-NEXT: movq %rdi, %rax 4582; CHECK-SSE1-NEXT: movq 24(%rdx), %r8 4583; CHECK-SSE1-NEXT: movq 16(%rdx), %r9 4584; CHECK-SSE1-NEXT: movq (%rdx), %r11 4585; CHECK-SSE1-NEXT: movq 8(%rdx), %r10 4586; CHECK-SSE1-NEXT: movq (%rsi), %rdx 4587; CHECK-SSE1-NEXT: xorq %r11, %rdx 4588; CHECK-SSE1-NEXT: movq 8(%rsi), %rdi 4589; CHECK-SSE1-NEXT: xorq %r10, %rdi 4590; CHECK-SSE1-NEXT: movq 16(%rsi), %rbx 4591; CHECK-SSE1-NEXT: xorq %r9, %rbx 4592; CHECK-SSE1-NEXT: movq 24(%rsi), %rsi 4593; CHECK-SSE1-NEXT: xorq %r8, %rsi 4594; CHECK-SSE1-NEXT: andq 24(%rcx), %rsi 4595; CHECK-SSE1-NEXT: andq 16(%rcx), %rbx 4596; CHECK-SSE1-NEXT: andq 8(%rcx), %rdi 4597; CHECK-SSE1-NEXT: andq (%rcx), %rdx 4598; CHECK-SSE1-NEXT: xorq %r11, %rdx 4599; CHECK-SSE1-NEXT: xorq %r10, %rdi 4600; CHECK-SSE1-NEXT: xorq %r9, %rbx 4601; CHECK-SSE1-NEXT: xorq %r8, %rsi 4602; CHECK-SSE1-NEXT: movq %rsi, 24(%rax) 4603; CHECK-SSE1-NEXT: movq %rbx, 16(%rax) 4604; CHECK-SSE1-NEXT: movq %rdi, 8(%rax) 4605; CHECK-SSE1-NEXT: movq %rdx, (%rax) 4606; CHECK-SSE1-NEXT: popq %rbx 4607; CHECK-SSE1-NEXT: retq 4608; 4609; CHECK-SSE2-LABEL: in_v4i64: 4610; CHECK-SSE2: # %bb.0: 4611; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4612; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4613; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4614; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4615; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4616; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4617; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4618; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4619; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4620; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4621; CHECK-SSE2-NEXT: retq 4622; 4623; CHECK-XOP-LABEL: in_v4i64: 4624; CHECK-XOP: # %bb.0: 4625; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4626; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4627; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4628; CHECK-XOP-NEXT: retq 4629 %x = load <4 x i64>, <4 x i64> *%px, align 32 4630 %y = load <4 x i64>, <4 x i64> *%py, align 32 4631 %mask = load <4 x i64>, <4 x i64> *%pmask, align 32 4632 %n0 = xor <4 x i64> %x, %y 4633 %n1 = and <4 x i64> %n0, %mask 4634 %r = xor <4 x i64> %n1, %y 4635 ret <4 x i64> %r 4636} 4637