1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE1 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefix=CHECK-XOP 5 6; ============================================================================ ; 7; Various cases with %x and/or %y being a constant 8; ============================================================================ ; 9 10define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 11; CHECK-SSE1-LABEL: out_constant_varx_mone: 12; CHECK-SSE1: # %bb.0: 13; CHECK-SSE1-NEXT: movq %rdi, %rax 14; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 15; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 16; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 17; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 18; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 19; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 20; CHECK-SSE1-NEXT: retq 21; 22; CHECK-SSE2-LABEL: out_constant_varx_mone: 23; CHECK-SSE2: # %bb.0: 24; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 25; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 26; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 27; CHECK-SSE2-NEXT: pand (%rdi), %xmm0 28; CHECK-SSE2-NEXT: por %xmm1, %xmm0 29; CHECK-SSE2-NEXT: retq 30; 31; CHECK-XOP-LABEL: out_constant_varx_mone: 32; CHECK-XOP: # %bb.0: 33; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 34; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 35; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 36; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0 37; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 38; CHECK-XOP-NEXT: retq 39 %x = load <4 x i32>, <4 x i32> *%px, align 16 40 %y = load <4 x i32>, <4 x i32> *%py, align 16 41 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 43 %mx = and <4 x i32> %mask, %x 44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1> 45 %r = or <4 x i32> %mx, %my 46 ret <4 x i32> %r 47} 48 49define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 50; CHECK-SSE1-LABEL: in_constant_varx_mone: 51; CHECK-SSE1: # %bb.0: 52; CHECK-SSE1-NEXT: movq %rdi, %rax 53; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 54; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0 55; CHECK-SSE1-NEXT: xorps {{.*}}(%rip), %xmm0 56; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 57; CHECK-SSE1-NEXT: retq 58; 59; CHECK-SSE2-LABEL: in_constant_varx_mone: 60; CHECK-SSE2: # %bb.0: 61; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 62; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 63; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0 64; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 65; CHECK-SSE2-NEXT: retq 66; 67; CHECK-XOP-LABEL: in_constant_varx_mone: 68; CHECK-XOP: # %bb.0: 69; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 70; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 71; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0 72; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 73; CHECK-XOP-NEXT: retq 74 %x = load <4 x i32>, <4 x i32> *%px, align 16 75 %y = load <4 x i32>, <4 x i32> *%py, align 16 76 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 77 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x 78 %n1 = and <4 x i32> %n0, %mask 79 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1> 80 ret <4 x i32> %r 81} 82 83; This is not a canonical form. Testing for completeness only. 84define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 85; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask: 86; CHECK-SSE1: # %bb.0: 87; CHECK-SSE1-NEXT: movq %rdi, %rax 88; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 89; CHECK-SSE1-NEXT: orps (%rcx), %xmm0 90; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 91; CHECK-SSE1-NEXT: retq 92; 93; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask: 94; CHECK-SSE2: # %bb.0: 95; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0 96; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 97; CHECK-SSE2-NEXT: retq 98; 99; CHECK-XOP-LABEL: out_constant_varx_mone_invmask: 100; CHECK-XOP: # %bb.0: 101; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0 102; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 103; CHECK-XOP-NEXT: retq 104 %x = load <4 x i32>, <4 x i32> *%px, align 16 105 %y = load <4 x i32>, <4 x i32> *%py, align 16 106 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 107 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 108 %mx = and <4 x i32> %notmask, %x 109 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 110 %r = or <4 x i32> %mx, %my 111 ret <4 x i32> %r 112} 113 114; This is not a canonical form. Testing for completeness only. 115define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 116; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask: 117; CHECK-SSE1: # %bb.0: 118; CHECK-SSE1-NEXT: movq %rdi, %rax 119; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 120; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 121; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2 122; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2 123; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0 124; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0 125; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 126; CHECK-SSE1-NEXT: retq 127; 128; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask: 129; CHECK-SSE2: # %bb.0: 130; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 131; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 132; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2 133; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2 134; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0 135; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 136; CHECK-SSE2-NEXT: retq 137; 138; CHECK-XOP-LABEL: in_constant_varx_mone_invmask: 139; CHECK-XOP: # %bb.0: 140; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 141; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 142; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2 143; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0 144; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 145; CHECK-XOP-NEXT: retq 146 %x = load <4 x i32>, <4 x i32> *%px, align 16 147 %y = load <4 x i32>, <4 x i32> *%py, align 16 148 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 149 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 150 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x 151 %n1 = and <4 x i32> %n0, %notmask 152 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1> 153 ret <4 x i32> %r 154} 155 156define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 157; CHECK-SSE1-LABEL: out_constant_varx_42: 158; CHECK-SSE1: # %bb.0: 159; CHECK-SSE1-NEXT: movq %rdi, %rax 160; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 161; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 162; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 163; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 164; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 165; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 166; CHECK-SSE1-NEXT: retq 167; 168; CHECK-SSE2-LABEL: out_constant_varx_42: 169; CHECK-SSE2: # %bb.0: 170; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 171; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 172; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 173; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0 174; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 175; CHECK-SSE2-NEXT: retq 176; 177; CHECK-XOP-LABEL: out_constant_varx_42: 178; CHECK-XOP: # %bb.0: 179; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 180; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 181; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0 182; CHECK-XOP-NEXT: retq 183 %x = load <4 x i32>, <4 x i32> *%px, align 16 184 %y = load <4 x i32>, <4 x i32> *%py, align 16 185 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 186 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 187 %mx = and <4 x i32> %mask, %x 188 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42> 189 %r = or <4 x i32> %mx, %my 190 ret <4 x i32> %r 191} 192 193define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 194; CHECK-SSE1-LABEL: in_constant_varx_42: 195; CHECK-SSE1: # %bb.0: 196; CHECK-SSE1-NEXT: movq %rdi, %rax 197; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 198; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 199; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 200; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 201; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 202; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 203; CHECK-SSE1-NEXT: retq 204; 205; CHECK-SSE2-LABEL: in_constant_varx_42: 206; CHECK-SSE2: # %bb.0: 207; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 208; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 209; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 210; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0 211; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 212; CHECK-SSE2-NEXT: retq 213; 214; CHECK-XOP-LABEL: in_constant_varx_42: 215; CHECK-XOP: # %bb.0: 216; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 217; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 218; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0 219; CHECK-XOP-NEXT: retq 220 %x = load <4 x i32>, <4 x i32> *%px, align 16 221 %y = load <4 x i32>, <4 x i32> *%py, align 16 222 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 223 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x 224 %n1 = and <4 x i32> %n0, %mask 225 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42> 226 ret <4 x i32> %r 227} 228 229; This is not a canonical form. Testing for completeness only. 230define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 231; CHECK-SSE1-LABEL: out_constant_varx_42_invmask: 232; CHECK-SSE1: # %bb.0: 233; CHECK-SSE1-NEXT: movq %rdi, %rax 234; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 235; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 236; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 237; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 238; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 239; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 240; CHECK-SSE1-NEXT: retq 241; 242; CHECK-SSE2-LABEL: out_constant_varx_42_invmask: 243; CHECK-SSE2: # %bb.0: 244; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 245; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 246; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1 247; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0 248; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 249; CHECK-SSE2-NEXT: retq 250; 251; CHECK-XOP-LABEL: out_constant_varx_42_invmask: 252; CHECK-XOP: # %bb.0: 253; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 254; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42] 255; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0 256; CHECK-XOP-NEXT: retq 257 %x = load <4 x i32>, <4 x i32> *%px, align 16 258 %y = load <4 x i32>, <4 x i32> *%py, align 16 259 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 260 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 261 %mx = and <4 x i32> %notmask, %x 262 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42> 263 %r = or <4 x i32> %mx, %my 264 ret <4 x i32> %r 265} 266 267; This is not a canonical form. Testing for completeness only. 268define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 269; CHECK-SSE1-LABEL: in_constant_varx_42_invmask: 270; CHECK-SSE1: # %bb.0: 271; CHECK-SSE1-NEXT: movq %rdi, %rax 272; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 273; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 274; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 275; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 276; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 277; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 278; CHECK-SSE1-NEXT: retq 279; 280; CHECK-SSE2-LABEL: in_constant_varx_42_invmask: 281; CHECK-SSE2: # %bb.0: 282; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 283; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 284; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1 285; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0 286; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 287; CHECK-SSE2-NEXT: retq 288; 289; CHECK-XOP-LABEL: in_constant_varx_42_invmask: 290; CHECK-XOP: # %bb.0: 291; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 292; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42] 293; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0 294; CHECK-XOP-NEXT: retq 295 %x = load <4 x i32>, <4 x i32> *%px, align 16 296 %y = load <4 x i32>, <4 x i32> *%py, align 16 297 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 298 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 299 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x 300 %n1 = and <4 x i32> %n0, %notmask 301 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42> 302 ret <4 x i32> %r 303} 304 305define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 306; CHECK-SSE1-LABEL: out_constant_mone_vary: 307; CHECK-SSE1: # %bb.0: 308; CHECK-SSE1-NEXT: movq %rdi, %rax 309; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 310; CHECK-SSE1-NEXT: orps (%rcx), %xmm0 311; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 312; CHECK-SSE1-NEXT: retq 313; 314; CHECK-SSE2-LABEL: out_constant_mone_vary: 315; CHECK-SSE2: # %bb.0: 316; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0 317; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 318; CHECK-SSE2-NEXT: retq 319; 320; CHECK-XOP-LABEL: out_constant_mone_vary: 321; CHECK-XOP: # %bb.0: 322; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0 323; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 324; CHECK-XOP-NEXT: retq 325 %x = load <4 x i32>, <4 x i32> *%px, align 16 326 %y = load <4 x i32>, <4 x i32> *%py, align 16 327 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 328 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 329 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 330 %my = and <4 x i32> %notmask, %y 331 %r = or <4 x i32> %mx, %my 332 ret <4 x i32> %r 333} 334 335define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 336; CHECK-SSE1-LABEL: in_constant_mone_vary: 337; CHECK-SSE1: # %bb.0: 338; CHECK-SSE1-NEXT: movq %rdi, %rax 339; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 340; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 341; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 342; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 343; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) 344; CHECK-SSE1-NEXT: retq 345; 346; CHECK-SSE2-LABEL: in_constant_mone_vary: 347; CHECK-SSE2: # %bb.0: 348; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0 349; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 350; CHECK-SSE2-NEXT: retq 351; 352; CHECK-XOP-LABEL: in_constant_mone_vary: 353; CHECK-XOP: # %bb.0: 354; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0 355; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 356; CHECK-XOP-NEXT: retq 357 %x = load <4 x i32>, <4 x i32> *%px, align 16 358 %y = load <4 x i32>, <4 x i32> *%py, align 16 359 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 360 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x 361 %n1 = and <4 x i32> %n0, %mask 362 %r = xor <4 x i32> %n1, %y 363 ret <4 x i32> %r 364} 365 366; This is not a canonical form. Testing for completeness only. 367define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 368; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask: 369; CHECK-SSE1: # %bb.0: 370; CHECK-SSE1-NEXT: movq %rdi, %rax 371; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 372; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 373; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 374; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 375; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 376; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 377; CHECK-SSE1-NEXT: retq 378; 379; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask: 380; CHECK-SSE2: # %bb.0: 381; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 382; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 383; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 384; CHECK-SSE2-NEXT: pand (%rsi), %xmm0 385; CHECK-SSE2-NEXT: por %xmm1, %xmm0 386; CHECK-SSE2-NEXT: retq 387; 388; CHECK-XOP-LABEL: out_constant_mone_vary_invmask: 389; CHECK-XOP: # %bb.0: 390; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 391; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 392; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 393; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0 394; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 395; CHECK-XOP-NEXT: retq 396 %x = load <4 x i32>, <4 x i32> *%px, align 16 397 %y = load <4 x i32>, <4 x i32> *%py, align 16 398 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 399 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 400 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1> 401 %my = and <4 x i32> %mask, %y 402 %r = or <4 x i32> %mx, %my 403 ret <4 x i32> %r 404} 405 406; This is not a canonical form. Testing for completeness only. 407define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 408; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask: 409; CHECK-SSE1: # %bb.0: 410; CHECK-SSE1-NEXT: movq %rdi, %rax 411; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 412; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 413; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 414; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 415; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 416; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 417; CHECK-SSE1-NEXT: retq 418; 419; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask: 420; CHECK-SSE2: # %bb.0: 421; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 422; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 423; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 424; CHECK-SSE2-NEXT: pand (%rsi), %xmm0 425; CHECK-SSE2-NEXT: por %xmm1, %xmm0 426; CHECK-SSE2-NEXT: retq 427; 428; CHECK-XOP-LABEL: in_constant_mone_vary_invmask: 429; CHECK-XOP: # %bb.0: 430; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 431; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 432; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 433; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0 434; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 435; CHECK-XOP-NEXT: retq 436 %x = load <4 x i32>, <4 x i32> *%px, align 16 437 %y = load <4 x i32>, <4 x i32> *%py, align 16 438 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 439 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 440 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x 441 %n1 = and <4 x i32> %n0, %notmask 442 %r = xor <4 x i32> %n1, %y 443 ret <4 x i32> %r 444} 445 446define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 447; CHECK-SSE1-LABEL: out_constant_42_vary: 448; CHECK-SSE1: # %bb.0: 449; CHECK-SSE1-NEXT: movq %rdi, %rax 450; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 451; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.88545355E-44,5.88545355E-44,5.88545355E-44,5.88545355E-44] 452; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 453; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 454; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 455; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 456; CHECK-SSE1-NEXT: retq 457; 458; CHECK-SSE2-LABEL: out_constant_42_vary: 459; CHECK-SSE2: # %bb.0: 460; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 461; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42] 462; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 463; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 464; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 465; CHECK-SSE2-NEXT: retq 466; 467; CHECK-XOP-LABEL: out_constant_42_vary: 468; CHECK-XOP: # %bb.0: 469; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 470; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42] 471; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0 472; CHECK-XOP-NEXT: retq 473 %x = load <4 x i32>, <4 x i32> *%px, align 16 474 %y = load <4 x i32>, <4 x i32> *%py, align 16 475 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 476 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 477 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42> 478 %my = and <4 x i32> %notmask, %y 479 %r = or <4 x i32> %mx, %my 480 ret <4 x i32> %r 481} 482 483define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 484; CHECK-SSE1-LABEL: in_constant_42_vary: 485; CHECK-SSE1: # %bb.0: 486; CHECK-SSE1-NEXT: movq %rdi, %rax 487; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 488; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 489; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 490; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0 491; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 492; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 493; CHECK-SSE1-NEXT: retq 494; 495; CHECK-SSE2-LABEL: in_constant_42_vary: 496; CHECK-SSE2: # %bb.0: 497; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 498; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 499; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1 500; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0 501; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 502; CHECK-SSE2-NEXT: retq 503; 504; CHECK-XOP-LABEL: in_constant_42_vary: 505; CHECK-XOP: # %bb.0: 506; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 507; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42] 508; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0 509; CHECK-XOP-NEXT: retq 510 %x = load <4 x i32>, <4 x i32> *%px, align 16 511 %y = load <4 x i32>, <4 x i32> *%py, align 16 512 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 513 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x 514 %n1 = and <4 x i32> %n0, %mask 515 %r = xor <4 x i32> %n1, %y 516 ret <4 x i32> %r 517} 518 519; This is not a canonical form. Testing for completeness only. 520define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 521; CHECK-SSE1-LABEL: out_constant_42_vary_invmask: 522; CHECK-SSE1: # %bb.0: 523; CHECK-SSE1-NEXT: movq %rdi, %rax 524; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 525; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 526; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm1 527; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 528; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 529; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 530; CHECK-SSE1-NEXT: retq 531; 532; CHECK-SSE2-LABEL: out_constant_42_vary_invmask: 533; CHECK-SSE2: # %bb.0: 534; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 535; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 536; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm1 537; CHECK-SSE2-NEXT: andps (%rsi), %xmm0 538; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 539; CHECK-SSE2-NEXT: retq 540; 541; CHECK-XOP-LABEL: out_constant_42_vary_invmask: 542; CHECK-XOP: # %bb.0: 543; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 544; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 545; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0 546; CHECK-XOP-NEXT: retq 547 %x = load <4 x i32>, <4 x i32> *%px, align 16 548 %y = load <4 x i32>, <4 x i32> *%py, align 16 549 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 550 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 551 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42> 552 %my = and <4 x i32> %mask, %y 553 %r = or <4 x i32> %mx, %my 554 ret <4 x i32> %r 555} 556 557; This is not a canonical form. Testing for completeness only. 558define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) { 559; CHECK-SSE1-LABEL: in_constant_42_vary_invmask: 560; CHECK-SSE1: # %bb.0: 561; CHECK-SSE1-NEXT: movq %rdi, %rax 562; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 563; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1 564; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 565; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0 566; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 567; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 568; CHECK-SSE1-NEXT: retq 569; 570; CHECK-SSE2-LABEL: in_constant_42_vary_invmask: 571; CHECK-SSE2: # %bb.0: 572; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 573; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1 574; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 575; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0 576; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 577; CHECK-SSE2-NEXT: retq 578; 579; CHECK-XOP-LABEL: in_constant_42_vary_invmask: 580; CHECK-XOP: # %bb.0: 581; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 582; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 583; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0 584; CHECK-XOP-NEXT: retq 585 %x = load <4 x i32>, <4 x i32> *%px, align 16 586 %y = load <4 x i32>, <4 x i32> *%py, align 16 587 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16 588 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 589 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x 590 %n1 = and <4 x i32> %n0, %notmask 591 %r = xor <4 x i32> %n1, %y 592 ret <4 x i32> %r 593} 594