1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512dq -disable-peephole | FileCheck %s --check-prefix=AVX512 5 6; 7; Float Comparisons 8; Only equal/not-equal/ordered/unordered can be safely commuted 9; 10 11define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) { 12; SSE-LABEL: commute_cmpps_eq: 13; SSE: # %bb.0: 14; SSE-NEXT: cmpeqps (%rdi), %xmm0 15; SSE-NEXT: retq 16; 17; AVX-LABEL: commute_cmpps_eq: 18; AVX: # %bb.0: 19; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 20; AVX-NEXT: retq 21; 22; AVX512-LABEL: commute_cmpps_eq: 23; AVX512: # %bb.0: 24; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 25; AVX512-NEXT: retq 26 %1 = load <4 x float>, <4 x float>* %a0 27 %2 = fcmp oeq <4 x float> %1, %a1 28 %3 = sext <4 x i1> %2 to <4 x i32> 29 ret <4 x i32> %3 30} 31 32define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) { 33; SSE-LABEL: commute_cmpps_ne: 34; SSE: # %bb.0: 35; SSE-NEXT: cmpneqps (%rdi), %xmm0 36; SSE-NEXT: retq 37; 38; AVX-LABEL: commute_cmpps_ne: 39; AVX: # %bb.0: 40; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0 41; AVX-NEXT: retq 42; 43; AVX512-LABEL: commute_cmpps_ne: 44; AVX512: # %bb.0: 45; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0 46; AVX512-NEXT: retq 47 %1 = load <4 x float>, <4 x float>* %a0 48 %2 = fcmp une <4 x float> %1, %a1 49 %3 = sext <4 x i1> %2 to <4 x i32> 50 ret <4 x i32> %3 51} 52 53define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) { 54; SSE-LABEL: commute_cmpps_ord: 55; SSE: # %bb.0: 56; SSE-NEXT: cmpordps (%rdi), %xmm0 57; SSE-NEXT: retq 58; 59; AVX-LABEL: commute_cmpps_ord: 60; AVX: # %bb.0: 61; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0 62; AVX-NEXT: retq 63; 64; AVX512-LABEL: commute_cmpps_ord: 65; AVX512: # %bb.0: 66; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %xmm0 67; AVX512-NEXT: retq 68 %1 = load <4 x float>, <4 x float>* %a0 69 %2 = fcmp ord <4 x float> %1, %a1 70 %3 = sext <4 x i1> %2 to <4 x i32> 71 ret <4 x i32> %3 72} 73 74define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) { 75; SSE-LABEL: commute_cmpps_uno: 76; SSE: # %bb.0: 77; SSE-NEXT: cmpunordps (%rdi), %xmm0 78; SSE-NEXT: retq 79; 80; AVX-LABEL: commute_cmpps_uno: 81; AVX: # %bb.0: 82; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0 83; AVX-NEXT: retq 84; 85; AVX512-LABEL: commute_cmpps_uno: 86; AVX512: # %bb.0: 87; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0 88; AVX512-NEXT: retq 89 %1 = load <4 x float>, <4 x float>* %a0 90 %2 = fcmp uno <4 x float> %1, %a1 91 %3 = sext <4 x i1> %2 to <4 x i32> 92 ret <4 x i32> %3 93} 94 95define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) { 96; SSE-LABEL: commute_cmpps_ueq: 97; SSE: # %bb.0: 98; SSE-NEXT: movaps (%rdi), %xmm1 99; SSE-NEXT: movaps %xmm1, %xmm2 100; SSE-NEXT: cmpeqps %xmm0, %xmm2 101; SSE-NEXT: cmpunordps %xmm1, %xmm0 102; SSE-NEXT: orps %xmm2, %xmm0 103; SSE-NEXT: retq 104; 105; AVX-LABEL: commute_cmpps_ueq: 106; AVX: # %bb.0: 107; AVX-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0 108; AVX-NEXT: retq 109; 110; AVX512-LABEL: commute_cmpps_ueq: 111; AVX512: # %bb.0: 112; AVX512-NEXT: vcmpeq_uqps (%rdi), %xmm0, %xmm0 113; AVX512-NEXT: retq 114 %1 = load <4 x float>, <4 x float>* %a0 115 %2 = fcmp ueq <4 x float> %1, %a1 116 %3 = sext <4 x i1> %2 to <4 x i32> 117 ret <4 x i32> %3 118} 119 120define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) { 121; SSE-LABEL: commute_cmpps_one: 122; SSE: # %bb.0: 123; SSE-NEXT: movaps (%rdi), %xmm1 124; SSE-NEXT: movaps %xmm1, %xmm2 125; SSE-NEXT: cmpneqps %xmm0, %xmm2 126; SSE-NEXT: cmpordps %xmm1, %xmm0 127; SSE-NEXT: andps %xmm2, %xmm0 128; SSE-NEXT: retq 129; 130; AVX-LABEL: commute_cmpps_one: 131; AVX: # %bb.0: 132; AVX-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0 133; AVX-NEXT: retq 134; 135; AVX512-LABEL: commute_cmpps_one: 136; AVX512: # %bb.0: 137; AVX512-NEXT: vcmpneq_oqps (%rdi), %xmm0, %xmm0 138; AVX512-NEXT: retq 139 %1 = load <4 x float>, <4 x float>* %a0 140 %2 = fcmp one <4 x float> %1, %a1 141 %3 = sext <4 x i1> %2 to <4 x i32> 142 ret <4 x i32> %3 143} 144 145define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) { 146; SSE-LABEL: commute_cmpps_lt: 147; SSE: # %bb.0: 148; SSE-NEXT: movaps (%rdi), %xmm1 149; SSE-NEXT: cmpltps %xmm0, %xmm1 150; SSE-NEXT: movaps %xmm1, %xmm0 151; SSE-NEXT: retq 152; 153; AVX-LABEL: commute_cmpps_lt: 154; AVX: # %bb.0: 155; AVX-NEXT: vmovaps (%rdi), %xmm1 156; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 157; AVX-NEXT: retq 158; 159; AVX512-LABEL: commute_cmpps_lt: 160; AVX512: # %bb.0: 161; AVX512-NEXT: vmovaps (%rdi), %xmm1 162; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 163; AVX512-NEXT: retq 164 %1 = load <4 x float>, <4 x float>* %a0 165 %2 = fcmp olt <4 x float> %1, %a1 166 %3 = sext <4 x i1> %2 to <4 x i32> 167 ret <4 x i32> %3 168} 169 170define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) { 171; SSE-LABEL: commute_cmpps_le: 172; SSE: # %bb.0: 173; SSE-NEXT: movaps (%rdi), %xmm1 174; SSE-NEXT: cmpleps %xmm0, %xmm1 175; SSE-NEXT: movaps %xmm1, %xmm0 176; SSE-NEXT: retq 177; 178; AVX-LABEL: commute_cmpps_le: 179; AVX: # %bb.0: 180; AVX-NEXT: vmovaps (%rdi), %xmm1 181; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 182; AVX-NEXT: retq 183; 184; AVX512-LABEL: commute_cmpps_le: 185; AVX512: # %bb.0: 186; AVX512-NEXT: vmovaps (%rdi), %xmm1 187; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 188; AVX512-NEXT: retq 189 %1 = load <4 x float>, <4 x float>* %a0 190 %2 = fcmp ole <4 x float> %1, %a1 191 %3 = sext <4 x i1> %2 to <4 x i32> 192 ret <4 x i32> %3 193} 194 195define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) { 196; SSE-LABEL: commute_cmpps_eq_ymm: 197; SSE: # %bb.0: 198; SSE-NEXT: cmpeqps (%rdi), %xmm0 199; SSE-NEXT: cmpeqps 16(%rdi), %xmm1 200; SSE-NEXT: retq 201; 202; AVX-LABEL: commute_cmpps_eq_ymm: 203; AVX: # %bb.0: 204; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 205; AVX-NEXT: retq 206; 207; AVX512-LABEL: commute_cmpps_eq_ymm: 208; AVX512: # %bb.0: 209; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 210; AVX512-NEXT: retq 211 %1 = load <8 x float>, <8 x float>* %a0 212 %2 = fcmp oeq <8 x float> %1, %a1 213 %3 = sext <8 x i1> %2 to <8 x i32> 214 ret <8 x i32> %3 215} 216 217define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) { 218; SSE-LABEL: commute_cmpps_ne_ymm: 219; SSE: # %bb.0: 220; SSE-NEXT: cmpneqps (%rdi), %xmm0 221; SSE-NEXT: cmpneqps 16(%rdi), %xmm1 222; SSE-NEXT: retq 223; 224; AVX-LABEL: commute_cmpps_ne_ymm: 225; AVX: # %bb.0: 226; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 227; AVX-NEXT: retq 228; 229; AVX512-LABEL: commute_cmpps_ne_ymm: 230; AVX512: # %bb.0: 231; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 232; AVX512-NEXT: retq 233 %1 = load <8 x float>, <8 x float>* %a0 234 %2 = fcmp une <8 x float> %1, %a1 235 %3 = sext <8 x i1> %2 to <8 x i32> 236 ret <8 x i32> %3 237} 238 239define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) { 240; SSE-LABEL: commute_cmpps_ord_ymm: 241; SSE: # %bb.0: 242; SSE-NEXT: cmpordps (%rdi), %xmm0 243; SSE-NEXT: cmpordps 16(%rdi), %xmm1 244; SSE-NEXT: retq 245; 246; AVX-LABEL: commute_cmpps_ord_ymm: 247; AVX: # %bb.0: 248; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 249; AVX-NEXT: retq 250; 251; AVX512-LABEL: commute_cmpps_ord_ymm: 252; AVX512: # %bb.0: 253; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 254; AVX512-NEXT: retq 255 %1 = load <8 x float>, <8 x float>* %a0 256 %2 = fcmp ord <8 x float> %1, %a1 257 %3 = sext <8 x i1> %2 to <8 x i32> 258 ret <8 x i32> %3 259} 260 261define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) { 262; SSE-LABEL: commute_cmpps_uno_ymm: 263; SSE: # %bb.0: 264; SSE-NEXT: cmpunordps (%rdi), %xmm0 265; SSE-NEXT: cmpunordps 16(%rdi), %xmm1 266; SSE-NEXT: retq 267; 268; AVX-LABEL: commute_cmpps_uno_ymm: 269; AVX: # %bb.0: 270; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 271; AVX-NEXT: retq 272; 273; AVX512-LABEL: commute_cmpps_uno_ymm: 274; AVX512: # %bb.0: 275; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 276; AVX512-NEXT: retq 277 %1 = load <8 x float>, <8 x float>* %a0 278 %2 = fcmp uno <8 x float> %1, %a1 279 %3 = sext <8 x i1> %2 to <8 x i32> 280 ret <8 x i32> %3 281} 282 283define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) { 284; SSE-LABEL: commute_cmpps_ueq_ymm: 285; SSE: # %bb.0: 286; SSE-NEXT: movaps (%rdi), %xmm2 287; SSE-NEXT: movaps 16(%rdi), %xmm3 288; SSE-NEXT: movaps %xmm2, %xmm4 289; SSE-NEXT: cmpeqps %xmm0, %xmm4 290; SSE-NEXT: cmpunordps %xmm2, %xmm0 291; SSE-NEXT: orps %xmm4, %xmm0 292; SSE-NEXT: movaps %xmm3, %xmm2 293; SSE-NEXT: cmpeqps %xmm1, %xmm2 294; SSE-NEXT: cmpunordps %xmm3, %xmm1 295; SSE-NEXT: orps %xmm2, %xmm1 296; SSE-NEXT: retq 297; 298; AVX-LABEL: commute_cmpps_ueq_ymm: 299; AVX: # %bb.0: 300; AVX-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0 301; AVX-NEXT: retq 302; 303; AVX512-LABEL: commute_cmpps_ueq_ymm: 304; AVX512: # %bb.0: 305; AVX512-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0 306; AVX512-NEXT: retq 307 %1 = load <8 x float>, <8 x float>* %a0 308 %2 = fcmp ueq <8 x float> %1, %a1 309 %3 = sext <8 x i1> %2 to <8 x i32> 310 ret <8 x i32> %3 311} 312 313define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) { 314; SSE-LABEL: commute_cmpps_one_ymm: 315; SSE: # %bb.0: 316; SSE-NEXT: movaps (%rdi), %xmm2 317; SSE-NEXT: movaps 16(%rdi), %xmm3 318; SSE-NEXT: movaps %xmm2, %xmm4 319; SSE-NEXT: cmpneqps %xmm0, %xmm4 320; SSE-NEXT: cmpordps %xmm2, %xmm0 321; SSE-NEXT: andps %xmm4, %xmm0 322; SSE-NEXT: movaps %xmm3, %xmm2 323; SSE-NEXT: cmpneqps %xmm1, %xmm2 324; SSE-NEXT: cmpordps %xmm3, %xmm1 325; SSE-NEXT: andps %xmm2, %xmm1 326; SSE-NEXT: retq 327; 328; AVX-LABEL: commute_cmpps_one_ymm: 329; AVX: # %bb.0: 330; AVX-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0 331; AVX-NEXT: retq 332; 333; AVX512-LABEL: commute_cmpps_one_ymm: 334; AVX512: # %bb.0: 335; AVX512-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0 336; AVX512-NEXT: retq 337 %1 = load <8 x float>, <8 x float>* %a0 338 %2 = fcmp one <8 x float> %1, %a1 339 %3 = sext <8 x i1> %2 to <8 x i32> 340 ret <8 x i32> %3 341} 342 343define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) { 344; SSE-LABEL: commute_cmpps_lt_ymm: 345; SSE: # %bb.0: 346; SSE-NEXT: movaps (%rdi), %xmm2 347; SSE-NEXT: movaps 16(%rdi), %xmm3 348; SSE-NEXT: cmpltps %xmm0, %xmm2 349; SSE-NEXT: cmpltps %xmm1, %xmm3 350; SSE-NEXT: movaps %xmm2, %xmm0 351; SSE-NEXT: movaps %xmm3, %xmm1 352; SSE-NEXT: retq 353; 354; AVX-LABEL: commute_cmpps_lt_ymm: 355; AVX: # %bb.0: 356; AVX-NEXT: vmovaps (%rdi), %ymm1 357; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 358; AVX-NEXT: retq 359; 360; AVX512-LABEL: commute_cmpps_lt_ymm: 361; AVX512: # %bb.0: 362; AVX512-NEXT: vmovaps (%rdi), %ymm1 363; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 364; AVX512-NEXT: retq 365 %1 = load <8 x float>, <8 x float>* %a0 366 %2 = fcmp olt <8 x float> %1, %a1 367 %3 = sext <8 x i1> %2 to <8 x i32> 368 ret <8 x i32> %3 369} 370 371define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) { 372; SSE-LABEL: commute_cmpps_le_ymm: 373; SSE: # %bb.0: 374; SSE-NEXT: movaps (%rdi), %xmm2 375; SSE-NEXT: movaps 16(%rdi), %xmm3 376; SSE-NEXT: cmpleps %xmm0, %xmm2 377; SSE-NEXT: cmpleps %xmm1, %xmm3 378; SSE-NEXT: movaps %xmm2, %xmm0 379; SSE-NEXT: movaps %xmm3, %xmm1 380; SSE-NEXT: retq 381; 382; AVX-LABEL: commute_cmpps_le_ymm: 383; AVX: # %bb.0: 384; AVX-NEXT: vmovaps (%rdi), %ymm1 385; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 386; AVX-NEXT: retq 387; 388; AVX512-LABEL: commute_cmpps_le_ymm: 389; AVX512: # %bb.0: 390; AVX512-NEXT: vmovaps (%rdi), %ymm1 391; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 392; AVX512-NEXT: retq 393 %1 = load <8 x float>, <8 x float>* %a0 394 %2 = fcmp ole <8 x float> %1, %a1 395 %3 = sext <8 x i1> %2 to <8 x i32> 396 ret <8 x i32> %3 397} 398 399; 400; Double Comparisons 401; Only equal/not-equal/ordered/unordered can be safely commuted 402; 403 404define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) { 405; SSE-LABEL: commute_cmppd_eq: 406; SSE: # %bb.0: 407; SSE-NEXT: cmpeqpd (%rdi), %xmm0 408; SSE-NEXT: retq 409; 410; AVX-LABEL: commute_cmppd_eq: 411; AVX: # %bb.0: 412; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 413; AVX-NEXT: retq 414; 415; AVX512-LABEL: commute_cmppd_eq: 416; AVX512: # %bb.0: 417; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 418; AVX512-NEXT: retq 419 %1 = load <2 x double>, <2 x double>* %a0 420 %2 = fcmp oeq <2 x double> %1, %a1 421 %3 = sext <2 x i1> %2 to <2 x i64> 422 ret <2 x i64> %3 423} 424 425define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) { 426; SSE-LABEL: commute_cmppd_ne: 427; SSE: # %bb.0: 428; SSE-NEXT: cmpneqpd (%rdi), %xmm0 429; SSE-NEXT: retq 430; 431; AVX-LABEL: commute_cmppd_ne: 432; AVX: # %bb.0: 433; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0 434; AVX-NEXT: retq 435; 436; AVX512-LABEL: commute_cmppd_ne: 437; AVX512: # %bb.0: 438; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0 439; AVX512-NEXT: retq 440 %1 = load <2 x double>, <2 x double>* %a0 441 %2 = fcmp une <2 x double> %1, %a1 442 %3 = sext <2 x i1> %2 to <2 x i64> 443 ret <2 x i64> %3 444} 445 446define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) { 447; SSE-LABEL: commute_cmppd_ord: 448; SSE: # %bb.0: 449; SSE-NEXT: cmpordpd (%rdi), %xmm0 450; SSE-NEXT: retq 451; 452; AVX-LABEL: commute_cmppd_ord: 453; AVX: # %bb.0: 454; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0 455; AVX-NEXT: retq 456; 457; AVX512-LABEL: commute_cmppd_ord: 458; AVX512: # %bb.0: 459; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0 460; AVX512-NEXT: retq 461 %1 = load <2 x double>, <2 x double>* %a0 462 %2 = fcmp ord <2 x double> %1, %a1 463 %3 = sext <2 x i1> %2 to <2 x i64> 464 ret <2 x i64> %3 465} 466 467define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) { 468; SSE-LABEL: commute_cmppd_ueq: 469; SSE: # %bb.0: 470; SSE-NEXT: movapd (%rdi), %xmm1 471; SSE-NEXT: movapd %xmm1, %xmm2 472; SSE-NEXT: cmpeqpd %xmm0, %xmm2 473; SSE-NEXT: cmpunordpd %xmm1, %xmm0 474; SSE-NEXT: orpd %xmm2, %xmm0 475; SSE-NEXT: retq 476; 477; AVX-LABEL: commute_cmppd_ueq: 478; AVX: # %bb.0: 479; AVX-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0 480; AVX-NEXT: retq 481; 482; AVX512-LABEL: commute_cmppd_ueq: 483; AVX512: # %bb.0: 484; AVX512-NEXT: vcmpeq_uqpd (%rdi), %xmm0, %xmm0 485; AVX512-NEXT: retq 486 %1 = load <2 x double>, <2 x double>* %a0 487 %2 = fcmp ueq <2 x double> %1, %a1 488 %3 = sext <2 x i1> %2 to <2 x i64> 489 ret <2 x i64> %3 490} 491 492define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) { 493; SSE-LABEL: commute_cmppd_one: 494; SSE: # %bb.0: 495; SSE-NEXT: movapd (%rdi), %xmm1 496; SSE-NEXT: movapd %xmm1, %xmm2 497; SSE-NEXT: cmpneqpd %xmm0, %xmm2 498; SSE-NEXT: cmpordpd %xmm1, %xmm0 499; SSE-NEXT: andpd %xmm2, %xmm0 500; SSE-NEXT: retq 501; 502; AVX-LABEL: commute_cmppd_one: 503; AVX: # %bb.0: 504; AVX-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0 505; AVX-NEXT: retq 506; 507; AVX512-LABEL: commute_cmppd_one: 508; AVX512: # %bb.0: 509; AVX512-NEXT: vcmpneq_oqpd (%rdi), %xmm0, %xmm0 510; AVX512-NEXT: retq 511 %1 = load <2 x double>, <2 x double>* %a0 512 %2 = fcmp one <2 x double> %1, %a1 513 %3 = sext <2 x i1> %2 to <2 x i64> 514 ret <2 x i64> %3 515} 516 517define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) { 518; SSE-LABEL: commute_cmppd_uno: 519; SSE: # %bb.0: 520; SSE-NEXT: cmpunordpd (%rdi), %xmm0 521; SSE-NEXT: retq 522; 523; AVX-LABEL: commute_cmppd_uno: 524; AVX: # %bb.0: 525; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0 526; AVX-NEXT: retq 527; 528; AVX512-LABEL: commute_cmppd_uno: 529; AVX512: # %bb.0: 530; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0 531; AVX512-NEXT: retq 532 %1 = load <2 x double>, <2 x double>* %a0 533 %2 = fcmp uno <2 x double> %1, %a1 534 %3 = sext <2 x i1> %2 to <2 x i64> 535 ret <2 x i64> %3 536} 537 538define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) { 539; SSE-LABEL: commute_cmppd_lt: 540; SSE: # %bb.0: 541; SSE-NEXT: movapd (%rdi), %xmm1 542; SSE-NEXT: cmpltpd %xmm0, %xmm1 543; SSE-NEXT: movapd %xmm1, %xmm0 544; SSE-NEXT: retq 545; 546; AVX-LABEL: commute_cmppd_lt: 547; AVX: # %bb.0: 548; AVX-NEXT: vmovapd (%rdi), %xmm1 549; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 550; AVX-NEXT: retq 551; 552; AVX512-LABEL: commute_cmppd_lt: 553; AVX512: # %bb.0: 554; AVX512-NEXT: vmovapd (%rdi), %xmm1 555; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 556; AVX512-NEXT: retq 557 %1 = load <2 x double>, <2 x double>* %a0 558 %2 = fcmp olt <2 x double> %1, %a1 559 %3 = sext <2 x i1> %2 to <2 x i64> 560 ret <2 x i64> %3 561} 562 563define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) { 564; SSE-LABEL: commute_cmppd_le: 565; SSE: # %bb.0: 566; SSE-NEXT: movapd (%rdi), %xmm1 567; SSE-NEXT: cmplepd %xmm0, %xmm1 568; SSE-NEXT: movapd %xmm1, %xmm0 569; SSE-NEXT: retq 570; 571; AVX-LABEL: commute_cmppd_le: 572; AVX: # %bb.0: 573; AVX-NEXT: vmovapd (%rdi), %xmm1 574; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 575; AVX-NEXT: retq 576; 577; AVX512-LABEL: commute_cmppd_le: 578; AVX512: # %bb.0: 579; AVX512-NEXT: vmovapd (%rdi), %xmm1 580; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 581; AVX512-NEXT: retq 582 %1 = load <2 x double>, <2 x double>* %a0 583 %2 = fcmp ole <2 x double> %1, %a1 584 %3 = sext <2 x i1> %2 to <2 x i64> 585 ret <2 x i64> %3 586} 587 588define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) { 589; SSE-LABEL: commute_cmppd_eq_ymmm: 590; SSE: # %bb.0: 591; SSE-NEXT: cmpeqpd (%rdi), %xmm0 592; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1 593; SSE-NEXT: retq 594; 595; AVX-LABEL: commute_cmppd_eq_ymmm: 596; AVX: # %bb.0: 597; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 598; AVX-NEXT: retq 599; 600; AVX512-LABEL: commute_cmppd_eq_ymmm: 601; AVX512: # %bb.0: 602; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 603; AVX512-NEXT: retq 604 %1 = load <4 x double>, <4 x double>* %a0 605 %2 = fcmp oeq <4 x double> %1, %a1 606 %3 = sext <4 x i1> %2 to <4 x i64> 607 ret <4 x i64> %3 608} 609 610define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) { 611; SSE-LABEL: commute_cmppd_ne_ymmm: 612; SSE: # %bb.0: 613; SSE-NEXT: cmpneqpd (%rdi), %xmm0 614; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1 615; SSE-NEXT: retq 616; 617; AVX-LABEL: commute_cmppd_ne_ymmm: 618; AVX: # %bb.0: 619; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 620; AVX-NEXT: retq 621; 622; AVX512-LABEL: commute_cmppd_ne_ymmm: 623; AVX512: # %bb.0: 624; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 625; AVX512-NEXT: retq 626 %1 = load <4 x double>, <4 x double>* %a0 627 %2 = fcmp une <4 x double> %1, %a1 628 %3 = sext <4 x i1> %2 to <4 x i64> 629 ret <4 x i64> %3 630} 631 632define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) { 633; SSE-LABEL: commute_cmppd_ord_ymmm: 634; SSE: # %bb.0: 635; SSE-NEXT: cmpordpd (%rdi), %xmm0 636; SSE-NEXT: cmpordpd 16(%rdi), %xmm1 637; SSE-NEXT: retq 638; 639; AVX-LABEL: commute_cmppd_ord_ymmm: 640; AVX: # %bb.0: 641; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 642; AVX-NEXT: retq 643; 644; AVX512-LABEL: commute_cmppd_ord_ymmm: 645; AVX512: # %bb.0: 646; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 647; AVX512-NEXT: retq 648 %1 = load <4 x double>, <4 x double>* %a0 649 %2 = fcmp ord <4 x double> %1, %a1 650 %3 = sext <4 x i1> %2 to <4 x i64> 651 ret <4 x i64> %3 652} 653 654define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) { 655; SSE-LABEL: commute_cmppd_uno_ymmm: 656; SSE: # %bb.0: 657; SSE-NEXT: cmpunordpd (%rdi), %xmm0 658; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1 659; SSE-NEXT: retq 660; 661; AVX-LABEL: commute_cmppd_uno_ymmm: 662; AVX: # %bb.0: 663; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 664; AVX-NEXT: retq 665; 666; AVX512-LABEL: commute_cmppd_uno_ymmm: 667; AVX512: # %bb.0: 668; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 669; AVX512-NEXT: retq 670 %1 = load <4 x double>, <4 x double>* %a0 671 %2 = fcmp uno <4 x double> %1, %a1 672 %3 = sext <4 x i1> %2 to <4 x i64> 673 ret <4 x i64> %3 674} 675 676define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) { 677; SSE-LABEL: commute_cmppd_ueq_ymmm: 678; SSE: # %bb.0: 679; SSE-NEXT: movapd (%rdi), %xmm2 680; SSE-NEXT: movapd 16(%rdi), %xmm3 681; SSE-NEXT: movapd %xmm2, %xmm4 682; SSE-NEXT: cmpeqpd %xmm0, %xmm4 683; SSE-NEXT: cmpunordpd %xmm2, %xmm0 684; SSE-NEXT: orpd %xmm4, %xmm0 685; SSE-NEXT: movapd %xmm3, %xmm2 686; SSE-NEXT: cmpeqpd %xmm1, %xmm2 687; SSE-NEXT: cmpunordpd %xmm3, %xmm1 688; SSE-NEXT: orpd %xmm2, %xmm1 689; SSE-NEXT: retq 690; 691; AVX-LABEL: commute_cmppd_ueq_ymmm: 692; AVX: # %bb.0: 693; AVX-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0 694; AVX-NEXT: retq 695; 696; AVX512-LABEL: commute_cmppd_ueq_ymmm: 697; AVX512: # %bb.0: 698; AVX512-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0 699; AVX512-NEXT: retq 700 %1 = load <4 x double>, <4 x double>* %a0 701 %2 = fcmp ueq <4 x double> %1, %a1 702 %3 = sext <4 x i1> %2 to <4 x i64> 703 ret <4 x i64> %3 704} 705 706define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) { 707; SSE-LABEL: commute_cmppd_one_ymmm: 708; SSE: # %bb.0: 709; SSE-NEXT: movapd (%rdi), %xmm2 710; SSE-NEXT: movapd 16(%rdi), %xmm3 711; SSE-NEXT: movapd %xmm2, %xmm4 712; SSE-NEXT: cmpneqpd %xmm0, %xmm4 713; SSE-NEXT: cmpordpd %xmm2, %xmm0 714; SSE-NEXT: andpd %xmm4, %xmm0 715; SSE-NEXT: movapd %xmm3, %xmm2 716; SSE-NEXT: cmpneqpd %xmm1, %xmm2 717; SSE-NEXT: cmpordpd %xmm3, %xmm1 718; SSE-NEXT: andpd %xmm2, %xmm1 719; SSE-NEXT: retq 720; 721; AVX-LABEL: commute_cmppd_one_ymmm: 722; AVX: # %bb.0: 723; AVX-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0 724; AVX-NEXT: retq 725; 726; AVX512-LABEL: commute_cmppd_one_ymmm: 727; AVX512: # %bb.0: 728; AVX512-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0 729; AVX512-NEXT: retq 730 %1 = load <4 x double>, <4 x double>* %a0 731 %2 = fcmp one <4 x double> %1, %a1 732 %3 = sext <4 x i1> %2 to <4 x i64> 733 ret <4 x i64> %3 734} 735 736define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) { 737; SSE-LABEL: commute_cmppd_lt_ymmm: 738; SSE: # %bb.0: 739; SSE-NEXT: movapd (%rdi), %xmm2 740; SSE-NEXT: movapd 16(%rdi), %xmm3 741; SSE-NEXT: cmpltpd %xmm0, %xmm2 742; SSE-NEXT: cmpltpd %xmm1, %xmm3 743; SSE-NEXT: movapd %xmm2, %xmm0 744; SSE-NEXT: movapd %xmm3, %xmm1 745; SSE-NEXT: retq 746; 747; AVX-LABEL: commute_cmppd_lt_ymmm: 748; AVX: # %bb.0: 749; AVX-NEXT: vmovapd (%rdi), %ymm1 750; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 751; AVX-NEXT: retq 752; 753; AVX512-LABEL: commute_cmppd_lt_ymmm: 754; AVX512: # %bb.0: 755; AVX512-NEXT: vmovapd (%rdi), %ymm1 756; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 757; AVX512-NEXT: retq 758 %1 = load <4 x double>, <4 x double>* %a0 759 %2 = fcmp olt <4 x double> %1, %a1 760 %3 = sext <4 x i1> %2 to <4 x i64> 761 ret <4 x i64> %3 762} 763 764define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) { 765; SSE-LABEL: commute_cmppd_le_ymmm: 766; SSE: # %bb.0: 767; SSE-NEXT: movapd (%rdi), %xmm2 768; SSE-NEXT: movapd 16(%rdi), %xmm3 769; SSE-NEXT: cmplepd %xmm0, %xmm2 770; SSE-NEXT: cmplepd %xmm1, %xmm3 771; SSE-NEXT: movapd %xmm2, %xmm0 772; SSE-NEXT: movapd %xmm3, %xmm1 773; SSE-NEXT: retq 774; 775; AVX-LABEL: commute_cmppd_le_ymmm: 776; AVX: # %bb.0: 777; AVX-NEXT: vmovapd (%rdi), %ymm1 778; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 779; AVX-NEXT: retq 780; 781; AVX512-LABEL: commute_cmppd_le_ymmm: 782; AVX512: # %bb.0: 783; AVX512-NEXT: vmovapd (%rdi), %ymm1 784; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 785; AVX512-NEXT: retq 786 %1 = load <4 x double>, <4 x double>* %a0 787 %2 = fcmp ole <4 x double> %1, %a1 788 %3 = sext <4 x i1> %2 to <4 x i64> 789 ret <4 x i64> %3 790} 791 792define <16 x i32> @commute_cmpps_eq_zmm(<16 x float>* %a0, <16 x float> %a1) { 793; SSE-LABEL: commute_cmpps_eq_zmm: 794; SSE: # %bb.0: 795; SSE-NEXT: cmpeqps (%rdi), %xmm0 796; SSE-NEXT: cmpeqps 16(%rdi), %xmm1 797; SSE-NEXT: cmpeqps 32(%rdi), %xmm2 798; SSE-NEXT: cmpeqps 48(%rdi), %xmm3 799; SSE-NEXT: retq 800; 801; AVX-LABEL: commute_cmpps_eq_zmm: 802; AVX: # %bb.0: 803; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 804; AVX-NEXT: vcmpeqps 32(%rdi), %ymm1, %ymm1 805; AVX-NEXT: retq 806; 807; AVX512-LABEL: commute_cmpps_eq_zmm: 808; AVX512: # %bb.0: 809; AVX512-NEXT: vcmpeqps (%rdi), %zmm0, %k0 810; AVX512-NEXT: vpmovm2d %k0, %zmm0 811; AVX512-NEXT: retq 812 %1 = load <16 x float>, <16 x float>* %a0 813 %2 = fcmp oeq <16 x float> %1, %a1 814 %3 = sext <16 x i1> %2 to <16 x i32> 815 ret <16 x i32> %3 816} 817 818define <16 x i32> @commute_cmpps_ne_zmm(<16 x float>* %a0, <16 x float> %a1) { 819; SSE-LABEL: commute_cmpps_ne_zmm: 820; SSE: # %bb.0: 821; SSE-NEXT: cmpneqps (%rdi), %xmm0 822; SSE-NEXT: cmpneqps 16(%rdi), %xmm1 823; SSE-NEXT: cmpneqps 32(%rdi), %xmm2 824; SSE-NEXT: cmpneqps 48(%rdi), %xmm3 825; SSE-NEXT: retq 826; 827; AVX-LABEL: commute_cmpps_ne_zmm: 828; AVX: # %bb.0: 829; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 830; AVX-NEXT: vcmpneqps 32(%rdi), %ymm1, %ymm1 831; AVX-NEXT: retq 832; 833; AVX512-LABEL: commute_cmpps_ne_zmm: 834; AVX512: # %bb.0: 835; AVX512-NEXT: vcmpneqps (%rdi), %zmm0, %k0 836; AVX512-NEXT: vpmovm2d %k0, %zmm0 837; AVX512-NEXT: retq 838 %1 = load <16 x float>, <16 x float>* %a0 839 %2 = fcmp une <16 x float> %1, %a1 840 %3 = sext <16 x i1> %2 to <16 x i32> 841 ret <16 x i32> %3 842} 843 844define <16 x i32> @commute_cmpps_ord_zmm(<16 x float>* %a0, <16 x float> %a1) { 845; SSE-LABEL: commute_cmpps_ord_zmm: 846; SSE: # %bb.0: 847; SSE-NEXT: cmpordps (%rdi), %xmm0 848; SSE-NEXT: cmpordps 16(%rdi), %xmm1 849; SSE-NEXT: cmpordps 32(%rdi), %xmm2 850; SSE-NEXT: cmpordps 48(%rdi), %xmm3 851; SSE-NEXT: retq 852; 853; AVX-LABEL: commute_cmpps_ord_zmm: 854; AVX: # %bb.0: 855; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 856; AVX-NEXT: vcmpordps 32(%rdi), %ymm1, %ymm1 857; AVX-NEXT: retq 858; 859; AVX512-LABEL: commute_cmpps_ord_zmm: 860; AVX512: # %bb.0: 861; AVX512-NEXT: vcmpordps (%rdi), %zmm0, %k0 862; AVX512-NEXT: vpmovm2d %k0, %zmm0 863; AVX512-NEXT: retq 864 %1 = load <16 x float>, <16 x float>* %a0 865 %2 = fcmp ord <16 x float> %1, %a1 866 %3 = sext <16 x i1> %2 to <16 x i32> 867 ret <16 x i32> %3 868} 869 870define <16 x i32> @commute_cmpps_uno_zmm(<16 x float>* %a0, <16 x float> %a1) { 871; SSE-LABEL: commute_cmpps_uno_zmm: 872; SSE: # %bb.0: 873; SSE-NEXT: cmpunordps (%rdi), %xmm0 874; SSE-NEXT: cmpunordps 16(%rdi), %xmm1 875; SSE-NEXT: cmpunordps 32(%rdi), %xmm2 876; SSE-NEXT: cmpunordps 48(%rdi), %xmm3 877; SSE-NEXT: retq 878; 879; AVX-LABEL: commute_cmpps_uno_zmm: 880; AVX: # %bb.0: 881; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 882; AVX-NEXT: vcmpunordps 32(%rdi), %ymm1, %ymm1 883; AVX-NEXT: retq 884; 885; AVX512-LABEL: commute_cmpps_uno_zmm: 886; AVX512: # %bb.0: 887; AVX512-NEXT: vcmpunordps (%rdi), %zmm0, %k0 888; AVX512-NEXT: vpmovm2d %k0, %zmm0 889; AVX512-NEXT: retq 890 %1 = load <16 x float>, <16 x float>* %a0 891 %2 = fcmp uno <16 x float> %1, %a1 892 %3 = sext <16 x i1> %2 to <16 x i32> 893 ret <16 x i32> %3 894} 895 896define <16 x i32> @commute_cmpps_ueq_zmm(<16 x float>* %a0, <16 x float> %a1) { 897; SSE-LABEL: commute_cmpps_ueq_zmm: 898; SSE: # %bb.0: 899; SSE-NEXT: movaps (%rdi), %xmm7 900; SSE-NEXT: movaps 16(%rdi), %xmm5 901; SSE-NEXT: movaps 32(%rdi), %xmm6 902; SSE-NEXT: movaps 48(%rdi), %xmm8 903; SSE-NEXT: movaps %xmm7, %xmm4 904; SSE-NEXT: cmpeqps %xmm0, %xmm4 905; SSE-NEXT: cmpunordps %xmm7, %xmm0 906; SSE-NEXT: orps %xmm4, %xmm0 907; SSE-NEXT: movaps %xmm5, %xmm4 908; SSE-NEXT: cmpeqps %xmm1, %xmm4 909; SSE-NEXT: cmpunordps %xmm5, %xmm1 910; SSE-NEXT: orps %xmm4, %xmm1 911; SSE-NEXT: movaps %xmm6, %xmm4 912; SSE-NEXT: cmpeqps %xmm2, %xmm4 913; SSE-NEXT: cmpunordps %xmm6, %xmm2 914; SSE-NEXT: orps %xmm4, %xmm2 915; SSE-NEXT: movaps %xmm8, %xmm4 916; SSE-NEXT: cmpeqps %xmm3, %xmm4 917; SSE-NEXT: cmpunordps %xmm8, %xmm3 918; SSE-NEXT: orps %xmm4, %xmm3 919; SSE-NEXT: retq 920; 921; AVX-LABEL: commute_cmpps_ueq_zmm: 922; AVX: # %bb.0: 923; AVX-NEXT: vcmpeq_uqps (%rdi), %ymm0, %ymm0 924; AVX-NEXT: vcmpeq_uqps 32(%rdi), %ymm1, %ymm1 925; AVX-NEXT: retq 926; 927; AVX512-LABEL: commute_cmpps_ueq_zmm: 928; AVX512: # %bb.0: 929; AVX512-NEXT: vcmpeq_uqps (%rdi), %zmm0, %k0 930; AVX512-NEXT: vpmovm2d %k0, %zmm0 931; AVX512-NEXT: retq 932 %1 = load <16 x float>, <16 x float>* %a0 933 %2 = fcmp ueq <16 x float> %1, %a1 934 %3 = sext <16 x i1> %2 to <16 x i32> 935 ret <16 x i32> %3 936} 937 938define <16 x i32> @commute_cmpps_one_zmm(<16 x float>* %a0, <16 x float> %a1) { 939; SSE-LABEL: commute_cmpps_one_zmm: 940; SSE: # %bb.0: 941; SSE-NEXT: movaps (%rdi), %xmm7 942; SSE-NEXT: movaps 16(%rdi), %xmm5 943; SSE-NEXT: movaps 32(%rdi), %xmm6 944; SSE-NEXT: movaps 48(%rdi), %xmm8 945; SSE-NEXT: movaps %xmm7, %xmm4 946; SSE-NEXT: cmpneqps %xmm0, %xmm4 947; SSE-NEXT: cmpordps %xmm7, %xmm0 948; SSE-NEXT: andps %xmm4, %xmm0 949; SSE-NEXT: movaps %xmm5, %xmm4 950; SSE-NEXT: cmpneqps %xmm1, %xmm4 951; SSE-NEXT: cmpordps %xmm5, %xmm1 952; SSE-NEXT: andps %xmm4, %xmm1 953; SSE-NEXT: movaps %xmm6, %xmm4 954; SSE-NEXT: cmpneqps %xmm2, %xmm4 955; SSE-NEXT: cmpordps %xmm6, %xmm2 956; SSE-NEXT: andps %xmm4, %xmm2 957; SSE-NEXT: movaps %xmm8, %xmm4 958; SSE-NEXT: cmpneqps %xmm3, %xmm4 959; SSE-NEXT: cmpordps %xmm8, %xmm3 960; SSE-NEXT: andps %xmm4, %xmm3 961; SSE-NEXT: retq 962; 963; AVX-LABEL: commute_cmpps_one_zmm: 964; AVX: # %bb.0: 965; AVX-NEXT: vcmpneq_oqps (%rdi), %ymm0, %ymm0 966; AVX-NEXT: vcmpneq_oqps 32(%rdi), %ymm1, %ymm1 967; AVX-NEXT: retq 968; 969; AVX512-LABEL: commute_cmpps_one_zmm: 970; AVX512: # %bb.0: 971; AVX512-NEXT: vcmpneq_oqps (%rdi), %zmm0, %k0 972; AVX512-NEXT: vpmovm2d %k0, %zmm0 973; AVX512-NEXT: retq 974 %1 = load <16 x float>, <16 x float>* %a0 975 %2 = fcmp one <16 x float> %1, %a1 976 %3 = sext <16 x i1> %2 to <16 x i32> 977 ret <16 x i32> %3 978} 979 980define <16 x i32> @commute_cmpps_lt_zmm(<16 x float>* %a0, <16 x float> %a1) { 981; SSE-LABEL: commute_cmpps_lt_zmm: 982; SSE: # %bb.0: 983; SSE-NEXT: movaps (%rdi), %xmm4 984; SSE-NEXT: movaps 16(%rdi), %xmm5 985; SSE-NEXT: movaps 32(%rdi), %xmm6 986; SSE-NEXT: movaps 48(%rdi), %xmm7 987; SSE-NEXT: cmpltps %xmm0, %xmm4 988; SSE-NEXT: cmpltps %xmm1, %xmm5 989; SSE-NEXT: cmpltps %xmm2, %xmm6 990; SSE-NEXT: cmpltps %xmm3, %xmm7 991; SSE-NEXT: movaps %xmm4, %xmm0 992; SSE-NEXT: movaps %xmm5, %xmm1 993; SSE-NEXT: movaps %xmm6, %xmm2 994; SSE-NEXT: movaps %xmm7, %xmm3 995; SSE-NEXT: retq 996; 997; AVX-LABEL: commute_cmpps_lt_zmm: 998; AVX: # %bb.0: 999; AVX-NEXT: vmovaps (%rdi), %ymm2 1000; AVX-NEXT: vmovaps 32(%rdi), %ymm3 1001; AVX-NEXT: vcmpltps %ymm0, %ymm2, %ymm0 1002; AVX-NEXT: vcmpltps %ymm1, %ymm3, %ymm1 1003; AVX-NEXT: retq 1004; 1005; AVX512-LABEL: commute_cmpps_lt_zmm: 1006; AVX512: # %bb.0: 1007; AVX512-NEXT: vcmpgtps (%rdi), %zmm0, %k0 1008; AVX512-NEXT: vpmovm2d %k0, %zmm0 1009; AVX512-NEXT: retq 1010 %1 = load <16 x float>, <16 x float>* %a0 1011 %2 = fcmp olt <16 x float> %1, %a1 1012 %3 = sext <16 x i1> %2 to <16 x i32> 1013 ret <16 x i32> %3 1014} 1015 1016define <16 x i32> @commute_cmpps_le_zmm(<16 x float>* %a0, <16 x float> %a1) { 1017; SSE-LABEL: commute_cmpps_le_zmm: 1018; SSE: # %bb.0: 1019; SSE-NEXT: movaps (%rdi), %xmm4 1020; SSE-NEXT: movaps 16(%rdi), %xmm5 1021; SSE-NEXT: movaps 32(%rdi), %xmm6 1022; SSE-NEXT: movaps 48(%rdi), %xmm7 1023; SSE-NEXT: cmpleps %xmm0, %xmm4 1024; SSE-NEXT: cmpleps %xmm1, %xmm5 1025; SSE-NEXT: cmpleps %xmm2, %xmm6 1026; SSE-NEXT: cmpleps %xmm3, %xmm7 1027; SSE-NEXT: movaps %xmm4, %xmm0 1028; SSE-NEXT: movaps %xmm5, %xmm1 1029; SSE-NEXT: movaps %xmm6, %xmm2 1030; SSE-NEXT: movaps %xmm7, %xmm3 1031; SSE-NEXT: retq 1032; 1033; AVX-LABEL: commute_cmpps_le_zmm: 1034; AVX: # %bb.0: 1035; AVX-NEXT: vmovaps (%rdi), %ymm2 1036; AVX-NEXT: vmovaps 32(%rdi), %ymm3 1037; AVX-NEXT: vcmpleps %ymm0, %ymm2, %ymm0 1038; AVX-NEXT: vcmpleps %ymm1, %ymm3, %ymm1 1039; AVX-NEXT: retq 1040; 1041; AVX512-LABEL: commute_cmpps_le_zmm: 1042; AVX512: # %bb.0: 1043; AVX512-NEXT: vcmpgeps (%rdi), %zmm0, %k0 1044; AVX512-NEXT: vpmovm2d %k0, %zmm0 1045; AVX512-NEXT: retq 1046 %1 = load <16 x float>, <16 x float>* %a0 1047 %2 = fcmp ole <16 x float> %1, %a1 1048 %3 = sext <16 x i1> %2 to <16 x i32> 1049 ret <16 x i32> %3 1050} 1051 1052define <8 x i64> @commute_cmppd_eq_zmmm(<8 x double>* %a0, <8 x double> %a1) { 1053; SSE-LABEL: commute_cmppd_eq_zmmm: 1054; SSE: # %bb.0: 1055; SSE-NEXT: cmpeqpd (%rdi), %xmm0 1056; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1 1057; SSE-NEXT: cmpeqpd 32(%rdi), %xmm2 1058; SSE-NEXT: cmpeqpd 48(%rdi), %xmm3 1059; SSE-NEXT: retq 1060; 1061; AVX-LABEL: commute_cmppd_eq_zmmm: 1062; AVX: # %bb.0: 1063; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 1064; AVX-NEXT: vcmpeqpd 32(%rdi), %ymm1, %ymm1 1065; AVX-NEXT: retq 1066; 1067; AVX512-LABEL: commute_cmppd_eq_zmmm: 1068; AVX512: # %bb.0: 1069; AVX512-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 1070; AVX512-NEXT: vpmovm2q %k0, %zmm0 1071; AVX512-NEXT: retq 1072 %1 = load <8 x double>, <8 x double>* %a0 1073 %2 = fcmp oeq <8 x double> %1, %a1 1074 %3 = sext <8 x i1> %2 to <8 x i64> 1075 ret <8 x i64> %3 1076} 1077 1078define <8 x i64> @commute_cmppd_ne_zmmm(<8 x double>* %a0, <8 x double> %a1) { 1079; SSE-LABEL: commute_cmppd_ne_zmmm: 1080; SSE: # %bb.0: 1081; SSE-NEXT: cmpneqpd (%rdi), %xmm0 1082; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1 1083; SSE-NEXT: cmpneqpd 32(%rdi), %xmm2 1084; SSE-NEXT: cmpneqpd 48(%rdi), %xmm3 1085; SSE-NEXT: retq 1086; 1087; AVX-LABEL: commute_cmppd_ne_zmmm: 1088; AVX: # %bb.0: 1089; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 1090; AVX-NEXT: vcmpneqpd 32(%rdi), %ymm1, %ymm1 1091; AVX-NEXT: retq 1092; 1093; AVX512-LABEL: commute_cmppd_ne_zmmm: 1094; AVX512: # %bb.0: 1095; AVX512-NEXT: vcmpneqpd (%rdi), %zmm0, %k0 1096; AVX512-NEXT: vpmovm2q %k0, %zmm0 1097; AVX512-NEXT: retq 1098 %1 = load <8 x double>, <8 x double>* %a0 1099 %2 = fcmp une <8 x double> %1, %a1 1100 %3 = sext <8 x i1> %2 to <8 x i64> 1101 ret <8 x i64> %3 1102} 1103 1104define <8 x i64> @commute_cmppd_ord_zmmm(<8 x double>* %a0, <8 x double> %a1) { 1105; SSE-LABEL: commute_cmppd_ord_zmmm: 1106; SSE: # %bb.0: 1107; SSE-NEXT: cmpordpd (%rdi), %xmm0 1108; SSE-NEXT: cmpordpd 16(%rdi), %xmm1 1109; SSE-NEXT: cmpordpd 32(%rdi), %xmm2 1110; SSE-NEXT: cmpordpd 48(%rdi), %xmm3 1111; SSE-NEXT: retq 1112; 1113; AVX-LABEL: commute_cmppd_ord_zmmm: 1114; AVX: # %bb.0: 1115; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 1116; AVX-NEXT: vcmpordpd 32(%rdi), %ymm1, %ymm1 1117; AVX-NEXT: retq 1118; 1119; AVX512-LABEL: commute_cmppd_ord_zmmm: 1120; AVX512: # %bb.0: 1121; AVX512-NEXT: vcmpordpd (%rdi), %zmm0, %k0 1122; AVX512-NEXT: vpmovm2q %k0, %zmm0 1123; AVX512-NEXT: retq 1124 %1 = load <8 x double>, <8 x double>* %a0 1125 %2 = fcmp ord <8 x double> %1, %a1 1126 %3 = sext <8 x i1> %2 to <8 x i64> 1127 ret <8 x i64> %3 1128} 1129 1130define <8 x i64> @commute_cmppd_uno_zmmm(<8 x double>* %a0, <8 x double> %a1) { 1131; SSE-LABEL: commute_cmppd_uno_zmmm: 1132; SSE: # %bb.0: 1133; SSE-NEXT: cmpunordpd (%rdi), %xmm0 1134; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1 1135; SSE-NEXT: cmpunordpd 32(%rdi), %xmm2 1136; SSE-NEXT: cmpunordpd 48(%rdi), %xmm3 1137; SSE-NEXT: retq 1138; 1139; AVX-LABEL: commute_cmppd_uno_zmmm: 1140; AVX: # %bb.0: 1141; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 1142; AVX-NEXT: vcmpunordpd 32(%rdi), %ymm1, %ymm1 1143; AVX-NEXT: retq 1144; 1145; AVX512-LABEL: commute_cmppd_uno_zmmm: 1146; AVX512: # %bb.0: 1147; AVX512-NEXT: vcmpunordpd (%rdi), %zmm0, %k0 1148; AVX512-NEXT: vpmovm2q %k0, %zmm0 1149; AVX512-NEXT: retq 1150 %1 = load <8 x double>, <8 x double>* %a0 1151 %2 = fcmp uno <8 x double> %1, %a1 1152 %3 = sext <8 x i1> %2 to <8 x i64> 1153 ret <8 x i64> %3 1154} 1155 1156define <8 x i64> @commute_cmppd_ueq_zmmm(<8 x double>* %a0, <8 x double> %a1) { 1157; SSE-LABEL: commute_cmppd_ueq_zmmm: 1158; SSE: # %bb.0: 1159; SSE-NEXT: movapd (%rdi), %xmm7 1160; SSE-NEXT: movapd 16(%rdi), %xmm5 1161; SSE-NEXT: movapd 32(%rdi), %xmm6 1162; SSE-NEXT: movapd 48(%rdi), %xmm8 1163; SSE-NEXT: movapd %xmm7, %xmm4 1164; SSE-NEXT: cmpeqpd %xmm0, %xmm4 1165; SSE-NEXT: cmpunordpd %xmm7, %xmm0 1166; SSE-NEXT: orpd %xmm4, %xmm0 1167; SSE-NEXT: movapd %xmm5, %xmm4 1168; SSE-NEXT: cmpeqpd %xmm1, %xmm4 1169; SSE-NEXT: cmpunordpd %xmm5, %xmm1 1170; SSE-NEXT: orpd %xmm4, %xmm1 1171; SSE-NEXT: movapd %xmm6, %xmm4 1172; SSE-NEXT: cmpeqpd %xmm2, %xmm4 1173; SSE-NEXT: cmpunordpd %xmm6, %xmm2 1174; SSE-NEXT: orpd %xmm4, %xmm2 1175; SSE-NEXT: movapd %xmm8, %xmm4 1176; SSE-NEXT: cmpeqpd %xmm3, %xmm4 1177; SSE-NEXT: cmpunordpd %xmm8, %xmm3 1178; SSE-NEXT: orpd %xmm4, %xmm3 1179; SSE-NEXT: retq 1180; 1181; AVX-LABEL: commute_cmppd_ueq_zmmm: 1182; AVX: # %bb.0: 1183; AVX-NEXT: vcmpeq_uqpd (%rdi), %ymm0, %ymm0 1184; AVX-NEXT: vcmpeq_uqpd 32(%rdi), %ymm1, %ymm1 1185; AVX-NEXT: retq 1186; 1187; AVX512-LABEL: commute_cmppd_ueq_zmmm: 1188; AVX512: # %bb.0: 1189; AVX512-NEXT: vcmpeq_uqpd (%rdi), %zmm0, %k0 1190; AVX512-NEXT: vpmovm2q %k0, %zmm0 1191; AVX512-NEXT: retq 1192 %1 = load <8 x double>, <8 x double>* %a0 1193 %2 = fcmp ueq <8 x double> %1, %a1 1194 %3 = sext <8 x i1> %2 to <8 x i64> 1195 ret <8 x i64> %3 1196} 1197 1198define <8 x i64> @commute_cmppd_one_zmmm(<8 x double>* %a0, <8 x double> %a1) { 1199; SSE-LABEL: commute_cmppd_one_zmmm: 1200; SSE: # %bb.0: 1201; SSE-NEXT: movapd (%rdi), %xmm7 1202; SSE-NEXT: movapd 16(%rdi), %xmm5 1203; SSE-NEXT: movapd 32(%rdi), %xmm6 1204; SSE-NEXT: movapd 48(%rdi), %xmm8 1205; SSE-NEXT: movapd %xmm7, %xmm4 1206; SSE-NEXT: cmpneqpd %xmm0, %xmm4 1207; SSE-NEXT: cmpordpd %xmm7, %xmm0 1208; SSE-NEXT: andpd %xmm4, %xmm0 1209; SSE-NEXT: movapd %xmm5, %xmm4 1210; SSE-NEXT: cmpneqpd %xmm1, %xmm4 1211; SSE-NEXT: cmpordpd %xmm5, %xmm1 1212; SSE-NEXT: andpd %xmm4, %xmm1 1213; SSE-NEXT: movapd %xmm6, %xmm4 1214; SSE-NEXT: cmpneqpd %xmm2, %xmm4 1215; SSE-NEXT: cmpordpd %xmm6, %xmm2 1216; SSE-NEXT: andpd %xmm4, %xmm2 1217; SSE-NEXT: movapd %xmm8, %xmm4 1218; SSE-NEXT: cmpneqpd %xmm3, %xmm4 1219; SSE-NEXT: cmpordpd %xmm8, %xmm3 1220; SSE-NEXT: andpd %xmm4, %xmm3 1221; SSE-NEXT: retq 1222; 1223; AVX-LABEL: commute_cmppd_one_zmmm: 1224; AVX: # %bb.0: 1225; AVX-NEXT: vcmpneq_oqpd (%rdi), %ymm0, %ymm0 1226; AVX-NEXT: vcmpneq_oqpd 32(%rdi), %ymm1, %ymm1 1227; AVX-NEXT: retq 1228; 1229; AVX512-LABEL: commute_cmppd_one_zmmm: 1230; AVX512: # %bb.0: 1231; AVX512-NEXT: vcmpneq_oqpd (%rdi), %zmm0, %k0 1232; AVX512-NEXT: vpmovm2q %k0, %zmm0 1233; AVX512-NEXT: retq 1234 %1 = load <8 x double>, <8 x double>* %a0 1235 %2 = fcmp one <8 x double> %1, %a1 1236 %3 = sext <8 x i1> %2 to <8 x i64> 1237 ret <8 x i64> %3 1238} 1239 1240define <8 x i64> @commute_cmppd_lt_zmmm(<8 x double>* %a0, <8 x double> %a1) { 1241; SSE-LABEL: commute_cmppd_lt_zmmm: 1242; SSE: # %bb.0: 1243; SSE-NEXT: movapd (%rdi), %xmm4 1244; SSE-NEXT: movapd 16(%rdi), %xmm5 1245; SSE-NEXT: movapd 32(%rdi), %xmm6 1246; SSE-NEXT: movapd 48(%rdi), %xmm7 1247; SSE-NEXT: cmpltpd %xmm0, %xmm4 1248; SSE-NEXT: cmpltpd %xmm1, %xmm5 1249; SSE-NEXT: cmpltpd %xmm2, %xmm6 1250; SSE-NEXT: cmpltpd %xmm3, %xmm7 1251; SSE-NEXT: movapd %xmm4, %xmm0 1252; SSE-NEXT: movapd %xmm5, %xmm1 1253; SSE-NEXT: movapd %xmm6, %xmm2 1254; SSE-NEXT: movapd %xmm7, %xmm3 1255; SSE-NEXT: retq 1256; 1257; AVX-LABEL: commute_cmppd_lt_zmmm: 1258; AVX: # %bb.0: 1259; AVX-NEXT: vmovapd (%rdi), %ymm2 1260; AVX-NEXT: vmovapd 32(%rdi), %ymm3 1261; AVX-NEXT: vcmpltpd %ymm0, %ymm2, %ymm0 1262; AVX-NEXT: vcmpltpd %ymm1, %ymm3, %ymm1 1263; AVX-NEXT: retq 1264; 1265; AVX512-LABEL: commute_cmppd_lt_zmmm: 1266; AVX512: # %bb.0: 1267; AVX512-NEXT: vcmpgtpd (%rdi), %zmm0, %k0 1268; AVX512-NEXT: vpmovm2q %k0, %zmm0 1269; AVX512-NEXT: retq 1270 %1 = load <8 x double>, <8 x double>* %a0 1271 %2 = fcmp olt <8 x double> %1, %a1 1272 %3 = sext <8 x i1> %2 to <8 x i64> 1273 ret <8 x i64> %3 1274} 1275 1276define <8 x i64> @commute_cmppd_le_zmmm(<8 x double>* %a0, <8 x double> %a1) { 1277; SSE-LABEL: commute_cmppd_le_zmmm: 1278; SSE: # %bb.0: 1279; SSE-NEXT: movapd (%rdi), %xmm4 1280; SSE-NEXT: movapd 16(%rdi), %xmm5 1281; SSE-NEXT: movapd 32(%rdi), %xmm6 1282; SSE-NEXT: movapd 48(%rdi), %xmm7 1283; SSE-NEXT: cmplepd %xmm0, %xmm4 1284; SSE-NEXT: cmplepd %xmm1, %xmm5 1285; SSE-NEXT: cmplepd %xmm2, %xmm6 1286; SSE-NEXT: cmplepd %xmm3, %xmm7 1287; SSE-NEXT: movapd %xmm4, %xmm0 1288; SSE-NEXT: movapd %xmm5, %xmm1 1289; SSE-NEXT: movapd %xmm6, %xmm2 1290; SSE-NEXT: movapd %xmm7, %xmm3 1291; SSE-NEXT: retq 1292; 1293; AVX-LABEL: commute_cmppd_le_zmmm: 1294; AVX: # %bb.0: 1295; AVX-NEXT: vmovapd (%rdi), %ymm2 1296; AVX-NEXT: vmovapd 32(%rdi), %ymm3 1297; AVX-NEXT: vcmplepd %ymm0, %ymm2, %ymm0 1298; AVX-NEXT: vcmplepd %ymm1, %ymm3, %ymm1 1299; AVX-NEXT: retq 1300; 1301; AVX512-LABEL: commute_cmppd_le_zmmm: 1302; AVX512: # %bb.0: 1303; AVX512-NEXT: vcmpgepd (%rdi), %zmm0, %k0 1304; AVX512-NEXT: vpmovm2q %k0, %zmm0 1305; AVX512-NEXT: retq 1306 %1 = load <8 x double>, <8 x double>* %a0 1307 %2 = fcmp ole <8 x double> %1, %a1 1308 %3 = sext <8 x i1> %2 to <8 x i64> 1309 ret <8 x i64> %3 1310} 1311