1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX 4 5; 6; Float Comparisons 7; Only equal/not-equal/ordered/unordered can be safely commuted 8; 9 10define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) { 11; SSE-LABEL: commute_cmpps_eq: 12; SSE: # BB#0: 13; SSE-NEXT: cmpeqps (%rdi), %xmm0 14; SSE-NEXT: retq 15; 16; AVX-LABEL: commute_cmpps_eq: 17; AVX: # BB#0: 18; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 19; AVX-NEXT: retq 20; 21 %1 = load <4 x float>, <4 x float>* %a0 22 %2 = fcmp oeq <4 x float> %1, %a1 23 %3 = sext <4 x i1> %2 to <4 x i32> 24 ret <4 x i32> %3 25} 26 27define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) { 28; SSE-LABEL: commute_cmpps_ne: 29; SSE: # BB#0: 30; SSE-NEXT: cmpneqps (%rdi), %xmm0 31; SSE-NEXT: retq 32; 33; AVX-LABEL: commute_cmpps_ne: 34; AVX: # BB#0: 35; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0 36; AVX-NEXT: retq 37; 38 %1 = load <4 x float>, <4 x float>* %a0 39 %2 = fcmp une <4 x float> %1, %a1 40 %3 = sext <4 x i1> %2 to <4 x i32> 41 ret <4 x i32> %3 42} 43 44define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) { 45; SSE-LABEL: commute_cmpps_ord: 46; SSE: # BB#0: 47; SSE-NEXT: cmpordps (%rdi), %xmm0 48; SSE-NEXT: retq 49; 50; AVX-LABEL: commute_cmpps_ord: 51; AVX: # BB#0: 52; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0 53; AVX-NEXT: retq 54; 55 %1 = load <4 x float>, <4 x float>* %a0 56 %2 = fcmp ord <4 x float> %1, %a1 57 %3 = sext <4 x i1> %2 to <4 x i32> 58 ret <4 x i32> %3 59} 60 61define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) { 62; SSE-LABEL: commute_cmpps_uno: 63; SSE: # BB#0: 64; SSE-NEXT: cmpunordps (%rdi), %xmm0 65; SSE-NEXT: retq 66; 67; AVX-LABEL: commute_cmpps_uno: 68; AVX: # BB#0: 69; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0 70; AVX-NEXT: retq 71; 72 %1 = load <4 x float>, <4 x float>* %a0 73 %2 = fcmp uno <4 x float> %1, %a1 74 %3 = sext <4 x i1> %2 to <4 x i32> 75 ret <4 x i32> %3 76} 77 78define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) { 79; SSE-LABEL: commute_cmpps_ueq: 80; SSE: # BB#0: 81; SSE-NEXT: movaps (%rdi), %xmm1 82; SSE-NEXT: movaps %xmm1, %xmm2 83; SSE-NEXT: cmpeqps %xmm0, %xmm2 84; SSE-NEXT: cmpunordps %xmm1, %xmm0 85; SSE-NEXT: orps %xmm2, %xmm0 86; SSE-NEXT: retq 87; 88; AVX-LABEL: commute_cmpps_ueq: 89; AVX: # BB#0: 90; AVX-NEXT: vmovaps (%rdi), %xmm1 91; AVX-NEXT: vcmpeqps %xmm0, %xmm1, %xmm2 92; AVX-NEXT: vcmpunordps %xmm0, %xmm1, %xmm0 93; AVX-NEXT: vorps %xmm2, %xmm0, %xmm0 94; AVX-NEXT: retq 95; 96 %1 = load <4 x float>, <4 x float>* %a0 97 %2 = fcmp ueq <4 x float> %1, %a1 98 %3 = sext <4 x i1> %2 to <4 x i32> 99 ret <4 x i32> %3 100} 101 102define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) { 103; SSE-LABEL: commute_cmpps_one: 104; SSE: # BB#0: 105; SSE-NEXT: movaps (%rdi), %xmm1 106; SSE-NEXT: movaps %xmm1, %xmm2 107; SSE-NEXT: cmpneqps %xmm0, %xmm2 108; SSE-NEXT: cmpordps %xmm1, %xmm0 109; SSE-NEXT: andps %xmm2, %xmm0 110; SSE-NEXT: retq 111; 112; AVX-LABEL: commute_cmpps_one: 113; AVX: # BB#0: 114; AVX-NEXT: vmovaps (%rdi), %xmm1 115; AVX-NEXT: vcmpneqps %xmm0, %xmm1, %xmm2 116; AVX-NEXT: vcmpordps %xmm0, %xmm1, %xmm0 117; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 118; AVX-NEXT: retq 119; 120 %1 = load <4 x float>, <4 x float>* %a0 121 %2 = fcmp one <4 x float> %1, %a1 122 %3 = sext <4 x i1> %2 to <4 x i32> 123 ret <4 x i32> %3 124} 125 126define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) { 127; SSE-LABEL: commute_cmpps_lt: 128; SSE: # BB#0: 129; SSE-NEXT: movaps (%rdi), %xmm1 130; SSE-NEXT: cmpltps %xmm0, %xmm1 131; SSE-NEXT: movaps %xmm1, %xmm0 132; SSE-NEXT: retq 133; 134; AVX-LABEL: commute_cmpps_lt: 135; AVX: # BB#0: 136; AVX-NEXT: vmovaps (%rdi), %xmm1 137; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 138; AVX-NEXT: retq 139; 140 %1 = load <4 x float>, <4 x float>* %a0 141 %2 = fcmp olt <4 x float> %1, %a1 142 %3 = sext <4 x i1> %2 to <4 x i32> 143 ret <4 x i32> %3 144} 145 146define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) { 147; SSE-LABEL: commute_cmpps_le: 148; SSE: # BB#0: 149; SSE-NEXT: movaps (%rdi), %xmm1 150; SSE-NEXT: cmpleps %xmm0, %xmm1 151; SSE-NEXT: movaps %xmm1, %xmm0 152; SSE-NEXT: retq 153; 154; AVX-LABEL: commute_cmpps_le: 155; AVX: # BB#0: 156; AVX-NEXT: vmovaps (%rdi), %xmm1 157; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 158; AVX-NEXT: retq 159; 160 %1 = load <4 x float>, <4 x float>* %a0 161 %2 = fcmp ole <4 x float> %1, %a1 162 %3 = sext <4 x i1> %2 to <4 x i32> 163 ret <4 x i32> %3 164} 165 166define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) { 167; SSE-LABEL: commute_cmpps_eq_ymm: 168; SSE: # BB#0: 169; SSE-NEXT: cmpeqps (%rdi), %xmm0 170; SSE-NEXT: cmpeqps 16(%rdi), %xmm1 171; SSE-NEXT: retq 172; 173; AVX-LABEL: commute_cmpps_eq_ymm: 174; AVX: # BB#0: 175; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 176; AVX-NEXT: retq 177; 178 %1 = load <8 x float>, <8 x float>* %a0 179 %2 = fcmp oeq <8 x float> %1, %a1 180 %3 = sext <8 x i1> %2 to <8 x i32> 181 ret <8 x i32> %3 182} 183 184define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) { 185; SSE-LABEL: commute_cmpps_ne_ymm: 186; SSE: # BB#0: 187; SSE-NEXT: cmpneqps (%rdi), %xmm0 188; SSE-NEXT: cmpneqps 16(%rdi), %xmm1 189; SSE-NEXT: retq 190; 191; AVX-LABEL: commute_cmpps_ne_ymm: 192; AVX: # BB#0: 193; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0 194; AVX-NEXT: retq 195; 196 %1 = load <8 x float>, <8 x float>* %a0 197 %2 = fcmp une <8 x float> %1, %a1 198 %3 = sext <8 x i1> %2 to <8 x i32> 199 ret <8 x i32> %3 200} 201 202define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) { 203; SSE-LABEL: commute_cmpps_ord_ymm: 204; SSE: # BB#0: 205; SSE-NEXT: cmpordps (%rdi), %xmm0 206; SSE-NEXT: cmpordps 16(%rdi), %xmm1 207; SSE-NEXT: retq 208; 209; AVX-LABEL: commute_cmpps_ord_ymm: 210; AVX: # BB#0: 211; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0 212; AVX-NEXT: retq 213; 214 %1 = load <8 x float>, <8 x float>* %a0 215 %2 = fcmp ord <8 x float> %1, %a1 216 %3 = sext <8 x i1> %2 to <8 x i32> 217 ret <8 x i32> %3 218} 219 220define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) { 221; SSE-LABEL: commute_cmpps_uno_ymm: 222; SSE: # BB#0: 223; SSE-NEXT: cmpunordps (%rdi), %xmm0 224; SSE-NEXT: cmpunordps 16(%rdi), %xmm1 225; SSE-NEXT: retq 226; 227; AVX-LABEL: commute_cmpps_uno_ymm: 228; AVX: # BB#0: 229; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0 230; AVX-NEXT: retq 231; 232 %1 = load <8 x float>, <8 x float>* %a0 233 %2 = fcmp uno <8 x float> %1, %a1 234 %3 = sext <8 x i1> %2 to <8 x i32> 235 ret <8 x i32> %3 236} 237 238define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) { 239; SSE-LABEL: commute_cmpps_ueq_ymm: 240; SSE: # BB#0: 241; SSE-NEXT: movaps (%rdi), %xmm2 242; SSE-NEXT: movaps 16(%rdi), %xmm3 243; SSE-NEXT: movaps %xmm2, %xmm4 244; SSE-NEXT: cmpeqps %xmm0, %xmm4 245; SSE-NEXT: cmpunordps %xmm2, %xmm0 246; SSE-NEXT: orps %xmm4, %xmm0 247; SSE-NEXT: movaps %xmm3, %xmm2 248; SSE-NEXT: cmpeqps %xmm1, %xmm2 249; SSE-NEXT: cmpunordps %xmm3, %xmm1 250; SSE-NEXT: orps %xmm2, %xmm1 251; SSE-NEXT: retq 252; 253; AVX-LABEL: commute_cmpps_ueq_ymm: 254; AVX: # BB#0: 255; AVX-NEXT: vmovaps (%rdi), %ymm1 256; AVX-NEXT: vcmpeqps %ymm0, %ymm1, %ymm2 257; AVX-NEXT: vcmpunordps %ymm0, %ymm1, %ymm0 258; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0 259; AVX-NEXT: retq 260; 261 %1 = load <8 x float>, <8 x float>* %a0 262 %2 = fcmp ueq <8 x float> %1, %a1 263 %3 = sext <8 x i1> %2 to <8 x i32> 264 ret <8 x i32> %3 265} 266 267define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) { 268; SSE-LABEL: commute_cmpps_one_ymm: 269; SSE: # BB#0: 270; SSE-NEXT: movaps (%rdi), %xmm2 271; SSE-NEXT: movaps 16(%rdi), %xmm3 272; SSE-NEXT: movaps %xmm2, %xmm4 273; SSE-NEXT: cmpneqps %xmm0, %xmm4 274; SSE-NEXT: cmpordps %xmm2, %xmm0 275; SSE-NEXT: andps %xmm4, %xmm0 276; SSE-NEXT: movaps %xmm3, %xmm2 277; SSE-NEXT: cmpneqps %xmm1, %xmm2 278; SSE-NEXT: cmpordps %xmm3, %xmm1 279; SSE-NEXT: andps %xmm2, %xmm1 280; SSE-NEXT: retq 281; 282; AVX-LABEL: commute_cmpps_one_ymm: 283; AVX: # BB#0: 284; AVX-NEXT: vmovaps (%rdi), %ymm1 285; AVX-NEXT: vcmpneqps %ymm0, %ymm1, %ymm2 286; AVX-NEXT: vcmpordps %ymm0, %ymm1, %ymm0 287; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 288; AVX-NEXT: retq 289; 290 %1 = load <8 x float>, <8 x float>* %a0 291 %2 = fcmp one <8 x float> %1, %a1 292 %3 = sext <8 x i1> %2 to <8 x i32> 293 ret <8 x i32> %3 294} 295 296define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) { 297; SSE-LABEL: commute_cmpps_lt_ymm: 298; SSE: # BB#0: 299; SSE-NEXT: movaps (%rdi), %xmm2 300; SSE-NEXT: movaps 16(%rdi), %xmm3 301; SSE-NEXT: cmpltps %xmm0, %xmm2 302; SSE-NEXT: cmpltps %xmm1, %xmm3 303; SSE-NEXT: movaps %xmm2, %xmm0 304; SSE-NEXT: movaps %xmm3, %xmm1 305; SSE-NEXT: retq 306; 307; AVX-LABEL: commute_cmpps_lt_ymm: 308; AVX: # BB#0: 309; AVX-NEXT: vmovaps (%rdi), %ymm1 310; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 311; AVX-NEXT: retq 312; 313 %1 = load <8 x float>, <8 x float>* %a0 314 %2 = fcmp olt <8 x float> %1, %a1 315 %3 = sext <8 x i1> %2 to <8 x i32> 316 ret <8 x i32> %3 317} 318 319define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) { 320; SSE-LABEL: commute_cmpps_le_ymm: 321; SSE: # BB#0: 322; SSE-NEXT: movaps (%rdi), %xmm2 323; SSE-NEXT: movaps 16(%rdi), %xmm3 324; SSE-NEXT: cmpleps %xmm0, %xmm2 325; SSE-NEXT: cmpleps %xmm1, %xmm3 326; SSE-NEXT: movaps %xmm2, %xmm0 327; SSE-NEXT: movaps %xmm3, %xmm1 328; SSE-NEXT: retq 329; 330; AVX-LABEL: commute_cmpps_le_ymm: 331; AVX: # BB#0: 332; AVX-NEXT: vmovaps (%rdi), %ymm1 333; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0 334; AVX-NEXT: retq 335; 336 %1 = load <8 x float>, <8 x float>* %a0 337 %2 = fcmp ole <8 x float> %1, %a1 338 %3 = sext <8 x i1> %2 to <8 x i32> 339 ret <8 x i32> %3 340} 341 342; 343; Double Comparisons 344; Only equal/not-equal/ordered/unordered can be safely commuted 345; 346 347define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) { 348; SSE-LABEL: commute_cmppd_eq: 349; SSE: # BB#0: 350; SSE-NEXT: cmpeqpd (%rdi), %xmm0 351; SSE-NEXT: retq 352; 353; AVX-LABEL: commute_cmppd_eq: 354; AVX: # BB#0: 355; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 356; AVX-NEXT: retq 357; 358 %1 = load <2 x double>, <2 x double>* %a0 359 %2 = fcmp oeq <2 x double> %1, %a1 360 %3 = sext <2 x i1> %2 to <2 x i64> 361 ret <2 x i64> %3 362} 363 364define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) { 365; SSE-LABEL: commute_cmppd_ne: 366; SSE: # BB#0: 367; SSE-NEXT: cmpneqpd (%rdi), %xmm0 368; SSE-NEXT: retq 369; 370; AVX-LABEL: commute_cmppd_ne: 371; AVX: # BB#0: 372; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0 373; AVX-NEXT: retq 374; 375 %1 = load <2 x double>, <2 x double>* %a0 376 %2 = fcmp une <2 x double> %1, %a1 377 %3 = sext <2 x i1> %2 to <2 x i64> 378 ret <2 x i64> %3 379} 380 381define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) { 382; SSE-LABEL: commute_cmppd_ord: 383; SSE: # BB#0: 384; SSE-NEXT: cmpordpd (%rdi), %xmm0 385; SSE-NEXT: retq 386; 387; AVX-LABEL: commute_cmppd_ord: 388; AVX: # BB#0: 389; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0 390; AVX-NEXT: retq 391; 392 %1 = load <2 x double>, <2 x double>* %a0 393 %2 = fcmp ord <2 x double> %1, %a1 394 %3 = sext <2 x i1> %2 to <2 x i64> 395 ret <2 x i64> %3 396} 397 398define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) { 399; SSE-LABEL: commute_cmppd_ueq: 400; SSE: # BB#0: 401; SSE-NEXT: movapd (%rdi), %xmm1 402; SSE-NEXT: movapd %xmm1, %xmm2 403; SSE-NEXT: cmpeqpd %xmm0, %xmm2 404; SSE-NEXT: cmpunordpd %xmm1, %xmm0 405; SSE-NEXT: orpd %xmm2, %xmm0 406; SSE-NEXT: retq 407; 408; AVX-LABEL: commute_cmppd_ueq: 409; AVX: # BB#0: 410; AVX-NEXT: vmovapd (%rdi), %xmm1 411; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm2 412; AVX-NEXT: vcmpunordpd %xmm0, %xmm1, %xmm0 413; AVX-NEXT: vorpd %xmm2, %xmm0, %xmm0 414; AVX-NEXT: retq 415; 416 %1 = load <2 x double>, <2 x double>* %a0 417 %2 = fcmp ueq <2 x double> %1, %a1 418 %3 = sext <2 x i1> %2 to <2 x i64> 419 ret <2 x i64> %3 420} 421 422define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) { 423; SSE-LABEL: commute_cmppd_one: 424; SSE: # BB#0: 425; SSE-NEXT: movapd (%rdi), %xmm1 426; SSE-NEXT: movapd %xmm1, %xmm2 427; SSE-NEXT: cmpneqpd %xmm0, %xmm2 428; SSE-NEXT: cmpordpd %xmm1, %xmm0 429; SSE-NEXT: andpd %xmm2, %xmm0 430; SSE-NEXT: retq 431; 432; AVX-LABEL: commute_cmppd_one: 433; AVX: # BB#0: 434; AVX-NEXT: vmovapd (%rdi), %xmm1 435; AVX-NEXT: vcmpneqpd %xmm0, %xmm1, %xmm2 436; AVX-NEXT: vcmpordpd %xmm0, %xmm1, %xmm0 437; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0 438; AVX-NEXT: retq 439; 440 %1 = load <2 x double>, <2 x double>* %a0 441 %2 = fcmp one <2 x double> %1, %a1 442 %3 = sext <2 x i1> %2 to <2 x i64> 443 ret <2 x i64> %3 444} 445 446define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) { 447; SSE-LABEL: commute_cmppd_uno: 448; SSE: # BB#0: 449; SSE-NEXT: cmpunordpd (%rdi), %xmm0 450; SSE-NEXT: retq 451; 452; AVX-LABEL: commute_cmppd_uno: 453; AVX: # BB#0: 454; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0 455; AVX-NEXT: retq 456; 457 %1 = load <2 x double>, <2 x double>* %a0 458 %2 = fcmp uno <2 x double> %1, %a1 459 %3 = sext <2 x i1> %2 to <2 x i64> 460 ret <2 x i64> %3 461} 462 463define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) { 464; SSE-LABEL: commute_cmppd_lt: 465; SSE: # BB#0: 466; SSE-NEXT: movapd (%rdi), %xmm1 467; SSE-NEXT: cmpltpd %xmm0, %xmm1 468; SSE-NEXT: movapd %xmm1, %xmm0 469; SSE-NEXT: retq 470; 471; AVX-LABEL: commute_cmppd_lt: 472; AVX: # BB#0: 473; AVX-NEXT: vmovapd (%rdi), %xmm1 474; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 475; AVX-NEXT: retq 476; 477 %1 = load <2 x double>, <2 x double>* %a0 478 %2 = fcmp olt <2 x double> %1, %a1 479 %3 = sext <2 x i1> %2 to <2 x i64> 480 ret <2 x i64> %3 481} 482 483define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) { 484; SSE-LABEL: commute_cmppd_le: 485; SSE: # BB#0: 486; SSE-NEXT: movapd (%rdi), %xmm1 487; SSE-NEXT: cmplepd %xmm0, %xmm1 488; SSE-NEXT: movapd %xmm1, %xmm0 489; SSE-NEXT: retq 490; 491; AVX-LABEL: commute_cmppd_le: 492; AVX: # BB#0: 493; AVX-NEXT: vmovapd (%rdi), %xmm1 494; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 495; AVX-NEXT: retq 496; 497 %1 = load <2 x double>, <2 x double>* %a0 498 %2 = fcmp ole <2 x double> %1, %a1 499 %3 = sext <2 x i1> %2 to <2 x i64> 500 ret <2 x i64> %3 501} 502 503define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) { 504; SSE-LABEL: commute_cmppd_eq_ymmm: 505; SSE: # BB#0: 506; SSE-NEXT: cmpeqpd (%rdi), %xmm0 507; SSE-NEXT: cmpeqpd 16(%rdi), %xmm1 508; SSE-NEXT: retq 509; 510; AVX-LABEL: commute_cmppd_eq_ymmm: 511; AVX: # BB#0: 512; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 513; AVX-NEXT: retq 514; 515 %1 = load <4 x double>, <4 x double>* %a0 516 %2 = fcmp oeq <4 x double> %1, %a1 517 %3 = sext <4 x i1> %2 to <4 x i64> 518 ret <4 x i64> %3 519} 520 521define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) { 522; SSE-LABEL: commute_cmppd_ne_ymmm: 523; SSE: # BB#0: 524; SSE-NEXT: cmpneqpd (%rdi), %xmm0 525; SSE-NEXT: cmpneqpd 16(%rdi), %xmm1 526; SSE-NEXT: retq 527; 528; AVX-LABEL: commute_cmppd_ne_ymmm: 529; AVX: # BB#0: 530; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0 531; AVX-NEXT: retq 532; 533 %1 = load <4 x double>, <4 x double>* %a0 534 %2 = fcmp une <4 x double> %1, %a1 535 %3 = sext <4 x i1> %2 to <4 x i64> 536 ret <4 x i64> %3 537} 538 539define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) { 540; SSE-LABEL: commute_cmppd_ord_ymmm: 541; SSE: # BB#0: 542; SSE-NEXT: cmpordpd (%rdi), %xmm0 543; SSE-NEXT: cmpordpd 16(%rdi), %xmm1 544; SSE-NEXT: retq 545; 546; AVX-LABEL: commute_cmppd_ord_ymmm: 547; AVX: # BB#0: 548; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0 549; AVX-NEXT: retq 550; 551 %1 = load <4 x double>, <4 x double>* %a0 552 %2 = fcmp ord <4 x double> %1, %a1 553 %3 = sext <4 x i1> %2 to <4 x i64> 554 ret <4 x i64> %3 555} 556 557define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) { 558; SSE-LABEL: commute_cmppd_uno_ymmm: 559; SSE: # BB#0: 560; SSE-NEXT: cmpunordpd (%rdi), %xmm0 561; SSE-NEXT: cmpunordpd 16(%rdi), %xmm1 562; SSE-NEXT: retq 563; 564; AVX-LABEL: commute_cmppd_uno_ymmm: 565; AVX: # BB#0: 566; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0 567; AVX-NEXT: retq 568; 569 %1 = load <4 x double>, <4 x double>* %a0 570 %2 = fcmp uno <4 x double> %1, %a1 571 %3 = sext <4 x i1> %2 to <4 x i64> 572 ret <4 x i64> %3 573} 574 575define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) { 576; SSE-LABEL: commute_cmppd_ueq_ymmm: 577; SSE: # BB#0: 578; SSE-NEXT: movapd (%rdi), %xmm2 579; SSE-NEXT: movapd 16(%rdi), %xmm3 580; SSE-NEXT: movapd %xmm2, %xmm4 581; SSE-NEXT: cmpeqpd %xmm0, %xmm4 582; SSE-NEXT: cmpunordpd %xmm2, %xmm0 583; SSE-NEXT: orpd %xmm4, %xmm0 584; SSE-NEXT: movapd %xmm3, %xmm2 585; SSE-NEXT: cmpeqpd %xmm1, %xmm2 586; SSE-NEXT: cmpunordpd %xmm3, %xmm1 587; SSE-NEXT: orpd %xmm2, %xmm1 588; SSE-NEXT: retq 589; 590; AVX-LABEL: commute_cmppd_ueq_ymmm: 591; AVX: # BB#0: 592; AVX-NEXT: vmovapd (%rdi), %ymm1 593; AVX-NEXT: vcmpeqpd %ymm0, %ymm1, %ymm2 594; AVX-NEXT: vcmpunordpd %ymm0, %ymm1, %ymm0 595; AVX-NEXT: vorpd %ymm2, %ymm0, %ymm0 596; AVX-NEXT: retq 597; 598 %1 = load <4 x double>, <4 x double>* %a0 599 %2 = fcmp ueq <4 x double> %1, %a1 600 %3 = sext <4 x i1> %2 to <4 x i64> 601 ret <4 x i64> %3 602} 603 604define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) { 605; SSE-LABEL: commute_cmppd_one_ymmm: 606; SSE: # BB#0: 607; SSE-NEXT: movapd (%rdi), %xmm2 608; SSE-NEXT: movapd 16(%rdi), %xmm3 609; SSE-NEXT: movapd %xmm2, %xmm4 610; SSE-NEXT: cmpneqpd %xmm0, %xmm4 611; SSE-NEXT: cmpordpd %xmm2, %xmm0 612; SSE-NEXT: andpd %xmm4, %xmm0 613; SSE-NEXT: movapd %xmm3, %xmm2 614; SSE-NEXT: cmpneqpd %xmm1, %xmm2 615; SSE-NEXT: cmpordpd %xmm3, %xmm1 616; SSE-NEXT: andpd %xmm2, %xmm1 617; SSE-NEXT: retq 618; 619; AVX-LABEL: commute_cmppd_one_ymmm: 620; AVX: # BB#0: 621; AVX-NEXT: vmovapd (%rdi), %ymm1 622; AVX-NEXT: vcmpneqpd %ymm0, %ymm1, %ymm2 623; AVX-NEXT: vcmpordpd %ymm0, %ymm1, %ymm0 624; AVX-NEXT: vandpd %ymm2, %ymm0, %ymm0 625; AVX-NEXT: retq 626; 627 %1 = load <4 x double>, <4 x double>* %a0 628 %2 = fcmp one <4 x double> %1, %a1 629 %3 = sext <4 x i1> %2 to <4 x i64> 630 ret <4 x i64> %3 631} 632 633define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) { 634; SSE-LABEL: commute_cmppd_lt_ymmm: 635; SSE: # BB#0: 636; SSE-NEXT: movapd (%rdi), %xmm2 637; SSE-NEXT: movapd 16(%rdi), %xmm3 638; SSE-NEXT: cmpltpd %xmm0, %xmm2 639; SSE-NEXT: cmpltpd %xmm1, %xmm3 640; SSE-NEXT: movapd %xmm2, %xmm0 641; SSE-NEXT: movapd %xmm3, %xmm1 642; SSE-NEXT: retq 643; 644; AVX-LABEL: commute_cmppd_lt_ymmm: 645; AVX: # BB#0: 646; AVX-NEXT: vmovapd (%rdi), %ymm1 647; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 648; AVX-NEXT: retq 649; 650 %1 = load <4 x double>, <4 x double>* %a0 651 %2 = fcmp olt <4 x double> %1, %a1 652 %3 = sext <4 x i1> %2 to <4 x i64> 653 ret <4 x i64> %3 654} 655 656define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) { 657; SSE-LABEL: commute_cmppd_le_ymmm: 658; SSE: # BB#0: 659; SSE-NEXT: movapd (%rdi), %xmm2 660; SSE-NEXT: movapd 16(%rdi), %xmm3 661; SSE-NEXT: cmplepd %xmm0, %xmm2 662; SSE-NEXT: cmplepd %xmm1, %xmm3 663; SSE-NEXT: movapd %xmm2, %xmm0 664; SSE-NEXT: movapd %xmm3, %xmm1 665; SSE-NEXT: retq 666; 667; AVX-LABEL: commute_cmppd_le_ymmm: 668; AVX: # BB#0: 669; AVX-NEXT: vmovapd (%rdi), %ymm1 670; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0 671; AVX-NEXT: retq 672; 673 %1 = load <4 x double>, <4 x double>* %a0 674 %2 = fcmp ole <4 x double> %1, %a1 675 %3 = sext <4 x i1> %2 to <4 x i64> 676 ret <4 x i64> %3 677} 678