1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX 4 5; This test is an assembly of avx512 instructions to check their scheduling 6 7define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 8; GENERIC-LABEL: addpd512: 9; GENERIC: # %bb.0: # %entry 10; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 11; GENERIC-NEXT: retq # sched: [1:1.00] 12; 13; SKX-LABEL: addpd512: 14; SKX: # %bb.0: # %entry 15; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 16; SKX-NEXT: retq # sched: [7:1.00] 17entry: 18 %add.i = fadd <8 x double> %x, %y 19 ret <8 x double> %add.i 20} 21 22define <8 x double> @addpd512fold(<8 x double> %y) { 23; GENERIC-LABEL: addpd512fold: 24; GENERIC: # %bb.0: # %entry 25; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] 26; GENERIC-NEXT: retq # sched: [1:1.00] 27; 28; SKX-LABEL: addpd512fold: 29; SKX: # %bb.0: # %entry 30; SKX-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] 31; SKX-NEXT: retq # sched: [7:1.00] 32entry: 33 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 34 ret <8 x double> %add.i 35} 36 37define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 38; GENERIC-LABEL: addps512: 39; GENERIC: # %bb.0: # %entry 40; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 41; GENERIC-NEXT: retq # sched: [1:1.00] 42; 43; SKX-LABEL: addps512: 44; SKX: # %bb.0: # %entry 45; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 46; SKX-NEXT: retq # sched: [7:1.00] 47entry: 48 %add.i = fadd <16 x float> %x, %y 49 ret <16 x float> %add.i 50} 51 52define <16 x float> @addps512fold(<16 x float> %y) { 53; GENERIC-LABEL: addps512fold: 54; GENERIC: # %bb.0: # %entry 55; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00] 56; GENERIC-NEXT: retq # sched: [1:1.00] 57; 58; SKX-LABEL: addps512fold: 59; SKX: # %bb.0: # %entry 60; SKX-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] 61; SKX-NEXT: retq # sched: [7:1.00] 62entry: 63 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 64 ret <16 x float> %add.i 65} 66 67define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 68; GENERIC-LABEL: subpd512: 69; GENERIC: # %bb.0: # %entry 70; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 71; GENERIC-NEXT: retq # sched: [1:1.00] 72; 73; SKX-LABEL: subpd512: 74; SKX: # %bb.0: # %entry 75; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 76; SKX-NEXT: retq # sched: [7:1.00] 77entry: 78 %sub.i = fsub <8 x double> %x, %y 79 ret <8 x double> %sub.i 80} 81 82define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 83; GENERIC-LABEL: subpd512fold: 84; GENERIC: # %bb.0: # %entry 85; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 86; GENERIC-NEXT: retq # sched: [1:1.00] 87; 88; SKX-LABEL: subpd512fold: 89; SKX: # %bb.0: # %entry 90; SKX-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50] 91; SKX-NEXT: retq # sched: [7:1.00] 92entry: 93 %tmp2 = load <8 x double>, <8 x double>* %x, align 8 94 %sub.i = fsub <8 x double> %y, %tmp2 95 ret <8 x double> %sub.i 96} 97 98define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 99; GENERIC-LABEL: subps512: 100; GENERIC: # %bb.0: # %entry 101; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] 102; GENERIC-NEXT: retq # sched: [1:1.00] 103; 104; SKX-LABEL: subps512: 105; SKX: # %bb.0: # %entry 106; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 107; SKX-NEXT: retq # sched: [7:1.00] 108entry: 109 %sub.i = fsub <16 x float> %x, %y 110 ret <16 x float> %sub.i 111} 112 113define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 114; GENERIC-LABEL: subps512fold: 115; GENERIC: # %bb.0: # %entry 116; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] 117; GENERIC-NEXT: retq # sched: [1:1.00] 118; 119; SKX-LABEL: subps512fold: 120; SKX: # %bb.0: # %entry 121; SKX-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] 122; SKX-NEXT: retq # sched: [7:1.00] 123entry: 124 %tmp2 = load <16 x float>, <16 x float>* %x, align 4 125 %sub.i = fsub <16 x float> %y, %tmp2 126 ret <16 x float> %sub.i 127} 128 129define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 130; GENERIC-LABEL: imulq512: 131; GENERIC: # %bb.0: 132; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00] 133; GENERIC-NEXT: retq # sched: [1:1.00] 134; 135; SKX-LABEL: imulq512: 136; SKX: # %bb.0: 137; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50] 138; SKX-NEXT: retq # sched: [7:1.00] 139 %z = mul <8 x i64>%x, %y 140 ret <8 x i64>%z 141} 142 143define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { 144; GENERIC-LABEL: imulq256: 145; GENERIC: # %bb.0: 146; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00] 147; GENERIC-NEXT: retq # sched: [1:1.00] 148; 149; SKX-LABEL: imulq256: 150; SKX: # %bb.0: 151; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50] 152; SKX-NEXT: retq # sched: [7:1.00] 153 %z = mul <4 x i64>%x, %y 154 ret <4 x i64>%z 155} 156 157define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { 158; GENERIC-LABEL: imulq128: 159; GENERIC: # %bb.0: 160; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00] 161; GENERIC-NEXT: retq # sched: [1:1.00] 162; 163; SKX-LABEL: imulq128: 164; SKX: # %bb.0: 165; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50] 166; SKX-NEXT: retq # sched: [7:1.00] 167 %z = mul <2 x i64>%x, %y 168 ret <2 x i64>%z 169} 170 171define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 172; GENERIC-LABEL: mulpd512: 173; GENERIC: # %bb.0: # %entry 174; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00] 175; GENERIC-NEXT: retq # sched: [1:1.00] 176; 177; SKX-LABEL: mulpd512: 178; SKX: # %bb.0: # %entry 179; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 180; SKX-NEXT: retq # sched: [7:1.00] 181entry: 182 %mul.i = fmul <8 x double> %x, %y 183 ret <8 x double> %mul.i 184} 185 186define <8 x double> @mulpd512fold(<8 x double> %y) { 187; GENERIC-LABEL: mulpd512fold: 188; GENERIC: # %bb.0: # %entry 189; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] 190; GENERIC-NEXT: retq # sched: [1:1.00] 191; 192; SKX-LABEL: mulpd512fold: 193; SKX: # %bb.0: # %entry 194; SKX-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] 195; SKX-NEXT: retq # sched: [7:1.00] 196entry: 197 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 198 ret <8 x double> %mul.i 199} 200 201define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 202; GENERIC-LABEL: mulps512: 203; GENERIC: # %bb.0: # %entry 204; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] 205; GENERIC-NEXT: retq # sched: [1:1.00] 206; 207; SKX-LABEL: mulps512: 208; SKX: # %bb.0: # %entry 209; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 210; SKX-NEXT: retq # sched: [7:1.00] 211entry: 212 %mul.i = fmul <16 x float> %x, %y 213 ret <16 x float> %mul.i 214} 215 216define <16 x float> @mulps512fold(<16 x float> %y) { 217; GENERIC-LABEL: mulps512fold: 218; GENERIC: # %bb.0: # %entry 219; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00] 220; GENERIC-NEXT: retq # sched: [1:1.00] 221; 222; SKX-LABEL: mulps512fold: 223; SKX: # %bb.0: # %entry 224; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] 225; SKX-NEXT: retq # sched: [7:1.00] 226entry: 227 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 228 ret <16 x float> %mul.i 229} 230 231define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 232; GENERIC-LABEL: divpd512: 233; GENERIC: # %bb.0: # %entry 234; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00] 235; GENERIC-NEXT: retq # sched: [1:1.00] 236; 237; SKX-LABEL: divpd512: 238; SKX: # %bb.0: # %entry 239; SKX-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:16.00] 240; SKX-NEXT: retq # sched: [7:1.00] 241entry: 242 %div.i = fdiv <8 x double> %x, %y 243 ret <8 x double> %div.i 244} 245 246define <8 x double> @divpd512fold(<8 x double> %y) { 247; GENERIC-LABEL: divpd512fold: 248; GENERIC: # %bb.0: # %entry 249; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00] 250; GENERIC-NEXT: retq # sched: [1:1.00] 251; 252; SKX-LABEL: divpd512fold: 253; SKX: # %bb.0: # %entry 254; SKX-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:16.00] 255; SKX-NEXT: retq # sched: [7:1.00] 256entry: 257 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 258 ret <8 x double> %div.i 259} 260 261define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 262; GENERIC-LABEL: divps512: 263; GENERIC: # %bb.0: # %entry 264; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00] 265; GENERIC-NEXT: retq # sched: [1:1.00] 266; 267; SKX-LABEL: divps512: 268; SKX: # %bb.0: # %entry 269; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00] 270; SKX-NEXT: retq # sched: [7:1.00] 271entry: 272 %div.i = fdiv <16 x float> %x, %y 273 ret <16 x float> %div.i 274} 275 276define <16 x float> @divps512fold(<16 x float> %y) { 277; GENERIC-LABEL: divps512fold: 278; GENERIC: # %bb.0: # %entry 279; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00] 280; GENERIC-NEXT: retq # sched: [1:1.00] 281; 282; SKX-LABEL: divps512fold: 283; SKX: # %bb.0: # %entry 284; SKX-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [25:10.00] 285; SKX-NEXT: retq # sched: [7:1.00] 286entry: 287 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 288 ret <16 x float> %div.i 289} 290 291define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 292; GENERIC-LABEL: vpaddq_test: 293; GENERIC: # %bb.0: 294; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 295; GENERIC-NEXT: retq # sched: [1:1.00] 296; 297; SKX-LABEL: vpaddq_test: 298; SKX: # %bb.0: 299; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 300; SKX-NEXT: retq # sched: [7:1.00] 301 %x = add <8 x i64> %i, %j 302 ret <8 x i64> %x 303} 304 305define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 306; GENERIC-LABEL: vpaddq_fold_test: 307; GENERIC: # %bb.0: 308; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 309; GENERIC-NEXT: retq # sched: [1:1.00] 310; 311; SKX-LABEL: vpaddq_fold_test: 312; SKX: # %bb.0: 313; SKX-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 314; SKX-NEXT: retq # sched: [7:1.00] 315 %tmp = load <8 x i64>, <8 x i64>* %j, align 4 316 %x = add <8 x i64> %i, %tmp 317 ret <8 x i64> %x 318} 319 320define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 321; GENERIC-LABEL: vpaddq_broadcast_test: 322; GENERIC: # %bb.0: 323; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 324; GENERIC-NEXT: retq # sched: [1:1.00] 325; 326; SKX-LABEL: vpaddq_broadcast_test: 327; SKX: # %bb.0: 328; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 329; SKX-NEXT: retq # sched: [7:1.00] 330 %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 331 ret <8 x i64> %x 332} 333 334define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 335; GENERIC-LABEL: vpaddq_broadcast2_test: 336; GENERIC: # %bb.0: 337; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 338; GENERIC-NEXT: retq # sched: [1:1.00] 339; 340; SKX-LABEL: vpaddq_broadcast2_test: 341; SKX: # %bb.0: 342; SKX-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 343; SKX-NEXT: retq # sched: [7:1.00] 344 %tmp = load i64, i64* %j 345 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 346 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 347 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 348 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 349 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 350 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 351 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 352 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 353 %x = add <8 x i64> %i, %j.7 354 ret <8 x i64> %x 355} 356 357define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 358; GENERIC-LABEL: vpaddd_test: 359; GENERIC: # %bb.0: 360; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 361; GENERIC-NEXT: retq # sched: [1:1.00] 362; 363; SKX-LABEL: vpaddd_test: 364; SKX: # %bb.0: 365; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 366; SKX-NEXT: retq # sched: [7:1.00] 367 %x = add <16 x i32> %i, %j 368 ret <16 x i32> %x 369} 370 371define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 372; GENERIC-LABEL: vpaddd_fold_test: 373; GENERIC: # %bb.0: 374; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 375; GENERIC-NEXT: retq # sched: [1:1.00] 376; 377; SKX-LABEL: vpaddd_fold_test: 378; SKX: # %bb.0: 379; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 380; SKX-NEXT: retq # sched: [7:1.00] 381 %tmp = load <16 x i32>, <16 x i32>* %j, align 4 382 %x = add <16 x i32> %i, %tmp 383 ret <16 x i32> %x 384} 385 386define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 387; GENERIC-LABEL: vpaddd_broadcast_test: 388; GENERIC: # %bb.0: 389; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 390; GENERIC-NEXT: retq # sched: [1:1.00] 391; 392; SKX-LABEL: vpaddd_broadcast_test: 393; SKX: # %bb.0: 394; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 395; SKX-NEXT: retq # sched: [7:1.00] 396 %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 397 ret <16 x i32> %x 398} 399 400define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 401; GENERIC-LABEL: vpaddd_mask_test: 402; GENERIC: # %bb.0: 403; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 404; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50] 405; GENERIC-NEXT: retq # sched: [1:1.00] 406; 407; SKX-LABEL: vpaddd_mask_test: 408; SKX: # %bb.0: 409; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 410; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33] 411; SKX-NEXT: retq # sched: [7:1.00] 412 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 413 %x = add <16 x i32> %i, %j 414 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 415 ret <16 x i32> %r 416} 417 418define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 419; GENERIC-LABEL: vpaddd_maskz_test: 420; GENERIC: # %bb.0: 421; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 422; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 423; GENERIC-NEXT: retq # sched: [1:1.00] 424; 425; SKX-LABEL: vpaddd_maskz_test: 426; SKX: # %bb.0: 427; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 428; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 429; SKX-NEXT: retq # sched: [7:1.00] 430 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 431 %x = add <16 x i32> %i, %j 432 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 433 ret <16 x i32> %r 434} 435 436define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 437; GENERIC-LABEL: vpaddd_mask_fold_test: 438; GENERIC: # %bb.0: 439; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 440; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] 441; GENERIC-NEXT: retq # sched: [1:1.00] 442; 443; SKX-LABEL: vpaddd_mask_fold_test: 444; SKX: # %bb.0: 445; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 446; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] 447; SKX-NEXT: retq # sched: [7:1.00] 448 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 449 %j = load <16 x i32>, <16 x i32>* %j.ptr 450 %x = add <16 x i32> %i, %j 451 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 452 ret <16 x i32> %r 453} 454 455define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 456; GENERIC-LABEL: vpaddd_mask_broadcast_test: 457; GENERIC: # %bb.0: 458; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 459; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] 460; GENERIC-NEXT: retq # sched: [1:1.00] 461; 462; SKX-LABEL: vpaddd_mask_broadcast_test: 463; SKX: # %bb.0: 464; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 465; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] 466; SKX-NEXT: retq # sched: [7:1.00] 467 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 468 %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 469 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 470 ret <16 x i32> %r 471} 472 473define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 474; GENERIC-LABEL: vpaddd_maskz_fold_test: 475; GENERIC: # %bb.0: 476; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 477; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] 478; GENERIC-NEXT: retq # sched: [1:1.00] 479; 480; SKX-LABEL: vpaddd_maskz_fold_test: 481; SKX: # %bb.0: 482; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 483; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] 484; SKX-NEXT: retq # sched: [7:1.00] 485 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 486 %j = load <16 x i32>, <16 x i32>* %j.ptr 487 %x = add <16 x i32> %i, %j 488 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 489 ret <16 x i32> %r 490} 491 492define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 493; GENERIC-LABEL: vpaddd_maskz_broadcast_test: 494; GENERIC: # %bb.0: 495; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 496; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] 497; GENERIC-NEXT: retq # sched: [1:1.00] 498; 499; SKX-LABEL: vpaddd_maskz_broadcast_test: 500; SKX: # %bb.0: 501; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 502; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] 503; SKX-NEXT: retq # sched: [7:1.00] 504 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 505 %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 506 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 507 ret <16 x i32> %r 508} 509 510define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 511; GENERIC-LABEL: vpsubq_test: 512; GENERIC: # %bb.0: 513; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 514; GENERIC-NEXT: retq # sched: [1:1.00] 515; 516; SKX-LABEL: vpsubq_test: 517; SKX: # %bb.0: 518; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 519; SKX-NEXT: retq # sched: [7:1.00] 520 %x = sub <8 x i64> %i, %j 521 ret <8 x i64> %x 522} 523 524define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 525; GENERIC-LABEL: vpsubd_test: 526; GENERIC: # %bb.0: 527; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 528; GENERIC-NEXT: retq # sched: [1:1.00] 529; 530; SKX-LABEL: vpsubd_test: 531; SKX: # %bb.0: 532; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 533; SKX-NEXT: retq # sched: [7:1.00] 534 %x = sub <16 x i32> %i, %j 535 ret <16 x i32> %x 536} 537 538define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 539; GENERIC-LABEL: vpmulld_test: 540; GENERIC: # %bb.0: 541; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 542; GENERIC-NEXT: retq # sched: [1:1.00] 543; 544; SKX-LABEL: vpmulld_test: 545; SKX: # %bb.0: 546; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00] 547; SKX-NEXT: retq # sched: [7:1.00] 548 %x = mul <16 x i32> %i, %j 549 ret <16 x i32> %x 550} 551 552declare float @sqrtf(float) readnone 553define float @sqrtA(float %a) nounwind uwtable readnone ssp { 554; GENERIC-LABEL: sqrtA: 555; GENERIC: # %bb.0: # %entry 556; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] 557; GENERIC-NEXT: retq # sched: [1:1.00] 558; 559; SKX-LABEL: sqrtA: 560; SKX: # %bb.0: # %entry 561; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] 562; SKX-NEXT: retq # sched: [7:1.00] 563entry: 564 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 565 ret float %conv1 566} 567 568declare double @sqrt(double) readnone 569define double @sqrtB(double %a) nounwind uwtable readnone ssp { 570; GENERIC-LABEL: sqrtB: 571; GENERIC: # %bb.0: # %entry 572; GENERIC-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00] 573; GENERIC-NEXT: retq # sched: [1:1.00] 574; 575; SKX-LABEL: sqrtB: 576; SKX: # %bb.0: # %entry 577; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] 578; SKX-NEXT: retq # sched: [7:1.00] 579entry: 580 %call = tail call double @sqrt(double %a) nounwind readnone 581 ret double %call 582} 583 584declare float @llvm.sqrt.f32(float) 585define float @sqrtC(float %a) nounwind { 586; GENERIC-LABEL: sqrtC: 587; GENERIC: # %bb.0: 588; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] 589; GENERIC-NEXT: retq # sched: [1:1.00] 590; 591; SKX-LABEL: sqrtC: 592; SKX: # %bb.0: 593; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] 594; SKX-NEXT: retq # sched: [7:1.00] 595 %b = call float @llvm.sqrt.f32(float %a) 596 ret float %b 597} 598 599declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 600define <16 x float> @sqrtD(<16 x float> %a) nounwind { 601; GENERIC-LABEL: sqrtD: 602; GENERIC: # %bb.0: 603; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [29:28.00] 604; GENERIC-NEXT: retq # sched: [1:1.00] 605; 606; SKX-LABEL: sqrtD: 607; SKX: # %bb.0: 608; SKX-NEXT: vsqrtps %zmm0, %zmm0 # sched: [20:12.00] 609; SKX-NEXT: retq # sched: [7:1.00] 610 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 611 ret <16 x float> %b 612} 613 614declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 615define <8 x double> @sqrtE(<8 x double> %a) nounwind { 616; GENERIC-LABEL: sqrtE: 617; GENERIC: # %bb.0: 618; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [45:44.00] 619; GENERIC-NEXT: retq # sched: [1:1.00] 620; 621; SKX-LABEL: sqrtE: 622; SKX: # %bb.0: 623; SKX-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [32:24.00] 624; SKX-NEXT: retq # sched: [7:1.00] 625 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 626 ret <8 x double> %b 627} 628 629define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 630; GENERIC-LABEL: fadd_broadcast: 631; GENERIC: # %bb.0: 632; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] 633; GENERIC-NEXT: retq # sched: [1:1.00] 634; 635; SKX-LABEL: fadd_broadcast: 636; SKX: # %bb.0: 637; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] 638; SKX-NEXT: retq # sched: [7:1.00] 639 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 640 ret <16 x float> %b 641} 642 643define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 644; GENERIC-LABEL: addq_broadcast: 645; GENERIC: # %bb.0: 646; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 647; GENERIC-NEXT: retq # sched: [1:1.00] 648; 649; SKX-LABEL: addq_broadcast: 650; SKX: # %bb.0: 651; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 652; SKX-NEXT: retq # sched: [7:1.00] 653 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 654 ret <8 x i64> %b 655} 656 657define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 658; GENERIC-LABEL: orq_broadcast: 659; GENERIC: # %bb.0: 660; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] 661; GENERIC-NEXT: retq # sched: [1:1.00] 662; 663; SKX-LABEL: orq_broadcast: 664; SKX: # %bb.0: 665; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 666; SKX-NEXT: retq # sched: [7:1.00] 667 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 668 ret <8 x i64> %b 669} 670 671define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 672; GENERIC-LABEL: andd512fold: 673; GENERIC: # %bb.0: # %entry 674; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00] 675; GENERIC-NEXT: retq # sched: [1:1.00] 676; 677; SKX-LABEL: andd512fold: 678; SKX: # %bb.0: # %entry 679; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50] 680; SKX-NEXT: retq # sched: [7:1.00] 681entry: 682 %a = load <16 x i32>, <16 x i32>* %x, align 4 683 %b = and <16 x i32> %y, %a 684 ret <16 x i32> %b 685} 686 687define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 688; GENERIC-LABEL: andqbrst: 689; GENERIC: # %bb.0: # %entry 690; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00] 691; GENERIC-NEXT: retq # sched: [1:1.00] 692; 693; SKX-LABEL: andqbrst: 694; SKX: # %bb.0: # %entry 695; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 696; SKX-NEXT: retq # sched: [7:1.00] 697entry: 698 %a = load i64, i64* %ap, align 8 699 %b = insertelement <8 x i64> undef, i64 %a, i32 0 700 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 701 %d = and <8 x i64> %p1, %c 702 ret <8 x i64>%d 703} 704 705define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 706; GENERIC-LABEL: test_mask_vaddps: 707; GENERIC: # %bb.0: 708; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 709; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 710; GENERIC-NEXT: retq # sched: [1:1.00] 711; 712; SKX-LABEL: test_mask_vaddps: 713; SKX: # %bb.0: 714; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 715; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 716; SKX-NEXT: retq # sched: [7:1.00] 717 <16 x float> %j, <16 x i32> %mask1) 718 nounwind readnone { 719 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 720 %x = fadd <16 x float> %i, %j 721 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 722 ret <16 x float> %r 723} 724 725define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 726; GENERIC-LABEL: test_mask_vmulps: 727; GENERIC: # %bb.0: 728; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 729; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00] 730; GENERIC-NEXT: retq # sched: [1:1.00] 731; 732; SKX-LABEL: test_mask_vmulps: 733; SKX: # %bb.0: 734; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 735; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 736; SKX-NEXT: retq # sched: [7:1.00] 737 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 738 %x = fmul <16 x float> %i, %j 739 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 740 ret <16 x float> %r 741} 742 743define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 744; GENERIC-LABEL: test_mask_vminps: 745; GENERIC: # %bb.0: 746; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 747; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 748; GENERIC-NEXT: retq # sched: [1:1.00] 749; 750; SKX-LABEL: test_mask_vminps: 751; SKX: # %bb.0: 752; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 753; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 754; SKX-NEXT: retq # sched: [7:1.00] 755 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 756 %cmp_res = fcmp olt <16 x float> %i, %j 757 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 758 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 759 ret <16 x float> %r 760} 761 762define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { 763; GENERIC-LABEL: test_mask_vminpd: 764; GENERIC: # %bb.0: 765; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 766; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 767; GENERIC-NEXT: retq # sched: [1:1.00] 768; 769; SKX-LABEL: test_mask_vminpd: 770; SKX: # %bb.0: 771; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 772; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 773; SKX-NEXT: retq # sched: [7:1.00] 774 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 775 %cmp_res = fcmp olt <8 x double> %i, %j 776 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 777 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 778 ret <8 x double> %r 779} 780 781define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 782; GENERIC-LABEL: test_mask_vmaxps: 783; GENERIC: # %bb.0: 784; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 785; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 786; GENERIC-NEXT: retq # sched: [1:1.00] 787; 788; SKX-LABEL: test_mask_vmaxps: 789; SKX: # %bb.0: 790; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 791; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 792; SKX-NEXT: retq # sched: [7:1.00] 793 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 794 %cmp_res = fcmp ogt <16 x float> %i, %j 795 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 796 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 797 ret <16 x float> %r 798} 799 800define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone { 801; GENERIC-LABEL: test_mask_vmaxpd: 802; GENERIC: # %bb.0: 803; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33] 804; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 805; GENERIC-NEXT: retq # sched: [1:1.00] 806; 807; SKX-LABEL: test_mask_vmaxpd: 808; SKX: # %bb.0: 809; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] 810; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 811; SKX-NEXT: retq # sched: [7:1.00] 812 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 813 %cmp_res = fcmp ogt <8 x double> %i, %j 814 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 815 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 816 ret <8 x double> %r 817} 818 819define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 820; GENERIC-LABEL: test_mask_vsubps: 821; GENERIC: # %bb.0: 822; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 823; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 824; GENERIC-NEXT: retq # sched: [1:1.00] 825; 826; SKX-LABEL: test_mask_vsubps: 827; SKX: # %bb.0: 828; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 829; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 830; SKX-NEXT: retq # sched: [7:1.00] 831 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 832 %x = fsub <16 x float> %i, %j 833 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 834 ret <16 x float> %r 835} 836 837define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone { 838; GENERIC-LABEL: test_mask_vdivps: 839; GENERIC: # %bb.0: 840; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33] 841; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00] 842; GENERIC-NEXT: retq # sched: [1:1.00] 843; 844; SKX-LABEL: test_mask_vdivps: 845; SKX: # %bb.0: 846; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] 847; SKX-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [18:10.00] 848; SKX-NEXT: retq # sched: [7:1.00] 849 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 850 %x = fdiv <16 x float> %i, %j 851 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 852 ret <16 x float> %r 853} 854 855define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { 856; GENERIC-LABEL: test_mask_vaddpd: 857; GENERIC: # %bb.0: 858; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33] 859; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00] 860; GENERIC-NEXT: retq # sched: [1:1.00] 861; 862; SKX-LABEL: test_mask_vaddpd: 863; SKX: # %bb.0: 864; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00] 865; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] 866; SKX-NEXT: retq # sched: [7:1.00] 867 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 868 %x = fadd <8 x double> %i, %j 869 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 870 ret <8 x double> %r 871} 872 873define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone { 874; GENERIC-LABEL: test_maskz_vaddpd: 875; GENERIC: # %bb.0: 876; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 877; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00] 878; GENERIC-NEXT: retq # sched: [1:1.00] 879; 880; SKX-LABEL: test_maskz_vaddpd: 881; SKX: # %bb.0: 882; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 883; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50] 884; SKX-NEXT: retq # sched: [7:1.00] 885 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 886 %x = fadd <8 x double> %i, %j 887 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 888 ret <8 x double> %r 889} 890 891define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { 892; GENERIC-LABEL: test_mask_fold_vaddpd: 893; GENERIC: # %bb.0: 894; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 895; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] 896; GENERIC-NEXT: retq # sched: [1:1.00] 897; 898; SKX-LABEL: test_mask_fold_vaddpd: 899; SKX: # %bb.0: 900; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 901; SKX-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50] 902; SKX-NEXT: retq # sched: [7:1.00] 903 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 904 %tmp = load <8 x double>, <8 x double>* %j, align 8 905 %x = fadd <8 x double> %i, %tmp 906 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 907 ret <8 x double> %r 908} 909 910define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind { 911; GENERIC-LABEL: test_maskz_fold_vaddpd: 912; GENERIC: # %bb.0: 913; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 914; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] 915; GENERIC-NEXT: retq # sched: [1:1.00] 916; 917; SKX-LABEL: test_maskz_fold_vaddpd: 918; SKX: # %bb.0: 919; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 920; SKX-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] 921; SKX-NEXT: retq # sched: [7:1.00] 922 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 923 %tmp = load <8 x double>, <8 x double>* %j, align 8 924 %x = fadd <8 x double> %i, %tmp 925 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 926 ret <8 x double> %r 927} 928 929define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { 930; GENERIC-LABEL: test_broadcast_vaddpd: 931; GENERIC: # %bb.0: 932; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [10:1.00] 933; GENERIC-NEXT: retq # sched: [1:1.00] 934; 935; SKX-LABEL: test_broadcast_vaddpd: 936; SKX: # %bb.0: 937; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50] 938; SKX-NEXT: retq # sched: [7:1.00] 939 %tmp = load double, double* %j 940 %b = insertelement <8 x double> undef, double %tmp, i32 0 941 %c = shufflevector <8 x double> %b, <8 x double> undef, 942 <8 x i32> zeroinitializer 943 %x = fadd <8 x double> %c, %i 944 ret <8 x double> %x 945} 946 947define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind { 948; GENERIC-LABEL: test_mask_broadcast_vaddpd: 949; GENERIC: # %bb.0: 950; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33] 951; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [10:1.00] 952; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 953; GENERIC-NEXT: retq # sched: [1:1.00] 954; 955; SKX-LABEL: test_mask_broadcast_vaddpd: 956; SKX: # %bb.0: 957; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] 958; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50] 959; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 960; SKX-NEXT: retq # sched: [7:1.00] 961 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 962 %tmp = load double, double* %j 963 %b = insertelement <8 x double> undef, double %tmp, i32 0 964 %c = shufflevector <8 x double> %b, <8 x double> undef, 965 <8 x i32> zeroinitializer 966 %x = fadd <8 x double> %c, %i 967 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 968 ret <8 x double> %r 969} 970 971define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, 972; GENERIC-LABEL: test_maskz_broadcast_vaddpd: 973; GENERIC: # %bb.0: 974; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 975; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] 976; GENERIC-NEXT: retq # sched: [1:1.00] 977; 978; SKX-LABEL: test_maskz_broadcast_vaddpd: 979; SKX: # %bb.0: 980; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 981; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] 982; SKX-NEXT: retq # sched: [7:1.00] 983 <8 x i64> %mask1) nounwind { 984 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 985 %tmp = load double, double* %j 986 %b = insertelement <8 x double> undef, double %tmp, i32 0 987 %c = shufflevector <8 x double> %b, <8 x double> undef, 988 <8 x i32> zeroinitializer 989 %x = fadd <8 x double> %c, %i 990 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 991 ret <8 x double> %r 992} 993 994define <16 x float> @test_fxor(<16 x float> %a) { 995; GENERIC-LABEL: test_fxor: 996; GENERIC: # %bb.0: 997; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] 998; GENERIC-NEXT: retq # sched: [1:1.00] 999; 1000; SKX-LABEL: test_fxor: 1001; SKX: # %bb.0: 1002; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 1003; SKX-NEXT: retq # sched: [7:1.00] 1004 1005 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 1006 ret <16 x float>%res 1007} 1008 1009define <8 x float> @test_fxor_8f32(<8 x float> %a) { 1010; GENERIC-LABEL: test_fxor_8f32: 1011; GENERIC: # %bb.0: 1012; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00] 1013; GENERIC-NEXT: retq # sched: [1:1.00] 1014; 1015; SKX-LABEL: test_fxor_8f32: 1016; SKX: # %bb.0: 1017; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] 1018; SKX-NEXT: retq # sched: [7:1.00] 1019 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 1020 ret <8 x float>%res 1021} 1022 1023define <8 x double> @fabs_v8f64(<8 x double> %p) 1024; GENERIC-LABEL: fabs_v8f64: 1025; GENERIC: # %bb.0: 1026; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00] 1027; GENERIC-NEXT: retq # sched: [1:1.00] 1028; 1029; SKX-LABEL: fabs_v8f64: 1030; SKX: # %bb.0: 1031; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 1032; SKX-NEXT: retq # sched: [7:1.00] 1033{ 1034 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1035 ret <8 x double> %t 1036} 1037declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1038 1039define <16 x float> @fabs_v16f32(<16 x float> %p) 1040; GENERIC-LABEL: fabs_v16f32: 1041; GENERIC: # %bb.0: 1042; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] 1043; GENERIC-NEXT: retq # sched: [1:1.00] 1044; 1045; SKX-LABEL: fabs_v16f32: 1046; SKX: # %bb.0: 1047; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 1048; SKX-NEXT: retq # sched: [7:1.00] 1049{ 1050 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1051 ret <16 x float> %t 1052} 1053declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1054 1055define double @test1(double %a, double %b) nounwind { 1056; GENERIC-LABEL: test1: 1057; GENERIC: # %bb.0: 1058; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 1059; GENERIC-NEXT: jne .LBB64_1 # sched: [1:1.00] 1060; GENERIC-NEXT: jnp .LBB64_2 # sched: [1:1.00] 1061; GENERIC-NEXT: .LBB64_1: # %l1 1062; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1063; GENERIC-NEXT: retq # sched: [1:1.00] 1064; GENERIC-NEXT: .LBB64_2: # %l2 1065; GENERIC-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1066; GENERIC-NEXT: retq # sched: [1:1.00] 1067; 1068; SKX-LABEL: test1: 1069; SKX: # %bb.0: 1070; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 1071; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50] 1072; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50] 1073; SKX-NEXT: .LBB64_1: # %l1 1074; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1075; SKX-NEXT: retq # sched: [7:1.00] 1076; SKX-NEXT: .LBB64_2: # %l2 1077; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1078; SKX-NEXT: retq # sched: [7:1.00] 1079 %tobool = fcmp une double %a, %b 1080 br i1 %tobool, label %l1, label %l2 1081 1082l1: 1083 %c = fsub double %a, %b 1084 ret double %c 1085l2: 1086 %c1 = fadd double %a, %b 1087 ret double %c1 1088} 1089 1090define float @test2(float %a, float %b) nounwind { 1091; GENERIC-LABEL: test2: 1092; GENERIC: # %bb.0: 1093; GENERIC-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] 1094; GENERIC-NEXT: jbe .LBB65_2 # sched: [1:1.00] 1095; GENERIC-NEXT: # %bb.1: # %l1 1096; GENERIC-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1097; GENERIC-NEXT: retq # sched: [1:1.00] 1098; GENERIC-NEXT: .LBB65_2: # %l2 1099; GENERIC-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1100; GENERIC-NEXT: retq # sched: [1:1.00] 1101; 1102; SKX-LABEL: test2: 1103; SKX: # %bb.0: 1104; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] 1105; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50] 1106; SKX-NEXT: # %bb.1: # %l1 1107; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1108; SKX-NEXT: retq # sched: [7:1.00] 1109; SKX-NEXT: .LBB65_2: # %l2 1110; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1111; SKX-NEXT: retq # sched: [7:1.00] 1112 %tobool = fcmp olt float %a, %b 1113 br i1 %tobool, label %l1, label %l2 1114 1115l1: 1116 %c = fsub float %a, %b 1117 ret float %c 1118l2: 1119 %c1 = fadd float %a, %b 1120 ret float %c1 1121} 1122 1123define i32 @test3(float %a, float %b) { 1124; GENERIC-LABEL: test3: 1125; GENERIC: # %bb.0: 1126; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00] 1127; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] 1128; GENERIC-NEXT: retq # sched: [1:1.00] 1129; 1130; SKX-LABEL: test3: 1131; SKX: # %bb.0: 1132; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00] 1133; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] 1134; SKX-NEXT: retq # sched: [7:1.00] 1135 1136 %cmp10.i = fcmp oeq float %a, %b 1137 %conv11.i = zext i1 %cmp10.i to i32 1138 ret i32 %conv11.i 1139} 1140 1141define float @test5(float %p) #0 { 1142; GENERIC-LABEL: test5: 1143; GENERIC: # %bb.0: # %entry 1144; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 1145; GENERIC-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] 1146; GENERIC-NEXT: jne .LBB67_1 # sched: [1:1.00] 1147; GENERIC-NEXT: jp .LBB67_1 # sched: [1:1.00] 1148; GENERIC-NEXT: # %bb.2: # %return 1149; GENERIC-NEXT: retq # sched: [1:1.00] 1150; GENERIC-NEXT: .LBB67_1: # %if.end 1151; GENERIC-NEXT: seta %al # sched: [2:1.00] 1152; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33] 1153; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 1154; GENERIC-NEXT: retq # sched: [1:1.00] 1155; 1156; SKX-LABEL: test5: 1157; SKX: # %bb.0: # %entry 1158; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 1159; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] 1160; SKX-NEXT: jne .LBB67_1 # sched: [1:0.50] 1161; SKX-NEXT: jp .LBB67_1 # sched: [1:0.50] 1162; SKX-NEXT: # %bb.2: # %return 1163; SKX-NEXT: retq # sched: [7:1.00] 1164; SKX-NEXT: .LBB67_1: # %if.end 1165; SKX-NEXT: seta %al # sched: [2:1.00] 1166; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25] 1167; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 1168; SKX-NEXT: retq # sched: [7:1.00] 1169entry: 1170 %cmp = fcmp oeq float %p, 0.000000e+00 1171 br i1 %cmp, label %return, label %if.end 1172 1173if.end: ; preds = %entry 1174 %cmp1 = fcmp ogt float %p, 0.000000e+00 1175 %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00 1176 br label %return 1177 1178return: ; preds = %if.end, %entry 1179 %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ] 1180 ret float %retval.0 1181} 1182 1183define i32 @test6(i32 %a, i32 %b) { 1184; GENERIC-LABEL: test6: 1185; GENERIC: # %bb.0: 1186; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] 1187; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 1188; GENERIC-NEXT: sete %al # sched: [1:0.50] 1189; GENERIC-NEXT: retq # sched: [1:1.00] 1190; 1191; SKX-LABEL: test6: 1192; SKX: # %bb.0: 1193; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] 1194; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 1195; SKX-NEXT: sete %al # sched: [1:0.50] 1196; SKX-NEXT: retq # sched: [7:1.00] 1197 %cmp = icmp eq i32 %a, %b 1198 %res = zext i1 %cmp to i32 1199 ret i32 %res 1200} 1201 1202define i32 @test7(double %x, double %y) #2 { 1203; GENERIC-LABEL: test7: 1204; GENERIC: # %bb.0: # %entry 1205; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] 1206; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 1207; GENERIC-NEXT: setne %al # sched: [1:0.50] 1208; GENERIC-NEXT: retq # sched: [1:1.00] 1209; 1210; SKX-LABEL: test7: 1211; SKX: # %bb.0: # %entry 1212; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] 1213; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 1214; SKX-NEXT: setne %al # sched: [1:0.50] 1215; SKX-NEXT: retq # sched: [7:1.00] 1216entry: 1217 %0 = fcmp one double %x, %y 1218 %or = zext i1 %0 to i32 1219 ret i32 %or 1220} 1221 1222define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { 1223; GENERIC-LABEL: test8: 1224; GENERIC: # %bb.0: 1225; GENERIC-NEXT: xorl $-2147483648, %esi # imm = 0x80000000 1226; GENERIC-NEXT: # sched: [1:0.33] 1227; GENERIC-NEXT: testl %edx, %edx # sched: [1:0.33] 1228; GENERIC-NEXT: movl $1, %eax # sched: [1:0.33] 1229; GENERIC-NEXT: cmovel %eax, %edx # sched: [2:0.67] 1230; GENERIC-NEXT: notl %edi # sched: [1:0.33] 1231; GENERIC-NEXT: orl %edi, %esi # sched: [1:0.33] 1232; GENERIC-NEXT: cmovnel %edx, %eax # sched: [2:0.67] 1233; GENERIC-NEXT: retq # sched: [1:1.00] 1234; 1235; SKX-LABEL: test8: 1236; SKX: # %bb.0: 1237; SKX-NEXT: notl %edi # sched: [1:0.25] 1238; SKX-NEXT: xorl $-2147483648, %esi # imm = 0x80000000 1239; SKX-NEXT: # sched: [1:0.25] 1240; SKX-NEXT: testl %edx, %edx # sched: [1:0.25] 1241; SKX-NEXT: movl $1, %eax # sched: [1:0.25] 1242; SKX-NEXT: cmovel %eax, %edx # sched: [1:0.50] 1243; SKX-NEXT: orl %edi, %esi # sched: [1:0.25] 1244; SKX-NEXT: cmovnel %edx, %eax # sched: [1:0.50] 1245; SKX-NEXT: retq # sched: [7:1.00] 1246 %tmp1 = icmp eq i32 %a1, -1 1247 %tmp2 = icmp eq i32 %a2, -2147483648 1248 %tmp3 = and i1 %tmp1, %tmp2 1249 %tmp4 = icmp eq i32 %a3, 0 1250 %tmp5 = or i1 %tmp3, %tmp4 1251 %res = select i1 %tmp5, i32 1, i32 %a3 1252 ret i32 %res 1253} 1254 1255define i32 @test9(i64 %a) { 1256; GENERIC-LABEL: test9: 1257; GENERIC: # %bb.0: 1258; GENERIC-NEXT: testb $1, %dil # sched: [1:0.33] 1259; GENERIC-NEXT: jne .LBB71_2 # sched: [1:1.00] 1260; GENERIC-NEXT: # %bb.1: # %A 1261; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33] 1262; GENERIC-NEXT: retq # sched: [1:1.00] 1263; GENERIC-NEXT: .LBB71_2: # %B 1264; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] 1265; GENERIC-NEXT: retq # sched: [1:1.00] 1266; 1267; SKX-LABEL: test9: 1268; SKX: # %bb.0: 1269; SKX-NEXT: testb $1, %dil # sched: [1:0.25] 1270; SKX-NEXT: jne .LBB71_2 # sched: [1:0.50] 1271; SKX-NEXT: # %bb.1: # %A 1272; SKX-NEXT: movl $6, %eax # sched: [1:0.25] 1273; SKX-NEXT: retq # sched: [7:1.00] 1274; SKX-NEXT: .LBB71_2: # %B 1275; SKX-NEXT: movl $7, %eax # sched: [1:0.25] 1276; SKX-NEXT: retq # sched: [7:1.00] 1277 %b = and i64 %a, 1 1278 %cmp10.i = icmp eq i64 %b, 0 1279 br i1 %cmp10.i, label %A, label %B 1280A: 1281 ret i32 6 1282B: 1283 ret i32 7 1284} 1285 1286define i32 @test10(i64 %b, i64 %c, i1 %d) { 1287; GENERIC-LABEL: test10: 1288; GENERIC: # %bb.0: 1289; GENERIC-NEXT: movl %edx, %eax # sched: [1:0.33] 1290; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] 1291; GENERIC-NEXT: cmpq %rsi, %rdi # sched: [1:0.33] 1292; GENERIC-NEXT: sete %cl # sched: [1:0.50] 1293; GENERIC-NEXT: orb %dl, %cl # sched: [1:0.33] 1294; GENERIC-NEXT: andb $1, %cl # sched: [1:0.33] 1295; GENERIC-NEXT: cmpb %cl, %al # sched: [1:0.33] 1296; GENERIC-NEXT: je .LBB72_1 # sched: [1:1.00] 1297; GENERIC-NEXT: # %bb.2: # %if.end.i 1298; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33] 1299; GENERIC-NEXT: retq # sched: [1:1.00] 1300; GENERIC-NEXT: .LBB72_1: # %if.then.i 1301; GENERIC-NEXT: movl $5, %eax # sched: [1:0.33] 1302; GENERIC-NEXT: retq # sched: [1:1.00] 1303; 1304; SKX-LABEL: test10: 1305; SKX: # %bb.0: 1306; SKX-NEXT: movl %edx, %eax # sched: [1:0.25] 1307; SKX-NEXT: andb $1, %al # sched: [1:0.25] 1308; SKX-NEXT: cmpq %rsi, %rdi # sched: [1:0.25] 1309; SKX-NEXT: sete %cl # sched: [1:0.50] 1310; SKX-NEXT: orb %dl, %cl # sched: [1:0.25] 1311; SKX-NEXT: andb $1, %cl # sched: [1:0.25] 1312; SKX-NEXT: cmpb %cl, %al # sched: [1:0.25] 1313; SKX-NEXT: je .LBB72_1 # sched: [1:0.50] 1314; SKX-NEXT: # %bb.2: # %if.end.i 1315; SKX-NEXT: movl $6, %eax # sched: [1:0.25] 1316; SKX-NEXT: retq # sched: [7:1.00] 1317; SKX-NEXT: .LBB72_1: # %if.then.i 1318; SKX-NEXT: movl $5, %eax # sched: [1:0.25] 1319; SKX-NEXT: retq # sched: [7:1.00] 1320 1321 %cmp8.i = icmp eq i64 %b, %c 1322 %or1 = or i1 %d, %cmp8.i 1323 %xor1 = xor i1 %d, %or1 1324 br i1 %xor1, label %if.end.i, label %if.then.i 1325 1326if.then.i: 1327 ret i32 5 1328 1329if.end.i: 1330 ret i32 6 1331} 1332 1333define <16 x float> @sitof32(<16 x i32> %a) nounwind { 1334; GENERIC-LABEL: sitof32: 1335; GENERIC: # %bb.0: 1336; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 1337; GENERIC-NEXT: retq # sched: [1:1.00] 1338; 1339; SKX-LABEL: sitof32: 1340; SKX: # %bb.0: 1341; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 1342; SKX-NEXT: retq # sched: [7:1.00] 1343 %b = sitofp <16 x i32> %a to <16 x float> 1344 ret <16 x float> %b 1345} 1346 1347define <8 x double> @sltof864(<8 x i64> %a) { 1348; GENERIC-LABEL: sltof864: 1349; GENERIC: # %bb.0: 1350; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] 1351; GENERIC-NEXT: retq # sched: [1:1.00] 1352; 1353; SKX-LABEL: sltof864: 1354; SKX: # %bb.0: 1355; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] 1356; SKX-NEXT: retq # sched: [7:1.00] 1357 %b = sitofp <8 x i64> %a to <8 x double> 1358 ret <8 x double> %b 1359} 1360 1361define <4 x double> @slto4f64(<4 x i64> %a) { 1362; GENERIC-LABEL: slto4f64: 1363; GENERIC: # %bb.0: 1364; GENERIC-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:1.00] 1365; GENERIC-NEXT: retq # sched: [1:1.00] 1366; 1367; SKX-LABEL: slto4f64: 1368; SKX: # %bb.0: 1369; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50] 1370; SKX-NEXT: retq # sched: [7:1.00] 1371 %b = sitofp <4 x i64> %a to <4 x double> 1372 ret <4 x double> %b 1373} 1374 1375define <2 x double> @slto2f64(<2 x i64> %a) { 1376; GENERIC-LABEL: slto2f64: 1377; GENERIC: # %bb.0: 1378; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00] 1379; GENERIC-NEXT: retq # sched: [1:1.00] 1380; 1381; SKX-LABEL: slto2f64: 1382; SKX: # %bb.0: 1383; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50] 1384; SKX-NEXT: retq # sched: [7:1.00] 1385 %b = sitofp <2 x i64> %a to <2 x double> 1386 ret <2 x double> %b 1387} 1388 1389define <2 x float> @sltof2f32(<2 x i64> %a) { 1390; GENERIC-LABEL: sltof2f32: 1391; GENERIC: # %bb.0: 1392; GENERIC-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [3:1.00] 1393; GENERIC-NEXT: retq # sched: [1:1.00] 1394; 1395; SKX-LABEL: sltof2f32: 1396; SKX: # %bb.0: 1397; SKX-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00] 1398; SKX-NEXT: retq # sched: [7:1.00] 1399 %b = sitofp <2 x i64> %a to <2 x float> 1400 ret <2 x float>%b 1401} 1402 1403define <4 x float> @slto4f32_mem(<4 x i64>* %a) { 1404; GENERIC-LABEL: slto4f32_mem: 1405; GENERIC: # %bb.0: 1406; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [10:1.00] 1407; GENERIC-NEXT: retq # sched: [1:1.00] 1408; 1409; SKX-LABEL: slto4f32_mem: 1410; SKX: # %bb.0: 1411; SKX-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [11:0.50] 1412; SKX-NEXT: retq # sched: [7:1.00] 1413 %a1 = load <4 x i64>, <4 x i64>* %a, align 8 1414 %b = sitofp <4 x i64> %a1 to <4 x float> 1415 ret <4 x float>%b 1416} 1417 1418define <4 x i64> @f64to4sl(<4 x double> %a) { 1419; GENERIC-LABEL: f64to4sl: 1420; GENERIC: # %bb.0: 1421; GENERIC-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:1.00] 1422; GENERIC-NEXT: retq # sched: [1:1.00] 1423; 1424; SKX-LABEL: f64to4sl: 1425; SKX: # %bb.0: 1426; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50] 1427; SKX-NEXT: retq # sched: [7:1.00] 1428 %b = fptosi <4 x double> %a to <4 x i64> 1429 ret <4 x i64> %b 1430} 1431 1432define <4 x i64> @f32to4sl(<4 x float> %a) { 1433; GENERIC-LABEL: f32to4sl: 1434; GENERIC: # %bb.0: 1435; GENERIC-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [3:1.00] 1436; GENERIC-NEXT: retq # sched: [1:1.00] 1437; 1438; SKX-LABEL: f32to4sl: 1439; SKX: # %bb.0: 1440; SKX-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00] 1441; SKX-NEXT: retq # sched: [7:1.00] 1442 %b = fptosi <4 x float> %a to <4 x i64> 1443 ret <4 x i64> %b 1444} 1445 1446define <4 x float> @slto4f32(<4 x i64> %a) { 1447; GENERIC-LABEL: slto4f32: 1448; GENERIC: # %bb.0: 1449; GENERIC-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00] 1450; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1451; GENERIC-NEXT: retq # sched: [1:1.00] 1452; 1453; SKX-LABEL: slto4f32: 1454; SKX: # %bb.0: 1455; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00] 1456; SKX-NEXT: vzeroupper # sched: [4:1.00] 1457; SKX-NEXT: retq # sched: [7:1.00] 1458 %b = sitofp <4 x i64> %a to <4 x float> 1459 ret <4 x float> %b 1460} 1461 1462define <4 x float> @ulto4f32(<4 x i64> %a) { 1463; GENERIC-LABEL: ulto4f32: 1464; GENERIC: # %bb.0: 1465; GENERIC-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00] 1466; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1467; GENERIC-NEXT: retq # sched: [1:1.00] 1468; 1469; SKX-LABEL: ulto4f32: 1470; SKX: # %bb.0: 1471; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00] 1472; SKX-NEXT: vzeroupper # sched: [4:1.00] 1473; SKX-NEXT: retq # sched: [7:1.00] 1474 %b = uitofp <4 x i64> %a to <4 x float> 1475 ret <4 x float> %b 1476} 1477 1478define <8 x double> @ulto8f64(<8 x i64> %a) { 1479; GENERIC-LABEL: ulto8f64: 1480; GENERIC: # %bb.0: 1481; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] 1482; GENERIC-NEXT: retq # sched: [1:1.00] 1483; 1484; SKX-LABEL: ulto8f64: 1485; SKX: # %bb.0: 1486; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] 1487; SKX-NEXT: retq # sched: [7:1.00] 1488 %b = uitofp <8 x i64> %a to <8 x double> 1489 ret <8 x double> %b 1490} 1491 1492define <16 x double> @ulto16f64(<16 x i64> %a) { 1493; GENERIC-LABEL: ulto16f64: 1494; GENERIC: # %bb.0: 1495; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00] 1496; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00] 1497; GENERIC-NEXT: retq # sched: [1:1.00] 1498; 1499; SKX-LABEL: ulto16f64: 1500; SKX: # %bb.0: 1501; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] 1502; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50] 1503; SKX-NEXT: retq # sched: [7:1.00] 1504 %b = uitofp <16 x i64> %a to <16 x double> 1505 ret <16 x double> %b 1506} 1507 1508define <16 x i32> @f64to16si(<16 x float> %a) nounwind { 1509; GENERIC-LABEL: f64to16si: 1510; GENERIC: # %bb.0: 1511; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] 1512; GENERIC-NEXT: retq # sched: [1:1.00] 1513; 1514; SKX-LABEL: f64to16si: 1515; SKX: # %bb.0: 1516; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] 1517; SKX-NEXT: retq # sched: [7:1.00] 1518 %b = fptosi <16 x float> %a to <16 x i32> 1519 ret <16 x i32> %b 1520} 1521 1522define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { 1523; GENERIC-LABEL: f32to16ui: 1524; GENERIC: # %bb.0: 1525; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00] 1526; GENERIC-NEXT: retq # sched: [1:1.00] 1527; 1528; SKX-LABEL: f32to16ui: 1529; SKX: # %bb.0: 1530; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50] 1531; SKX-NEXT: retq # sched: [7:1.00] 1532 %b = fptoui <16 x float> %a to <16 x i32> 1533 ret <16 x i32> %b 1534} 1535 1536define <16 x i8> @f32to16uc(<16 x float> %f) { 1537; GENERIC-LABEL: f32to16uc: 1538; GENERIC: # %bb.0: 1539; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] 1540; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00] 1541; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1542; GENERIC-NEXT: retq # sched: [1:1.00] 1543; 1544; SKX-LABEL: f32to16uc: 1545; SKX: # %bb.0: 1546; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] 1547; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] 1548; SKX-NEXT: vzeroupper # sched: [4:1.00] 1549; SKX-NEXT: retq # sched: [7:1.00] 1550 %res = fptoui <16 x float> %f to <16 x i8> 1551 ret <16 x i8> %res 1552} 1553 1554define <16 x i16> @f32to16us(<16 x float> %f) { 1555; GENERIC-LABEL: f32to16us: 1556; GENERIC: # %bb.0: 1557; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] 1558; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 # sched: [1:1.00] 1559; GENERIC-NEXT: retq # sched: [1:1.00] 1560; 1561; SKX-LABEL: f32to16us: 1562; SKX: # %bb.0: 1563; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] 1564; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00] 1565; SKX-NEXT: retq # sched: [7:1.00] 1566 %res = fptoui <16 x float> %f to <16 x i16> 1567 ret <16 x i16> %res 1568} 1569 1570define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { 1571; GENERIC-LABEL: f32to8ui: 1572; GENERIC: # %bb.0: 1573; GENERIC-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [3:1.00] 1574; GENERIC-NEXT: retq # sched: [1:1.00] 1575; 1576; SKX-LABEL: f32to8ui: 1577; SKX: # %bb.0: 1578; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50] 1579; SKX-NEXT: retq # sched: [7:1.00] 1580 %b = fptoui <8 x float> %a to <8 x i32> 1581 ret <8 x i32> %b 1582} 1583 1584define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { 1585; GENERIC-LABEL: f32to4ui: 1586; GENERIC: # %bb.0: 1587; GENERIC-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [3:1.00] 1588; GENERIC-NEXT: retq # sched: [1:1.00] 1589; 1590; SKX-LABEL: f32to4ui: 1591; SKX: # %bb.0: 1592; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50] 1593; SKX-NEXT: retq # sched: [7:1.00] 1594 %b = fptoui <4 x float> %a to <4 x i32> 1595 ret <4 x i32> %b 1596} 1597 1598define <8 x i32> @f64to8ui(<8 x double> %a) nounwind { 1599; GENERIC-LABEL: f64to8ui: 1600; GENERIC: # %bb.0: 1601; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [4:1.00] 1602; GENERIC-NEXT: retq # sched: [1:1.00] 1603; 1604; SKX-LABEL: f64to8ui: 1605; SKX: # %bb.0: 1606; SKX-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00] 1607; SKX-NEXT: retq # sched: [7:1.00] 1608 %b = fptoui <8 x double> %a to <8 x i32> 1609 ret <8 x i32> %b 1610} 1611 1612define <8 x i16> @f64to8us(<8 x double> %f) { 1613; GENERIC-LABEL: f64to8us: 1614; GENERIC: # %bb.0: 1615; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] 1616; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] 1617; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1618; GENERIC-NEXT: retq # sched: [1:1.00] 1619; 1620; SKX-LABEL: f64to8us: 1621; SKX: # %bb.0: 1622; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] 1623; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] 1624; SKX-NEXT: vzeroupper # sched: [4:1.00] 1625; SKX-NEXT: retq # sched: [7:1.00] 1626 %res = fptoui <8 x double> %f to <8 x i16> 1627 ret <8 x i16> %res 1628} 1629 1630define <8 x i8> @f64to8uc(<8 x double> %f) { 1631; GENERIC-LABEL: f64to8uc: 1632; GENERIC: # %bb.0: 1633; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] 1634; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] 1635; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1636; GENERIC-NEXT: retq # sched: [1:1.00] 1637; 1638; SKX-LABEL: f64to8uc: 1639; SKX: # %bb.0: 1640; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] 1641; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] 1642; SKX-NEXT: vzeroupper # sched: [4:1.00] 1643; SKX-NEXT: retq # sched: [7:1.00] 1644 %res = fptoui <8 x double> %f to <8 x i8> 1645 ret <8 x i8> %res 1646} 1647 1648define <4 x i32> @f64to4ui(<4 x double> %a) nounwind { 1649; GENERIC-LABEL: f64to4ui: 1650; GENERIC: # %bb.0: 1651; GENERIC-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00] 1652; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1653; GENERIC-NEXT: retq # sched: [1:1.00] 1654; 1655; SKX-LABEL: f64to4ui: 1656; SKX: # %bb.0: 1657; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00] 1658; SKX-NEXT: vzeroupper # sched: [4:1.00] 1659; SKX-NEXT: retq # sched: [7:1.00] 1660 %b = fptoui <4 x double> %a to <4 x i32> 1661 ret <4 x i32> %b 1662} 1663 1664define <8 x double> @sito8f64(<8 x i32> %a) { 1665; GENERIC-LABEL: sito8f64: 1666; GENERIC: # %bb.0: 1667; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 1668; GENERIC-NEXT: retq # sched: [1:1.00] 1669; 1670; SKX-LABEL: sito8f64: 1671; SKX: # %bb.0: 1672; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 1673; SKX-NEXT: retq # sched: [7:1.00] 1674 %b = sitofp <8 x i32> %a to <8 x double> 1675 ret <8 x double> %b 1676} 1677define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { 1678; GENERIC-LABEL: i32to8f64_mask: 1679; GENERIC: # %bb.0: 1680; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 1681; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] 1682; GENERIC-NEXT: retq # sched: [1:1.00] 1683; 1684; SKX-LABEL: i32to8f64_mask: 1685; SKX: # %bb.0: 1686; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 1687; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] 1688; SKX-NEXT: retq # sched: [7:1.00] 1689; VLNOBW-LABEL: i32to8f64_mask: 1690; VLNOBW: # %bb.0: 1691; VLNOBW-NEXT: kmovw %edi, %k1 1692; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 1693; VLNOBW-NEXT: ret{{[l|q]}} 1694 %1 = bitcast i8 %c to <8 x i1> 1695 %2 = sitofp <8 x i32> %b to <8 x double> 1696 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a 1697 ret <8 x double> %3 1698} 1699define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { 1700; GENERIC-LABEL: sito8f64_maskz: 1701; GENERIC: # %bb.0: 1702; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 1703; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] 1704; GENERIC-NEXT: retq # sched: [1:1.00] 1705; 1706; SKX-LABEL: sito8f64_maskz: 1707; SKX: # %bb.0: 1708; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 1709; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] 1710; SKX-NEXT: retq # sched: [7:1.00] 1711; VLNOBW-LABEL: sito8f64_maskz: 1712; VLNOBW: # %bb.0: 1713; VLNOBW-NEXT: kmovw %edi, %k1 1714; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 1715; VLNOBW-NEXT: ret{{[l|q]}} 1716 %1 = bitcast i8 %b to <8 x i1> 1717 %2 = sitofp <8 x i32> %a to <8 x double> 1718 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer 1719 ret <8 x double> %3 1720} 1721 1722define <8 x i32> @f64to8si(<8 x double> %a) { 1723; GENERIC-LABEL: f64to8si: 1724; GENERIC: # %bb.0: 1725; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00] 1726; GENERIC-NEXT: retq # sched: [1:1.00] 1727; 1728; SKX-LABEL: f64to8si: 1729; SKX: # %bb.0: 1730; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] 1731; SKX-NEXT: retq # sched: [7:1.00] 1732 %b = fptosi <8 x double> %a to <8 x i32> 1733 ret <8 x i32> %b 1734} 1735 1736define <4 x i32> @f64to4si(<4 x double> %a) { 1737; GENERIC-LABEL: f64to4si: 1738; GENERIC: # %bb.0: 1739; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] 1740; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1741; GENERIC-NEXT: retq # sched: [1:1.00] 1742; 1743; SKX-LABEL: f64to4si: 1744; SKX: # %bb.0: 1745; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] 1746; SKX-NEXT: vzeroupper # sched: [4:1.00] 1747; SKX-NEXT: retq # sched: [7:1.00] 1748 %b = fptosi <4 x double> %a to <4 x i32> 1749 ret <4 x i32> %b 1750} 1751 1752define <16 x float> @f64to16f32(<16 x double> %b) nounwind { 1753; GENERIC-LABEL: f64to16f32: 1754; GENERIC: # %bb.0: 1755; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] 1756; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00] 1757; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] 1758; GENERIC-NEXT: retq # sched: [1:1.00] 1759; 1760; SKX-LABEL: f64to16f32: 1761; SKX: # %bb.0: 1762; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] 1763; SKX-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00] 1764; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] 1765; SKX-NEXT: retq # sched: [7:1.00] 1766 %a = fptrunc <16 x double> %b to <16 x float> 1767 ret <16 x float> %a 1768} 1769 1770define <4 x float> @f64to4f32(<4 x double> %b) { 1771; GENERIC-LABEL: f64to4f32: 1772; GENERIC: # %bb.0: 1773; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] 1774; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1775; GENERIC-NEXT: retq # sched: [1:1.00] 1776; 1777; SKX-LABEL: f64to4f32: 1778; SKX: # %bb.0: 1779; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] 1780; SKX-NEXT: vzeroupper # sched: [4:1.00] 1781; SKX-NEXT: retq # sched: [7:1.00] 1782 %a = fptrunc <4 x double> %b to <4 x float> 1783 ret <4 x float> %a 1784} 1785 1786define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { 1787; GENERIC-LABEL: f64to4f32_mask: 1788; GENERIC: # %bb.0: 1789; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] 1790; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] 1791; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00] 1792; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1793; GENERIC-NEXT: retq # sched: [1:1.00] 1794; 1795; SKX-LABEL: f64to4f32_mask: 1796; SKX: # %bb.0: 1797; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] 1798; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] 1799; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00] 1800; SKX-NEXT: vzeroupper # sched: [4:1.00] 1801; SKX-NEXT: retq # sched: [7:1.00] 1802 %a = fptrunc <4 x double> %b to <4 x float> 1803 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer 1804 ret <4 x float> %c 1805} 1806 1807define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { 1808; GENERIC-LABEL: f64tof32_inreg: 1809; GENERIC: # %bb.0: 1810; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [4:1.00] 1811; GENERIC-NEXT: retq # sched: [1:1.00] 1812; 1813; SKX-LABEL: f64tof32_inreg: 1814; SKX: # %bb.0: 1815; SKX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] 1816; SKX-NEXT: retq # sched: [7:1.00] 1817 %ext = extractelement <2 x double> %a0, i32 0 1818 %cvt = fptrunc double %ext to float 1819 %res = insertelement <4 x float> %a1, float %cvt, i32 0 1820 ret <4 x float> %res 1821} 1822 1823define <8 x double> @f32to8f64(<8 x float> %b) nounwind { 1824; GENERIC-LABEL: f32to8f64: 1825; GENERIC: # %bb.0: 1826; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] 1827; GENERIC-NEXT: retq # sched: [1:1.00] 1828; 1829; SKX-LABEL: f32to8f64: 1830; SKX: # %bb.0: 1831; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] 1832; SKX-NEXT: retq # sched: [7:1.00] 1833 %a = fpext <8 x float> %b to <8 x double> 1834 ret <8 x double> %a 1835} 1836 1837define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { 1838; GENERIC-LABEL: f32to4f64_mask: 1839; GENERIC: # %bb.0: 1840; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] 1841; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00] 1842; GENERIC-NEXT: retq # sched: [1:1.00] 1843; 1844; SKX-LABEL: f32to4f64_mask: 1845; SKX: # %bb.0: 1846; SKX-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] 1847; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00] 1848; SKX-NEXT: retq # sched: [7:1.00] 1849 %a = fpext <4 x float> %b to <4 x double> 1850 %mask = fcmp ogt <4 x double> %a1, %b1 1851 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer 1852 ret <4 x double> %c 1853} 1854 1855define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { 1856; GENERIC-LABEL: f32tof64_inreg: 1857; GENERIC: # %bb.0: 1858; GENERIC-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1859; GENERIC-NEXT: retq # sched: [1:1.00] 1860; 1861; SKX-LABEL: f32tof64_inreg: 1862; SKX: # %bb.0: 1863; SKX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 1864; SKX-NEXT: retq # sched: [7:1.00] 1865 %ext = extractelement <4 x float> %a1, i32 0 1866 %cvt = fpext float %ext to double 1867 %res = insertelement <2 x double> %a0, double %cvt, i32 0 1868 ret <2 x double> %res 1869} 1870 1871define double @sltof64_load(i64* nocapture %e) { 1872; GENERIC-LABEL: sltof64_load: 1873; GENERIC: # %bb.0: # %entry 1874; GENERIC-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1875; GENERIC-NEXT: retq # sched: [1:1.00] 1876; 1877; SKX-LABEL: sltof64_load: 1878; SKX: # %bb.0: # %entry 1879; SKX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1880; SKX-NEXT: retq # sched: [7:1.00] 1881entry: 1882 %tmp1 = load i64, i64* %e, align 8 1883 %conv = sitofp i64 %tmp1 to double 1884 ret double %conv 1885} 1886 1887define double @sitof64_load(i32* %e) { 1888; GENERIC-LABEL: sitof64_load: 1889; GENERIC: # %bb.0: # %entry 1890; GENERIC-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1891; GENERIC-NEXT: retq # sched: [1:1.00] 1892; 1893; SKX-LABEL: sitof64_load: 1894; SKX: # %bb.0: # %entry 1895; SKX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1896; SKX-NEXT: retq # sched: [7:1.00] 1897entry: 1898 %tmp1 = load i32, i32* %e, align 4 1899 %conv = sitofp i32 %tmp1 to double 1900 ret double %conv 1901} 1902 1903define float @sitof32_load(i32* %e) { 1904; GENERIC-LABEL: sitof32_load: 1905; GENERIC: # %bb.0: # %entry 1906; GENERIC-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 1907; GENERIC-NEXT: retq # sched: [1:1.00] 1908; 1909; SKX-LABEL: sitof32_load: 1910; SKX: # %bb.0: # %entry 1911; SKX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1912; SKX-NEXT: retq # sched: [7:1.00] 1913entry: 1914 %tmp1 = load i32, i32* %e, align 4 1915 %conv = sitofp i32 %tmp1 to float 1916 ret float %conv 1917} 1918 1919define float @sltof32_load(i64* %e) { 1920; GENERIC-LABEL: sltof32_load: 1921; GENERIC: # %bb.0: # %entry 1922; GENERIC-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 1923; GENERIC-NEXT: retq # sched: [1:1.00] 1924; 1925; SKX-LABEL: sltof32_load: 1926; SKX: # %bb.0: # %entry 1927; SKX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 1928; SKX-NEXT: retq # sched: [7:1.00] 1929entry: 1930 %tmp1 = load i64, i64* %e, align 8 1931 %conv = sitofp i64 %tmp1 to float 1932 ret float %conv 1933} 1934 1935define void @f32tof64_loadstore() { 1936; GENERIC-LABEL: f32tof64_loadstore: 1937; GENERIC: # %bb.0: # %entry 1938; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 1939; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] 1940; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1941; GENERIC-NEXT: retq # sched: [1:1.00] 1942; 1943; SKX-LABEL: f32tof64_loadstore: 1944; SKX: # %bb.0: # %entry 1945; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 1946; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 1947; SKX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1948; SKX-NEXT: retq # sched: [7:1.00] 1949entry: 1950 %f = alloca float, align 4 1951 %d = alloca double, align 8 1952 %tmp = load float, float* %f, align 4 1953 %conv = fpext float %tmp to double 1954 store double %conv, double* %d, align 8 1955 ret void 1956} 1957 1958define void @f64tof32_loadstore() nounwind uwtable { 1959; GENERIC-LABEL: f64tof32_loadstore: 1960; GENERIC: # %bb.0: # %entry 1961; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 1962; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 1963; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1964; GENERIC-NEXT: retq # sched: [1:1.00] 1965; 1966; SKX-LABEL: f64tof32_loadstore: 1967; SKX: # %bb.0: # %entry 1968; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 1969; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 1970; SKX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1971; SKX-NEXT: retq # sched: [7:1.00] 1972entry: 1973 %f = alloca float, align 4 1974 %d = alloca double, align 8 1975 %tmp = load double, double* %d, align 8 1976 %conv = fptrunc double %tmp to float 1977 store float %conv, float* %f, align 4 1978 ret void 1979} 1980 1981define double @long_to_double(i64 %x) { 1982; GENERIC-LABEL: long_to_double: 1983; GENERIC: # %bb.0: 1984; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 1985; GENERIC-NEXT: retq # sched: [1:1.00] 1986; 1987; SKX-LABEL: long_to_double: 1988; SKX: # %bb.0: 1989; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 1990; SKX-NEXT: retq # sched: [7:1.00] 1991 %res = bitcast i64 %x to double 1992 ret double %res 1993} 1994 1995define i64 @double_to_long(double %x) { 1996; GENERIC-LABEL: double_to_long: 1997; GENERIC: # %bb.0: 1998; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 1999; GENERIC-NEXT: retq # sched: [1:1.00] 2000; 2001; SKX-LABEL: double_to_long: 2002; SKX: # %bb.0: 2003; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 2004; SKX-NEXT: retq # sched: [7:1.00] 2005 %res = bitcast double %x to i64 2006 ret i64 %res 2007} 2008 2009define float @int_to_float(i32 %x) { 2010; GENERIC-LABEL: int_to_float: 2011; GENERIC: # %bb.0: 2012; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 2013; GENERIC-NEXT: retq # sched: [1:1.00] 2014; 2015; SKX-LABEL: int_to_float: 2016; SKX: # %bb.0: 2017; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 2018; SKX-NEXT: retq # sched: [7:1.00] 2019 %res = bitcast i32 %x to float 2020 ret float %res 2021} 2022 2023define i32 @float_to_int(float %x) { 2024; GENERIC-LABEL: float_to_int: 2025; GENERIC: # %bb.0: 2026; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 2027; GENERIC-NEXT: retq # sched: [1:1.00] 2028; 2029; SKX-LABEL: float_to_int: 2030; SKX: # %bb.0: 2031; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 2032; SKX-NEXT: retq # sched: [7:1.00] 2033 %res = bitcast float %x to i32 2034 ret i32 %res 2035} 2036 2037define <16 x double> @uito16f64(<16 x i32> %a) nounwind { 2038; GENERIC-LABEL: uito16f64: 2039; GENERIC: # %bb.0: 2040; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00] 2041; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] 2042; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00] 2043; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 2044; GENERIC-NEXT: retq # sched: [1:1.00] 2045; 2046; SKX-LABEL: uito16f64: 2047; SKX: # %bb.0: 2048; SKX-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00] 2049; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] 2050; SKX-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00] 2051; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 2052; SKX-NEXT: retq # sched: [7:1.00] 2053 %b = uitofp <16 x i32> %a to <16 x double> 2054 ret <16 x double> %b 2055} 2056 2057define <8 x float> @slto8f32(<8 x i64> %a) { 2058; GENERIC-LABEL: slto8f32: 2059; GENERIC: # %bb.0: 2060; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] 2061; GENERIC-NEXT: retq # sched: [1:1.00] 2062; 2063; SKX-LABEL: slto8f32: 2064; SKX: # %bb.0: 2065; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] 2066; SKX-NEXT: retq # sched: [7:1.00] 2067 %b = sitofp <8 x i64> %a to <8 x float> 2068 ret <8 x float> %b 2069} 2070 2071define <16 x float> @slto16f32(<16 x i64> %a) { 2072; GENERIC-LABEL: slto16f32: 2073; GENERIC: # %bb.0: 2074; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00] 2075; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [3:1.00] 2076; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] 2077; GENERIC-NEXT: retq # sched: [1:1.00] 2078; 2079; SKX-LABEL: slto16f32: 2080; SKX: # %bb.0: 2081; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] 2082; SKX-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00] 2083; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] 2084; SKX-NEXT: retq # sched: [7:1.00] 2085 %b = sitofp <16 x i64> %a to <16 x float> 2086 ret <16 x float> %b 2087} 2088 2089define <8 x double> @slto8f64(<8 x i64> %a) { 2090; GENERIC-LABEL: slto8f64: 2091; GENERIC: # %bb.0: 2092; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] 2093; GENERIC-NEXT: retq # sched: [1:1.00] 2094; 2095; SKX-LABEL: slto8f64: 2096; SKX: # %bb.0: 2097; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] 2098; SKX-NEXT: retq # sched: [7:1.00] 2099 %b = sitofp <8 x i64> %a to <8 x double> 2100 ret <8 x double> %b 2101} 2102 2103define <16 x double> @slto16f64(<16 x i64> %a) { 2104; GENERIC-LABEL: slto16f64: 2105; GENERIC: # %bb.0: 2106; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00] 2107; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00] 2108; GENERIC-NEXT: retq # sched: [1:1.00] 2109; 2110; SKX-LABEL: slto16f64: 2111; SKX: # %bb.0: 2112; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] 2113; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50] 2114; SKX-NEXT: retq # sched: [7:1.00] 2115 %b = sitofp <16 x i64> %a to <16 x double> 2116 ret <16 x double> %b 2117} 2118 2119define <8 x float> @ulto8f32(<8 x i64> %a) { 2120; GENERIC-LABEL: ulto8f32: 2121; GENERIC: # %bb.0: 2122; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] 2123; GENERIC-NEXT: retq # sched: [1:1.00] 2124; 2125; SKX-LABEL: ulto8f32: 2126; SKX: # %bb.0: 2127; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] 2128; SKX-NEXT: retq # sched: [7:1.00] 2129 %b = uitofp <8 x i64> %a to <8 x float> 2130 ret <8 x float> %b 2131} 2132 2133define <16 x float> @ulto16f32(<16 x i64> %a) { 2134; GENERIC-LABEL: ulto16f32: 2135; GENERIC: # %bb.0: 2136; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00] 2137; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [3:1.00] 2138; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00] 2139; GENERIC-NEXT: retq # sched: [1:1.00] 2140; 2141; SKX-LABEL: ulto16f32: 2142; SKX: # %bb.0: 2143; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] 2144; SKX-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00] 2145; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] 2146; SKX-NEXT: retq # sched: [7:1.00] 2147 %b = uitofp <16 x i64> %a to <16 x float> 2148 ret <16 x float> %b 2149} 2150 2151define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { 2152; GENERIC-LABEL: uito8f64_mask: 2153; GENERIC: # %bb.0: 2154; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 2155; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00] 2156; GENERIC-NEXT: retq # sched: [1:1.00] 2157; 2158; SKX-LABEL: uito8f64_mask: 2159; SKX: # %bb.0: 2160; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 2161; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] 2162; SKX-NEXT: retq # sched: [7:1.00] 2163; VLNOBW-LABEL: uito8f64_mask: 2164; VLNOBW: # %bb.0: 2165; VLNOBW-NEXT: kmovw %edi, %k1 2166; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 2167; VLNOBW-NEXT: ret{{[l|q]}} 2168 %1 = bitcast i8 %c to <8 x i1> 2169 %2 = uitofp <8 x i32> %b to <8 x double> 2170 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a 2171 ret <8 x double> %3 2172} 2173define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { 2174; GENERIC-LABEL: uito8f64_maskz: 2175; GENERIC: # %bb.0: 2176; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 2177; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00] 2178; GENERIC-NEXT: retq # sched: [1:1.00] 2179; 2180; SKX-LABEL: uito8f64_maskz: 2181; SKX: # %bb.0: 2182; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 2183; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] 2184; SKX-NEXT: retq # sched: [7:1.00] 2185 %1 = bitcast i8 %b to <8 x i1> 2186 %2 = uitofp <8 x i32> %a to <8 x double> 2187 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer 2188 ret <8 x double> %3 2189} 2190 2191define <4 x double> @uito4f64(<4 x i32> %a) nounwind { 2192; GENERIC-LABEL: uito4f64: 2193; GENERIC: # %bb.0: 2194; GENERIC-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [4:1.00] 2195; GENERIC-NEXT: retq # sched: [1:1.00] 2196; 2197; SKX-LABEL: uito4f64: 2198; SKX: # %bb.0: 2199; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00] 2200; SKX-NEXT: retq # sched: [7:1.00] 2201 %b = uitofp <4 x i32> %a to <4 x double> 2202 ret <4 x double> %b 2203} 2204 2205define <16 x float> @uito16f32(<16 x i32> %a) nounwind { 2206; GENERIC-LABEL: uito16f32: 2207; GENERIC: # %bb.0: 2208; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [3:1.00] 2209; GENERIC-NEXT: retq # sched: [1:1.00] 2210; 2211; SKX-LABEL: uito16f32: 2212; SKX: # %bb.0: 2213; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50] 2214; SKX-NEXT: retq # sched: [7:1.00] 2215 %b = uitofp <16 x i32> %a to <16 x float> 2216 ret <16 x float> %b 2217} 2218 2219define <8 x double> @uito8f64(<8 x i32> %a) { 2220; GENERIC-LABEL: uito8f64: 2221; GENERIC: # %bb.0: 2222; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00] 2223; GENERIC-NEXT: retq # sched: [1:1.00] 2224; 2225; SKX-LABEL: uito8f64: 2226; SKX: # %bb.0: 2227; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00] 2228; SKX-NEXT: retq # sched: [7:1.00] 2229 %b = uitofp <8 x i32> %a to <8 x double> 2230 ret <8 x double> %b 2231} 2232 2233define <8 x float> @uito8f32(<8 x i32> %a) nounwind { 2234; GENERIC-LABEL: uito8f32: 2235; GENERIC: # %bb.0: 2236; GENERIC-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [3:1.00] 2237; GENERIC-NEXT: retq # sched: [1:1.00] 2238; 2239; SKX-LABEL: uito8f32: 2240; SKX: # %bb.0: 2241; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50] 2242; SKX-NEXT: retq # sched: [7:1.00] 2243 %b = uitofp <8 x i32> %a to <8 x float> 2244 ret <8 x float> %b 2245} 2246 2247define <4 x float> @uito4f32(<4 x i32> %a) nounwind { 2248; GENERIC-LABEL: uito4f32: 2249; GENERIC: # %bb.0: 2250; GENERIC-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [3:1.00] 2251; GENERIC-NEXT: retq # sched: [1:1.00] 2252; 2253; SKX-LABEL: uito4f32: 2254; SKX: # %bb.0: 2255; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50] 2256; SKX-NEXT: retq # sched: [7:1.00] 2257 %b = uitofp <4 x i32> %a to <4 x float> 2258 ret <4 x float> %b 2259} 2260 2261define i32 @fptosi(float %a) nounwind { 2262; GENERIC-LABEL: fptosi: 2263; GENERIC: # %bb.0: 2264; GENERIC-NEXT: vcvttss2si %xmm0, %eax # sched: [5:1.00] 2265; GENERIC-NEXT: retq # sched: [1:1.00] 2266; 2267; SKX-LABEL: fptosi: 2268; SKX: # %bb.0: 2269; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [6:1.00] 2270; SKX-NEXT: retq # sched: [7:1.00] 2271 %b = fptosi float %a to i32 2272 ret i32 %b 2273} 2274 2275define i32 @fptoui(float %a) nounwind { 2276; GENERIC-LABEL: fptoui: 2277; GENERIC: # %bb.0: 2278; GENERIC-NEXT: vcvttss2usi %xmm0, %eax # sched: [5:1.00] 2279; GENERIC-NEXT: retq # sched: [1:1.00] 2280; 2281; SKX-LABEL: fptoui: 2282; SKX: # %bb.0: 2283; SKX-NEXT: vcvttss2usi %xmm0, %eax # sched: [6:1.00] 2284; SKX-NEXT: retq # sched: [7:1.00] 2285 %b = fptoui float %a to i32 2286 ret i32 %b 2287} 2288 2289define float @uitof32(i32 %a) nounwind { 2290; GENERIC-LABEL: uitof32: 2291; GENERIC: # %bb.0: 2292; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] 2293; GENERIC-NEXT: retq # sched: [1:1.00] 2294; 2295; SKX-LABEL: uitof32: 2296; SKX: # %bb.0: 2297; SKX-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] 2298; SKX-NEXT: retq # sched: [7:1.00] 2299 %b = uitofp i32 %a to float 2300 ret float %b 2301} 2302 2303define double @uitof64(i32 %a) nounwind { 2304; GENERIC-LABEL: uitof64: 2305; GENERIC: # %bb.0: 2306; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] 2307; GENERIC-NEXT: retq # sched: [1:1.00] 2308; 2309; SKX-LABEL: uitof64: 2310; SKX: # %bb.0: 2311; SKX-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] 2312; SKX-NEXT: retq # sched: [7:1.00] 2313 %b = uitofp i32 %a to double 2314 ret double %b 2315} 2316 2317define <16 x float> @sbto16f32(<16 x i32> %a) { 2318; GENERIC-LABEL: sbto16f32: 2319; GENERIC: # %bb.0: 2320; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] 2321; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 2322; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2323; GENERIC-NEXT: retq # sched: [1:1.00] 2324; 2325; SKX-LABEL: sbto16f32: 2326; SKX: # %bb.0: 2327; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] 2328; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 2329; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2330; SKX-NEXT: retq # sched: [7:1.00] 2331 %mask = icmp slt <16 x i32> %a, zeroinitializer 2332 %1 = sitofp <16 x i1> %mask to <16 x float> 2333 ret <16 x float> %1 2334} 2335 2336define <16 x float> @scto16f32(<16 x i8> %a) { 2337; GENERIC-LABEL: scto16f32: 2338; GENERIC: # %bb.0: 2339; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] 2340; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2341; GENERIC-NEXT: retq # sched: [1:1.00] 2342; 2343; SKX-LABEL: scto16f32: 2344; SKX: # %bb.0: 2345; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] 2346; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2347; SKX-NEXT: retq # sched: [7:1.00] 2348 %1 = sitofp <16 x i8> %a to <16 x float> 2349 ret <16 x float> %1 2350} 2351 2352define <16 x float> @ssto16f32(<16 x i16> %a) { 2353; GENERIC-LABEL: ssto16f32: 2354; GENERIC: # %bb.0: 2355; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] 2356; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2357; GENERIC-NEXT: retq # sched: [1:1.00] 2358; 2359; SKX-LABEL: ssto16f32: 2360; SKX: # %bb.0: 2361; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] 2362; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2363; SKX-NEXT: retq # sched: [7:1.00] 2364 %1 = sitofp <16 x i16> %a to <16 x float> 2365 ret <16 x float> %1 2366} 2367 2368define <8 x double> @ssto16f64(<8 x i16> %a) { 2369; GENERIC-LABEL: ssto16f64: 2370; GENERIC: # %bb.0: 2371; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] 2372; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2373; GENERIC-NEXT: retq # sched: [1:1.00] 2374; 2375; SKX-LABEL: ssto16f64: 2376; SKX: # %bb.0: 2377; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 2378; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2379; SKX-NEXT: retq # sched: [7:1.00] 2380 %1 = sitofp <8 x i16> %a to <8 x double> 2381 ret <8 x double> %1 2382} 2383 2384define <8 x double> @scto8f64(<8 x i8> %a) { 2385; GENERIC-LABEL: scto8f64: 2386; GENERIC: # %bb.0: 2387; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 2388; GENERIC-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:1.00] 2389; GENERIC-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00] 2390; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2391; GENERIC-NEXT: retq # sched: [1:1.00] 2392; 2393; SKX-LABEL: scto8f64: 2394; SKX: # %bb.0: 2395; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 2396; SKX-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:0.50] 2397; SKX-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50] 2398; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2399; SKX-NEXT: retq # sched: [7:1.00] 2400 %1 = sitofp <8 x i8> %a to <8 x double> 2401 ret <8 x double> %1 2402} 2403 2404define <16 x double> @scto16f64(<16 x i8> %a) { 2405; GENERIC-LABEL: scto16f64: 2406; GENERIC: # %bb.0: 2407; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00] 2408; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2409; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2410; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2411; GENERIC-NEXT: retq # sched: [1:1.00] 2412; 2413; SKX-LABEL: scto16f64: 2414; SKX: # %bb.0: 2415; SKX-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00] 2416; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2417; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2418; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2419; SKX-NEXT: retq # sched: [7:1.00] 2420 %b = sitofp <16 x i8> %a to <16 x double> 2421 ret <16 x double> %b 2422} 2423 2424define <16 x double> @sbto16f64(<16 x double> %a) { 2425; GENERIC-LABEL: sbto16f64: 2426; GENERIC: # %bb.0: 2427; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 2428; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] 2429; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] 2430; GENERIC-NEXT: kunpckbw %k0, %k1, %k0 # sched: [1:1.00] 2431; GENERIC-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.33] 2432; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2433; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2434; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2435; GENERIC-NEXT: retq # sched: [1:1.00] 2436; 2437; SKX-LABEL: sbto16f64: 2438; SKX: # %bb.0: 2439; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 2440; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00] 2441; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00] 2442; SKX-NEXT: kunpckbw %k0, %k1, %k0 # sched: [3:1.00] 2443; SKX-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.25] 2444; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2445; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2446; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2447; SKX-NEXT: retq # sched: [7:1.00] 2448 %cmpres = fcmp ogt <16 x double> %a, zeroinitializer 2449 %1 = sitofp <16 x i1> %cmpres to <16 x double> 2450 ret <16 x double> %1 2451} 2452 2453define <8 x double> @sbto8f64(<8 x double> %a) { 2454; GENERIC-LABEL: sbto8f64: 2455; GENERIC: # %bb.0: 2456; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2457; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] 2458; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33] 2459; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2460; GENERIC-NEXT: retq # sched: [1:1.00] 2461; 2462; SKX-LABEL: sbto8f64: 2463; SKX: # %bb.0: 2464; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2465; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] 2466; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25] 2467; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2468; SKX-NEXT: retq # sched: [7:1.00] 2469 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer 2470 %1 = sitofp <8 x i1> %cmpres to <8 x double> 2471 ret <8 x double> %1 2472} 2473 2474define <8 x float> @sbto8f32(<8 x float> %a) { 2475; GENERIC-LABEL: sbto8f32: 2476; GENERIC: # %bb.0: 2477; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2478; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2479; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] 2480; GENERIC-NEXT: retq # sched: [1:1.00] 2481; 2482; SKX-LABEL: sbto8f32: 2483; SKX: # %bb.0: 2484; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2485; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 2486; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] 2487; SKX-NEXT: retq # sched: [7:1.00] 2488 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer 2489 %1 = sitofp <8 x i1> %cmpres to <8 x float> 2490 ret <8 x float> %1 2491} 2492 2493define <4 x float> @sbto4f32(<4 x float> %a) { 2494; GENERIC-LABEL: sbto4f32: 2495; GENERIC: # %bb.0: 2496; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2497; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2498; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] 2499; GENERIC-NEXT: retq # sched: [1:1.00] 2500; 2501; SKX-LABEL: sbto4f32: 2502; SKX: # %bb.0: 2503; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2504; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 2505; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] 2506; SKX-NEXT: retq # sched: [7:1.00] 2507 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer 2508 %1 = sitofp <4 x i1> %cmpres to <4 x float> 2509 ret <4 x float> %1 2510} 2511 2512define <4 x double> @sbto4f64(<4 x double> %a) { 2513; GENERIC-LABEL: sbto4f64: 2514; GENERIC: # %bb.0: 2515; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2516; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] 2517; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] 2518; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] 2519; GENERIC-NEXT: retq # sched: [1:1.00] 2520; 2521; SKX-LABEL: sbto4f64: 2522; SKX: # %bb.0: 2523; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2524; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] 2525; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] 2526; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] 2527; SKX-NEXT: retq # sched: [7:1.00] 2528 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer 2529 %1 = sitofp <4 x i1> %cmpres to <4 x double> 2530 ret <4 x double> %1 2531} 2532 2533define <2 x float> @sbto2f32(<2 x float> %a) { 2534; GENERIC-LABEL: sbto2f32: 2535; GENERIC: # %bb.0: 2536; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2537; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2538; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] 2539; GENERIC-NEXT: retq # sched: [1:1.00] 2540; 2541; SKX-LABEL: sbto2f32: 2542; SKX: # %bb.0: 2543; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2544; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 2545; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] 2546; SKX-NEXT: retq # sched: [7:1.00] 2547 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer 2548 %1 = sitofp <2 x i1> %cmpres to <2 x float> 2549 ret <2 x float> %1 2550} 2551 2552define <2 x double> @sbto2f64(<2 x double> %a) { 2553; GENERIC-LABEL: sbto2f64: 2554; GENERIC: # %bb.0: 2555; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2556; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2557; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] 2558; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] 2559; GENERIC-NEXT: retq # sched: [1:1.00] 2560; 2561; SKX-LABEL: sbto2f64: 2562; SKX: # %bb.0: 2563; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2564; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 2565; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] 2566; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] 2567; SKX-NEXT: retq # sched: [7:1.00] 2568 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer 2569 %1 = sitofp <2 x i1> %cmpres to <2 x double> 2570 ret <2 x double> %1 2571} 2572 2573define <16 x float> @ucto16f32(<16 x i8> %a) { 2574; GENERIC-LABEL: ucto16f32: 2575; GENERIC: # %bb.0: 2576; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] 2577; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2578; GENERIC-NEXT: retq # sched: [1:1.00] 2579; 2580; SKX-LABEL: ucto16f32: 2581; SKX: # %bb.0: 2582; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] 2583; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2584; SKX-NEXT: retq # sched: [7:1.00] 2585 %b = uitofp <16 x i8> %a to <16 x float> 2586 ret <16 x float>%b 2587} 2588 2589define <8 x double> @ucto8f64(<8 x i8> %a) { 2590; GENERIC-LABEL: ucto8f64: 2591; GENERIC: # %bb.0: 2592; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2593; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 2594; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2595; GENERIC-NEXT: retq # sched: [1:1.00] 2596; 2597; SKX-LABEL: ucto8f64: 2598; SKX: # %bb.0: 2599; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2600; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 2601; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2602; SKX-NEXT: retq # sched: [7:1.00] 2603 %b = uitofp <8 x i8> %a to <8 x double> 2604 ret <8 x double> %b 2605} 2606 2607define <16 x float> @swto16f32(<16 x i16> %a) { 2608; GENERIC-LABEL: swto16f32: 2609; GENERIC: # %bb.0: 2610; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] 2611; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2612; GENERIC-NEXT: retq # sched: [1:1.00] 2613; 2614; SKX-LABEL: swto16f32: 2615; SKX: # %bb.0: 2616; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] 2617; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2618; SKX-NEXT: retq # sched: [7:1.00] 2619 %b = sitofp <16 x i16> %a to <16 x float> 2620 ret <16 x float> %b 2621} 2622 2623define <8 x double> @swto8f64(<8 x i16> %a) { 2624; GENERIC-LABEL: swto8f64: 2625; GENERIC: # %bb.0: 2626; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] 2627; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2628; GENERIC-NEXT: retq # sched: [1:1.00] 2629; 2630; SKX-LABEL: swto8f64: 2631; SKX: # %bb.0: 2632; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 2633; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2634; SKX-NEXT: retq # sched: [7:1.00] 2635 %b = sitofp <8 x i16> %a to <8 x double> 2636 ret <8 x double> %b 2637} 2638 2639define <16 x double> @swto16f64(<16 x i16> %a) { 2640; GENERIC-LABEL: swto16f64: 2641; GENERIC: # %bb.0: 2642; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00] 2643; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2644; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2645; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2646; GENERIC-NEXT: retq # sched: [1:1.00] 2647; 2648; SKX-LABEL: swto16f64: 2649; SKX: # %bb.0: 2650; SKX-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00] 2651; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2652; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2653; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2654; SKX-NEXT: retq # sched: [7:1.00] 2655 %b = sitofp <16 x i16> %a to <16 x double> 2656 ret <16 x double> %b 2657} 2658 2659define <16 x double> @ucto16f64(<16 x i8> %a) { 2660; GENERIC-LABEL: ucto16f64: 2661; GENERIC: # %bb.0: 2662; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] 2663; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2664; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2665; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2666; GENERIC-NEXT: retq # sched: [1:1.00] 2667; 2668; SKX-LABEL: ucto16f64: 2669; SKX: # %bb.0: 2670; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] 2671; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2672; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2673; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2674; SKX-NEXT: retq # sched: [7:1.00] 2675 %b = uitofp <16 x i8> %a to <16 x double> 2676 ret <16 x double> %b 2677} 2678 2679define <16 x float> @uwto16f32(<16 x i16> %a) { 2680; GENERIC-LABEL: uwto16f32: 2681; GENERIC: # %bb.0: 2682; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 2683; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2684; GENERIC-NEXT: retq # sched: [1:1.00] 2685; 2686; SKX-LABEL: uwto16f32: 2687; SKX: # %bb.0: 2688; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 2689; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2690; SKX-NEXT: retq # sched: [7:1.00] 2691 %b = uitofp <16 x i16> %a to <16 x float> 2692 ret <16 x float> %b 2693} 2694 2695define <8 x double> @uwto8f64(<8 x i16> %a) { 2696; GENERIC-LABEL: uwto8f64: 2697; GENERIC: # %bb.0: 2698; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 2699; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2700; GENERIC-NEXT: retq # sched: [1:1.00] 2701; 2702; SKX-LABEL: uwto8f64: 2703; SKX: # %bb.0: 2704; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 2705; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2706; SKX-NEXT: retq # sched: [7:1.00] 2707 %b = uitofp <8 x i16> %a to <8 x double> 2708 ret <8 x double> %b 2709} 2710 2711define <16 x double> @uwto16f64(<16 x i16> %a) { 2712; GENERIC-LABEL: uwto16f64: 2713; GENERIC: # %bb.0: 2714; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 2715; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2716; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2717; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2718; GENERIC-NEXT: retq # sched: [1:1.00] 2719; 2720; SKX-LABEL: uwto16f64: 2721; SKX: # %bb.0: 2722; SKX-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 2723; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2724; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2725; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2726; SKX-NEXT: retq # sched: [7:1.00] 2727 %b = uitofp <16 x i16> %a to <16 x double> 2728 ret <16 x double> %b 2729} 2730 2731define <16 x float> @sito16f32(<16 x i32> %a) { 2732; GENERIC-LABEL: sito16f32: 2733; GENERIC: # %bb.0: 2734; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2735; GENERIC-NEXT: retq # sched: [1:1.00] 2736; 2737; SKX-LABEL: sito16f32: 2738; SKX: # %bb.0: 2739; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2740; SKX-NEXT: retq # sched: [7:1.00] 2741 %b = sitofp <16 x i32> %a to <16 x float> 2742 ret <16 x float> %b 2743} 2744 2745define <16 x double> @sito16f64(<16 x i32> %a) { 2746; GENERIC-LABEL: sito16f64: 2747; GENERIC: # %bb.0: 2748; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00] 2749; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00] 2750; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00] 2751; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00] 2752; GENERIC-NEXT: retq # sched: [1:1.00] 2753; 2754; SKX-LABEL: sito16f64: 2755; SKX: # %bb.0: 2756; SKX-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00] 2757; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] 2758; SKX-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00] 2759; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] 2760; SKX-NEXT: retq # sched: [7:1.00] 2761 %b = sitofp <16 x i32> %a to <16 x double> 2762 ret <16 x double> %b 2763} 2764 2765define <16 x float> @usto16f32(<16 x i16> %a) { 2766; GENERIC-LABEL: usto16f32: 2767; GENERIC: # %bb.0: 2768; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 2769; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2770; GENERIC-NEXT: retq # sched: [1:1.00] 2771; 2772; SKX-LABEL: usto16f32: 2773; SKX: # %bb.0: 2774; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 2775; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2776; SKX-NEXT: retq # sched: [7:1.00] 2777 %b = uitofp <16 x i16> %a to <16 x float> 2778 ret <16 x float> %b 2779} 2780 2781define <16 x float> @ubto16f32(<16 x i32> %a) { 2782; GENERIC-LABEL: ubto16f32: 2783; GENERIC: # %bb.0: 2784; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] 2785; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 2786; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] 2787; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00] 2788; GENERIC-NEXT: retq # sched: [1:1.00] 2789; 2790; SKX-LABEL: ubto16f32: 2791; SKX: # %bb.0: 2792; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] 2793; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 2794; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] 2795; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] 2796; SKX-NEXT: retq # sched: [7:1.00] 2797 %mask = icmp slt <16 x i32> %a, zeroinitializer 2798 %1 = uitofp <16 x i1> %mask to <16 x float> 2799 ret <16 x float> %1 2800} 2801 2802define <16 x double> @ubto16f64(<16 x i32> %a) { 2803; GENERIC-LABEL: ubto16f64: 2804; GENERIC: # %bb.0: 2805; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] 2806; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 2807; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] 2808; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] 2809; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] 2810; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] 2811; GENERIC-NEXT: retq # sched: [1:1.00] 2812; 2813; SKX-LABEL: ubto16f64: 2814; SKX: # %bb.0: 2815; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] 2816; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 2817; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] 2818; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] 2819; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] 2820; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] 2821; SKX-NEXT: retq # sched: [7:1.00] 2822 %mask = icmp slt <16 x i32> %a, zeroinitializer 2823 %1 = uitofp <16 x i1> %mask to <16 x double> 2824 ret <16 x double> %1 2825} 2826 2827define <8 x float> @ubto8f32(<8 x i32> %a) { 2828; GENERIC-LABEL: ubto8f32: 2829; GENERIC: # %bb.0: 2830; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2831; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2832; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] 2833; GENERIC-NEXT: retq # sched: [1:1.00] 2834; 2835; SKX-LABEL: ubto8f32: 2836; SKX: # %bb.0: 2837; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2838; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2839; SKX-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] 2840; SKX-NEXT: retq # sched: [7:1.00] 2841 %mask = icmp slt <8 x i32> %a, zeroinitializer 2842 %1 = uitofp <8 x i1> %mask to <8 x float> 2843 ret <8 x float> %1 2844} 2845 2846define <8 x double> @ubto8f64(<8 x i32> %a) { 2847; GENERIC-LABEL: ubto8f64: 2848; GENERIC: # %bb.0: 2849; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2850; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2851; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00] 2852; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] 2853; GENERIC-NEXT: retq # sched: [1:1.00] 2854; 2855; SKX-LABEL: ubto8f64: 2856; SKX: # %bb.0: 2857; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2858; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2859; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50] 2860; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] 2861; SKX-NEXT: retq # sched: [7:1.00] 2862 %mask = icmp slt <8 x i32> %a, zeroinitializer 2863 %1 = uitofp <8 x i1> %mask to <8 x double> 2864 ret <8 x double> %1 2865} 2866 2867define <4 x float> @ubto4f32(<4 x i32> %a) { 2868; GENERIC-LABEL: ubto4f32: 2869; GENERIC: # %bb.0: 2870; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2871; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 2872; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] 2873; GENERIC-NEXT: retq # sched: [1:1.00] 2874; 2875; SKX-LABEL: ubto4f32: 2876; SKX: # %bb.0: 2877; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2878; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 2879; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50] 2880; SKX-NEXT: retq # sched: [7:1.00] 2881 %mask = icmp slt <4 x i32> %a, zeroinitializer 2882 %1 = uitofp <4 x i1> %mask to <4 x float> 2883 ret <4 x float> %1 2884} 2885 2886define <4 x double> @ubto4f64(<4 x i32> %a) { 2887; GENERIC-LABEL: ubto4f64: 2888; GENERIC: # %bb.0: 2889; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2890; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 2891; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] 2892; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] 2893; GENERIC-NEXT: retq # sched: [1:1.00] 2894; 2895; SKX-LABEL: ubto4f64: 2896; SKX: # %bb.0: 2897; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2898; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 2899; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] 2900; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] 2901; SKX-NEXT: retq # sched: [7:1.00] 2902 %mask = icmp slt <4 x i32> %a, zeroinitializer 2903 %1 = uitofp <4 x i1> %mask to <4 x double> 2904 ret <4 x double> %1 2905} 2906 2907define <2 x float> @ubto2f32(<2 x i32> %a) { 2908; GENERIC-LABEL: ubto2f32: 2909; GENERIC: # %bb.0: 2910; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2911; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] 2912; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2913; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2914; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50] 2915; GENERIC-NEXT: retq # sched: [1:1.00] 2916; 2917; SKX-LABEL: ubto2f32: 2918; SKX: # %bb.0: 2919; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2920; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] 2921; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2922; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2923; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] 2924; SKX-NEXT: retq # sched: [7:1.00] 2925 %mask = icmp ne <2 x i32> %a, zeroinitializer 2926 %1 = uitofp <2 x i1> %mask to <2 x float> 2927 ret <2 x float> %1 2928} 2929 2930define <2 x double> @ubto2f64(<2 x i32> %a) { 2931; GENERIC-LABEL: ubto2f64: 2932; GENERIC: # %bb.0: 2933; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2934; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] 2935; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2936; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2937; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50] 2938; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] 2939; GENERIC-NEXT: retq # sched: [1:1.00] 2940; 2941; SKX-LABEL: ubto2f64: 2942; SKX: # %bb.0: 2943; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 2944; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] 2945; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2946; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] 2947; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] 2948; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] 2949; SKX-NEXT: retq # sched: [7:1.00] 2950 %mask = icmp ne <2 x i32> %a, zeroinitializer 2951 %1 = uitofp <2 x i1> %mask to <2 x double> 2952 ret <2 x double> %1 2953} 2954 2955define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 2956; GENERIC-LABEL: zext_8x8mem_to_8x16: 2957; GENERIC: # %bb.0: 2958; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 2959; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 2960; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 2961; GENERIC-NEXT: retq # sched: [1:1.00] 2962; 2963; SKX-LABEL: zext_8x8mem_to_8x16: 2964; SKX: # %bb.0: 2965; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 2966; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 2967; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] 2968; SKX-NEXT: retq # sched: [7:1.00] 2969 %a = load <8 x i8>,<8 x i8> *%i,align 1 2970 %x = zext <8 x i8> %a to <8 x i16> 2971 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 2972 ret <8 x i16> %ret 2973} 2974 2975define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 2976; GENERIC-LABEL: sext_8x8mem_to_8x16: 2977; GENERIC: # %bb.0: 2978; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 2979; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 2980; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 2981; GENERIC-NEXT: retq # sched: [1:1.00] 2982; 2983; SKX-LABEL: sext_8x8mem_to_8x16: 2984; SKX: # %bb.0: 2985; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 2986; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 2987; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 2988; SKX-NEXT: retq # sched: [7:1.00] 2989 %a = load <8 x i8>,<8 x i8> *%i,align 1 2990 %x = sext <8 x i8> %a to <8 x i16> 2991 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer 2992 ret <8 x i16> %ret 2993} 2994 2995 2996define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 2997; GENERIC-LABEL: zext_16x8mem_to_16x16: 2998; GENERIC: # %bb.0: 2999; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3000; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3001; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] 3002; GENERIC-NEXT: retq # sched: [1:1.00] 3003; 3004; SKX-LABEL: zext_16x8mem_to_16x16: 3005; SKX: # %bb.0: 3006; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3007; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3008; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 3009; SKX-NEXT: retq # sched: [7:1.00] 3010 %a = load <16 x i8>,<16 x i8> *%i,align 1 3011 %x = zext <16 x i8> %a to <16 x i16> 3012 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 3013 ret <16 x i16> %ret 3014} 3015 3016define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 3017; GENERIC-LABEL: sext_16x8mem_to_16x16: 3018; GENERIC: # %bb.0: 3019; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3020; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3021; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3022; GENERIC-NEXT: retq # sched: [1:1.00] 3023; 3024; SKX-LABEL: sext_16x8mem_to_16x16: 3025; SKX: # %bb.0: 3026; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3027; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3028; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3029; SKX-NEXT: retq # sched: [7:1.00] 3030 %a = load <16 x i8>,<16 x i8> *%i,align 1 3031 %x = sext <16 x i8> %a to <16 x i16> 3032 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 3033 ret <16 x i16> %ret 3034} 3035 3036define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 3037; GENERIC-LABEL: zext_16x8_to_16x16: 3038; GENERIC: # %bb.0: 3039; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 3040; GENERIC-NEXT: retq # sched: [1:1.00] 3041; 3042; SKX-LABEL: zext_16x8_to_16x16: 3043; SKX: # %bb.0: 3044; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 3045; SKX-NEXT: retq # sched: [7:1.00] 3046 %x = zext <16 x i8> %a to <16 x i16> 3047 ret <16 x i16> %x 3048} 3049 3050define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 3051; GENERIC-LABEL: zext_16x8_to_16x16_mask: 3052; GENERIC: # %bb.0: 3053; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3054; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3055; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 3056; GENERIC-NEXT: retq # sched: [1:1.00] 3057; 3058; SKX-LABEL: zext_16x8_to_16x16_mask: 3059; SKX: # %bb.0: 3060; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3061; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3062; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 3063; SKX-NEXT: retq # sched: [7:1.00] 3064 %x = zext <16 x i8> %a to <16 x i16> 3065 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 3066 ret <16 x i16> %ret 3067} 3068 3069define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone { 3070; GENERIC-LABEL: sext_16x8_to_16x16: 3071; GENERIC: # %bb.0: 3072; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] 3073; GENERIC-NEXT: retq # sched: [1:1.00] 3074; 3075; SKX-LABEL: sext_16x8_to_16x16: 3076; SKX: # %bb.0: 3077; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 3078; SKX-NEXT: retq # sched: [7:1.00] 3079 %x = sext <16 x i8> %a to <16 x i16> 3080 ret <16 x i16> %x 3081} 3082 3083define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { 3084; GENERIC-LABEL: sext_16x8_to_16x16_mask: 3085; GENERIC: # %bb.0: 3086; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3087; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3088; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00] 3089; GENERIC-NEXT: retq # sched: [1:1.00] 3090; 3091; SKX-LABEL: sext_16x8_to_16x16_mask: 3092; SKX: # %bb.0: 3093; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3094; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3095; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00] 3096; SKX-NEXT: retq # sched: [7:1.00] 3097 %x = sext <16 x i8> %a to <16 x i16> 3098 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer 3099 ret <16 x i16> %ret 3100} 3101 3102define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 3103; GENERIC-LABEL: zext_32x8mem_to_32x16: 3104; GENERIC: # %bb.0: 3105; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] 3106; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] 3107; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00] 3108; GENERIC-NEXT: retq # sched: [1:1.00] 3109; 3110; SKX-LABEL: zext_32x8mem_to_32x16: 3111; SKX: # %bb.0: 3112; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] 3113; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] 3114; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00] 3115; SKX-NEXT: retq # sched: [7:1.00] 3116 %a = load <32 x i8>,<32 x i8> *%i,align 1 3117 %x = zext <32 x i8> %a to <32 x i16> 3118 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 3119 ret <32 x i16> %ret 3120} 3121 3122define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { 3123; GENERIC-LABEL: sext_32x8mem_to_32x16: 3124; GENERIC: # %bb.0: 3125; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] 3126; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33] 3127; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3128; GENERIC-NEXT: retq # sched: [1:1.00] 3129; 3130; SKX-LABEL: sext_32x8mem_to_32x16: 3131; SKX: # %bb.0: 3132; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] 3133; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] 3134; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3135; SKX-NEXT: retq # sched: [7:1.00] 3136 %a = load <32 x i8>,<32 x i8> *%i,align 1 3137 %x = sext <32 x i8> %a to <32 x i16> 3138 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 3139 ret <32 x i16> %ret 3140} 3141 3142define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 3143; GENERIC-LABEL: zext_32x8_to_32x16: 3144; GENERIC: # %bb.0: 3145; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] 3146; GENERIC-NEXT: retq # sched: [1:1.00] 3147; 3148; SKX-LABEL: zext_32x8_to_32x16: 3149; SKX: # %bb.0: 3150; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] 3151; SKX-NEXT: retq # sched: [7:1.00] 3152 %x = zext <32 x i8> %a to <32 x i16> 3153 ret <32 x i16> %x 3154} 3155 3156define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 3157; GENERIC-LABEL: zext_32x8_to_32x16_mask: 3158; GENERIC: # %bb.0: 3159; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] 3160; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] 3161; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] 3162; GENERIC-NEXT: retq # sched: [1:1.00] 3163; 3164; SKX-LABEL: zext_32x8_to_32x16_mask: 3165; SKX: # %bb.0: 3166; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] 3167; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] 3168; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] 3169; SKX-NEXT: retq # sched: [7:1.00] 3170 %x = zext <32 x i8> %a to <32 x i16> 3171 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 3172 ret <32 x i16> %ret 3173} 3174 3175define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { 3176; GENERIC-LABEL: sext_32x8_to_32x16: 3177; GENERIC: # %bb.0: 3178; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00] 3179; GENERIC-NEXT: retq # sched: [1:1.00] 3180; 3181; SKX-LABEL: sext_32x8_to_32x16: 3182; SKX: # %bb.0: 3183; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00] 3184; SKX-NEXT: retq # sched: [7:1.00] 3185 %x = sext <32 x i8> %a to <32 x i16> 3186 ret <32 x i16> %x 3187} 3188 3189define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { 3190; GENERIC-LABEL: sext_32x8_to_32x16_mask: 3191; GENERIC: # %bb.0: 3192; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] 3193; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] 3194; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00] 3195; GENERIC-NEXT: retq # sched: [1:1.00] 3196; 3197; SKX-LABEL: sext_32x8_to_32x16_mask: 3198; SKX: # %bb.0: 3199; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] 3200; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] 3201; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00] 3202; SKX-NEXT: retq # sched: [7:1.00] 3203 %x = sext <32 x i8> %a to <32 x i16> 3204 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 3205 ret <32 x i16> %ret 3206} 3207 3208define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 3209; GENERIC-LABEL: zext_4x8mem_to_4x32: 3210; GENERIC: # %bb.0: 3211; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3212; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3213; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] 3214; GENERIC-NEXT: retq # sched: [1:1.00] 3215; 3216; SKX-LABEL: zext_4x8mem_to_4x32: 3217; SKX: # %bb.0: 3218; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3219; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3220; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] 3221; SKX-NEXT: retq # sched: [7:1.00] 3222 %a = load <4 x i8>,<4 x i8> *%i,align 1 3223 %x = zext <4 x i8> %a to <4 x i32> 3224 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 3225 ret <4 x i32> %ret 3226} 3227 3228define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 3229; GENERIC-LABEL: sext_4x8mem_to_4x32: 3230; GENERIC: # %bb.0: 3231; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3232; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3233; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 3234; GENERIC-NEXT: retq # sched: [1:1.00] 3235; 3236; SKX-LABEL: sext_4x8mem_to_4x32: 3237; SKX: # %bb.0: 3238; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3239; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3240; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 3241; SKX-NEXT: retq # sched: [7:1.00] 3242 %a = load <4 x i8>,<4 x i8> *%i,align 1 3243 %x = sext <4 x i8> %a to <4 x i32> 3244 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 3245 ret <4 x i32> %ret 3246} 3247 3248define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 3249; GENERIC-LABEL: zext_8x8mem_to_8x32: 3250; GENERIC: # %bb.0: 3251; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3252; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3253; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] 3254; GENERIC-NEXT: retq # sched: [1:1.00] 3255; 3256; SKX-LABEL: zext_8x8mem_to_8x32: 3257; SKX: # %bb.0: 3258; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3259; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3260; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 3261; SKX-NEXT: retq # sched: [7:1.00] 3262 %a = load <8 x i8>,<8 x i8> *%i,align 1 3263 %x = zext <8 x i8> %a to <8 x i32> 3264 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3265 ret <8 x i32> %ret 3266} 3267 3268define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 3269; GENERIC-LABEL: sext_8x8mem_to_8x32: 3270; GENERIC: # %bb.0: 3271; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3272; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3273; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3274; GENERIC-NEXT: retq # sched: [1:1.00] 3275; 3276; SKX-LABEL: sext_8x8mem_to_8x32: 3277; SKX: # %bb.0: 3278; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3279; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3280; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3281; SKX-NEXT: retq # sched: [7:1.00] 3282 %a = load <8 x i8>,<8 x i8> *%i,align 1 3283 %x = sext <8 x i8> %a to <8 x i32> 3284 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3285 ret <8 x i32> %ret 3286} 3287 3288define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 3289; GENERIC-LABEL: zext_16x8mem_to_16x32: 3290; GENERIC: # %bb.0: 3291; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3292; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3293; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00] 3294; GENERIC-NEXT: retq # sched: [1:1.00] 3295; 3296; SKX-LABEL: zext_16x8mem_to_16x32: 3297; SKX: # %bb.0: 3298; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3299; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3300; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00] 3301; SKX-NEXT: retq # sched: [7:1.00] 3302 %a = load <16 x i8>,<16 x i8> *%i,align 1 3303 %x = zext <16 x i8> %a to <16 x i32> 3304 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3305 ret <16 x i32> %ret 3306} 3307 3308define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { 3309; GENERIC-LABEL: sext_16x8mem_to_16x32: 3310; GENERIC: # %bb.0: 3311; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3312; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3313; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3314; GENERIC-NEXT: retq # sched: [1:1.00] 3315; 3316; SKX-LABEL: sext_16x8mem_to_16x32: 3317; SKX: # %bb.0: 3318; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3319; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3320; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3321; SKX-NEXT: retq # sched: [7:1.00] 3322 %a = load <16 x i8>,<16 x i8> *%i,align 1 3323 %x = sext <16 x i8> %a to <16 x i32> 3324 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3325 ret <16 x i32> %ret 3326} 3327 3328define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 3329; GENERIC-LABEL: zext_16x8_to_16x32_mask: 3330; GENERIC: # %bb.0: 3331; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3332; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3333; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] 3334; GENERIC-NEXT: retq # sched: [1:1.00] 3335; 3336; SKX-LABEL: zext_16x8_to_16x32_mask: 3337; SKX: # %bb.0: 3338; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3339; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3340; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] 3341; SKX-NEXT: retq # sched: [7:1.00] 3342 %x = zext <16 x i8> %a to <16 x i32> 3343 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3344 ret <16 x i32> %ret 3345} 3346 3347define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { 3348; GENERIC-LABEL: sext_16x8_to_16x32_mask: 3349; GENERIC: # %bb.0: 3350; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3351; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3352; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 3353; GENERIC-NEXT: retq # sched: [1:1.00] 3354; 3355; SKX-LABEL: sext_16x8_to_16x32_mask: 3356; SKX: # %bb.0: 3357; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3358; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3359; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] 3360; SKX-NEXT: retq # sched: [7:1.00] 3361 %x = sext <16 x i8> %a to <16 x i32> 3362 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3363 ret <16 x i32> %ret 3364} 3365 3366define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 3367; GENERIC-LABEL: zext_16x8_to_16x32: 3368; GENERIC: # %bb.0: 3369; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] 3370; GENERIC-NEXT: retq # sched: [1:1.00] 3371; 3372; SKX-LABEL: zext_16x8_to_16x32: 3373; SKX: # %bb.0: 3374; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] 3375; SKX-NEXT: retq # sched: [7:1.00] 3376 %x = zext <16 x i8> %i to <16 x i32> 3377 ret <16 x i32> %x 3378} 3379 3380define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { 3381; GENERIC-LABEL: sext_16x8_to_16x32: 3382; GENERIC: # %bb.0: 3383; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] 3384; GENERIC-NEXT: retq # sched: [1:1.00] 3385; 3386; SKX-LABEL: sext_16x8_to_16x32: 3387; SKX: # %bb.0: 3388; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] 3389; SKX-NEXT: retq # sched: [7:1.00] 3390 %x = sext <16 x i8> %i to <16 x i32> 3391 ret <16 x i32> %x 3392} 3393 3394define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 3395; GENERIC-LABEL: zext_2x8mem_to_2x64: 3396; GENERIC: # %bb.0: 3397; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3398; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3399; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] 3400; GENERIC-NEXT: retq # sched: [1:1.00] 3401; 3402; SKX-LABEL: zext_2x8mem_to_2x64: 3403; SKX: # %bb.0: 3404; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3405; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3406; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] 3407; SKX-NEXT: retq # sched: [7:1.00] 3408 %a = load <2 x i8>,<2 x i8> *%i,align 1 3409 %x = zext <2 x i8> %a to <2 x i64> 3410 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 3411 ret <2 x i64> %ret 3412} 3413define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { 3414; GENERIC-LABEL: sext_2x8mem_to_2x64mask: 3415; GENERIC: # %bb.0: 3416; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3417; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3418; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 3419; GENERIC-NEXT: retq # sched: [1:1.00] 3420; 3421; SKX-LABEL: sext_2x8mem_to_2x64mask: 3422; SKX: # %bb.0: 3423; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3424; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3425; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 3426; SKX-NEXT: retq # sched: [7:1.00] 3427 %a = load <2 x i8>,<2 x i8> *%i,align 1 3428 %x = sext <2 x i8> %a to <2 x i64> 3429 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 3430 ret <2 x i64> %ret 3431} 3432define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { 3433; GENERIC-LABEL: sext_2x8mem_to_2x64: 3434; GENERIC: # %bb.0: 3435; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [7:0.50] 3436; GENERIC-NEXT: retq # sched: [1:1.00] 3437; 3438; SKX-LABEL: sext_2x8mem_to_2x64: 3439; SKX: # %bb.0: 3440; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00] 3441; SKX-NEXT: retq # sched: [7:1.00] 3442 %a = load <2 x i8>,<2 x i8> *%i,align 1 3443 %x = sext <2 x i8> %a to <2 x i64> 3444 ret <2 x i64> %x 3445} 3446 3447define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 3448; GENERIC-LABEL: zext_4x8mem_to_4x64: 3449; GENERIC: # %bb.0: 3450; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3451; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3452; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] 3453; GENERIC-NEXT: retq # sched: [1:1.00] 3454; 3455; SKX-LABEL: zext_4x8mem_to_4x64: 3456; SKX: # %bb.0: 3457; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3458; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3459; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 3460; SKX-NEXT: retq # sched: [7:1.00] 3461 %a = load <4 x i8>,<4 x i8> *%i,align 1 3462 %x = zext <4 x i8> %a to <4 x i64> 3463 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 3464 ret <4 x i64> %ret 3465} 3466 3467define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { 3468; GENERIC-LABEL: sext_4x8mem_to_4x64mask: 3469; GENERIC: # %bb.0: 3470; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3471; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3472; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3473; GENERIC-NEXT: retq # sched: [1:1.00] 3474; 3475; SKX-LABEL: sext_4x8mem_to_4x64mask: 3476; SKX: # %bb.0: 3477; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3478; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3479; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3480; SKX-NEXT: retq # sched: [7:1.00] 3481 %a = load <4 x i8>,<4 x i8> *%i,align 1 3482 %x = sext <4 x i8> %a to <4 x i64> 3483 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 3484 ret <4 x i64> %ret 3485} 3486 3487define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { 3488; GENERIC-LABEL: sext_4x8mem_to_4x64: 3489; GENERIC: # %bb.0: 3490; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] 3491; GENERIC-NEXT: retq # sched: [1:1.00] 3492; 3493; SKX-LABEL: sext_4x8mem_to_4x64: 3494; SKX: # %bb.0: 3495; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] 3496; SKX-NEXT: retq # sched: [7:1.00] 3497 %a = load <4 x i8>,<4 x i8> *%i,align 1 3498 %x = sext <4 x i8> %a to <4 x i64> 3499 ret <4 x i64> %x 3500} 3501 3502define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 3503; GENERIC-LABEL: zext_8x8mem_to_8x64: 3504; GENERIC: # %bb.0: 3505; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3506; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3507; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] 3508; GENERIC-NEXT: retq # sched: [1:1.00] 3509; 3510; SKX-LABEL: zext_8x8mem_to_8x64: 3511; SKX: # %bb.0: 3512; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3513; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3514; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 3515; SKX-NEXT: retq # sched: [7:1.00] 3516 %a = load <8 x i8>,<8 x i8> *%i,align 1 3517 %x = zext <8 x i8> %a to <8 x i64> 3518 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3519 ret <8 x i64> %ret 3520} 3521 3522define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { 3523; GENERIC-LABEL: sext_8x8mem_to_8x64mask: 3524; GENERIC: # %bb.0: 3525; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3526; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3527; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3528; GENERIC-NEXT: retq # sched: [1:1.00] 3529; 3530; SKX-LABEL: sext_8x8mem_to_8x64mask: 3531; SKX: # %bb.0: 3532; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3533; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3534; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3535; SKX-NEXT: retq # sched: [7:1.00] 3536 %a = load <8 x i8>,<8 x i8> *%i,align 1 3537 %x = sext <8 x i8> %a to <8 x i64> 3538 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3539 ret <8 x i64> %ret 3540} 3541 3542define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { 3543; GENERIC-LABEL: sext_8x8mem_to_8x64: 3544; GENERIC: # %bb.0: 3545; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] 3546; GENERIC-NEXT: retq # sched: [1:1.00] 3547; 3548; SKX-LABEL: sext_8x8mem_to_8x64: 3549; SKX: # %bb.0: 3550; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] 3551; SKX-NEXT: retq # sched: [7:1.00] 3552 %a = load <8 x i8>,<8 x i8> *%i,align 1 3553 %x = sext <8 x i8> %a to <8 x i64> 3554 ret <8 x i64> %x 3555} 3556 3557define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 3558; GENERIC-LABEL: zext_4x16mem_to_4x32: 3559; GENERIC: # %bb.0: 3560; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3561; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3562; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] 3563; GENERIC-NEXT: retq # sched: [1:1.00] 3564; 3565; SKX-LABEL: zext_4x16mem_to_4x32: 3566; SKX: # %bb.0: 3567; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3568; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3569; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] 3570; SKX-NEXT: retq # sched: [7:1.00] 3571 %a = load <4 x i16>,<4 x i16> *%i,align 1 3572 %x = zext <4 x i16> %a to <4 x i32> 3573 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 3574 ret <4 x i32> %ret 3575} 3576 3577define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 3578; GENERIC-LABEL: sext_4x16mem_to_4x32mask: 3579; GENERIC: # %bb.0: 3580; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3581; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3582; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 3583; GENERIC-NEXT: retq # sched: [1:1.00] 3584; 3585; SKX-LABEL: sext_4x16mem_to_4x32mask: 3586; SKX: # %bb.0: 3587; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3588; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3589; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 3590; SKX-NEXT: retq # sched: [7:1.00] 3591 %a = load <4 x i16>,<4 x i16> *%i,align 1 3592 %x = sext <4 x i16> %a to <4 x i32> 3593 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 3594 ret <4 x i32> %ret 3595} 3596 3597define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { 3598; GENERIC-LABEL: sext_4x16mem_to_4x32: 3599; GENERIC: # %bb.0: 3600; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [7:0.50] 3601; GENERIC-NEXT: retq # sched: [1:1.00] 3602; 3603; SKX-LABEL: sext_4x16mem_to_4x32: 3604; SKX: # %bb.0: 3605; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00] 3606; SKX-NEXT: retq # sched: [7:1.00] 3607 %a = load <4 x i16>,<4 x i16> *%i,align 1 3608 %x = sext <4 x i16> %a to <4 x i32> 3609 ret <4 x i32> %x 3610} 3611 3612 3613define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 3614; GENERIC-LABEL: zext_8x16mem_to_8x32: 3615; GENERIC: # %bb.0: 3616; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3617; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3618; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 3619; GENERIC-NEXT: retq # sched: [1:1.00] 3620; 3621; SKX-LABEL: zext_8x16mem_to_8x32: 3622; SKX: # %bb.0: 3623; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3624; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3625; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] 3626; SKX-NEXT: retq # sched: [7:1.00] 3627 %a = load <8 x i16>,<8 x i16> *%i,align 1 3628 %x = zext <8 x i16> %a to <8 x i32> 3629 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3630 ret <8 x i32> %ret 3631} 3632 3633define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 3634; GENERIC-LABEL: sext_8x16mem_to_8x32mask: 3635; GENERIC: # %bb.0: 3636; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3637; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3638; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3639; GENERIC-NEXT: retq # sched: [1:1.00] 3640; 3641; SKX-LABEL: sext_8x16mem_to_8x32mask: 3642; SKX: # %bb.0: 3643; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3644; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3645; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3646; SKX-NEXT: retq # sched: [7:1.00] 3647 %a = load <8 x i16>,<8 x i16> *%i,align 1 3648 %x = sext <8 x i16> %a to <8 x i32> 3649 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3650 ret <8 x i32> %ret 3651} 3652 3653define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { 3654; GENERIC-LABEL: sext_8x16mem_to_8x32: 3655; GENERIC: # %bb.0: 3656; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00] 3657; GENERIC-NEXT: retq # sched: [1:1.00] 3658; 3659; SKX-LABEL: sext_8x16mem_to_8x32: 3660; SKX: # %bb.0: 3661; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00] 3662; SKX-NEXT: retq # sched: [7:1.00] 3663 %a = load <8 x i16>,<8 x i16> *%i,align 1 3664 %x = sext <8 x i16> %a to <8 x i32> 3665 ret <8 x i32> %x 3666} 3667 3668define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 3669; GENERIC-LABEL: zext_8x16_to_8x32mask: 3670; GENERIC: # %bb.0: 3671; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] 3672; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] 3673; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 3674; GENERIC-NEXT: retq # sched: [1:1.00] 3675; 3676; SKX-LABEL: zext_8x16_to_8x32mask: 3677; SKX: # %bb.0: 3678; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] 3679; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] 3680; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 3681; SKX-NEXT: retq # sched: [7:1.00] 3682 %x = zext <8 x i16> %a to <8 x i32> 3683 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 3684 ret <8 x i32> %ret 3685} 3686 3687define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone { 3688; GENERIC-LABEL: zext_8x16_to_8x32: 3689; GENERIC: # %bb.0: 3690; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 3691; GENERIC-NEXT: retq # sched: [1:1.00] 3692; 3693; SKX-LABEL: zext_8x16_to_8x32: 3694; SKX: # %bb.0: 3695; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 3696; SKX-NEXT: retq # sched: [7:1.00] 3697 %x = zext <8 x i16> %a to <8 x i32> 3698 ret <8 x i32> %x 3699} 3700 3701define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 3702; GENERIC-LABEL: zext_16x16mem_to_16x32: 3703; GENERIC: # %bb.0: 3704; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3705; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3706; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] 3707; GENERIC-NEXT: retq # sched: [1:1.00] 3708; 3709; SKX-LABEL: zext_16x16mem_to_16x32: 3710; SKX: # %bb.0: 3711; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3712; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3713; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 3714; SKX-NEXT: retq # sched: [7:1.00] 3715 %a = load <16 x i16>,<16 x i16> *%i,align 1 3716 %x = zext <16 x i16> %a to <16 x i32> 3717 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3718 ret <16 x i32> %ret 3719} 3720 3721define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { 3722; GENERIC-LABEL: sext_16x16mem_to_16x32mask: 3723; GENERIC: # %bb.0: 3724; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 3725; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33] 3726; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3727; GENERIC-NEXT: retq # sched: [1:1.00] 3728; 3729; SKX-LABEL: sext_16x16mem_to_16x32mask: 3730; SKX: # %bb.0: 3731; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 3732; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] 3733; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3734; SKX-NEXT: retq # sched: [7:1.00] 3735 %a = load <16 x i16>,<16 x i16> *%i,align 1 3736 %x = sext <16 x i16> %a to <16 x i32> 3737 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3738 ret <16 x i32> %ret 3739} 3740 3741define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { 3742; GENERIC-LABEL: sext_16x16mem_to_16x32: 3743; GENERIC: # %bb.0: 3744; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00] 3745; GENERIC-NEXT: retq # sched: [1:1.00] 3746; 3747; SKX-LABEL: sext_16x16mem_to_16x32: 3748; SKX: # %bb.0: 3749; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00] 3750; SKX-NEXT: retq # sched: [7:1.00] 3751 %a = load <16 x i16>,<16 x i16> *%i,align 1 3752 %x = sext <16 x i16> %a to <16 x i32> 3753 ret <16 x i32> %x 3754} 3755define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { 3756; GENERIC-LABEL: zext_16x16_to_16x32mask: 3757; GENERIC: # %bb.0: 3758; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 3759; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 3760; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 3761; GENERIC-NEXT: retq # sched: [1:1.00] 3762; 3763; SKX-LABEL: zext_16x16_to_16x32mask: 3764; SKX: # %bb.0: 3765; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 3766; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 3767; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 3768; SKX-NEXT: retq # sched: [7:1.00] 3769 %x = zext <16 x i16> %a to <16 x i32> 3770 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 3771 ret <16 x i32> %ret 3772} 3773 3774define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { 3775; GENERIC-LABEL: zext_16x16_to_16x32: 3776; GENERIC: # %bb.0: 3777; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] 3778; GENERIC-NEXT: retq # sched: [1:1.00] 3779; 3780; SKX-LABEL: zext_16x16_to_16x32: 3781; SKX: # %bb.0: 3782; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] 3783; SKX-NEXT: retq # sched: [7:1.00] 3784 %x = zext <16 x i16> %a to <16 x i32> 3785 ret <16 x i32> %x 3786} 3787 3788define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 3789; GENERIC-LABEL: zext_2x16mem_to_2x64: 3790; GENERIC: # %bb.0: 3791; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3792; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3793; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00] 3794; GENERIC-NEXT: retq # sched: [1:1.00] 3795; 3796; SKX-LABEL: zext_2x16mem_to_2x64: 3797; SKX: # %bb.0: 3798; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3799; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3800; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00] 3801; SKX-NEXT: retq # sched: [7:1.00] 3802 %a = load <2 x i16>,<2 x i16> *%i,align 1 3803 %x = zext <2 x i16> %a to <2 x i64> 3804 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 3805 ret <2 x i64> %ret 3806} 3807 3808define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { 3809; GENERIC-LABEL: sext_2x16mem_to_2x64mask: 3810; GENERIC: # %bb.0: 3811; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3812; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3813; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 3814; GENERIC-NEXT: retq # sched: [1:1.00] 3815; 3816; SKX-LABEL: sext_2x16mem_to_2x64mask: 3817; SKX: # %bb.0: 3818; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3819; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3820; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 3821; SKX-NEXT: retq # sched: [7:1.00] 3822 %a = load <2 x i16>,<2 x i16> *%i,align 1 3823 %x = sext <2 x i16> %a to <2 x i64> 3824 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 3825 ret <2 x i64> %ret 3826} 3827 3828define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { 3829; GENERIC-LABEL: sext_2x16mem_to_2x64: 3830; GENERIC: # %bb.0: 3831; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [7:0.50] 3832; GENERIC-NEXT: retq # sched: [1:1.00] 3833; 3834; SKX-LABEL: sext_2x16mem_to_2x64: 3835; SKX: # %bb.0: 3836; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00] 3837; SKX-NEXT: retq # sched: [7:1.00] 3838 %a = load <2 x i16>,<2 x i16> *%i,align 1 3839 %x = sext <2 x i16> %a to <2 x i64> 3840 ret <2 x i64> %x 3841} 3842 3843define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 3844; GENERIC-LABEL: zext_4x16mem_to_4x64: 3845; GENERIC: # %bb.0: 3846; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3847; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3848; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] 3849; GENERIC-NEXT: retq # sched: [1:1.00] 3850; 3851; SKX-LABEL: zext_4x16mem_to_4x64: 3852; SKX: # %bb.0: 3853; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3854; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3855; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] 3856; SKX-NEXT: retq # sched: [7:1.00] 3857 %a = load <4 x i16>,<4 x i16> *%i,align 1 3858 %x = zext <4 x i16> %a to <4 x i64> 3859 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 3860 ret <4 x i64> %ret 3861} 3862 3863define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { 3864; GENERIC-LABEL: sext_4x16mem_to_4x64mask: 3865; GENERIC: # %bb.0: 3866; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 3867; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 3868; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 3869; GENERIC-NEXT: retq # sched: [1:1.00] 3870; 3871; SKX-LABEL: sext_4x16mem_to_4x64mask: 3872; SKX: # %bb.0: 3873; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 3874; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 3875; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 3876; SKX-NEXT: retq # sched: [7:1.00] 3877 %a = load <4 x i16>,<4 x i16> *%i,align 1 3878 %x = sext <4 x i16> %a to <4 x i64> 3879 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 3880 ret <4 x i64> %ret 3881} 3882 3883define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { 3884; GENERIC-LABEL: sext_4x16mem_to_4x64: 3885; GENERIC: # %bb.0: 3886; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] 3887; GENERIC-NEXT: retq # sched: [1:1.00] 3888; 3889; SKX-LABEL: sext_4x16mem_to_4x64: 3890; SKX: # %bb.0: 3891; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] 3892; SKX-NEXT: retq # sched: [7:1.00] 3893 %a = load <4 x i16>,<4 x i16> *%i,align 1 3894 %x = sext <4 x i16> %a to <4 x i64> 3895 ret <4 x i64> %x 3896} 3897 3898define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 3899; GENERIC-LABEL: zext_8x16mem_to_8x64: 3900; GENERIC: # %bb.0: 3901; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3902; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3903; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] 3904; GENERIC-NEXT: retq # sched: [1:1.00] 3905; 3906; SKX-LABEL: zext_8x16mem_to_8x64: 3907; SKX: # %bb.0: 3908; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3909; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3910; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 3911; SKX-NEXT: retq # sched: [7:1.00] 3912 %a = load <8 x i16>,<8 x i16> *%i,align 1 3913 %x = zext <8 x i16> %a to <8 x i64> 3914 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3915 ret <8 x i64> %ret 3916} 3917 3918define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { 3919; GENERIC-LABEL: sext_8x16mem_to_8x64mask: 3920; GENERIC: # %bb.0: 3921; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 3922; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 3923; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 3924; GENERIC-NEXT: retq # sched: [1:1.00] 3925; 3926; SKX-LABEL: sext_8x16mem_to_8x64mask: 3927; SKX: # %bb.0: 3928; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 3929; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 3930; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 3931; SKX-NEXT: retq # sched: [7:1.00] 3932 %a = load <8 x i16>,<8 x i16> *%i,align 1 3933 %x = sext <8 x i16> %a to <8 x i64> 3934 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3935 ret <8 x i64> %ret 3936} 3937 3938define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { 3939; GENERIC-LABEL: sext_8x16mem_to_8x64: 3940; GENERIC: # %bb.0: 3941; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00] 3942; GENERIC-NEXT: retq # sched: [1:1.00] 3943; 3944; SKX-LABEL: sext_8x16mem_to_8x64: 3945; SKX: # %bb.0: 3946; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00] 3947; SKX-NEXT: retq # sched: [7:1.00] 3948 %a = load <8 x i16>,<8 x i16> *%i,align 1 3949 %x = sext <8 x i16> %a to <8 x i64> 3950 ret <8 x i64> %x 3951} 3952 3953define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { 3954; GENERIC-LABEL: zext_8x16_to_8x64mask: 3955; GENERIC: # %bb.0: 3956; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] 3957; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] 3958; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] 3959; GENERIC-NEXT: retq # sched: [1:1.00] 3960; 3961; SKX-LABEL: zext_8x16_to_8x64mask: 3962; SKX: # %bb.0: 3963; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] 3964; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] 3965; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 3966; SKX-NEXT: retq # sched: [7:1.00] 3967 %x = zext <8 x i16> %a to <8 x i64> 3968 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 3969 ret <8 x i64> %ret 3970} 3971 3972define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { 3973; GENERIC-LABEL: zext_8x16_to_8x64: 3974; GENERIC: # %bb.0: 3975; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] 3976; GENERIC-NEXT: retq # sched: [1:1.00] 3977; 3978; SKX-LABEL: zext_8x16_to_8x64: 3979; SKX: # %bb.0: 3980; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 3981; SKX-NEXT: retq # sched: [7:1.00] 3982 %ret = zext <8 x i16> %a to <8 x i64> 3983 ret <8 x i64> %ret 3984} 3985 3986define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 3987; GENERIC-LABEL: zext_2x32mem_to_2x64: 3988; GENERIC: # %bb.0: 3989; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 3990; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 3991; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00] 3992; GENERIC-NEXT: retq # sched: [1:1.00] 3993; 3994; SKX-LABEL: zext_2x32mem_to_2x64: 3995; SKX: # %bb.0: 3996; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 3997; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 3998; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00] 3999; SKX-NEXT: retq # sched: [7:1.00] 4000 %a = load <2 x i32>,<2 x i32> *%i,align 1 4001 %x = zext <2 x i32> %a to <2 x i64> 4002 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 4003 ret <2 x i64> %ret 4004} 4005 4006define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { 4007; GENERIC-LABEL: sext_2x32mem_to_2x64mask: 4008; GENERIC: # %bb.0: 4009; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 4010; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] 4011; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00] 4012; GENERIC-NEXT: retq # sched: [1:1.00] 4013; 4014; SKX-LABEL: sext_2x32mem_to_2x64mask: 4015; SKX: # %bb.0: 4016; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 4017; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] 4018; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] 4019; SKX-NEXT: retq # sched: [7:1.00] 4020 %a = load <2 x i32>,<2 x i32> *%i,align 1 4021 %x = sext <2 x i32> %a to <2 x i64> 4022 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer 4023 ret <2 x i64> %ret 4024} 4025 4026define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { 4027; GENERIC-LABEL: sext_2x32mem_to_2x64: 4028; GENERIC: # %bb.0: 4029; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [7:0.50] 4030; GENERIC-NEXT: retq # sched: [1:1.00] 4031; 4032; SKX-LABEL: sext_2x32mem_to_2x64: 4033; SKX: # %bb.0: 4034; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00] 4035; SKX-NEXT: retq # sched: [7:1.00] 4036 %a = load <2 x i32>,<2 x i32> *%i,align 1 4037 %x = sext <2 x i32> %a to <2 x i64> 4038 ret <2 x i64> %x 4039} 4040 4041define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 4042; GENERIC-LABEL: zext_4x32mem_to_4x64: 4043; GENERIC: # %bb.0: 4044; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 4045; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 4046; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] 4047; GENERIC-NEXT: retq # sched: [1:1.00] 4048; 4049; SKX-LABEL: zext_4x32mem_to_4x64: 4050; SKX: # %bb.0: 4051; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 4052; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 4053; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] 4054; SKX-NEXT: retq # sched: [7:1.00] 4055 %a = load <4 x i32>,<4 x i32> *%i,align 1 4056 %x = zext <4 x i32> %a to <4 x i64> 4057 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 4058 ret <4 x i64> %ret 4059} 4060 4061define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { 4062; GENERIC-LABEL: sext_4x32mem_to_4x64mask: 4063; GENERIC: # %bb.0: 4064; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 4065; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] 4066; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00] 4067; GENERIC-NEXT: retq # sched: [1:1.00] 4068; 4069; SKX-LABEL: sext_4x32mem_to_4x64mask: 4070; SKX: # %bb.0: 4071; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 4072; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] 4073; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] 4074; SKX-NEXT: retq # sched: [7:1.00] 4075 %a = load <4 x i32>,<4 x i32> *%i,align 1 4076 %x = sext <4 x i32> %a to <4 x i64> 4077 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 4078 ret <4 x i64> %ret 4079} 4080 4081define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { 4082; GENERIC-LABEL: sext_4x32mem_to_4x64: 4083; GENERIC: # %bb.0: 4084; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00] 4085; GENERIC-NEXT: retq # sched: [1:1.00] 4086; 4087; SKX-LABEL: sext_4x32mem_to_4x64: 4088; SKX: # %bb.0: 4089; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00] 4090; SKX-NEXT: retq # sched: [7:1.00] 4091 %a = load <4 x i32>,<4 x i32> *%i,align 1 4092 %x = sext <4 x i32> %a to <4 x i64> 4093 ret <4 x i64> %x 4094} 4095 4096define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone { 4097; GENERIC-LABEL: sext_4x32_to_4x64: 4098; GENERIC: # %bb.0: 4099; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] 4100; GENERIC-NEXT: retq # sched: [1:1.00] 4101; 4102; SKX-LABEL: sext_4x32_to_4x64: 4103; SKX: # %bb.0: 4104; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4105; SKX-NEXT: retq # sched: [7:1.00] 4106 %x = sext <4 x i32> %a to <4 x i64> 4107 ret <4 x i64> %x 4108} 4109 4110define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { 4111; GENERIC-LABEL: zext_4x32_to_4x64mask: 4112; GENERIC: # %bb.0: 4113; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] 4114; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] 4115; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4116; GENERIC-NEXT: retq # sched: [1:1.00] 4117; 4118; SKX-LABEL: zext_4x32_to_4x64mask: 4119; SKX: # %bb.0: 4120; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] 4121; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] 4122; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4123; SKX-NEXT: retq # sched: [7:1.00] 4124 %x = zext <4 x i32> %a to <4 x i64> 4125 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer 4126 ret <4 x i64> %ret 4127} 4128 4129define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 4130; GENERIC-LABEL: zext_8x32mem_to_8x64: 4131; GENERIC: # %bb.0: 4132; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 4133; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 4134; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 4135; GENERIC-NEXT: retq # sched: [1:1.00] 4136; 4137; SKX-LABEL: zext_8x32mem_to_8x64: 4138; SKX: # %bb.0: 4139; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 4140; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 4141; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] 4142; SKX-NEXT: retq # sched: [7:1.00] 4143 %a = load <8 x i32>,<8 x i32> *%i,align 1 4144 %x = zext <8 x i32> %a to <8 x i64> 4145 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 4146 ret <8 x i64> %ret 4147} 4148 4149define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { 4150; GENERIC-LABEL: sext_8x32mem_to_8x64mask: 4151; GENERIC: # %bb.0: 4152; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 4153; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33] 4154; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 4155; GENERIC-NEXT: retq # sched: [1:1.00] 4156; 4157; SKX-LABEL: sext_8x32mem_to_8x64mask: 4158; SKX: # %bb.0: 4159; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 4160; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] 4161; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] 4162; SKX-NEXT: retq # sched: [7:1.00] 4163 %a = load <8 x i32>,<8 x i32> *%i,align 1 4164 %x = sext <8 x i32> %a to <8 x i64> 4165 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 4166 ret <8 x i64> %ret 4167} 4168 4169define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { 4170; GENERIC-LABEL: sext_8x32mem_to_8x64: 4171; GENERIC: # %bb.0: 4172; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00] 4173; GENERIC-NEXT: retq # sched: [1:1.00] 4174; 4175; SKX-LABEL: sext_8x32mem_to_8x64: 4176; SKX: # %bb.0: 4177; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00] 4178; SKX-NEXT: retq # sched: [7:1.00] 4179 %a = load <8 x i32>,<8 x i32> *%i,align 1 4180 %x = sext <8 x i32> %a to <8 x i64> 4181 ret <8 x i64> %x 4182} 4183 4184define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { 4185; GENERIC-LABEL: sext_8x32_to_8x64: 4186; GENERIC: # %bb.0: 4187; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00] 4188; GENERIC-NEXT: retq # sched: [1:1.00] 4189; 4190; SKX-LABEL: sext_8x32_to_8x64: 4191; SKX: # %bb.0: 4192; SKX-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00] 4193; SKX-NEXT: retq # sched: [7:1.00] 4194 %x = sext <8 x i32> %a to <8 x i64> 4195 ret <8 x i64> %x 4196} 4197 4198define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { 4199; GENERIC-LABEL: zext_8x32_to_8x64mask: 4200; GENERIC: # %bb.0: 4201; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] 4202; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33] 4203; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00] 4204; GENERIC-NEXT: retq # sched: [1:1.00] 4205; 4206; SKX-LABEL: zext_8x32_to_8x64mask: 4207; SKX: # %bb.0: 4208; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] 4209; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] 4210; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00] 4211; SKX-NEXT: retq # sched: [7:1.00] 4212 %x = zext <8 x i32> %a to <8 x i64> 4213 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer 4214 ret <8 x i64> %ret 4215} 4216define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { 4217; GENERIC-LABEL: fptrunc_test: 4218; GENERIC: # %bb.0: 4219; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00] 4220; GENERIC-NEXT: retq # sched: [1:1.00] 4221; 4222; SKX-LABEL: fptrunc_test: 4223; SKX: # %bb.0: 4224; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] 4225; SKX-NEXT: retq # sched: [7:1.00] 4226 %b = fptrunc <8 x double> %a to <8 x float> 4227 ret <8 x float> %b 4228} 4229 4230define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { 4231; GENERIC-LABEL: fpext_test: 4232; GENERIC: # %bb.0: 4233; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00] 4234; GENERIC-NEXT: retq # sched: [1:1.00] 4235; 4236; SKX-LABEL: fpext_test: 4237; SKX: # %bb.0: 4238; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] 4239; SKX-NEXT: retq # sched: [7:1.00] 4240 %b = fpext <8 x float> %a to <8 x double> 4241 ret <8 x double> %b 4242} 4243 4244define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { 4245; GENERIC-LABEL: zext_16i1_to_16xi32: 4246; GENERIC: # %bb.0: 4247; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 4248; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 4249; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] 4250; GENERIC-NEXT: retq # sched: [1:1.00] 4251; 4252; SKX-LABEL: zext_16i1_to_16xi32: 4253; SKX: # %bb.0: 4254; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 4255; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 4256; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] 4257; SKX-NEXT: retq # sched: [7:1.00] 4258 %a = bitcast i16 %b to <16 x i1> 4259 %c = zext <16 x i1> %a to <16 x i32> 4260 ret <16 x i32> %c 4261} 4262 4263define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { 4264; GENERIC-LABEL: zext_8i1_to_8xi64: 4265; GENERIC: # %bb.0: 4266; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 4267; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] 4268; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] 4269; GENERIC-NEXT: retq # sched: [1:1.00] 4270; 4271; SKX-LABEL: zext_8i1_to_8xi64: 4272; SKX: # %bb.0: 4273; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 4274; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] 4275; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] 4276; SKX-NEXT: retq # sched: [7:1.00] 4277 %a = bitcast i8 %b to <8 x i1> 4278 %c = zext <8 x i1> %a to <8 x i64> 4279 ret <8 x i64> %c 4280} 4281 4282define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { 4283; GENERIC-LABEL: trunc_16i8_to_16i1: 4284; GENERIC: # %bb.0: 4285; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 4286; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] 4287; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 4288; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 4289; GENERIC-NEXT: retq # sched: [1:1.00] 4290; 4291; SKX-LABEL: trunc_16i8_to_16i1: 4292; SKX: # %bb.0: 4293; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 4294; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] 4295; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 4296; SKX-NEXT: # kill: def $ax killed $ax killed $eax 4297; SKX-NEXT: retq # sched: [7:1.00] 4298 %mask_b = trunc <16 x i8>%a to <16 x i1> 4299 %mask = bitcast <16 x i1> %mask_b to i16 4300 ret i16 %mask 4301} 4302 4303define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { 4304; GENERIC-LABEL: trunc_16i32_to_16i1: 4305; GENERIC: # %bb.0: 4306; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] 4307; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] 4308; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 4309; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 4310; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4311; GENERIC-NEXT: retq # sched: [1:1.00] 4312; 4313; SKX-LABEL: trunc_16i32_to_16i1: 4314; SKX: # %bb.0: 4315; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] 4316; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] 4317; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 4318; SKX-NEXT: # kill: def $ax killed $ax killed $eax 4319; SKX-NEXT: vzeroupper # sched: [4:1.00] 4320; SKX-NEXT: retq # sched: [7:1.00] 4321 %mask_b = trunc <16 x i32>%a to <16 x i1> 4322 %mask = bitcast <16 x i1> %mask_b to i16 4323 ret i16 %mask 4324} 4325 4326define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { 4327; GENERIC-LABEL: trunc_4i32_to_4i1: 4328; GENERIC: # %bb.0: 4329; GENERIC-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4330; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 4331; GENERIC-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00] 4332; GENERIC-NEXT: retq # sched: [1:1.00] 4333; 4334; SKX-LABEL: trunc_4i32_to_4i1: 4335; SKX: # %bb.0: 4336; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 4337; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 4338; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50] 4339; SKX-NEXT: retq # sched: [7:1.00] 4340 %mask_a = trunc <4 x i32>%a to <4 x i1> 4341 %mask_b = trunc <4 x i32>%b to <4 x i1> 4342 %a_and_b = and <4 x i1>%mask_a, %mask_b 4343 %res = sext <4 x i1>%a_and_b to <4 x i32> 4344 ret <4 x i32>%res 4345} 4346 4347 4348define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { 4349; GENERIC-LABEL: trunc_8i16_to_8i1: 4350; GENERIC: # %bb.0: 4351; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 4352; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 4353; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 4354; GENERIC-NEXT: # kill: def $al killed $al killed $eax 4355; GENERIC-NEXT: retq # sched: [1:1.00] 4356; 4357; SKX-LABEL: trunc_8i16_to_8i1: 4358; SKX: # %bb.0: 4359; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 4360; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 4361; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 4362; SKX-NEXT: # kill: def $al killed $al killed $eax 4363; SKX-NEXT: retq # sched: [7:1.00] 4364 %mask_b = trunc <8 x i16>%a to <8 x i1> 4365 %mask = bitcast <8 x i1> %mask_b to i8 4366 ret i8 %mask 4367} 4368 4369define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { 4370; GENERIC-LABEL: sext_8i1_8i32: 4371; GENERIC: # %bb.0: 4372; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 4373; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50] 4374; GENERIC-NEXT: retq # sched: [1:1.00] 4375; 4376; SKX-LABEL: sext_8i1_8i32: 4377; SKX: # %bb.0: 4378; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 4379; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33] 4380; SKX-NEXT: retq # sched: [7:1.00] 4381 %x = icmp slt <8 x i32> %a1, %a2 4382 %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 4383 %y = sext <8 x i1> %x1 to <8 x i32> 4384 ret <8 x i32> %y 4385} 4386 4387 4388define i16 @trunc_i32_to_i1(i32 %a) { 4389; GENERIC-LABEL: trunc_i32_to_i1: 4390; GENERIC: # %bb.0: 4391; GENERIC-NEXT: movw $-4, %ax # sched: [1:0.33] 4392; GENERIC-NEXT: kmovd %eax, %k0 # sched: [1:0.33] 4393; GENERIC-NEXT: kshiftrw $1, %k0, %k0 # sched: [1:1.00] 4394; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00] 4395; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] 4396; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33] 4397; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:0.33] 4398; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 4399; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 4400; GENERIC-NEXT: retq # sched: [1:1.00] 4401; 4402; SKX-LABEL: trunc_i32_to_i1: 4403; SKX: # %bb.0: 4404; SKX-NEXT: movw $-4, %ax # sched: [1:0.25] 4405; SKX-NEXT: kmovd %eax, %k0 # sched: [1:1.00] 4406; SKX-NEXT: kshiftrw $1, %k0, %k0 # sched: [3:1.00] 4407; SKX-NEXT: kshiftlw $1, %k0, %k0 # sched: [3:1.00] 4408; SKX-NEXT: andl $1, %edi # sched: [1:0.25] 4409; SKX-NEXT: kmovw %edi, %k1 # sched: [1:1.00] 4410; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] 4411; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 4412; SKX-NEXT: # kill: def $ax killed $ax killed $eax 4413; SKX-NEXT: retq # sched: [7:1.00] 4414 %a_i = trunc i32 %a to i1 4415 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0 4416 %res = bitcast <16 x i1> %maskv to i16 4417 ret i16 %res 4418} 4419 4420define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { 4421; GENERIC-LABEL: sext_8i1_8i16: 4422; GENERIC: # %bb.0: 4423; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] 4424; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] 4425; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4426; GENERIC-NEXT: retq # sched: [1:1.00] 4427; 4428; SKX-LABEL: sext_8i1_8i16: 4429; SKX: # %bb.0: 4430; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] 4431; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] 4432; SKX-NEXT: vzeroupper # sched: [4:1.00] 4433; SKX-NEXT: retq # sched: [7:1.00] 4434 %x = icmp slt <8 x i32> %a1, %a2 4435 %y = sext <8 x i1> %x to <8 x i16> 4436 ret <8 x i16> %y 4437} 4438 4439define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { 4440; GENERIC-LABEL: sext_16i1_16i32: 4441; GENERIC: # %bb.0: 4442; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50] 4443; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 4444; GENERIC-NEXT: retq # sched: [1:1.00] 4445; 4446; SKX-LABEL: sext_16i1_16i32: 4447; SKX: # %bb.0: 4448; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00] 4449; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 4450; SKX-NEXT: retq # sched: [7:1.00] 4451 %x = icmp slt <16 x i32> %a1, %a2 4452 %y = sext <16 x i1> %x to <16 x i32> 4453 ret <16 x i32> %y 4454} 4455 4456define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { 4457; GENERIC-LABEL: sext_8i1_8i64: 4458; GENERIC: # %bb.0: 4459; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50] 4460; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] 4461; GENERIC-NEXT: retq # sched: [1:1.00] 4462; 4463; SKX-LABEL: sext_8i1_8i64: 4464; SKX: # %bb.0: 4465; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] 4466; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] 4467; SKX-NEXT: retq # sched: [7:1.00] 4468 %x = icmp slt <8 x i32> %a1, %a2 4469 %y = sext <8 x i1> %x to <8 x i64> 4470 ret <8 x i64> %y 4471} 4472 4473define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { 4474; GENERIC-LABEL: extload_v8i64: 4475; GENERIC: # %bb.0: 4476; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00] 4477; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] 4478; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4479; GENERIC-NEXT: retq # sched: [1:1.00] 4480; 4481; SKX-LABEL: extload_v8i64: 4482; SKX: # %bb.0: 4483; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] 4484; SKX-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] 4485; SKX-NEXT: vzeroupper # sched: [4:1.00] 4486; SKX-NEXT: retq # sched: [7:1.00] 4487 %sign_load = load <8 x i8>, <8 x i8>* %a 4488 %c = sext <8 x i8> %sign_load to <8 x i64> 4489 store <8 x i64> %c, <8 x i64>* %res 4490 ret void 4491} 4492 4493define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { 4494; GENERIC-LABEL: test21: 4495; GENERIC: # %bb.0: 4496; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] 4497; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33] 4498; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 4499; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00] 4500; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.50] 4501; GENERIC-NEXT: retq # sched: [1:1.00] 4502; 4503; SKX-LABEL: test21: 4504; SKX: # %bb.0: 4505; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] 4506; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00] 4507; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 4508; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00] 4509; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33] 4510; SKX-NEXT: retq # sched: [7:1.00] 4511 %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer 4512 ret <64 x i16> %ret 4513} 4514 4515define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone { 4516; GENERIC-LABEL: shuffle_zext_16x8_to_16x16: 4517; GENERIC: # %bb.0: 4518; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 4519; GENERIC-NEXT: retq # sched: [1:1.00] 4520; 4521; SKX-LABEL: shuffle_zext_16x8_to_16x16: 4522; SKX: # %bb.0: 4523; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4524; SKX-NEXT: retq # sched: [7:1.00] 4525 %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 4526 %2 = bitcast <32 x i8> %1 to <16 x i16> 4527 ret <16 x i16> %2 4528} 4529 4530define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { 4531; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask: 4532; GENERIC: # %bb.0: 4533; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] 4534; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33] 4535; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 4536; GENERIC-NEXT: retq # sched: [1:1.00] 4537; 4538; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: 4539; SKX: # %bb.0: 4540; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] 4541; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] 4542; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4543; SKX-NEXT: retq # sched: [7:1.00] 4544 %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16> 4545 %bc = bitcast <32 x i8> %x to <16 x i16> 4546 %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer 4547 ret <16 x i16> %ret 4548} 4549 4550define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) { 4551; GENERIC-LABEL: zext_32x8_to_16x16: 4552; GENERIC: # %bb.0: 4553; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 4554; GENERIC-NEXT: retq # sched: [1:1.00] 4555; 4556; SKX-LABEL: zext_32x8_to_16x16: 4557; SKX: # %bb.0: 4558; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4559; SKX-NEXT: retq # sched: [7:1.00] 4560 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32> 4561 %2 = bitcast <32 x i8> %1 to <16 x i16> 4562 ret <16 x i16> %2 4563} 4564 4565define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) { 4566; GENERIC-LABEL: zext_32x8_to_8x32: 4567; GENERIC: # %bb.0: 4568; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] 4569; GENERIC-NEXT: retq # sched: [1:1.00] 4570; 4571; SKX-LABEL: zext_32x8_to_8x32: 4572; SKX: # %bb.0: 4573; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4574; SKX-NEXT: retq # sched: [7:1.00] 4575 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32> 4576 %2 = bitcast <32 x i8> %1 to <8 x i32> 4577 ret <8 x i32> %2 4578} 4579 4580define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) { 4581; GENERIC-LABEL: zext_32x8_to_4x64: 4582; GENERIC: # %bb.0: 4583; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4584; GENERIC-NEXT: retq # sched: [1:1.00] 4585; 4586; SKX-LABEL: zext_32x8_to_4x64: 4587; SKX: # %bb.0: 4588; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4589; SKX-NEXT: retq # sched: [7:1.00] 4590 %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 4591 %2 = bitcast <32 x i8> %1 to <4 x i64> 4592 ret <4 x i64> %2 4593} 4594 4595define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) { 4596; GENERIC-LABEL: zext_16x16_to_8x32: 4597; GENERIC: # %bb.0: 4598; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4599; GENERIC-NEXT: retq # sched: [1:1.00] 4600; 4601; SKX-LABEL: zext_16x16_to_8x32: 4602; SKX: # %bb.0: 4603; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4604; SKX-NEXT: retq # sched: [7:1.00] 4605 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16> 4606 %2 = bitcast <16 x i16> %1 to <8 x i32> 4607 ret <8 x i32> %2 4608} 4609 4610define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) { 4611; GENERIC-LABEL: zext_16x16_to_4x64: 4612; GENERIC: # %bb.0: 4613; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4614; GENERIC-NEXT: retq # sched: [1:1.00] 4615; 4616; SKX-LABEL: zext_16x16_to_4x64: 4617; SKX: # %bb.0: 4618; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4619; SKX-NEXT: retq # sched: [7:1.00] 4620 %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16> 4621 %2 = bitcast <16 x i16> %1 to <4 x i64> 4622 ret <4 x i64> %2 4623} 4624 4625define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) { 4626; GENERIC-LABEL: zext_8x32_to_4x64: 4627; GENERIC: # %bb.0: 4628; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4629; GENERIC-NEXT: retq # sched: [1:1.00] 4630; 4631; SKX-LABEL: zext_8x32_to_4x64: 4632; SKX: # %bb.0: 4633; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4634; SKX-NEXT: retq # sched: [7:1.00] 4635 %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8> 4636 %2 = bitcast <8 x i32> %1 to <4 x i64> 4637 ret <4 x i64> %2 4638} 4639 4640define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { 4641; GENERIC-LABEL: zext_64xi1_to_64xi8: 4642; GENERIC: # %bb.0: 4643; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50] 4644; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [7:0.50] 4645; GENERIC-NEXT: retq # sched: [1:1.00] 4646; 4647; SKX-LABEL: zext_64xi1_to_64xi8: 4648; SKX: # %bb.0: 4649; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00] 4650; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] 4651; SKX-NEXT: retq # sched: [7:1.00] 4652 %mask = icmp eq <64 x i8> %x, %y 4653 %1 = zext <64 x i1> %mask to <64 x i8> 4654 ret <64 x i8> %1 4655} 4656 4657define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { 4658; GENERIC-LABEL: zext_32xi1_to_32xi16: 4659; GENERIC: # %bb.0: 4660; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50] 4661; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] 4662; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] 4663; GENERIC-NEXT: retq # sched: [1:1.00] 4664; 4665; SKX-LABEL: zext_32xi1_to_32xi16: 4666; SKX: # %bb.0: 4667; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00] 4668; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] 4669; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] 4670; SKX-NEXT: retq # sched: [7:1.00] 4671 %mask = icmp eq <32 x i16> %x, %y 4672 %1 = zext <32 x i1> %mask to <32 x i16> 4673 ret <32 x i16> %1 4674} 4675 4676define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { 4677; GENERIC-LABEL: zext_16xi1_to_16xi16: 4678; GENERIC: # %bb.0: 4679; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4680; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00] 4681; GENERIC-NEXT: retq # sched: [1:1.00] 4682; 4683; SKX-LABEL: zext_16xi1_to_16xi16: 4684; SKX: # %bb.0: 4685; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4686; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50] 4687; SKX-NEXT: retq # sched: [7:1.00] 4688 %mask = icmp eq <16 x i16> %x, %y 4689 %1 = zext <16 x i1> %mask to <16 x i16> 4690 ret <16 x i16> %1 4691} 4692 4693 4694define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { 4695; GENERIC-LABEL: zext_32xi1_to_32xi8: 4696; GENERIC: # %bb.0: 4697; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50] 4698; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [7:0.50] 4699; GENERIC-NEXT: retq # sched: [1:1.00] 4700; 4701; SKX-LABEL: zext_32xi1_to_32xi8: 4702; SKX: # %bb.0: 4703; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00] 4704; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] 4705; SKX-NEXT: retq # sched: [7:1.00] 4706 %mask = icmp eq <32 x i16> %x, %y 4707 %1 = zext <32 x i1> %mask to <32 x i8> 4708 ret <32 x i8> %1 4709} 4710 4711define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { 4712; GENERIC-LABEL: zext_4xi1_to_4x32: 4713; GENERIC: # %bb.0: 4714; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] 4715; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] 4716; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4717; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4718; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00] 4719; GENERIC-NEXT: retq # sched: [1:1.00] 4720; 4721; SKX-LABEL: zext_4xi1_to_4x32: 4722; SKX: # %bb.0: 4723; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] 4724; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] 4725; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4726; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4727; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50] 4728; SKX-NEXT: retq # sched: [7:1.00] 4729 %mask = icmp eq <4 x i8> %x, %y 4730 %1 = zext <4 x i1> %mask to <4 x i32> 4731 ret <4 x i32> %1 4732} 4733 4734define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { 4735; GENERIC-LABEL: zext_2xi1_to_2xi64: 4736; GENERIC: # %bb.0: 4737; GENERIC-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] 4738; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] 4739; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4740; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4741; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00] 4742; GENERIC-NEXT: retq # sched: [1:1.00] 4743; 4744; SKX-LABEL: zext_2xi1_to_2xi64: 4745; SKX: # %bb.0: 4746; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] 4747; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] 4748; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4749; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 4750; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50] 4751; SKX-NEXT: retq # sched: [7:1.00] 4752 %mask = icmp eq <2 x i8> %x, %y 4753 %1 = zext <2 x i1> %mask to <2 x i64> 4754 ret <2 x i64> %1 4755} 4756 4757define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 4758; GENERIC-LABEL: test_x86_fmadd_ps_z: 4759; GENERIC: # %bb.0: 4760; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4761; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4762; GENERIC-NEXT: retq # sched: [1:1.00] 4763; 4764; SKX-LABEL: test_x86_fmadd_ps_z: 4765; SKX: # %bb.0: 4766; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4767; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4768; SKX-NEXT: retq # sched: [7:1.00] 4769 %x = fmul <16 x float> %a0, %a1 4770 %res = fadd <16 x float> %x, %a2 4771 ret <16 x float> %res 4772} 4773 4774define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 4775; GENERIC-LABEL: test_x86_fmsub_ps_z: 4776; GENERIC: # %bb.0: 4777; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4778; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4779; GENERIC-NEXT: retq # sched: [1:1.00] 4780; 4781; SKX-LABEL: test_x86_fmsub_ps_z: 4782; SKX: # %bb.0: 4783; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4784; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4785; SKX-NEXT: retq # sched: [7:1.00] 4786 %x = fmul <16 x float> %a0, %a1 4787 %res = fsub <16 x float> %x, %a2 4788 ret <16 x float> %res 4789} 4790 4791define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 4792; GENERIC-LABEL: test_x86_fnmadd_ps_z: 4793; GENERIC: # %bb.0: 4794; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4795; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00] 4796; GENERIC-NEXT: retq # sched: [1:1.00] 4797; 4798; SKX-LABEL: test_x86_fnmadd_ps_z: 4799; SKX: # %bb.0: 4800; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4801; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50] 4802; SKX-NEXT: retq # sched: [7:1.00] 4803 %x = fmul <16 x float> %a0, %a1 4804 %res = fsub <16 x float> %a2, %x 4805 ret <16 x float> %res 4806} 4807 4808define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 4809; GENERIC-LABEL: test_x86_fnmsub_ps_z: 4810; GENERIC: # %bb.0: 4811; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4812; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00] 4813; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4814; GENERIC-NEXT: retq # sched: [1:1.00] 4815; 4816; SKX-LABEL: test_x86_fnmsub_ps_z: 4817; SKX: # %bb.0: 4818; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4819; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 4820; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4821; SKX-NEXT: retq # sched: [7:1.00] 4822 %x = fmul <16 x float> %a0, %a1 4823 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 4824 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 4825 float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 4826 float -0.000000e+00>, %x 4827 %res = fsub <16 x float> %y, %a2 4828 ret <16 x float> %res 4829} 4830 4831define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 4832; GENERIC-LABEL: test_x86_fmadd_pd_z: 4833; GENERIC: # %bb.0: 4834; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4835; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4836; GENERIC-NEXT: retq # sched: [1:1.00] 4837; 4838; SKX-LABEL: test_x86_fmadd_pd_z: 4839; SKX: # %bb.0: 4840; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4841; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4842; SKX-NEXT: retq # sched: [7:1.00] 4843 %x = fmul <8 x double> %a0, %a1 4844 %res = fadd <8 x double> %x, %a2 4845 ret <8 x double> %res 4846} 4847 4848define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 4849; GENERIC-LABEL: test_x86_fmsub_pd_z: 4850; GENERIC: # %bb.0: 4851; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4852; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00] 4853; GENERIC-NEXT: retq # sched: [1:1.00] 4854; 4855; SKX-LABEL: test_x86_fmsub_pd_z: 4856; SKX: # %bb.0: 4857; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4858; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] 4859; SKX-NEXT: retq # sched: [7:1.00] 4860 %x = fmul <8 x double> %a0, %a1 4861 %res = fsub <8 x double> %x, %a2 4862 ret <8 x double> %res 4863} 4864 4865define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { 4866; GENERIC-LABEL: test_x86_fmsub_213: 4867; GENERIC: # %bb.0: 4868; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 4869; GENERIC-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] 4870; GENERIC-NEXT: retq # sched: [1:1.00] 4871; 4872; SKX-LABEL: test_x86_fmsub_213: 4873; SKX: # %bb.0: 4874; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4875; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50] 4876; SKX-NEXT: retq # sched: [7:1.00] 4877 %x = fmul double %a0, %a1 4878 %res = fsub double %x, %a2 4879 ret double %res 4880} 4881 4882define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { 4883; GENERIC-LABEL: test_x86_fmsub_213_m: 4884; GENERIC: # %bb.0: 4885; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 4886; GENERIC-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 4887; GENERIC-NEXT: retq # sched: [1:1.00] 4888; 4889; SKX-LABEL: test_x86_fmsub_213_m: 4890; SKX: # %bb.0: 4891; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4892; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 4893; SKX-NEXT: retq # sched: [7:1.00] 4894 %a2 = load double , double *%a2_ptr 4895 %x = fmul double %a0, %a1 4896 %res = fsub double %x, %a2 4897 ret double %res 4898} 4899 4900define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { 4901; GENERIC-LABEL: test_x86_fmsub_231_m: 4902; GENERIC: # %bb.0: 4903; GENERIC-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 4904; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4905; GENERIC-NEXT: retq # sched: [1:1.00] 4906; 4907; SKX-LABEL: test_x86_fmsub_231_m: 4908; SKX: # %bb.0: 4909; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 4910; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4911; SKX-NEXT: retq # sched: [7:1.00] 4912 %a2 = load double , double *%a2_ptr 4913 %x = fmul double %a0, %a2 4914 %res = fsub double %x, %a1 4915 ret double %res 4916} 4917 4918define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { 4919; GENERIC-LABEL: test231_br: 4920; GENERIC: # %bb.0: 4921; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [12:1.00] 4922; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00] 4923; GENERIC-NEXT: retq # sched: [1:1.00] 4924; 4925; SKX-LABEL: test231_br: 4926; SKX: # %bb.0: 4927; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] 4928; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4929; SKX-NEXT: retq # sched: [7:1.00] 4930 %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 4931 %b2 = fadd <16 x float> %b1, %a2 4932 ret <16 x float> %b2 4933} 4934 4935define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { 4936; GENERIC-LABEL: test213_br: 4937; GENERIC: # %bb.0: 4938; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00] 4939; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00] 4940; GENERIC-NEXT: retq # sched: [1:1.00] 4941; 4942; SKX-LABEL: test213_br: 4943; SKX: # %bb.0: 4944; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] 4945; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] 4946; SKX-NEXT: retq # sched: [7:1.00] 4947 %b1 = fmul <16 x float> %a1, %a2 4948 %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 4949 ret <16 x float> %b2 4950} 4951 4952;mask (a*c+b , a) 4953define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 4954; GENERIC-LABEL: test_x86_fmadd132_ps: 4955; GENERIC: # %bb.0: 4956; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] 4957; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] 4958; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [12:1.00] 4959; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00] 4960; GENERIC-NEXT: retq # sched: [1:1.00] 4961; 4962; SKX-LABEL: test_x86_fmadd132_ps: 4963; SKX: # %bb.0: 4964; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] 4965; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] 4966; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50] 4967; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50] 4968; SKX-NEXT: retq # sched: [7:1.00] 4969 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 4970 %x = fmul <16 x float> %a0, %a2 4971 %y = fadd <16 x float> %x, %a1 4972 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0 4973 ret <16 x float> %res 4974} 4975 4976;mask (a*c+b , b) 4977define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 4978; GENERIC-LABEL: test_x86_fmadd231_ps: 4979; GENERIC: # %bb.0: 4980; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] 4981; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] 4982; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [12:1.00] 4983; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00] 4984; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 4985; GENERIC-NEXT: retq # sched: [1:1.00] 4986; 4987; SKX-LABEL: test_x86_fmadd231_ps: 4988; SKX: # %bb.0: 4989; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] 4990; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] 4991; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] 4992; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50] 4993; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 4994; SKX-NEXT: retq # sched: [7:1.00] 4995 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 4996 %x = fmul <16 x float> %a0, %a2 4997 %y = fadd <16 x float> %x, %a1 4998 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 4999 ret <16 x float> %res 5000} 5001 5002;mask (b*a+c , b) 5003define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { 5004; GENERIC-LABEL: test_x86_fmadd213_ps: 5005; GENERIC: # %bb.0: 5006; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] 5007; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33] 5008; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00] 5009; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [10:1.00] 5010; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 5011; GENERIC-NEXT: retq # sched: [1:1.00] 5012; 5013; SKX-LABEL: test_x86_fmadd213_ps: 5014; SKX: # %bb.0: 5015; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] 5016; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] 5017; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] 5018; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50] 5019; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 5020; SKX-NEXT: retq # sched: [7:1.00] 5021 %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 5022 %x = fmul <16 x float> %a1, %a0 5023 %y = fadd <16 x float> %x, %a2 5024 %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1 5025 ret <16 x float> %res 5026} 5027 5028define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 5029; GENERIC-LABEL: vpandd: 5030; GENERIC: # %bb.0: # %entry 5031; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5032; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5033; GENERIC-NEXT: retq # sched: [1:1.00] 5034; 5035; SKX-LABEL: vpandd: 5036; SKX: # %bb.0: # %entry 5037; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5038; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5039; SKX-NEXT: retq # sched: [7:1.00] 5040entry: 5041 ; Force the execution domain with an add. 5042 %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, 5043 i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5044 %x = and <16 x i32> %a2, %b 5045 ret <16 x i32> %x 5046} 5047 5048define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 5049; GENERIC-LABEL: vpandnd: 5050; GENERIC: # %bb.0: # %entry 5051; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5052; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] 5053; GENERIC-NEXT: retq # sched: [1:1.00] 5054; 5055; SKX-LABEL: vpandnd: 5056; SKX: # %bb.0: # %entry 5057; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5058; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] 5059; SKX-NEXT: retq # sched: [7:1.00] 5060entry: 5061 ; Force the execution domain with an add. 5062 %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, 5063 i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 5064 %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, 5065 i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 5066 %x = and <16 x i32> %a2, %b2 5067 ret <16 x i32> %x 5068} 5069 5070define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 5071; GENERIC-LABEL: vpord: 5072; GENERIC: # %bb.0: # %entry 5073; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5074; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5075; GENERIC-NEXT: retq # sched: [1:1.00] 5076; 5077; SKX-LABEL: vpord: 5078; SKX: # %bb.0: # %entry 5079; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5080; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5081; SKX-NEXT: retq # sched: [7:1.00] 5082entry: 5083 ; Force the execution domain with an add. 5084 %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, 5085 i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 5086 %x = or <16 x i32> %a2, %b 5087 ret <16 x i32> %x 5088} 5089 5090define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 5091; GENERIC-LABEL: vpxord: 5092; GENERIC: # %bb.0: # %entry 5093; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5094; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5095; GENERIC-NEXT: retq # sched: [1:1.00] 5096; 5097; SKX-LABEL: vpxord: 5098; SKX: # %bb.0: # %entry 5099; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] 5100; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5101; SKX-NEXT: retq # sched: [7:1.00] 5102entry: 5103 ; Force the execution domain with an add. 5104 %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, 5105 i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 5106 %x = xor <16 x i32> %a2, %b 5107 ret <16 x i32> %x 5108} 5109 5110define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 5111; GENERIC-LABEL: vpandq: 5112; GENERIC: # %bb.0: # %entry 5113; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5114; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5115; GENERIC-NEXT: retq # sched: [1:1.00] 5116; 5117; SKX-LABEL: vpandq: 5118; SKX: # %bb.0: # %entry 5119; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5120; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5121; SKX-NEXT: retq # sched: [7:1.00] 5122entry: 5123 ; Force the execution domain with an add. 5124 %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6> 5125 %x = and <8 x i64> %a2, %b 5126 ret <8 x i64> %x 5127} 5128 5129define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 5130; GENERIC-LABEL: vpandnq: 5131; GENERIC: # %bb.0: # %entry 5132; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5133; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] 5134; GENERIC-NEXT: retq # sched: [1:1.00] 5135; 5136; SKX-LABEL: vpandnq: 5137; SKX: # %bb.0: # %entry 5138; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5139; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] 5140; SKX-NEXT: retq # sched: [7:1.00] 5141entry: 5142 ; Force the execution domain with an add. 5143 %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 5144 %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 5145 %x = and <8 x i64> %a2, %b2 5146 ret <8 x i64> %x 5147} 5148 5149define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 5150; GENERIC-LABEL: vporq: 5151; GENERIC: # %bb.0: # %entry 5152; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5153; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5154; GENERIC-NEXT: retq # sched: [1:1.00] 5155; 5156; SKX-LABEL: vporq: 5157; SKX: # %bb.0: # %entry 5158; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5159; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5160; SKX-NEXT: retq # sched: [7:1.00] 5161entry: 5162 ; Force the execution domain with an add. 5163 %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8> 5164 %x = or <8 x i64> %a2, %b 5165 ret <8 x i64> %x 5166} 5167 5168define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 5169; GENERIC-LABEL: vpxorq: 5170; GENERIC: # %bb.0: # %entry 5171; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5172; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] 5173; GENERIC-NEXT: retq # sched: [1:1.00] 5174; 5175; SKX-LABEL: vpxorq: 5176; SKX: # %bb.0: # %entry 5177; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] 5178; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5179; SKX-NEXT: retq # sched: [7:1.00] 5180entry: 5181 ; Force the execution domain with an add. 5182 %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9> 5183 %x = xor <8 x i64> %a2, %b 5184 ret <8 x i64> %x 5185} 5186 5187define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { 5188; GENERIC-LABEL: and_v64i8: 5189; GENERIC: # %bb.0: 5190; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5191; GENERIC-NEXT: retq # sched: [1:1.00] 5192; 5193; SKX-LABEL: and_v64i8: 5194; SKX: # %bb.0: 5195; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5196; SKX-NEXT: retq # sched: [7:1.00] 5197 %res = and <64 x i8> %a, %b 5198 ret <64 x i8> %res 5199} 5200 5201define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { 5202; GENERIC-LABEL: andn_v64i8: 5203; GENERIC: # %bb.0: 5204; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 5205; GENERIC-NEXT: retq # sched: [1:1.00] 5206; 5207; SKX-LABEL: andn_v64i8: 5208; SKX: # %bb.0: 5209; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] 5210; SKX-NEXT: retq # sched: [7:1.00] 5211 %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 5212 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 5213 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, 5214 i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 5215 %res = and <64 x i8> %a, %b2 5216 ret <64 x i8> %res 5217} 5218 5219define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { 5220; GENERIC-LABEL: or_v64i8: 5221; GENERIC: # %bb.0: 5222; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5223; GENERIC-NEXT: retq # sched: [1:1.00] 5224; 5225; SKX-LABEL: or_v64i8: 5226; SKX: # %bb.0: 5227; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5228; SKX-NEXT: retq # sched: [7:1.00] 5229 %res = or <64 x i8> %a, %b 5230 ret <64 x i8> %res 5231} 5232 5233define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { 5234; GENERIC-LABEL: xor_v64i8: 5235; GENERIC: # %bb.0: 5236; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5237; GENERIC-NEXT: retq # sched: [1:1.00] 5238; 5239; SKX-LABEL: xor_v64i8: 5240; SKX: # %bb.0: 5241; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5242; SKX-NEXT: retq # sched: [7:1.00] 5243 %res = xor <64 x i8> %a, %b 5244 ret <64 x i8> %res 5245} 5246 5247define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { 5248; GENERIC-LABEL: and_v32i16: 5249; GENERIC: # %bb.0: 5250; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5251; GENERIC-NEXT: retq # sched: [1:1.00] 5252; 5253; SKX-LABEL: and_v32i16: 5254; SKX: # %bb.0: 5255; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5256; SKX-NEXT: retq # sched: [7:1.00] 5257 %res = and <32 x i16> %a, %b 5258 ret <32 x i16> %res 5259} 5260 5261define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { 5262; GENERIC-LABEL: andn_v32i16: 5263; GENERIC: # %bb.0: 5264; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00] 5265; GENERIC-NEXT: retq # sched: [1:1.00] 5266; 5267; SKX-LABEL: andn_v32i16: 5268; SKX: # %bb.0: 5269; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] 5270; SKX-NEXT: retq # sched: [7:1.00] 5271 %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, 5272 i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 5273 %res = and <32 x i16> %a, %b2 5274 ret <32 x i16> %res 5275} 5276 5277define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { 5278; GENERIC-LABEL: or_v32i16: 5279; GENERIC: # %bb.0: 5280; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5281; GENERIC-NEXT: retq # sched: [1:1.00] 5282; 5283; SKX-LABEL: or_v32i16: 5284; SKX: # %bb.0: 5285; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5286; SKX-NEXT: retq # sched: [7:1.00] 5287 %res = or <32 x i16> %a, %b 5288 ret <32 x i16> %res 5289} 5290 5291define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { 5292; GENERIC-LABEL: xor_v32i16: 5293; GENERIC: # %bb.0: 5294; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00] 5295; GENERIC-NEXT: retq # sched: [1:1.00] 5296; 5297; SKX-LABEL: xor_v32i16: 5298; SKX: # %bb.0: 5299; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] 5300; SKX-NEXT: retq # sched: [7:1.00] 5301 %res = xor <32 x i16> %a, %b 5302 ret <32 x i16> %res 5303} 5304 5305define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 5306; GENERIC-LABEL: masked_and_v16f32: 5307; GENERIC: # %bb.0: 5308; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5309; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5310; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5311; GENERIC-NEXT: retq # sched: [1:1.00] 5312; 5313; SKX-LABEL: masked_and_v16f32: 5314; SKX: # %bb.0: 5315; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5316; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5317; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5318; SKX-NEXT: retq # sched: [7:1.00] 5319 %a1 = bitcast <16 x float> %a to <16 x i32> 5320 %b1 = bitcast <16 x float> %b to <16 x i32> 5321 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 5322 %mask1 = bitcast i16 %mask to <16 x i1> 5323 %op = and <16 x i32> %a1, %b1 5324 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 5325 %cast = bitcast <16 x i32> %select to <16 x float> 5326 %add = fadd <16 x float> %c, %cast 5327 ret <16 x float> %add 5328} 5329 5330define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 5331; GENERIC-LABEL: masked_or_v16f32: 5332; GENERIC: # %bb.0: 5333; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5334; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5335; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5336; GENERIC-NEXT: retq # sched: [1:1.00] 5337; 5338; SKX-LABEL: masked_or_v16f32: 5339; SKX: # %bb.0: 5340; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5341; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5342; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5343; SKX-NEXT: retq # sched: [7:1.00] 5344 %a1 = bitcast <16 x float> %a to <16 x i32> 5345 %b1 = bitcast <16 x float> %b to <16 x i32> 5346 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 5347 %mask1 = bitcast i16 %mask to <16 x i1> 5348 %op = and <16 x i32> %a1, %b1 5349 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 5350 %cast = bitcast <16 x i32> %select to <16 x float> 5351 %add = fadd <16 x float> %c, %cast 5352 ret <16 x float> %add 5353} 5354 5355define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { 5356; GENERIC-LABEL: masked_xor_v16f32: 5357; GENERIC: # %bb.0: 5358; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5359; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5360; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5361; GENERIC-NEXT: retq # sched: [1:1.00] 5362; 5363; SKX-LABEL: masked_xor_v16f32: 5364; SKX: # %bb.0: 5365; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5366; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5367; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5368; SKX-NEXT: retq # sched: [7:1.00] 5369 %a1 = bitcast <16 x float> %a to <16 x i32> 5370 %b1 = bitcast <16 x float> %b to <16 x i32> 5371 %passThru1 = bitcast <16 x float> %passThru to <16 x i32> 5372 %mask1 = bitcast i16 %mask to <16 x i1> 5373 %op = and <16 x i32> %a1, %b1 5374 %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1 5375 %cast = bitcast <16 x i32> %select to <16 x float> 5376 %add = fadd <16 x float> %c, %cast 5377 ret <16 x float> %add 5378} 5379 5380define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 5381; GENERIC-LABEL: masked_and_v8f64: 5382; GENERIC: # %bb.0: 5383; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5384; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5385; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5386; GENERIC-NEXT: retq # sched: [1:1.00] 5387; 5388; SKX-LABEL: masked_and_v8f64: 5389; SKX: # %bb.0: 5390; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5391; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5392; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5393; SKX-NEXT: retq # sched: [7:1.00] 5394 %a1 = bitcast <8 x double> %a to <8 x i64> 5395 %b1 = bitcast <8 x double> %b to <8 x i64> 5396 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 5397 %mask1 = bitcast i8 %mask to <8 x i1> 5398 %op = and <8 x i64> %a1, %b1 5399 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 5400 %cast = bitcast <8 x i64> %select to <8 x double> 5401 %add = fadd <8 x double> %c, %cast 5402 ret <8 x double> %add 5403} 5404 5405define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 5406; GENERIC-LABEL: masked_or_v8f64: 5407; GENERIC: # %bb.0: 5408; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5409; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5410; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5411; GENERIC-NEXT: retq # sched: [1:1.00] 5412; 5413; SKX-LABEL: masked_or_v8f64: 5414; SKX: # %bb.0: 5415; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5416; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5417; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5418; SKX-NEXT: retq # sched: [7:1.00] 5419 %a1 = bitcast <8 x double> %a to <8 x i64> 5420 %b1 = bitcast <8 x double> %b to <8 x i64> 5421 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 5422 %mask1 = bitcast i8 %mask to <8 x i1> 5423 %op = and <8 x i64> %a1, %b1 5424 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 5425 %cast = bitcast <8 x i64> %select to <8 x double> 5426 %add = fadd <8 x double> %c, %cast 5427 ret <8 x double> %add 5428} 5429 5430define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { 5431; GENERIC-LABEL: masked_xor_v8f64: 5432; GENERIC: # %bb.0: 5433; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5434; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00] 5435; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00] 5436; GENERIC-NEXT: retq # sched: [1:1.00] 5437; 5438; SKX-LABEL: masked_xor_v8f64: 5439; SKX: # %bb.0: 5440; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5441; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] 5442; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] 5443; SKX-NEXT: retq # sched: [7:1.00] 5444 %a1 = bitcast <8 x double> %a to <8 x i64> 5445 %b1 = bitcast <8 x double> %b to <8 x i64> 5446 %passThru1 = bitcast <8 x double> %passThru to <8 x i64> 5447 %mask1 = bitcast i8 %mask to <8 x i1> 5448 %op = and <8 x i64> %a1, %b1 5449 %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1 5450 %cast = bitcast <8 x i64> %select to <8 x double> 5451 %add = fadd <8 x double> %c, %cast 5452 ret <8 x double> %add 5453} 5454 5455define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 5456; GENERIC-LABEL: test_mm512_mask_and_epi32: 5457; GENERIC: # %bb.0: # %entry 5458; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5459; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5460; GENERIC-NEXT: retq # sched: [1:1.00] 5461; 5462; SKX-LABEL: test_mm512_mask_and_epi32: 5463; SKX: # %bb.0: # %entry 5464; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5465; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5466; SKX-NEXT: retq # sched: [7:1.00] 5467entry: 5468 %and1.i.i = and <8 x i64> %__a, %__b 5469 %0 = bitcast <8 x i64> %and1.i.i to <16 x i32> 5470 %1 = bitcast <8 x i64> %__src to <16 x i32> 5471 %2 = bitcast i16 %__k to <16 x i1> 5472 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 5473 %4 = bitcast <16 x i32> %3 to <8 x i64> 5474 ret <8 x i64> %4 5475} 5476 5477define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 5478; GENERIC-LABEL: test_mm512_mask_or_epi32: 5479; GENERIC: # %bb.0: # %entry 5480; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5481; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5482; GENERIC-NEXT: retq # sched: [1:1.00] 5483; 5484; SKX-LABEL: test_mm512_mask_or_epi32: 5485; SKX: # %bb.0: # %entry 5486; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5487; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5488; SKX-NEXT: retq # sched: [7:1.00] 5489entry: 5490 %or1.i.i = or <8 x i64> %__a, %__b 5491 %0 = bitcast <8 x i64> %or1.i.i to <16 x i32> 5492 %1 = bitcast <8 x i64> %__src to <16 x i32> 5493 %2 = bitcast i16 %__k to <16 x i1> 5494 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 5495 %4 = bitcast <16 x i32> %3 to <8 x i64> 5496 ret <8 x i64> %4 5497} 5498 5499define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { 5500; GENERIC-LABEL: test_mm512_mask_xor_epi32: 5501; GENERIC: # %bb.0: # %entry 5502; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5503; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5504; GENERIC-NEXT: retq # sched: [1:1.00] 5505; 5506; SKX-LABEL: test_mm512_mask_xor_epi32: 5507; SKX: # %bb.0: # %entry 5508; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5509; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5510; SKX-NEXT: retq # sched: [7:1.00] 5511entry: 5512 %xor1.i.i = xor <8 x i64> %__a, %__b 5513 %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32> 5514 %1 = bitcast <8 x i64> %__src to <16 x i32> 5515 %2 = bitcast i16 %__k to <16 x i1> 5516 %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1 5517 %4 = bitcast <16 x i32> %3 to <8 x i64> 5518 ret <8 x i64> %4 5519} 5520 5521define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5522; GENERIC-LABEL: test_mm512_mask_xor_pd: 5523; GENERIC: # %bb.0: # %entry 5524; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5525; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5526; GENERIC-NEXT: retq # sched: [1:1.00] 5527; 5528; SKX-LABEL: test_mm512_mask_xor_pd: 5529; SKX: # %bb.0: # %entry 5530; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5531; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5532; SKX-NEXT: retq # sched: [7:1.00] 5533entry: 5534 %0 = bitcast <8 x double> %__A to <8 x i64> 5535 %1 = bitcast <8 x double> %__B to <8 x i64> 5536 %xor.i.i = xor <8 x i64> %0, %1 5537 %2 = bitcast <8 x i64> %xor.i.i to <8 x double> 5538 %3 = bitcast i8 %__U to <8 x i1> 5539 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 5540 ret <8 x double> %4 5541} 5542 5543define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5544; GENERIC-LABEL: test_mm512_maskz_xor_pd: 5545; GENERIC: # %bb.0: # %entry 5546; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5547; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 5548; GENERIC-NEXT: retq # sched: [1:1.00] 5549; 5550; SKX-LABEL: test_mm512_maskz_xor_pd: 5551; SKX: # %bb.0: # %entry 5552; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5553; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 5554; SKX-NEXT: retq # sched: [7:1.00] 5555entry: 5556 %0 = bitcast <8 x double> %__A to <8 x i64> 5557 %1 = bitcast <8 x double> %__B to <8 x i64> 5558 %xor.i.i = xor <8 x i64> %0, %1 5559 %2 = bitcast <8 x i64> %xor.i.i to <8 x double> 5560 %3 = bitcast i8 %__U to <8 x i1> 5561 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 5562 ret <8 x double> %4 5563} 5564 5565define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5566; GENERIC-LABEL: test_mm512_mask_xor_ps: 5567; GENERIC: # %bb.0: # %entry 5568; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5569; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5570; GENERIC-NEXT: retq # sched: [1:1.00] 5571; 5572; SKX-LABEL: test_mm512_mask_xor_ps: 5573; SKX: # %bb.0: # %entry 5574; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5575; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5576; SKX-NEXT: retq # sched: [7:1.00] 5577entry: 5578 %0 = bitcast <16 x float> %__A to <16 x i32> 5579 %1 = bitcast <16 x float> %__B to <16 x i32> 5580 %xor.i.i = xor <16 x i32> %0, %1 5581 %2 = bitcast <16 x i32> %xor.i.i to <16 x float> 5582 %3 = bitcast i16 %__U to <16 x i1> 5583 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 5584 ret <16 x float> %4 5585} 5586 5587define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5588; GENERIC-LABEL: test_mm512_maskz_xor_ps: 5589; GENERIC: # %bb.0: # %entry 5590; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5591; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 5592; GENERIC-NEXT: retq # sched: [1:1.00] 5593; 5594; SKX-LABEL: test_mm512_maskz_xor_ps: 5595; SKX: # %bb.0: # %entry 5596; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5597; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 5598; SKX-NEXT: retq # sched: [7:1.00] 5599entry: 5600 %0 = bitcast <16 x float> %__A to <16 x i32> 5601 %1 = bitcast <16 x float> %__B to <16 x i32> 5602 %xor.i.i = xor <16 x i32> %0, %1 5603 %2 = bitcast <16 x i32> %xor.i.i to <16 x float> 5604 %3 = bitcast i16 %__U to <16 x i1> 5605 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 5606 ret <16 x float> %4 5607} 5608 5609define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5610; GENERIC-LABEL: test_mm512_mask_or_pd: 5611; GENERIC: # %bb.0: # %entry 5612; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5613; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] 5614; GENERIC-NEXT: retq # sched: [1:1.00] 5615; 5616; SKX-LABEL: test_mm512_mask_or_pd: 5617; SKX: # %bb.0: # %entry 5618; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5619; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] 5620; SKX-NEXT: retq # sched: [7:1.00] 5621entry: 5622 %0 = bitcast <8 x double> %__A to <8 x i64> 5623 %1 = bitcast <8 x double> %__B to <8 x i64> 5624 %or.i.i = or <8 x i64> %1, %0 5625 %2 = bitcast <8 x i64> %or.i.i to <8 x double> 5626 %3 = bitcast i8 %__U to <8 x i1> 5627 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 5628 ret <8 x double> %4 5629} 5630 5631define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5632; GENERIC-LABEL: test_mm512_maskz_or_pd: 5633; GENERIC: # %bb.0: # %entry 5634; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5635; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] 5636; GENERIC-NEXT: retq # sched: [1:1.00] 5637; 5638; SKX-LABEL: test_mm512_maskz_or_pd: 5639; SKX: # %bb.0: # %entry 5640; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5641; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 5642; SKX-NEXT: retq # sched: [7:1.00] 5643entry: 5644 %0 = bitcast <8 x double> %__A to <8 x i64> 5645 %1 = bitcast <8 x double> %__B to <8 x i64> 5646 %or.i.i = or <8 x i64> %1, %0 5647 %2 = bitcast <8 x i64> %or.i.i to <8 x double> 5648 %3 = bitcast i8 %__U to <8 x i1> 5649 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 5650 ret <8 x double> %4 5651} 5652 5653define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5654; GENERIC-LABEL: test_mm512_mask_or_ps: 5655; GENERIC: # %bb.0: # %entry 5656; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5657; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] 5658; GENERIC-NEXT: retq # sched: [1:1.00] 5659; 5660; SKX-LABEL: test_mm512_mask_or_ps: 5661; SKX: # %bb.0: # %entry 5662; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5663; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] 5664; SKX-NEXT: retq # sched: [7:1.00] 5665entry: 5666 %0 = bitcast <16 x float> %__A to <16 x i32> 5667 %1 = bitcast <16 x float> %__B to <16 x i32> 5668 %or.i.i = or <16 x i32> %1, %0 5669 %2 = bitcast <16 x i32> %or.i.i to <16 x float> 5670 %3 = bitcast i16 %__U to <16 x i1> 5671 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 5672 ret <16 x float> %4 5673} 5674 5675define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5676; GENERIC-LABEL: test_mm512_maskz_or_ps: 5677; GENERIC: # %bb.0: # %entry 5678; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5679; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] 5680; GENERIC-NEXT: retq # sched: [1:1.00] 5681; 5682; SKX-LABEL: test_mm512_maskz_or_ps: 5683; SKX: # %bb.0: # %entry 5684; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5685; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 5686; SKX-NEXT: retq # sched: [7:1.00] 5687entry: 5688 %0 = bitcast <16 x float> %__A to <16 x i32> 5689 %1 = bitcast <16 x float> %__B to <16 x i32> 5690 %or.i.i = or <16 x i32> %1, %0 5691 %2 = bitcast <16 x i32> %or.i.i to <16 x float> 5692 %3 = bitcast i16 %__U to <16 x i1> 5693 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 5694 ret <16 x float> %4 5695} 5696 5697define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5698; GENERIC-LABEL: test_mm512_mask_and_pd: 5699; GENERIC: # %bb.0: # %entry 5700; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5701; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] 5702; GENERIC-NEXT: retq # sched: [1:1.00] 5703; 5704; SKX-LABEL: test_mm512_mask_and_pd: 5705; SKX: # %bb.0: # %entry 5706; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5707; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] 5708; SKX-NEXT: retq # sched: [7:1.00] 5709entry: 5710 %0 = bitcast <8 x double> %__A to <8 x i64> 5711 %1 = bitcast <8 x double> %__B to <8 x i64> 5712 %and.i.i = and <8 x i64> %1, %0 5713 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 5714 %3 = bitcast i8 %__U to <8 x i1> 5715 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 5716 ret <8 x double> %4 5717} 5718 5719define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5720; GENERIC-LABEL: test_mm512_maskz_and_pd: 5721; GENERIC: # %bb.0: # %entry 5722; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5723; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] 5724; GENERIC-NEXT: retq # sched: [1:1.00] 5725; 5726; SKX-LABEL: test_mm512_maskz_and_pd: 5727; SKX: # %bb.0: # %entry 5728; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5729; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 5730; SKX-NEXT: retq # sched: [7:1.00] 5731entry: 5732 %0 = bitcast <8 x double> %__A to <8 x i64> 5733 %1 = bitcast <8 x double> %__B to <8 x i64> 5734 %and.i.i = and <8 x i64> %1, %0 5735 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 5736 %3 = bitcast i8 %__U to <8 x i1> 5737 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 5738 ret <8 x double> %4 5739} 5740 5741define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5742; GENERIC-LABEL: test_mm512_mask_and_ps: 5743; GENERIC: # %bb.0: # %entry 5744; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5745; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00] 5746; GENERIC-NEXT: retq # sched: [1:1.00] 5747; 5748; SKX-LABEL: test_mm512_mask_and_ps: 5749; SKX: # %bb.0: # %entry 5750; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5751; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] 5752; SKX-NEXT: retq # sched: [7:1.00] 5753entry: 5754 %0 = bitcast <16 x float> %__A to <16 x i32> 5755 %1 = bitcast <16 x float> %__B to <16 x i32> 5756 %and.i.i = and <16 x i32> %1, %0 5757 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 5758 %3 = bitcast i16 %__U to <16 x i1> 5759 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 5760 ret <16 x float> %4 5761} 5762 5763define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5764; GENERIC-LABEL: test_mm512_maskz_and_ps: 5765; GENERIC: # %bb.0: # %entry 5766; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5767; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] 5768; GENERIC-NEXT: retq # sched: [1:1.00] 5769; 5770; SKX-LABEL: test_mm512_maskz_and_ps: 5771; SKX: # %bb.0: # %entry 5772; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5773; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] 5774; SKX-NEXT: retq # sched: [7:1.00] 5775entry: 5776 %0 = bitcast <16 x float> %__A to <16 x i32> 5777 %1 = bitcast <16 x float> %__B to <16 x i32> 5778 %and.i.i = and <16 x i32> %1, %0 5779 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 5780 %3 = bitcast i16 %__U to <16 x i1> 5781 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 5782 ret <16 x float> %4 5783} 5784 5785define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5786; GENERIC-LABEL: test_mm512_mask_andnot_pd: 5787; GENERIC: # %bb.0: # %entry 5788; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5789; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5790; GENERIC-NEXT: retq # sched: [1:1.00] 5791; 5792; SKX-LABEL: test_mm512_mask_andnot_pd: 5793; SKX: # %bb.0: # %entry 5794; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5795; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5796; SKX-NEXT: retq # sched: [7:1.00] 5797entry: 5798 %0 = bitcast <8 x double> %__A to <8 x i64> 5799 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 5800 %1 = bitcast <8 x double> %__B to <8 x i64> 5801 %and.i.i = and <8 x i64> %1, %neg.i.i 5802 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 5803 %3 = bitcast i8 %__U to <8 x i1> 5804 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W 5805 ret <8 x double> %4 5806} 5807 5808define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { 5809; GENERIC-LABEL: test_mm512_maskz_andnot_pd: 5810; GENERIC: # %bb.0: # %entry 5811; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5812; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 5813; GENERIC-NEXT: retq # sched: [1:1.00] 5814; 5815; SKX-LABEL: test_mm512_maskz_andnot_pd: 5816; SKX: # %bb.0: # %entry 5817; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5818; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 5819; SKX-NEXT: retq # sched: [7:1.00] 5820entry: 5821 %0 = bitcast <8 x double> %__A to <8 x i64> 5822 %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> 5823 %1 = bitcast <8 x double> %__B to <8 x i64> 5824 %and.i.i = and <8 x i64> %1, %neg.i.i 5825 %2 = bitcast <8 x i64> %and.i.i to <8 x double> 5826 %3 = bitcast i8 %__U to <8 x i1> 5827 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer 5828 ret <8 x double> %4 5829} 5830 5831define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5832; GENERIC-LABEL: test_mm512_mask_andnot_ps: 5833; GENERIC: # %bb.0: # %entry 5834; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5835; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00] 5836; GENERIC-NEXT: retq # sched: [1:1.00] 5837; 5838; SKX-LABEL: test_mm512_mask_andnot_ps: 5839; SKX: # %bb.0: # %entry 5840; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5841; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] 5842; SKX-NEXT: retq # sched: [7:1.00] 5843entry: 5844 %0 = bitcast <16 x float> %__A to <16 x i32> 5845 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 5846 %1 = bitcast <16 x float> %__B to <16 x i32> 5847 %and.i.i = and <16 x i32> %1, %neg.i.i 5848 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 5849 %3 = bitcast i16 %__U to <16 x i1> 5850 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W 5851 ret <16 x float> %4 5852} 5853 5854define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { 5855; GENERIC-LABEL: test_mm512_maskz_andnot_ps: 5856; GENERIC: # %bb.0: # %entry 5857; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 5858; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 5859; GENERIC-NEXT: retq # sched: [1:1.00] 5860; 5861; SKX-LABEL: test_mm512_maskz_andnot_ps: 5862; SKX: # %bb.0: # %entry 5863; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 5864; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 5865; SKX-NEXT: retq # sched: [7:1.00] 5866entry: 5867 %0 = bitcast <16 x float> %__A to <16 x i32> 5868 %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 5869 %1 = bitcast <16 x float> %__B to <16 x i32> 5870 %and.i.i = and <16 x i32> %1, %neg.i.i 5871 %2 = bitcast <16 x i32> %and.i.i to <16 x float> 5872 %3 = bitcast i16 %__U to <16 x i1> 5873 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer 5874 ret <16 x float> %4 5875} 5876 5877define i32 @mov_test1(float %x) { 5878; GENERIC-LABEL: mov_test1: 5879; GENERIC: # %bb.0: 5880; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 5881; GENERIC-NEXT: retq # sched: [1:1.00] 5882; 5883; SKX-LABEL: mov_test1: 5884; SKX: # %bb.0: 5885; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 5886; SKX-NEXT: retq # sched: [7:1.00] 5887 %res = bitcast float %x to i32 5888 ret i32 %res 5889} 5890 5891define <4 x i32> @mov_test2(i32 %x) { 5892; GENERIC-LABEL: mov_test2: 5893; GENERIC: # %bb.0: 5894; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 5895; GENERIC-NEXT: retq # sched: [1:1.00] 5896; 5897; SKX-LABEL: mov_test2: 5898; SKX: # %bb.0: 5899; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 5900; SKX-NEXT: retq # sched: [7:1.00] 5901 %res = insertelement <4 x i32>undef, i32 %x, i32 0 5902 ret <4 x i32>%res 5903} 5904 5905define <2 x i64> @mov_test3(i64 %x) { 5906; GENERIC-LABEL: mov_test3: 5907; GENERIC: # %bb.0: 5908; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 5909; GENERIC-NEXT: retq # sched: [1:1.00] 5910; 5911; SKX-LABEL: mov_test3: 5912; SKX: # %bb.0: 5913; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 5914; SKX-NEXT: retq # sched: [7:1.00] 5915 %res = insertelement <2 x i64>undef, i64 %x, i32 0 5916 ret <2 x i64>%res 5917} 5918 5919define <4 x i32> @mov_test4(i32* %x) { 5920; GENERIC-LABEL: mov_test4: 5921; GENERIC: # %bb.0: 5922; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 5923; GENERIC-NEXT: retq # sched: [1:1.00] 5924; 5925; SKX-LABEL: mov_test4: 5926; SKX: # %bb.0: 5927; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 5928; SKX-NEXT: retq # sched: [7:1.00] 5929 %y = load i32, i32* %x 5930 %res = insertelement <4 x i32>undef, i32 %y, i32 0 5931 ret <4 x i32>%res 5932} 5933 5934define void @mov_test5(float %x, float* %y) { 5935; GENERIC-LABEL: mov_test5: 5936; GENERIC: # %bb.0: 5937; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] 5938; GENERIC-NEXT: retq # sched: [1:1.00] 5939; 5940; SKX-LABEL: mov_test5: 5941; SKX: # %bb.0: 5942; SKX-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] 5943; SKX-NEXT: retq # sched: [7:1.00] 5944 store float %x, float* %y, align 4 5945 ret void 5946} 5947 5948define void @mov_test6(double %x, double* %y) { 5949; GENERIC-LABEL: mov_test6: 5950; GENERIC: # %bb.0: 5951; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] 5952; GENERIC-NEXT: retq # sched: [1:1.00] 5953; 5954; SKX-LABEL: mov_test6: 5955; SKX: # %bb.0: 5956; SKX-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] 5957; SKX-NEXT: retq # sched: [7:1.00] 5958 store double %x, double* %y, align 8 5959 ret void 5960} 5961 5962define float @mov_test7(i32* %x) { 5963; GENERIC-LABEL: mov_test7: 5964; GENERIC: # %bb.0: 5965; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 5966; GENERIC-NEXT: retq # sched: [1:1.00] 5967; 5968; SKX-LABEL: mov_test7: 5969; SKX: # %bb.0: 5970; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 5971; SKX-NEXT: retq # sched: [7:1.00] 5972 %y = load i32, i32* %x 5973 %res = bitcast i32 %y to float 5974 ret float %res 5975} 5976 5977define i32 @mov_test8(<4 x i32> %x) { 5978; GENERIC-LABEL: mov_test8: 5979; GENERIC: # %bb.0: 5980; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 5981; GENERIC-NEXT: retq # sched: [1:1.00] 5982; 5983; SKX-LABEL: mov_test8: 5984; SKX: # %bb.0: 5985; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 5986; SKX-NEXT: retq # sched: [7:1.00] 5987 %res = extractelement <4 x i32> %x, i32 0 5988 ret i32 %res 5989} 5990 5991define i64 @mov_test9(<2 x i64> %x) { 5992; GENERIC-LABEL: mov_test9: 5993; GENERIC: # %bb.0: 5994; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 5995; GENERIC-NEXT: retq # sched: [1:1.00] 5996; 5997; SKX-LABEL: mov_test9: 5998; SKX: # %bb.0: 5999; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 6000; SKX-NEXT: retq # sched: [7:1.00] 6001 %res = extractelement <2 x i64> %x, i32 0 6002 ret i64 %res 6003} 6004 6005define <4 x i32> @mov_test10(i32* %x) { 6006; GENERIC-LABEL: mov_test10: 6007; GENERIC: # %bb.0: 6008; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 6009; GENERIC-NEXT: retq # sched: [1:1.00] 6010; 6011; SKX-LABEL: mov_test10: 6012; SKX: # %bb.0: 6013; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 6014; SKX-NEXT: retq # sched: [7:1.00] 6015 %y = load i32, i32* %x, align 4 6016 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 6017 ret <4 x i32>%res 6018} 6019 6020define <4 x float> @mov_test11(float* %x) { 6021; GENERIC-LABEL: mov_test11: 6022; GENERIC: # %bb.0: 6023; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 6024; GENERIC-NEXT: retq # sched: [1:1.00] 6025; 6026; SKX-LABEL: mov_test11: 6027; SKX: # %bb.0: 6028; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 6029; SKX-NEXT: retq # sched: [7:1.00] 6030 %y = load float, float* %x, align 4 6031 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 6032 ret <4 x float>%res 6033} 6034 6035define <2 x double> @mov_test12(double* %x) { 6036; GENERIC-LABEL: mov_test12: 6037; GENERIC: # %bb.0: 6038; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 6039; GENERIC-NEXT: retq # sched: [1:1.00] 6040; 6041; SKX-LABEL: mov_test12: 6042; SKX: # %bb.0: 6043; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 6044; SKX-NEXT: retq # sched: [7:1.00] 6045 %y = load double, double* %x, align 8 6046 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 6047 ret <2 x double>%res 6048} 6049 6050define <2 x i64> @mov_test13(i64 %x) { 6051; GENERIC-LABEL: mov_test13: 6052; GENERIC: # %bb.0: 6053; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 6054; GENERIC-NEXT: retq # sched: [1:1.00] 6055; 6056; SKX-LABEL: mov_test13: 6057; SKX: # %bb.0: 6058; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] 6059; SKX-NEXT: retq # sched: [7:1.00] 6060 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 6061 ret <2 x i64>%res 6062} 6063 6064define <4 x i32> @mov_test14(i32 %x) { 6065; GENERIC-LABEL: mov_test14: 6066; GENERIC: # %bb.0: 6067; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 6068; GENERIC-NEXT: retq # sched: [1:1.00] 6069; 6070; SKX-LABEL: mov_test14: 6071; SKX: # %bb.0: 6072; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] 6073; SKX-NEXT: retq # sched: [7:1.00] 6074 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 6075 ret <4 x i32>%res 6076} 6077 6078define <4 x i32> @mov_test15(i32* %x) { 6079; GENERIC-LABEL: mov_test15: 6080; GENERIC: # %bb.0: 6081; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 6082; GENERIC-NEXT: retq # sched: [1:1.00] 6083; 6084; SKX-LABEL: mov_test15: 6085; SKX: # %bb.0: 6086; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 6087; SKX-NEXT: retq # sched: [7:1.00] 6088 %y = load i32, i32* %x, align 4 6089 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 6090 ret <4 x i32>%res 6091} 6092 6093define <16 x i32> @mov_test16(i8 * %addr) { 6094; GENERIC-LABEL: mov_test16: 6095; GENERIC: # %bb.0: 6096; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] 6097; GENERIC-NEXT: retq # sched: [1:1.00] 6098; 6099; SKX-LABEL: mov_test16: 6100; SKX: # %bb.0: 6101; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] 6102; SKX-NEXT: retq # sched: [7:1.00] 6103 %vaddr = bitcast i8* %addr to <16 x i32>* 6104 %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 6105 ret <16 x i32>%res 6106} 6107 6108define <16 x i32> @mov_test17(i8 * %addr) { 6109; GENERIC-LABEL: mov_test17: 6110; GENERIC: # %bb.0: 6111; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] 6112; GENERIC-NEXT: retq # sched: [1:1.00] 6113; 6114; SKX-LABEL: mov_test17: 6115; SKX: # %bb.0: 6116; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] 6117; SKX-NEXT: retq # sched: [7:1.00] 6118 %vaddr = bitcast i8* %addr to <16 x i32>* 6119 %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 6120 ret <16 x i32>%res 6121} 6122 6123define void @mov_test18(i8 * %addr, <8 x i64> %data) { 6124; GENERIC-LABEL: mov_test18: 6125; GENERIC: # %bb.0: 6126; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6127; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6128; GENERIC-NEXT: retq # sched: [1:1.00] 6129; 6130; SKX-LABEL: mov_test18: 6131; SKX: # %bb.0: 6132; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6133; SKX-NEXT: vzeroupper # sched: [4:1.00] 6134; SKX-NEXT: retq # sched: [7:1.00] 6135 %vaddr = bitcast i8* %addr to <8 x i64>* 6136 store <8 x i64>%data, <8 x i64>* %vaddr, align 64 6137 ret void 6138} 6139 6140define void @mov_test19(i8 * %addr, <16 x i32> %data) { 6141; GENERIC-LABEL: mov_test19: 6142; GENERIC: # %bb.0: 6143; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6144; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6145; GENERIC-NEXT: retq # sched: [1:1.00] 6146; 6147; SKX-LABEL: mov_test19: 6148; SKX: # %bb.0: 6149; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6150; SKX-NEXT: vzeroupper # sched: [4:1.00] 6151; SKX-NEXT: retq # sched: [7:1.00] 6152 %vaddr = bitcast i8* %addr to <16 x i32>* 6153 store <16 x i32>%data, <16 x i32>* %vaddr, align 1 6154 ret void 6155} 6156 6157define void @mov_test20(i8 * %addr, <16 x i32> %data) { 6158; GENERIC-LABEL: mov_test20: 6159; GENERIC: # %bb.0: 6160; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6161; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6162; GENERIC-NEXT: retq # sched: [1:1.00] 6163; 6164; SKX-LABEL: mov_test20: 6165; SKX: # %bb.0: 6166; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6167; SKX-NEXT: vzeroupper # sched: [4:1.00] 6168; SKX-NEXT: retq # sched: [7:1.00] 6169 %vaddr = bitcast i8* %addr to <16 x i32>* 6170 store <16 x i32>%data, <16 x i32>* %vaddr, align 64 6171 ret void 6172} 6173 6174define <8 x i64> @mov_test21(i8 * %addr) { 6175; GENERIC-LABEL: mov_test21: 6176; GENERIC: # %bb.0: 6177; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] 6178; GENERIC-NEXT: retq # sched: [1:1.00] 6179; 6180; SKX-LABEL: mov_test21: 6181; SKX: # %bb.0: 6182; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] 6183; SKX-NEXT: retq # sched: [7:1.00] 6184 %vaddr = bitcast i8* %addr to <8 x i64>* 6185 %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 6186 ret <8 x i64>%res 6187} 6188 6189define void @mov_test22(i8 * %addr, <8 x i64> %data) { 6190; GENERIC-LABEL: mov_test22: 6191; GENERIC: # %bb.0: 6192; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6193; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6194; GENERIC-NEXT: retq # sched: [1:1.00] 6195; 6196; SKX-LABEL: mov_test22: 6197; SKX: # %bb.0: 6198; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6199; SKX-NEXT: vzeroupper # sched: [4:1.00] 6200; SKX-NEXT: retq # sched: [7:1.00] 6201 %vaddr = bitcast i8* %addr to <8 x i64>* 6202 store <8 x i64>%data, <8 x i64>* %vaddr, align 1 6203 ret void 6204} 6205 6206define <8 x i64> @mov_test23(i8 * %addr) { 6207; GENERIC-LABEL: mov_test23: 6208; GENERIC: # %bb.0: 6209; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] 6210; GENERIC-NEXT: retq # sched: [1:1.00] 6211; 6212; SKX-LABEL: mov_test23: 6213; SKX: # %bb.0: 6214; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] 6215; SKX-NEXT: retq # sched: [7:1.00] 6216 %vaddr = bitcast i8* %addr to <8 x i64>* 6217 %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 6218 ret <8 x i64>%res 6219} 6220 6221define void @mov_test24(i8 * %addr, <8 x double> %data) { 6222; GENERIC-LABEL: mov_test24: 6223; GENERIC: # %bb.0: 6224; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6225; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6226; GENERIC-NEXT: retq # sched: [1:1.00] 6227; 6228; SKX-LABEL: mov_test24: 6229; SKX: # %bb.0: 6230; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6231; SKX-NEXT: vzeroupper # sched: [4:1.00] 6232; SKX-NEXT: retq # sched: [7:1.00] 6233 %vaddr = bitcast i8* %addr to <8 x double>* 6234 store <8 x double>%data, <8 x double>* %vaddr, align 64 6235 ret void 6236} 6237 6238define <8 x double> @mov_test25(i8 * %addr) { 6239; GENERIC-LABEL: mov_test25: 6240; GENERIC: # %bb.0: 6241; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] 6242; GENERIC-NEXT: retq # sched: [1:1.00] 6243; 6244; SKX-LABEL: mov_test25: 6245; SKX: # %bb.0: 6246; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] 6247; SKX-NEXT: retq # sched: [7:1.00] 6248 %vaddr = bitcast i8* %addr to <8 x double>* 6249 %res = load <8 x double>, <8 x double>* %vaddr, align 64 6250 ret <8 x double>%res 6251} 6252 6253define void @mov_test26(i8 * %addr, <16 x float> %data) { 6254; GENERIC-LABEL: mov_test26: 6255; GENERIC: # %bb.0: 6256; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6257; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6258; GENERIC-NEXT: retq # sched: [1:1.00] 6259; 6260; SKX-LABEL: mov_test26: 6261; SKX: # %bb.0: 6262; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 6263; SKX-NEXT: vzeroupper # sched: [4:1.00] 6264; SKX-NEXT: retq # sched: [7:1.00] 6265 %vaddr = bitcast i8* %addr to <16 x float>* 6266 store <16 x float>%data, <16 x float>* %vaddr, align 64 6267 ret void 6268} 6269 6270define <16 x float> @mov_test27(i8 * %addr) { 6271; GENERIC-LABEL: mov_test27: 6272; GENERIC: # %bb.0: 6273; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50] 6274; GENERIC-NEXT: retq # sched: [1:1.00] 6275; 6276; SKX-LABEL: mov_test27: 6277; SKX: # %bb.0: 6278; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] 6279; SKX-NEXT: retq # sched: [7:1.00] 6280 %vaddr = bitcast i8* %addr to <16 x float>* 6281 %res = load <16 x float>, <16 x float>* %vaddr, align 64 6282 ret <16 x float>%res 6283} 6284 6285define void @mov_test28(i8 * %addr, <8 x double> %data) { 6286; GENERIC-LABEL: mov_test28: 6287; GENERIC: # %bb.0: 6288; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6289; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6290; GENERIC-NEXT: retq # sched: [1:1.00] 6291; 6292; SKX-LABEL: mov_test28: 6293; SKX: # %bb.0: 6294; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6295; SKX-NEXT: vzeroupper # sched: [4:1.00] 6296; SKX-NEXT: retq # sched: [7:1.00] 6297 %vaddr = bitcast i8* %addr to <8 x double>* 6298 store <8 x double>%data, <8 x double>* %vaddr, align 1 6299 ret void 6300} 6301 6302define <8 x double> @mov_test29(i8 * %addr) { 6303; GENERIC-LABEL: mov_test29: 6304; GENERIC: # %bb.0: 6305; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] 6306; GENERIC-NEXT: retq # sched: [1:1.00] 6307; 6308; SKX-LABEL: mov_test29: 6309; SKX: # %bb.0: 6310; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] 6311; SKX-NEXT: retq # sched: [7:1.00] 6312 %vaddr = bitcast i8* %addr to <8 x double>* 6313 %res = load <8 x double>, <8 x double>* %vaddr, align 1 6314 ret <8 x double>%res 6315} 6316 6317define void @mov_test30(i8 * %addr, <16 x float> %data) { 6318; GENERIC-LABEL: mov_test30: 6319; GENERIC: # %bb.0: 6320; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6321; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6322; GENERIC-NEXT: retq # sched: [1:1.00] 6323; 6324; SKX-LABEL: mov_test30: 6325; SKX: # %bb.0: 6326; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] 6327; SKX-NEXT: vzeroupper # sched: [4:1.00] 6328; SKX-NEXT: retq # sched: [7:1.00] 6329 %vaddr = bitcast i8* %addr to <16 x float>* 6330 store <16 x float>%data, <16 x float>* %vaddr, align 1 6331 ret void 6332} 6333 6334define <16 x float> @mov_test31(i8 * %addr) { 6335; GENERIC-LABEL: mov_test31: 6336; GENERIC: # %bb.0: 6337; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50] 6338; GENERIC-NEXT: retq # sched: [1:1.00] 6339; 6340; SKX-LABEL: mov_test31: 6341; SKX: # %bb.0: 6342; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] 6343; SKX-NEXT: retq # sched: [7:1.00] 6344 %vaddr = bitcast i8* %addr to <16 x float>* 6345 %res = load <16 x float>, <16 x float>* %vaddr, align 1 6346 ret <16 x float>%res 6347} 6348 6349define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 6350; GENERIC-LABEL: mov_test32: 6351; GENERIC: # %bb.0: 6352; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 6353; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6354; GENERIC-NEXT: retq # sched: [1:1.00] 6355; 6356; SKX-LABEL: mov_test32: 6357; SKX: # %bb.0: 6358; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 6359; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6360; SKX-NEXT: retq # sched: [7:1.00] 6361 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 6362 %vaddr = bitcast i8* %addr to <16 x i32>* 6363 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 6364 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 6365 ret <16 x i32>%res 6366} 6367 6368define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 6369; GENERIC-LABEL: mov_test33: 6370; GENERIC: # %bb.0: 6371; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 6372; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6373; GENERIC-NEXT: retq # sched: [1:1.00] 6374; 6375; SKX-LABEL: mov_test33: 6376; SKX: # %bb.0: 6377; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 6378; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6379; SKX-NEXT: retq # sched: [7:1.00] 6380 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 6381 %vaddr = bitcast i8* %addr to <16 x i32>* 6382 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 6383 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 6384 ret <16 x i32>%res 6385} 6386 6387define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) { 6388; GENERIC-LABEL: mov_test34: 6389; GENERIC: # %bb.0: 6390; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 6391; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6392; GENERIC-NEXT: retq # sched: [1:1.00] 6393; 6394; SKX-LABEL: mov_test34: 6395; SKX: # %bb.0: 6396; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 6397; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6398; SKX-NEXT: retq # sched: [7:1.00] 6399 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 6400 %vaddr = bitcast i8* %addr to <16 x i32>* 6401 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 6402 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 6403 ret <16 x i32>%res 6404} 6405 6406define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) { 6407; GENERIC-LABEL: mov_test35: 6408; GENERIC: # %bb.0: 6409; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 6410; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6411; GENERIC-NEXT: retq # sched: [1:1.00] 6412; 6413; SKX-LABEL: mov_test35: 6414; SKX: # %bb.0: 6415; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 6416; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6417; SKX-NEXT: retq # sched: [7:1.00] 6418 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 6419 %vaddr = bitcast i8* %addr to <16 x i32>* 6420 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 6421 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 6422 ret <16 x i32>%res 6423} 6424 6425define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 6426; GENERIC-LABEL: mov_test36: 6427; GENERIC: # %bb.0: 6428; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 6429; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6430; GENERIC-NEXT: retq # sched: [1:1.00] 6431; 6432; SKX-LABEL: mov_test36: 6433; SKX: # %bb.0: 6434; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 6435; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6436; SKX-NEXT: retq # sched: [7:1.00] 6437 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 6438 %vaddr = bitcast i8* %addr to <8 x i64>* 6439 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 6440 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 6441 ret <8 x i64>%res 6442} 6443 6444define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 6445; GENERIC-LABEL: mov_test37: 6446; GENERIC: # %bb.0: 6447; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33] 6448; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6449; GENERIC-NEXT: retq # sched: [1:1.00] 6450; 6451; SKX-LABEL: mov_test37: 6452; SKX: # %bb.0: 6453; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00] 6454; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6455; SKX-NEXT: retq # sched: [7:1.00] 6456 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 6457 %vaddr = bitcast i8* %addr to <8 x i64>* 6458 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 6459 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 6460 ret <8 x i64>%res 6461} 6462 6463define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) { 6464; GENERIC-LABEL: mov_test38: 6465; GENERIC: # %bb.0: 6466; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 6467; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6468; GENERIC-NEXT: retq # sched: [1:1.00] 6469; 6470; SKX-LABEL: mov_test38: 6471; SKX: # %bb.0: 6472; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 6473; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6474; SKX-NEXT: retq # sched: [7:1.00] 6475 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 6476 %vaddr = bitcast i8* %addr to <8 x i64>* 6477 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 6478 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 6479 ret <8 x i64>%res 6480} 6481 6482define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) { 6483; GENERIC-LABEL: mov_test39: 6484; GENERIC: # %bb.0: 6485; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33] 6486; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6487; GENERIC-NEXT: retq # sched: [1:1.00] 6488; 6489; SKX-LABEL: mov_test39: 6490; SKX: # %bb.0: 6491; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00] 6492; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6493; SKX-NEXT: retq # sched: [7:1.00] 6494 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 6495 %vaddr = bitcast i8* %addr to <8 x i64>* 6496 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 6497 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 6498 ret <8 x i64>%res 6499} 6500 6501define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 6502; GENERIC-LABEL: mov_test40: 6503; GENERIC: # %bb.0: 6504; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 6505; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] 6506; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6507; GENERIC-NEXT: retq # sched: [1:1.00] 6508; 6509; SKX-LABEL: mov_test40: 6510; SKX: # %bb.0: 6511; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 6512; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] 6513; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6514; SKX-NEXT: retq # sched: [7:1.00] 6515 %mask = fcmp one <16 x float> %mask1, zeroinitializer 6516 %vaddr = bitcast i8* %addr to <16 x float>* 6517 %r = load <16 x float>, <16 x float>* %vaddr, align 64 6518 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 6519 ret <16 x float>%res 6520} 6521 6522define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 6523; GENERIC-LABEL: mov_test41: 6524; GENERIC: # %bb.0: 6525; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 6526; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] 6527; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6528; GENERIC-NEXT: retq # sched: [1:1.00] 6529; 6530; SKX-LABEL: mov_test41: 6531; SKX: # %bb.0: 6532; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 6533; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] 6534; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6535; SKX-NEXT: retq # sched: [7:1.00] 6536 %mask = fcmp one <16 x float> %mask1, zeroinitializer 6537 %vaddr = bitcast i8* %addr to <16 x float>* 6538 %r = load <16 x float>, <16 x float>* %vaddr, align 1 6539 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 6540 ret <16 x float>%res 6541} 6542 6543define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) { 6544; GENERIC-LABEL: mov_test42: 6545; GENERIC: # %bb.0: 6546; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 6547; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] 6548; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6549; GENERIC-NEXT: retq # sched: [1:1.00] 6550; 6551; SKX-LABEL: mov_test42: 6552; SKX: # %bb.0: 6553; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 6554; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] 6555; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6556; SKX-NEXT: retq # sched: [7:1.00] 6557 %mask = fcmp one <16 x float> %mask1, zeroinitializer 6558 %vaddr = bitcast i8* %addr to <16 x float>* 6559 %r = load <16 x float>, <16 x float>* %vaddr, align 64 6560 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 6561 ret <16 x float>%res 6562} 6563 6564define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) { 6565; GENERIC-LABEL: mov_test43: 6566; GENERIC: # %bb.0: 6567; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 6568; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] 6569; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6570; GENERIC-NEXT: retq # sched: [1:1.00] 6571; 6572; SKX-LABEL: mov_test43: 6573; SKX: # %bb.0: 6574; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 6575; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] 6576; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6577; SKX-NEXT: retq # sched: [7:1.00] 6578 %mask = fcmp one <16 x float> %mask1, zeroinitializer 6579 %vaddr = bitcast i8* %addr to <16 x float>* 6580 %r = load <16 x float>, <16 x float>* %vaddr, align 1 6581 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 6582 ret <16 x float>%res 6583} 6584 6585define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 6586; GENERIC-LABEL: mov_test44: 6587; GENERIC: # %bb.0: 6588; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 6589; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] 6590; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6591; GENERIC-NEXT: retq # sched: [1:1.00] 6592; 6593; SKX-LABEL: mov_test44: 6594; SKX: # %bb.0: 6595; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 6596; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] 6597; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6598; SKX-NEXT: retq # sched: [7:1.00] 6599 %mask = fcmp one <8 x double> %mask1, zeroinitializer 6600 %vaddr = bitcast i8* %addr to <8 x double>* 6601 %r = load <8 x double>, <8 x double>* %vaddr, align 64 6602 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 6603 ret <8 x double>%res 6604} 6605 6606define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 6607; GENERIC-LABEL: mov_test45: 6608; GENERIC: # %bb.0: 6609; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] 6610; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] 6611; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [7:0.50] 6612; GENERIC-NEXT: retq # sched: [1:1.00] 6613; 6614; SKX-LABEL: mov_test45: 6615; SKX: # %bb.0: 6616; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] 6617; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] 6618; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50] 6619; SKX-NEXT: retq # sched: [7:1.00] 6620 %mask = fcmp one <8 x double> %mask1, zeroinitializer 6621 %vaddr = bitcast i8* %addr to <8 x double>* 6622 %r = load <8 x double>, <8 x double>* %vaddr, align 1 6623 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 6624 ret <8 x double>%res 6625} 6626 6627define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) { 6628; GENERIC-LABEL: mov_test46: 6629; GENERIC: # %bb.0: 6630; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 6631; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] 6632; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6633; GENERIC-NEXT: retq # sched: [1:1.00] 6634; 6635; SKX-LABEL: mov_test46: 6636; SKX: # %bb.0: 6637; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 6638; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] 6639; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6640; SKX-NEXT: retq # sched: [7:1.00] 6641 %mask = fcmp one <8 x double> %mask1, zeroinitializer 6642 %vaddr = bitcast i8* %addr to <8 x double>* 6643 %r = load <8 x double>, <8 x double>* %vaddr, align 64 6644 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 6645 ret <8 x double>%res 6646} 6647 6648define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) { 6649; GENERIC-LABEL: mov_test47: 6650; GENERIC: # %bb.0: 6651; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 6652; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] 6653; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50] 6654; GENERIC-NEXT: retq # sched: [1:1.00] 6655; 6656; SKX-LABEL: mov_test47: 6657; SKX: # %bb.0: 6658; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 6659; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] 6660; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 6661; SKX-NEXT: retq # sched: [7:1.00] 6662 %mask = fcmp one <8 x double> %mask1, zeroinitializer 6663 %vaddr = bitcast i8* %addr to <8 x double>* 6664 %r = load <8 x double>, <8 x double>* %vaddr, align 1 6665 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 6666 ret <8 x double>%res 6667} 6668 6669define i16 @mask16(i16 %x) { 6670; GENERIC-LABEL: mask16: 6671; GENERIC: # %bb.0: 6672; GENERIC-NEXT: notl %edi # sched: [1:0.33] 6673; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 6674; GENERIC-NEXT: retq # sched: [1:1.00] 6675; 6676; SKX-LABEL: mask16: 6677; SKX: # %bb.0: 6678; SKX-NEXT: notl %edi # sched: [1:0.25] 6679; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 6680; SKX-NEXT: retq # sched: [7:1.00] 6681 %m0 = bitcast i16 %x to <16 x i1> 6682 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6683 %ret = bitcast <16 x i1> %m1 to i16 6684 ret i16 %ret 6685} 6686 6687define i32 @mask16_zext(i16 %x) { 6688; GENERIC-LABEL: mask16_zext: 6689; GENERIC: # %bb.0: 6690; GENERIC-NEXT: notl %edi # sched: [1:0.33] 6691; GENERIC-NEXT: movzwl %di, %eax # sched: [1:0.33] 6692; GENERIC-NEXT: retq # sched: [1:1.00] 6693; 6694; SKX-LABEL: mask16_zext: 6695; SKX: # %bb.0: 6696; SKX-NEXT: notl %edi # sched: [1:0.25] 6697; SKX-NEXT: movzwl %di, %eax # sched: [1:0.25] 6698; SKX-NEXT: retq # sched: [7:1.00] 6699 %m0 = bitcast i16 %x to <16 x i1> 6700 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6701 %m2 = bitcast <16 x i1> %m1 to i16 6702 %ret = zext i16 %m2 to i32 6703 ret i32 %ret 6704} 6705 6706define i8 @mask8(i8 %x) { 6707; GENERIC-LABEL: mask8: 6708; GENERIC: # %bb.0: 6709; GENERIC-NEXT: notb %dil # sched: [1:0.33] 6710; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 6711; GENERIC-NEXT: retq # sched: [1:1.00] 6712; 6713; SKX-LABEL: mask8: 6714; SKX: # %bb.0: 6715; SKX-NEXT: notb %dil # sched: [1:0.25] 6716; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 6717; SKX-NEXT: retq # sched: [7:1.00] 6718 %m0 = bitcast i8 %x to <8 x i1> 6719 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6720 %ret = bitcast <8 x i1> %m1 to i8 6721 ret i8 %ret 6722} 6723 6724define i32 @mask8_zext(i8 %x) { 6725; GENERIC-LABEL: mask8_zext: 6726; GENERIC: # %bb.0: 6727; GENERIC-NEXT: notb %dil # sched: [1:0.33] 6728; GENERIC-NEXT: movzbl %dil, %eax # sched: [1:0.33] 6729; GENERIC-NEXT: retq # sched: [1:1.00] 6730; 6731; SKX-LABEL: mask8_zext: 6732; SKX: # %bb.0: 6733; SKX-NEXT: notb %dil # sched: [1:0.25] 6734; SKX-NEXT: movzbl %dil, %eax # sched: [1:0.25] 6735; SKX-NEXT: retq # sched: [7:1.00] 6736 %m0 = bitcast i8 %x to <8 x i1> 6737 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6738 %m2 = bitcast <8 x i1> %m1 to i8 6739 %ret = zext i8 %m2 to i32 6740 ret i32 %ret 6741} 6742 6743define void @mask16_mem(i16* %ptr) { 6744; GENERIC-LABEL: mask16_mem: 6745; GENERIC: # %bb.0: 6746; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] 6747; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] 6748; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 6749; GENERIC-NEXT: retq # sched: [1:1.00] 6750; 6751; SKX-LABEL: mask16_mem: 6752; SKX: # %bb.0: 6753; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] 6754; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] 6755; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 6756; SKX-NEXT: retq # sched: [7:1.00] 6757 %x = load i16, i16* %ptr, align 4 6758 %m0 = bitcast i16 %x to <16 x i1> 6759 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6760 %ret = bitcast <16 x i1> %m1 to i16 6761 store i16 %ret, i16* %ptr, align 4 6762 ret void 6763} 6764 6765define void @mask8_mem(i8* %ptr) { 6766; GENERIC-LABEL: mask8_mem: 6767; GENERIC: # %bb.0: 6768; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] 6769; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] 6770; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 6771; GENERIC-NEXT: retq # sched: [1:1.00] 6772; 6773; SKX-LABEL: mask8_mem: 6774; SKX: # %bb.0: 6775; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] 6776; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00] 6777; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 6778; SKX-NEXT: retq # sched: [7:1.00] 6779 %x = load i8, i8* %ptr, align 4 6780 %m0 = bitcast i8 %x to <8 x i1> 6781 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6782 %ret = bitcast <8 x i1> %m1 to i8 6783 store i8 %ret, i8* %ptr, align 4 6784 ret void 6785} 6786 6787define i16 @mand16(i16 %x, i16 %y) { 6788; GENERIC-LABEL: mand16: 6789; GENERIC: # %bb.0: 6790; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 6791; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33] 6792; GENERIC-NEXT: andl %esi, %edi # sched: [1:0.33] 6793; GENERIC-NEXT: orl %eax, %edi # sched: [1:0.33] 6794; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 6795; GENERIC-NEXT: retq # sched: [1:1.00] 6796; 6797; SKX-LABEL: mand16: 6798; SKX: # %bb.0: 6799; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 6800; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25] 6801; SKX-NEXT: andl %esi, %edi # sched: [1:0.25] 6802; SKX-NEXT: orl %eax, %edi # sched: [1:0.25] 6803; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 6804; SKX-NEXT: retq # sched: [7:1.00] 6805 %ma = bitcast i16 %x to <16 x i1> 6806 %mb = bitcast i16 %y to <16 x i1> 6807 %mc = and <16 x i1> %ma, %mb 6808 %md = xor <16 x i1> %ma, %mb 6809 %me = or <16 x i1> %mc, %md 6810 %ret = bitcast <16 x i1> %me to i16 6811 ret i16 %ret 6812} 6813 6814define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { 6815; GENERIC-LABEL: mand16_mem: 6816; GENERIC: # %bb.0: 6817; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] 6818; GENERIC-NEXT: kmovw (%rsi), %k1 # sched: [5:0.50] 6819; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:0.33] 6820; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] 6821; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:0.33] 6822; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6823; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 6824; GENERIC-NEXT: retq # sched: [1:1.00] 6825; 6826; SKX-LABEL: mand16_mem: 6827; SKX: # %bb.0: 6828; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] 6829; SKX-NEXT: kmovw (%rsi), %k1 # sched: [7:1.00] 6830; SKX-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00] 6831; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] 6832; SKX-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00] 6833; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6834; SKX-NEXT: # kill: def $ax killed $ax killed $eax 6835; SKX-NEXT: retq # sched: [7:1.00] 6836 %ma = load <16 x i1>, <16 x i1>* %x 6837 %mb = load <16 x i1>, <16 x i1>* %y 6838 %mc = and <16 x i1> %ma, %mb 6839 %md = xor <16 x i1> %ma, %mb 6840 %me = or <16 x i1> %mc, %md 6841 %ret = bitcast <16 x i1> %me to i16 6842 ret i16 %ret 6843} 6844 6845define i8 @shuf_test1(i16 %v) nounwind { 6846; GENERIC-LABEL: shuf_test1: 6847; GENERIC: # %bb.0: 6848; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 6849; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00] 6850; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6851; GENERIC-NEXT: # kill: def $al killed $al killed $eax 6852; GENERIC-NEXT: retq # sched: [1:1.00] 6853; 6854; SKX-LABEL: shuf_test1: 6855; SKX: # %bb.0: 6856; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 6857; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00] 6858; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6859; SKX-NEXT: # kill: def $al killed $al killed $eax 6860; SKX-NEXT: retq # sched: [7:1.00] 6861 %v1 = bitcast i16 %v to <16 x i1> 6862 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6863 %mask1 = bitcast <8 x i1> %mask to i8 6864 ret i8 %mask1 6865} 6866 6867define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { 6868; GENERIC-LABEL: zext_test1: 6869; GENERIC: # %bb.0: 6870; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] 6871; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] 6872; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6873; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] 6874; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6875; GENERIC-NEXT: retq # sched: [1:1.00] 6876; 6877; SKX-LABEL: zext_test1: 6878; SKX: # %bb.0: 6879; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] 6880; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] 6881; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6882; SKX-NEXT: andl $1, %eax # sched: [1:0.25] 6883; SKX-NEXT: vzeroupper # sched: [4:1.00] 6884; SKX-NEXT: retq # sched: [7:1.00] 6885 %cmp_res = icmp ugt <16 x i32> %a, %b 6886 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 6887 %res = zext i1 %cmp_res.i1 to i32 6888 ret i32 %res 6889} 6890 6891define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { 6892; GENERIC-LABEL: zext_test2: 6893; GENERIC: # %bb.0: 6894; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] 6895; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] 6896; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6897; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33] 6898; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 6899; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6900; GENERIC-NEXT: retq # sched: [1:1.00] 6901; 6902; SKX-LABEL: zext_test2: 6903; SKX: # %bb.0: 6904; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] 6905; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] 6906; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6907; SKX-NEXT: andl $1, %eax # sched: [1:0.25] 6908; SKX-NEXT: # kill: def $ax killed $ax killed $eax 6909; SKX-NEXT: vzeroupper # sched: [4:1.00] 6910; SKX-NEXT: retq # sched: [7:1.00] 6911 %cmp_res = icmp ugt <16 x i32> %a, %b 6912 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 6913 %res = zext i1 %cmp_res.i1 to i16 6914 ret i16 %res 6915} 6916 6917define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { 6918; GENERIC-LABEL: zext_test3: 6919; GENERIC: # %bb.0: 6920; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50] 6921; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00] 6922; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 6923; GENERIC-NEXT: andb $1, %al # sched: [1:0.33] 6924; GENERIC-NEXT: # kill: def $al killed $al killed $eax 6925; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6926; GENERIC-NEXT: retq # sched: [1:1.00] 6927; 6928; SKX-LABEL: zext_test3: 6929; SKX: # %bb.0: 6930; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] 6931; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00] 6932; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 6933; SKX-NEXT: andb $1, %al # sched: [1:0.25] 6934; SKX-NEXT: # kill: def $al killed $al killed $eax 6935; SKX-NEXT: vzeroupper # sched: [4:1.00] 6936; SKX-NEXT: retq # sched: [7:1.00] 6937 %cmp_res = icmp ugt <16 x i32> %a, %b 6938 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 6939 %res = zext i1 %cmp_res.i1 to i8 6940 ret i8 %res 6941} 6942 6943define i8 @conv1(<8 x i1>* %R) { 6944; GENERIC-LABEL: conv1: 6945; GENERIC: # %bb.0: # %entry 6946; GENERIC-NEXT: movb $-1, (%rdi) # sched: [1:1.00] 6947; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 6948; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33] 6949; GENERIC-NEXT: retq # sched: [1:1.00] 6950; 6951; SKX-LABEL: conv1: 6952; SKX: # %bb.0: # %entry 6953; SKX-NEXT: movb $-1, (%rdi) # sched: [1:1.00] 6954; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 6955; SKX-NEXT: movb $-2, %al # sched: [1:0.25] 6956; SKX-NEXT: retq # sched: [7:1.00] 6957entry: 6958 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R 6959 6960 %maskPtr = alloca <8 x i1> 6961 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr 6962 %mask = load <8 x i1>, <8 x i1>* %maskPtr 6963 %mask_convert = bitcast <8 x i1> %mask to i8 6964 ret i8 %mask_convert 6965} 6966 6967define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { 6968; GENERIC-LABEL: test4: 6969; GENERIC: # %bb.0: 6970; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50] 6971; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50] 6972; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] 6973; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 6974; GENERIC-NEXT: retq # sched: [1:1.00] 6975; 6976; SKX-LABEL: test4: 6977; SKX: # %bb.0: 6978; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00] 6979; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00] 6980; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] 6981; SKX-NEXT: vzeroupper # sched: [4:1.00] 6982; SKX-NEXT: retq # sched: [7:1.00] 6983 %x_gt_y = icmp sgt <4 x i64> %x, %y 6984 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 6985 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 6986 %resse = sext <4 x i1>%res to <4 x i32> 6987 ret <4 x i32> %resse 6988} 6989 6990define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { 6991; GENERIC-LABEL: vcmp_test5: 6992; GENERIC: # %bb.0: 6993; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50] 6994; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50] 6995; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] 6996; GENERIC-NEXT: retq # sched: [1:1.00] 6997; 6998; SKX-LABEL: vcmp_test5: 6999; SKX: # %bb.0: 7000; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00] 7001; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00] 7002; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] 7003; SKX-NEXT: retq # sched: [7:1.00] 7004 %x_gt_y = icmp slt <2 x i64> %x, %y 7005 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 7006 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 7007 %resse = sext <2 x i1>%res to <2 x i64> 7008 ret <2 x i64> %resse 7009}define void @vcmp_test6(<16 x i1> %mask) { 7010allocas: 7011 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 7012 %b = bitcast <16 x i1> %a to i16 7013 %c = icmp eq i16 %b, 0 7014 br i1 %c, label %true, label %false 7015 7016true: 7017 ret void 7018 7019false: 7020 ret void 7021} 7022define void @vcmp_test7(<8 x i1> %mask) { 7023; GENERIC-LABEL: vcmp_test7: 7024; GENERIC: # %bb.0: # %allocas 7025; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 7026; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 7027; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 7028; GENERIC-NEXT: orb $85, %al # sched: [1:0.33] 7029; GENERIC-NEXT: retq # sched: [1:1.00] 7030; 7031; SKX-LABEL: vcmp_test7: 7032; SKX: # %bb.0: # %allocas 7033; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 7034; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 7035; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 7036; SKX-NEXT: orb $85, %al # sched: [1:0.25] 7037; SKX-NEXT: retq # sched: [7:1.00] 7038allocas: 7039 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 7040 %b = bitcast <8 x i1> %a to i8 7041 %c = icmp eq i8 %b, 0 7042 br i1 %c, label %true, label %false 7043 7044true: 7045 ret void 7046 7047false: 7048 ret void 7049} 7050define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { 7051; GENERIC-LABEL: vcmp_test8: 7052; GENERIC: # %bb.0: 7053; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 7054; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] 7055; GENERIC-NEXT: # %bb.2: 7056; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:0.33] 7057; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] 7058; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7059; GENERIC-NEXT: retq # sched: [1:1.00] 7060; GENERIC-NEXT: .LBB386_1: 7061; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 7062; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50] 7063; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] 7064; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7065; GENERIC-NEXT: retq # sched: [1:1.00] 7066; 7067; SKX-LABEL: vcmp_test8: 7068; SKX: # %bb.0: 7069; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 7070; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50] 7071; SKX-NEXT: # %bb.2: 7072; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] 7073; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] 7074; SKX-NEXT: vzeroupper # sched: [4:1.00] 7075; SKX-NEXT: retq # sched: [7:1.00] 7076; SKX-NEXT: .LBB386_1: 7077; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] 7078; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] 7079; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] 7080; SKX-NEXT: vzeroupper # sched: [4:1.00] 7081; SKX-NEXT: retq # sched: [7:1.00] 7082 %cond = icmp sgt i32 %a1, %b1 7083 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer 7084 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer 7085 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 7086 %res = sext <16 x i1> %mix to <16 x i8> 7087 ret <16 x i8> %res 7088} 7089define <16 x i1> @vpmov_test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { 7090; GENERIC-LABEL: vpmov_test9: 7091; GENERIC: # %bb.0: 7092; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 7093; GENERIC-NEXT: jg .LBB387_1 # sched: [1:1.00] 7094; GENERIC-NEXT: # %bb.2: 7095; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00] 7096; GENERIC-NEXT: jmp .LBB387_3 # sched: [1:1.00] 7097; GENERIC-NEXT: .LBB387_1: 7098; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 7099; GENERIC-NEXT: .LBB387_3: 7100; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] 7101; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] 7102; GENERIC-NEXT: retq # sched: [1:1.00] 7103; 7104; SKX-LABEL: vpmov_test9: 7105; SKX: # %bb.0: 7106; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 7107; SKX-NEXT: jg .LBB387_1 # sched: [1:0.50] 7108; SKX-NEXT: # %bb.2: 7109; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50] 7110; SKX-NEXT: jmp .LBB387_3 # sched: [1:0.50] 7111; SKX-NEXT: .LBB387_1: 7112; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 7113; SKX-NEXT: .LBB387_3: 7114; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] 7115; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] 7116; SKX-NEXT: retq # sched: [7:1.00] 7117 %mask = icmp sgt i32 %a1, %b1 7118 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b 7119 ret <16 x i1>%c 7120}define <8 x i1> @vpmov_test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { 7121 %mask = icmp sgt i32 %a1, %b1 7122 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b 7123 ret <8 x i1>%c 7124} 7125 7126define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { 7127; GENERIC-LABEL: vmov_test11: 7128; GENERIC: # %bb.0: 7129; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 7130; GENERIC-NEXT: jg .LBB389_1 # sched: [1:1.00] 7131; GENERIC-NEXT: # %bb.2: 7132; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00] 7133; GENERIC-NEXT: jmp .LBB389_3 # sched: [1:1.00] 7134; GENERIC-NEXT: .LBB389_1: 7135; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 7136; GENERIC-NEXT: .LBB389_3: 7137; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] 7138; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] 7139; GENERIC-NEXT: retq # sched: [1:1.00] 7140; 7141; SKX-LABEL: vmov_test11: 7142; SKX: # %bb.0: 7143; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 7144; SKX-NEXT: jg .LBB389_1 # sched: [1:0.50] 7145; SKX-NEXT: # %bb.2: 7146; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50] 7147; SKX-NEXT: jmp .LBB389_3 # sched: [1:0.50] 7148; SKX-NEXT: .LBB389_1: 7149; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 7150; SKX-NEXT: .LBB389_3: 7151; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] 7152; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] 7153; SKX-NEXT: retq # sched: [7:1.00] 7154 %mask = icmp sgt i32 %a1, %b1 7155 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b 7156 ret <4 x i1>%c 7157} 7158 7159define i32 @vmov_test12(i32 %x, i32 %y) { 7160; GENERIC-LABEL: vmov_test12: 7161; GENERIC: # %bb.0: 7162; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] 7163; GENERIC-NEXT: retq # sched: [1:1.00] 7164; 7165; SKX-LABEL: vmov_test12: 7166; SKX: # %bb.0: 7167; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] 7168; SKX-NEXT: retq # sched: [7:1.00] 7169 %a = bitcast i16 21845 to <16 x i1> 7170 %b = extractelement <16 x i1> %a, i32 0 7171 %c = select i1 %b, i32 %x, i32 %y 7172 ret i32 %c 7173} 7174 7175define i32 @vmov_test13(i32 %x, i32 %y) { 7176; GENERIC-LABEL: vmov_test13: 7177; GENERIC: # %bb.0: 7178; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] 7179; GENERIC-NEXT: retq # sched: [1:1.00] 7180; 7181; SKX-LABEL: vmov_test13: 7182; SKX: # %bb.0: 7183; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] 7184; SKX-NEXT: retq # sched: [7:1.00] 7185 %a = bitcast i16 21845 to <16 x i1> 7186 %b = extractelement <16 x i1> %a, i32 3 7187 %c = select i1 %b, i32 %x, i32 %y 7188 ret i32 %c 7189}define <4 x i1> @vmov_test14() { 7190 %a = bitcast i16 21845 to <16 x i1> 7191 %b = extractelement <16 x i1> %a, i32 2 7192 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1 7193 ret <4 x i1> %c 7194} 7195 7196define <16 x i1> @vmov_test15(i32 %x, i32 %y) { 7197; GENERIC-LABEL: vmov_test15: 7198; GENERIC: # %bb.0: 7199; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] 7200; GENERIC-NEXT: movl $21845, %eax # imm = 0x5555 7201; GENERIC-NEXT: # sched: [1:0.33] 7202; GENERIC-NEXT: movl $1, %ecx # sched: [1:0.33] 7203; GENERIC-NEXT: cmovgl %eax, %ecx # sched: [2:0.67] 7204; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33] 7205; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] 7206; GENERIC-NEXT: retq # sched: [1:1.00] 7207; 7208; SKX-LABEL: vmov_test15: 7209; SKX: # %bb.0: 7210; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] 7211; SKX-NEXT: movl $21845, %eax # imm = 0x5555 7212; SKX-NEXT: # sched: [1:0.25] 7213; SKX-NEXT: movl $1, %ecx # sched: [1:0.25] 7214; SKX-NEXT: cmovgl %eax, %ecx # sched: [1:0.50] 7215; SKX-NEXT: kmovd %ecx, %k0 # sched: [1:1.00] 7216; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] 7217; SKX-NEXT: retq # sched: [7:1.00] 7218 %a = bitcast i16 21845 to <16 x i1> 7219 %b = bitcast i16 1 to <16 x i1> 7220 %mask = icmp sgt i32 %x, %y 7221 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b 7222 ret <16 x i1> %c 7223} 7224 7225define <64 x i8> @vmov_test16(i64 %x) { 7226; 7227; GENERIC-LABEL: vmov_test16: 7228; GENERIC: # %bb.0: 7229; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] 7230; GENERIC-NEXT: movb $1, %al # sched: [1:0.33] 7231; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] 7232; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] 7233; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] 7234; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] 7235; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] 7236; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] 7237; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] 7238; GENERIC-NEXT: retq # sched: [1:1.00] 7239; 7240; SKX-LABEL: vmov_test16: 7241; SKX: # %bb.0: 7242; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] 7243; SKX-NEXT: movb $1, %al # sched: [1:0.25] 7244; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] 7245; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] 7246; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] 7247; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] 7248; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] 7249; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] 7250; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] 7251; SKX-NEXT: retq # sched: [7:1.00] 7252 %a = bitcast i64 %x to <64 x i1> 7253 %b = insertelement <64 x i1>%a, i1 true, i32 5 7254 %c = sext <64 x i1>%b to <64 x i8> 7255 ret <64 x i8>%c 7256} 7257 7258define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) { 7259; 7260; GENERIC-LABEL: vmov_test17: 7261; GENERIC: # %bb.0: 7262; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33] 7263; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33] 7264; GENERIC-NEXT: setg %al # sched: [1:0.50] 7265; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] 7266; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00] 7267; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33] 7268; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00] 7269; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00] 7270; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33] 7271; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] 7272; GENERIC-NEXT: retq # sched: [1:1.00] 7273; 7274; SKX-LABEL: vmov_test17: 7275; SKX: # %bb.0: 7276; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] 7277; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25] 7278; SKX-NEXT: setg %al # sched: [1:0.50] 7279; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] 7280; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00] 7281; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00] 7282; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00] 7283; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00] 7284; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00] 7285; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] 7286; SKX-NEXT: retq # sched: [7:1.00] 7287 %a = bitcast i64 %x to <64 x i1> 7288 %b = icmp sgt i32 %y, %z 7289 %c = insertelement <64 x i1>%a, i1 %b, i32 5 7290 %d = sext <64 x i1>%c to <64 x i8> 7291 ret <64 x i8>%d 7292} 7293 7294define <8 x i1> @vmov_test18(i8 %a, i16 %y) { 7295; GENERIC-LABEL: vmov_test18: 7296; GENERIC: # %bb.0: 7297; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33] 7298; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33] 7299; GENERIC-NEXT: kshiftrw $8, %k2, %k0 # sched: [1:1.00] 7300; GENERIC-NEXT: kshiftrw $9, %k2, %k2 # sched: [1:1.00] 7301; GENERIC-NEXT: kshiftrb $6, %k1, %k3 # sched: [1:1.00] 7302; GENERIC-NEXT: kxorb %k2, %k3, %k2 # sched: [1:0.33] 7303; GENERIC-NEXT: kshiftlb $7, %k2, %k2 # sched: [1:1.00] 7304; GENERIC-NEXT: kshiftrb $1, %k2, %k2 # sched: [1:1.00] 7305; GENERIC-NEXT: kxorb %k2, %k1, %k1 # sched: [1:0.33] 7306; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00] 7307; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00] 7308; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00] 7309; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:0.33] 7310; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33] 7311; GENERIC-NEXT: retq # sched: [1:1.00] 7312; 7313; SKX-LABEL: vmov_test18: 7314; SKX: # %bb.0: 7315; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] 7316; SKX-NEXT: kmovd %esi, %k2 # sched: [1:1.00] 7317; SKX-NEXT: kshiftrw $8, %k2, %k0 # sched: [3:1.00] 7318; SKX-NEXT: kshiftrw $9, %k2, %k2 # sched: [3:1.00] 7319; SKX-NEXT: kshiftrb $6, %k1, %k3 # sched: [3:1.00] 7320; SKX-NEXT: kxorb %k2, %k3, %k2 # sched: [1:1.00] 7321; SKX-NEXT: kshiftlb $7, %k2, %k2 # sched: [3:1.00] 7322; SKX-NEXT: kshiftrb $1, %k2, %k2 # sched: [3:1.00] 7323; SKX-NEXT: kxorb %k2, %k1, %k1 # sched: [1:1.00] 7324; SKX-NEXT: kshiftlb $1, %k1, %k1 # sched: [3:1.00] 7325; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00] 7326; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00] 7327; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00] 7328; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] 7329; SKX-NEXT: retq # sched: [7:1.00] 7330 %b = bitcast i8 %a to <8 x i1> 7331 %b1 = bitcast i16 %y to <16 x i1> 7332 %el1 = extractelement <16 x i1>%b1, i32 8 7333 %el2 = extractelement <16 x i1>%b1, i32 9 7334 %c = insertelement <8 x i1>%b, i1 %el1, i32 7 7335 %d = insertelement <8 x i1>%c, i1 %el2, i32 6 7336 ret <8 x i1>%d 7337} 7338define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { 7339; GENERIC-LABEL: vmov_test21: 7340; GENERIC: # %bb.0: 7341; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] 7342; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33] 7343; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 7344; GENERIC-NEXT: retq # sched: [1:1.00] 7345; 7346; SKX-LABEL: vmov_test21: 7347; SKX: # %bb.0: 7348; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] 7349; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] 7350; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 7351; SKX-NEXT: retq # sched: [7:1.00] 7352 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 7353 ret <32 x i16> %ret 7354} 7355 7356define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) { 7357; GENERIC-LABEL: vmov_test22: 7358; GENERIC: # %bb.0: 7359; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 7360; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] 7361; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7362; GENERIC-NEXT: retq # sched: [1:1.00] 7363; 7364; SKX-LABEL: vmov_test22: 7365; SKX: # %bb.0: 7366; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 7367; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] 7368; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7369; SKX-NEXT: retq # sched: [7:1.00] 7370 store <4 x i1> %a, <4 x i1>* %addr 7371 ret void 7372} 7373 7374define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) { 7375; GENERIC-LABEL: vmov_test23: 7376; GENERIC: # %bb.0: 7377; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 7378; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] 7379; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7380; GENERIC-NEXT: retq # sched: [1:1.00] 7381; 7382; SKX-LABEL: vmov_test23: 7383; SKX: # %bb.0: 7384; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 7385; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] 7386; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7387; SKX-NEXT: retq # sched: [7:1.00] 7388 store <2 x i1> %a, <2 x i1>* %addr 7389 ret void 7390} 7391 7392define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { 7393; GENERIC-LABEL: store_v1i1: 7394; GENERIC: # %bb.0: 7395; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 7396; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:0.33] 7397; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] 7398; GENERIC-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] 7399; GENERIC-NEXT: retq # sched: [1:1.00] 7400; 7401; SKX-LABEL: store_v1i1: 7402; SKX: # %bb.0: 7403; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 7404; SKX-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00] 7405; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] 7406; SKX-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] 7407; SKX-NEXT: retq # sched: [7:1.00] 7408 %x = xor <1 x i1> %c, <i1 1> 7409 store <1 x i1> %x, <1 x i1>* %ptr, align 4 7410 ret void 7411} 7412 7413define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { 7414; GENERIC-LABEL: store_v2i1: 7415; GENERIC: # %bb.0: 7416; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] 7417; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] 7418; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] 7419; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7420; GENERIC-NEXT: retq # sched: [1:1.00] 7421; 7422; SKX-LABEL: store_v2i1: 7423; SKX: # %bb.0: 7424; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] 7425; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] 7426; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] 7427; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7428; SKX-NEXT: retq # sched: [7:1.00] 7429 %x = xor <2 x i1> %c, <i1 1, i1 1> 7430 store <2 x i1> %x, <2 x i1>* %ptr, align 4 7431 ret void 7432} 7433 7434define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { 7435; GENERIC-LABEL: store_v4i1: 7436; GENERIC: # %bb.0: 7437; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] 7438; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] 7439; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] 7440; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7441; GENERIC-NEXT: retq # sched: [1:1.00] 7442; 7443; SKX-LABEL: store_v4i1: 7444; SKX: # %bb.0: 7445; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] 7446; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] 7447; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] 7448; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7449; SKX-NEXT: retq # sched: [7:1.00] 7450 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1> 7451 store <4 x i1> %x, <4 x i1>* %ptr, align 4 7452 ret void 7453} 7454 7455define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { 7456; GENERIC-LABEL: store_v8i1: 7457; GENERIC: # %bb.0: 7458; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 7459; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 7460; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33] 7461; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7462; GENERIC-NEXT: retq # sched: [1:1.00] 7463; 7464; SKX-LABEL: store_v8i1: 7465; SKX: # %bb.0: 7466; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 7467; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 7468; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00] 7469; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7470; SKX-NEXT: retq # sched: [7:1.00] 7471 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 7472 store <8 x i1> %x, <8 x i1>* %ptr, align 4 7473 ret void 7474} 7475 7476define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { 7477; GENERIC-LABEL: store_v16i1: 7478; GENERIC: # %bb.0: 7479; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 7480; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] 7481; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33] 7482; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 7483; GENERIC-NEXT: retq # sched: [1:1.00] 7484; 7485; SKX-LABEL: store_v16i1: 7486; SKX: # %bb.0: 7487; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 7488; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] 7489; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] 7490; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 7491; SKX-NEXT: retq # sched: [7:1.00] 7492 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 7493 store <16 x i1> %x, <16 x i1>* %ptr, align 4 7494 ret void 7495} 7496 7497;void f2(int); 7498;void f1(int c) 7499;{ 7500; static int v = 0; 7501; if (v == 0) 7502; v = 1; 7503; else 7504; v = 0; 7505; f2(v); 7506;} 7507 7508@f1.v = internal unnamed_addr global i1 false, align 4 7509 7510define void @f1(i32 %c) { 7511; GENERIC-LABEL: f1: 7512; GENERIC: # %bb.0: # %entry 7513; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] 7514; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33] 7515; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] 7516; GENERIC-NEXT: jmp f2 # TAILCALL 7517; 7518; SKX-LABEL: f1: 7519; SKX: # %bb.0: # %entry 7520; SKX-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] 7521; SKX-NEXT: xorl $1, %edi # sched: [1:0.25] 7522; SKX-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] 7523; SKX-NEXT: jmp f2 # TAILCALL 7524entry: 7525 %.b1 = load i1, i1* @f1.v, align 4 7526 %not..b1 = xor i1 %.b1, true 7527 store i1 %not..b1, i1* @f1.v, align 4 7528 %0 = zext i1 %not..b1 to i32 7529 tail call void @f2(i32 %0) #2 7530 ret void 7531} 7532 7533declare void @f2(i32) #1 7534 7535define void @store_i16_i1(i16 %x, i1 *%y) { 7536; GENERIC-LABEL: store_i16_i1: 7537; GENERIC: # %bb.0: 7538; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] 7539; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] 7540; GENERIC-NEXT: retq # sched: [1:1.00] 7541; 7542; SKX-LABEL: store_i16_i1: 7543; SKX: # %bb.0: 7544; SKX-NEXT: andl $1, %edi # sched: [1:0.25] 7545; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00] 7546; SKX-NEXT: retq # sched: [7:1.00] 7547 %c = trunc i16 %x to i1 7548 store i1 %c, i1* %y 7549 ret void 7550} 7551 7552define void @store_i8_i1(i8 %x, i1 *%y) { 7553; GENERIC-LABEL: store_i8_i1: 7554; GENERIC: # %bb.0: 7555; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] 7556; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] 7557; GENERIC-NEXT: retq # sched: [1:1.00] 7558; 7559; SKX-LABEL: store_i8_i1: 7560; SKX: # %bb.0: 7561; SKX-NEXT: andl $1, %edi # sched: [1:0.25] 7562; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00] 7563; SKX-NEXT: retq # sched: [7:1.00] 7564 %c = trunc i8 %x to i1 7565 store i1 %c, i1* %y 7566 ret void 7567} 7568 7569define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { 7570; GENERIC-LABEL: test_build_vec_v32i1: 7571; GENERIC: # %bb.0: 7572; GENERIC-NEXT: movl $1497715861, %eax # imm = 0x59455495 7573; GENERIC-NEXT: # sched: [1:0.33] 7574; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] 7575; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 7576; GENERIC-NEXT: retq # sched: [1:1.00] 7577; 7578; SKX-LABEL: test_build_vec_v32i1: 7579; SKX: # %bb.0: 7580; SKX-NEXT: movl $1497715861, %eax # imm = 0x59455495 7581; SKX-NEXT: # sched: [1:0.25] 7582; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] 7583; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 7584; SKX-NEXT: retq # sched: [7:1.00] 7585 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer 7586 ret <32 x i16> %ret 7587} 7588 7589define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { 7590; GENERIC-LABEL: test_build_vec_v64i1: 7591; GENERIC: # %bb.0: 7592; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:0.50] 7593; GENERIC-NEXT: retq # sched: [1:1.00] 7594; 7595; SKX-LABEL: test_build_vec_v64i1: 7596; SKX: # %bb.0: 7597; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00] 7598; SKX-NEXT: retq # sched: [7:1.00] 7599 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer 7600 ret <64 x i8> %ret 7601} 7602 7603define void @ktest_1(<8 x double> %in, double * %base) { 7604; GENERIC-LABEL: ktest_1: 7605; GENERIC: # %bb.0: 7606; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [7:0.50] 7607; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] 7608; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [7:0.50] 7609; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] 7610; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:0.33] 7611; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] 7612; GENERIC-NEXT: # %bb.1: # %L1 7613; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] 7614; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7615; GENERIC-NEXT: retq # sched: [1:1.00] 7616; GENERIC-NEXT: .LBB410_2: # %L2 7617; GENERIC-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] 7618; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7619; GENERIC-NEXT: retq # sched: [1:1.00] 7620; 7621; SKX-LABEL: ktest_1: 7622; SKX: # %bb.0: 7623; SKX-NEXT: vmovupd (%rdi), %zmm1 # sched: [8:0.50] 7624; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] 7625; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50] 7626; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] 7627; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00] 7628; SKX-NEXT: je .LBB410_2 # sched: [1:0.50] 7629; SKX-NEXT: # %bb.1: # %L1 7630; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] 7631; SKX-NEXT: vzeroupper # sched: [4:1.00] 7632; SKX-NEXT: retq # sched: [7:1.00] 7633; SKX-NEXT: .LBB410_2: # %L2 7634; SKX-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] 7635; SKX-NEXT: vzeroupper # sched: [4:1.00] 7636; SKX-NEXT: retq # sched: [7:1.00] 7637 %addr1 = getelementptr double, double * %base, i64 0 7638 %addr2 = getelementptr double, double * %base, i64 1 7639 7640 %vaddr1 = bitcast double* %addr1 to <8 x double>* 7641 %vaddr2 = bitcast double* %addr2 to <8 x double>* 7642 7643 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1 7644 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1 7645 7646 %sel1 = fcmp ogt <8 x double>%in, %val1 7647 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer 7648 %sel2 = fcmp olt <8 x double> %in, %val3 7649 %sel3 = and <8 x i1> %sel1, %sel2 7650 7651 %int_sel3 = bitcast <8 x i1> %sel3 to i8 7652 %res = icmp eq i8 %int_sel3, zeroinitializer 7653 br i1 %res, label %L2, label %L1 7654L1: 7655 store <8 x double> %in, <8 x double>* %vaddr1 7656 br label %End 7657L2: 7658 store <8 x double> %in, <8 x double>* %vaddr2 7659 br label %End 7660End: 7661 ret void 7662} 7663 7664define void @ktest_2(<32 x float> %in, float * %base) { 7665; 7666; GENERIC-LABEL: ktest_2: 7667; GENERIC: # %bb.0: 7668; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [7:0.50] 7669; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [7:0.50] 7670; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] 7671; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] 7672; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00] 7673; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [7:0.50] 7674; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [7:0.50] 7675; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] 7676; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] 7677; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00] 7678; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:0.33] 7679; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00] 7680; GENERIC-NEXT: # %bb.1: # %L1 7681; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 7682; GENERIC-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] 7683; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7684; GENERIC-NEXT: retq # sched: [1:1.00] 7685; GENERIC-NEXT: .LBB411_2: # %L2 7686; GENERIC-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] 7687; GENERIC-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] 7688; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7689; GENERIC-NEXT: retq # sched: [1:1.00] 7690; 7691; SKX-LABEL: ktest_2: 7692; SKX: # %bb.0: 7693; SKX-NEXT: vmovups (%rdi), %zmm2 # sched: [8:0.50] 7694; SKX-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50] 7695; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] 7696; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] 7697; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00] 7698; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50] 7699; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50] 7700; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] 7701; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] 7702; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00] 7703; SKX-NEXT: kortestd %k1, %k0 # sched: [3:1.00] 7704; SKX-NEXT: je .LBB411_2 # sched: [1:0.50] 7705; SKX-NEXT: # %bb.1: # %L1 7706; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] 7707; SKX-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] 7708; SKX-NEXT: vzeroupper # sched: [4:1.00] 7709; SKX-NEXT: retq # sched: [7:1.00] 7710; SKX-NEXT: .LBB411_2: # %L2 7711; SKX-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] 7712; SKX-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] 7713; SKX-NEXT: vzeroupper # sched: [4:1.00] 7714; SKX-NEXT: retq # sched: [7:1.00] 7715 %addr1 = getelementptr float, float * %base, i64 0 7716 %addr2 = getelementptr float, float * %base, i64 1 7717 7718 %vaddr1 = bitcast float* %addr1 to <32 x float>* 7719 %vaddr2 = bitcast float* %addr2 to <32 x float>* 7720 7721 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1 7722 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1 7723 7724 %sel1 = fcmp ogt <32 x float>%in, %val1 7725 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer 7726 %sel2 = fcmp olt <32 x float> %in, %val3 7727 %sel3 = or <32 x i1> %sel1, %sel2 7728 7729 %int_sel3 = bitcast <32 x i1> %sel3 to i32 7730 %res = icmp eq i32 %int_sel3, zeroinitializer 7731 br i1 %res, label %L2, label %L1 7732L1: 7733 store <32 x float> %in, <32 x float>* %vaddr1 7734 br label %End 7735L2: 7736 store <32 x float> %in, <32 x float>* %vaddr2 7737 br label %End 7738End: 7739 ret void 7740} 7741 7742define <8 x i64> @load_8i1(<8 x i1>* %a) { 7743; GENERIC-LABEL: load_8i1: 7744; GENERIC: # %bb.0: 7745; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] 7746; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33] 7747; GENERIC-NEXT: retq # sched: [1:1.00] 7748; 7749; SKX-LABEL: load_8i1: 7750; SKX: # %bb.0: 7751; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] 7752; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] 7753; SKX-NEXT: retq # sched: [7:1.00] 7754 %b = load <8 x i1>, <8 x i1>* %a 7755 %c = sext <8 x i1> %b to <8 x i64> 7756 ret <8 x i64> %c 7757} 7758 7759define <16 x i32> @load_16i1(<16 x i1>* %a) { 7760; GENERIC-LABEL: load_16i1: 7761; GENERIC: # %bb.0: 7762; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50] 7763; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 7764; GENERIC-NEXT: retq # sched: [1:1.00] 7765; 7766; SKX-LABEL: load_16i1: 7767; SKX: # %bb.0: 7768; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] 7769; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 7770; SKX-NEXT: retq # sched: [7:1.00] 7771 %b = load <16 x i1>, <16 x i1>* %a 7772 %c = sext <16 x i1> %b to <16 x i32> 7773 ret <16 x i32> %c 7774} 7775 7776define <2 x i16> @load_2i1(<2 x i1>* %a) { 7777; GENERIC-LABEL: load_2i1: 7778; GENERIC: # %bb.0: 7779; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] 7780; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] 7781; GENERIC-NEXT: retq # sched: [1:1.00] 7782; 7783; SKX-LABEL: load_2i1: 7784; SKX: # %bb.0: 7785; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] 7786; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] 7787; SKX-NEXT: retq # sched: [7:1.00] 7788 %b = load <2 x i1>, <2 x i1>* %a 7789 %c = sext <2 x i1> %b to <2 x i16> 7790 ret <2 x i16> %c 7791} 7792 7793define <4 x i16> @load_4i1(<4 x i1>* %a) { 7794; GENERIC-LABEL: load_4i1: 7795; GENERIC: # %bb.0: 7796; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50] 7797; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] 7798; GENERIC-NEXT: retq # sched: [1:1.00] 7799; 7800; SKX-LABEL: load_4i1: 7801; SKX: # %bb.0: 7802; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] 7803; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] 7804; SKX-NEXT: retq # sched: [7:1.00] 7805 %b = load <4 x i1>, <4 x i1>* %a 7806 %c = sext <4 x i1> %b to <4 x i16> 7807 ret <4 x i16> %c 7808} 7809 7810define <32 x i16> @load_32i1(<32 x i1>* %a) { 7811; GENERIC-LABEL: load_32i1: 7812; GENERIC: # %bb.0: 7813; GENERIC-NEXT: kmovd (%rdi), %k0 # sched: [5:0.50] 7814; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33] 7815; GENERIC-NEXT: retq # sched: [1:1.00] 7816; 7817; SKX-LABEL: load_32i1: 7818; SKX: # %bb.0: 7819; SKX-NEXT: kmovd (%rdi), %k0 # sched: [7:1.00] 7820; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] 7821; SKX-NEXT: retq # sched: [7:1.00] 7822 %b = load <32 x i1>, <32 x i1>* %a 7823 %c = sext <32 x i1> %b to <32 x i16> 7824 ret <32 x i16> %c 7825} 7826 7827define <64 x i8> @load_64i1(<64 x i1>* %a) { 7828; GENERIC-LABEL: load_64i1: 7829; GENERIC: # %bb.0: 7830; GENERIC-NEXT: kmovq (%rdi), %k0 # sched: [5:0.50] 7831; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33] 7832; GENERIC-NEXT: retq # sched: [1:1.00] 7833; 7834; SKX-LABEL: load_64i1: 7835; SKX: # %bb.0: 7836; SKX-NEXT: kmovq (%rdi), %k0 # sched: [7:1.00] 7837; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25] 7838; SKX-NEXT: retq # sched: [7:1.00] 7839 %b = load <64 x i1>, <64 x i1>* %a 7840 %c = sext <64 x i1> %b to <64 x i8> 7841 ret <64 x i8> %c 7842} 7843 7844define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { 7845; GENERIC-LABEL: store_8i1: 7846; GENERIC: # %bb.0: 7847; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 7848; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 7849; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7850; GENERIC-NEXT: retq # sched: [1:1.00] 7851; 7852; SKX-LABEL: store_8i1: 7853; SKX: # %bb.0: 7854; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 7855; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 7856; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7857; SKX-NEXT: retq # sched: [7:1.00] 7858 store <8 x i1> %v, <8 x i1>* %a 7859 ret void 7860} 7861 7862define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { 7863; GENERIC-LABEL: store_8i1_1: 7864; GENERIC: # %bb.0: 7865; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] 7866; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33] 7867; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7868; GENERIC-NEXT: retq # sched: [1:1.00] 7869; 7870; SKX-LABEL: store_8i1_1: 7871; SKX: # %bb.0: 7872; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] 7873; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] 7874; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] 7875; SKX-NEXT: retq # sched: [7:1.00] 7876 %v1 = trunc <8 x i16> %v to <8 x i1> 7877 store <8 x i1> %v1, <8 x i1>* %a 7878 ret void 7879} 7880 7881define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { 7882; GENERIC-LABEL: store_16i1: 7883; GENERIC: # %bb.0: 7884; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] 7885; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] 7886; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 7887; GENERIC-NEXT: retq # sched: [1:1.00] 7888; 7889; SKX-LABEL: store_16i1: 7890; SKX: # %bb.0: 7891; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] 7892; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] 7893; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] 7894; SKX-NEXT: retq # sched: [7:1.00] 7895 store <16 x i1> %v, <16 x i1>* %a 7896 ret void 7897} 7898 7899define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { 7900; GENERIC-LABEL: store_32i1: 7901; GENERIC: # %bb.0: 7902; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] 7903; GENERIC-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:0.33] 7904; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] 7905; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7906; GENERIC-NEXT: retq # sched: [1:1.00] 7907; 7908; SKX-LABEL: store_32i1: 7909; SKX: # %bb.0: 7910; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] 7911; SKX-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:1.00] 7912; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] 7913; SKX-NEXT: vzeroupper # sched: [4:1.00] 7914; SKX-NEXT: retq # sched: [7:1.00] 7915 store <32 x i1> %v, <32 x i1>* %a 7916 ret void 7917} 7918 7919define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { 7920; GENERIC-LABEL: store_32i1_1: 7921; GENERIC: # %bb.0: 7922; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] 7923; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33] 7924; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] 7925; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7926; GENERIC-NEXT: retq # sched: [1:1.00] 7927; 7928; SKX-LABEL: store_32i1_1: 7929; SKX: # %bb.0: 7930; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] 7931; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00] 7932; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] 7933; SKX-NEXT: vzeroupper # sched: [4:1.00] 7934; SKX-NEXT: retq # sched: [7:1.00] 7935 %v1 = trunc <32 x i16> %v to <32 x i1> 7936 store <32 x i1> %v1, <32 x i1>* %a 7937 ret void 7938} 7939 7940 7941define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { 7942; 7943; GENERIC-LABEL: store_64i1: 7944; GENERIC: # %bb.0: 7945; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] 7946; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33] 7947; GENERIC-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] 7948; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7949; GENERIC-NEXT: retq # sched: [1:1.00] 7950; 7951; SKX-LABEL: store_64i1: 7952; SKX: # %bb.0: 7953; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] 7954; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] 7955; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] 7956; SKX-NEXT: vzeroupper # sched: [4:1.00] 7957; SKX-NEXT: retq # sched: [7:1.00] 7958 store <64 x i1> %v, <64 x i1>* %a 7959 ret void 7960} 7961 7962define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { 7963; GENERIC-LABEL: test_bitcast_v8i1_zext: 7964; GENERIC: # %bb.0: 7965; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] 7966; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33] 7967; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] 7968; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7969; GENERIC-NEXT: retq # sched: [1:1.00] 7970; 7971; SKX-LABEL: test_bitcast_v8i1_zext: 7972; SKX: # %bb.0: 7973; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] 7974; SKX-NEXT: kmovb %k0, %eax # sched: [3:1.00] 7975; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] 7976; SKX-NEXT: vzeroupper # sched: [4:1.00] 7977; SKX-NEXT: retq # sched: [7:1.00] 7978 %v1 = icmp eq <16 x i32> %a, zeroinitializer 7979 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7980 %mask1 = bitcast <8 x i1> %mask to i8 7981 %val = zext i8 %mask1 to i32 7982 %val1 = add i32 %val, %val 7983 ret i32 %val1 7984} 7985 7986define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { 7987; GENERIC-LABEL: test_bitcast_v16i1_zext: 7988; GENERIC: # %bb.0: 7989; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33] 7990; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33] 7991; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33] 7992; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 7993; GENERIC-NEXT: retq # sched: [1:1.00] 7994; 7995; SKX-LABEL: test_bitcast_v16i1_zext: 7996; SKX: # %bb.0: 7997; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00] 7998; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00] 7999; SKX-NEXT: addl %eax, %eax # sched: [1:0.25] 8000; SKX-NEXT: vzeroupper # sched: [4:1.00] 8001; SKX-NEXT: retq # sched: [7:1.00] 8002 %v1 = icmp eq <16 x i32> %a, zeroinitializer 8003 %mask1 = bitcast <16 x i1> %v1 to i16 8004 %val = zext i16 %mask1 to i32 8005 %val1 = add i32 %val, %val 8006 ret i32 %val1 8007} 8008 8009define i16 @test_v16i1_add(i16 %x, i16 %y) { 8010; GENERIC-LABEL: test_v16i1_add: 8011; GENERIC: # %bb.0: 8012; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8013; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8014; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] 8015; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8016; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 8017; GENERIC-NEXT: retq # sched: [1:1.00] 8018; 8019; SKX-LABEL: test_v16i1_add: 8020; SKX: # %bb.0: 8021; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8022; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8023; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] 8024; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8025; SKX-NEXT: # kill: def $ax killed $ax killed $eax 8026; SKX-NEXT: retq # sched: [7:1.00] 8027 %m0 = bitcast i16 %x to <16 x i1> 8028 %m1 = bitcast i16 %y to <16 x i1> 8029 %m2 = add <16 x i1> %m0, %m1 8030 %ret = bitcast <16 x i1> %m2 to i16 8031 ret i16 %ret 8032} 8033 8034define i16 @test_v16i1_sub(i16 %x, i16 %y) { 8035; GENERIC-LABEL: test_v16i1_sub: 8036; GENERIC: # %bb.0: 8037; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8038; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8039; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33] 8040; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8041; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 8042; GENERIC-NEXT: retq # sched: [1:1.00] 8043; 8044; SKX-LABEL: test_v16i1_sub: 8045; SKX: # %bb.0: 8046; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8047; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8048; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] 8049; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8050; SKX-NEXT: # kill: def $ax killed $ax killed $eax 8051; SKX-NEXT: retq # sched: [7:1.00] 8052 %m0 = bitcast i16 %x to <16 x i1> 8053 %m1 = bitcast i16 %y to <16 x i1> 8054 %m2 = sub <16 x i1> %m0, %m1 8055 %ret = bitcast <16 x i1> %m2 to i16 8056 ret i16 %ret 8057} 8058 8059define i16 @test_v16i1_mul(i16 %x, i16 %y) { 8060; GENERIC-LABEL: test_v16i1_mul: 8061; GENERIC: # %bb.0: 8062; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8063; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8064; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:0.33] 8065; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8066; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 8067; GENERIC-NEXT: retq # sched: [1:1.00] 8068; 8069; SKX-LABEL: test_v16i1_mul: 8070; SKX: # %bb.0: 8071; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8072; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8073; SKX-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00] 8074; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8075; SKX-NEXT: # kill: def $ax killed $ax killed $eax 8076; SKX-NEXT: retq # sched: [7:1.00] 8077 %m0 = bitcast i16 %x to <16 x i1> 8078 %m1 = bitcast i16 %y to <16 x i1> 8079 %m2 = mul <16 x i1> %m0, %m1 8080 %ret = bitcast <16 x i1> %m2 to i16 8081 ret i16 %ret 8082} 8083 8084define i8 @test_v8i1_add(i8 %x, i8 %y) { 8085; GENERIC-LABEL: test_v8i1_add: 8086; GENERIC: # %bb.0: 8087; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8088; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8089; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] 8090; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8091; GENERIC-NEXT: # kill: def $al killed $al killed $eax 8092; GENERIC-NEXT: retq # sched: [1:1.00] 8093; 8094; SKX-LABEL: test_v8i1_add: 8095; SKX: # %bb.0: 8096; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8097; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8098; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] 8099; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8100; SKX-NEXT: # kill: def $al killed $al killed $eax 8101; SKX-NEXT: retq # sched: [7:1.00] 8102 %m0 = bitcast i8 %x to <8 x i1> 8103 %m1 = bitcast i8 %y to <8 x i1> 8104 %m2 = add <8 x i1> %m0, %m1 8105 %ret = bitcast <8 x i1> %m2 to i8 8106 ret i8 %ret 8107} 8108 8109define i8 @test_v8i1_sub(i8 %x, i8 %y) { 8110; GENERIC-LABEL: test_v8i1_sub: 8111; GENERIC: # %bb.0: 8112; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8113; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8114; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33] 8115; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8116; GENERIC-NEXT: # kill: def $al killed $al killed $eax 8117; GENERIC-NEXT: retq # sched: [1:1.00] 8118; 8119; SKX-LABEL: test_v8i1_sub: 8120; SKX: # %bb.0: 8121; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8122; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8123; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] 8124; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8125; SKX-NEXT: # kill: def $al killed $al killed $eax 8126; SKX-NEXT: retq # sched: [7:1.00] 8127 %m0 = bitcast i8 %x to <8 x i1> 8128 %m1 = bitcast i8 %y to <8 x i1> 8129 %m2 = sub <8 x i1> %m0, %m1 8130 %ret = bitcast <8 x i1> %m2 to i8 8131 ret i8 %ret 8132} 8133 8134define i8 @test_v8i1_mul(i8 %x, i8 %y) { 8135; GENERIC-LABEL: test_v8i1_mul: 8136; GENERIC: # %bb.0: 8137; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33] 8138; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33] 8139; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:0.33] 8140; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] 8141; GENERIC-NEXT: # kill: def $al killed $al killed $eax 8142; GENERIC-NEXT: retq # sched: [1:1.00] 8143; 8144; SKX-LABEL: test_v8i1_mul: 8145; SKX: # %bb.0: 8146; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] 8147; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00] 8148; SKX-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00] 8149; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] 8150; SKX-NEXT: # kill: def $al killed $al killed $eax 8151; SKX-NEXT: retq # sched: [7:1.00] 8152 %m0 = bitcast i8 %x to <8 x i1> 8153 %m1 = bitcast i8 %y to <8 x i1> 8154 %m2 = mul <8 x i1> %m0, %m1 8155 %ret = bitcast <8 x i1> %m2 to i8 8156 ret i8 %ret 8157} 8158 8159define <16 x i32> @_inreg16xi32(i32 %a) { 8160; GENERIC-LABEL: _inreg16xi32: 8161; GENERIC: # %bb.0: 8162; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00] 8163; GENERIC-NEXT: retq # sched: [1:1.00] 8164; 8165; SKX-LABEL: _inreg16xi32: 8166; SKX: # %bb.0: 8167; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] 8168; SKX-NEXT: retq # sched: [7:1.00] 8169 %b = insertelement <16 x i32> undef, i32 %a, i32 0 8170 %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer 8171 ret <16 x i32> %c 8172} 8173 8174define <8 x i64> @_inreg8xi64(i64 %a) { 8175; GENERIC-LABEL: _inreg8xi64: 8176; GENERIC: # %bb.0: 8177; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00] 8178; GENERIC-NEXT: retq # sched: [1:1.00] 8179; 8180; SKX-LABEL: _inreg8xi64: 8181; SKX: # %bb.0: 8182; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] 8183; SKX-NEXT: retq # sched: [7:1.00] 8184 %b = insertelement <8 x i64> undef, i64 %a, i32 0 8185 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 8186 ret <8 x i64> %c 8187} 8188 8189define <16 x float> @_ss16xfloat_v4(<4 x float> %a) { 8190; GENERIC-LABEL: _ss16xfloat_v4: 8191; GENERIC: # %bb.0: 8192; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8193; GENERIC-NEXT: retq # sched: [1:1.00] 8194; 8195; SKX-LABEL: _ss16xfloat_v4: 8196; SKX: # %bb.0: 8197; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8198; SKX-NEXT: retq # sched: [7:1.00] 8199 %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer 8200 ret <16 x float> %b 8201} 8202 8203define <16 x float> @_inreg16xfloat(float %a) { 8204; GENERIC-LABEL: _inreg16xfloat: 8205; GENERIC: # %bb.0: 8206; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8207; GENERIC-NEXT: retq # sched: [1:1.00] 8208; 8209; SKX-LABEL: _inreg16xfloat: 8210; SKX: # %bb.0: 8211; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8212; SKX-NEXT: retq # sched: [7:1.00] 8213 %b = insertelement <16 x float> undef, float %a, i32 0 8214 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8215 ret <16 x float> %c 8216} 8217 8218define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { 8219; GENERIC-LABEL: _ss16xfloat_mask: 8220; GENERIC: # %bb.0: 8221; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33] 8222; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00] 8223; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00] 8224; GENERIC-NEXT: retq # sched: [1:1.00] 8225; 8226; SKX-LABEL: _ss16xfloat_mask: 8227; SKX: # %bb.0: 8228; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00] 8229; SKX-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00] 8230; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] 8231; SKX-NEXT: retq # sched: [7:1.00] 8232 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8233 %b = insertelement <16 x float> undef, float %a, i32 0 8234 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8235 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i 8236 ret <16 x float> %r 8237} 8238 8239define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { 8240; GENERIC-LABEL: _ss16xfloat_maskz: 8241; GENERIC: # %bb.0: 8242; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8243; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 8244; GENERIC-NEXT: retq # sched: [1:1.00] 8245; 8246; SKX-LABEL: _ss16xfloat_maskz: 8247; SKX: # %bb.0: 8248; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8249; SKX-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] 8250; SKX-NEXT: retq # sched: [7:1.00] 8251 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8252 %b = insertelement <16 x float> undef, float %a, i32 0 8253 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8254 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer 8255 ret <16 x float> %r 8256} 8257 8258define <16 x float> @_ss16xfloat_load(float* %a.ptr) { 8259; GENERIC-LABEL: _ss16xfloat_load: 8260; GENERIC: # %bb.0: 8261; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00] 8262; GENERIC-NEXT: retq # sched: [1:1.00] 8263; 8264; SKX-LABEL: _ss16xfloat_load: 8265; SKX: # %bb.0: 8266; SKX-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:0.50] 8267; SKX-NEXT: retq # sched: [7:1.00] 8268 %a = load float, float* %a.ptr 8269 %b = insertelement <16 x float> undef, float %a, i32 0 8270 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8271 ret <16 x float> %c 8272} 8273 8274define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { 8275; GENERIC-LABEL: _ss16xfloat_mask_load: 8276; GENERIC: # %bb.0: 8277; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33] 8278; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00] 8279; GENERIC-NEXT: retq # sched: [1:1.00] 8280; 8281; SKX-LABEL: _ss16xfloat_mask_load: 8282; SKX: # %bb.0: 8283; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00] 8284; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50] 8285; SKX-NEXT: retq # sched: [7:1.00] 8286 %a = load float, float* %a.ptr 8287 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8288 %b = insertelement <16 x float> undef, float %a, i32 0 8289 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8290 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i 8291 ret <16 x float> %r 8292} 8293 8294define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { 8295; GENERIC-LABEL: _ss16xfloat_maskz_load: 8296; GENERIC: # %bb.0: 8297; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33] 8298; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 8299; GENERIC-NEXT: retq # sched: [1:1.00] 8300; 8301; SKX-LABEL: _ss16xfloat_maskz_load: 8302; SKX: # %bb.0: 8303; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00] 8304; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 8305; SKX-NEXT: retq # sched: [7:1.00] 8306 %a = load float, float* %a.ptr 8307 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8308 %b = insertelement <16 x float> undef, float %a, i32 0 8309 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8310 %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer 8311 ret <16 x float> %r 8312} 8313 8314define <8 x double> @_inreg8xdouble(double %a) { 8315; GENERIC-LABEL: _inreg8xdouble: 8316; GENERIC: # %bb.0: 8317; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8318; GENERIC-NEXT: retq # sched: [1:1.00] 8319; 8320; SKX-LABEL: _inreg8xdouble: 8321; SKX: # %bb.0: 8322; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8323; SKX-NEXT: retq # sched: [7:1.00] 8324 %b = insertelement <8 x double> undef, double %a, i32 0 8325 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8326 ret <8 x double> %c 8327} 8328 8329define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { 8330; GENERIC-LABEL: _sd8xdouble_mask: 8331; GENERIC: # %bb.0: 8332; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33] 8333; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00] 8334; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00] 8335; GENERIC-NEXT: retq # sched: [1:1.00] 8336; 8337; SKX-LABEL: _sd8xdouble_mask: 8338; SKX: # %bb.0: 8339; SKX-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [3:1.00] 8340; SKX-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00] 8341; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33] 8342; SKX-NEXT: retq # sched: [7:1.00] 8343 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 8344 %b = insertelement <8 x double> undef, double %a, i32 0 8345 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8346 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i 8347 ret <8 x double> %r 8348} 8349 8350define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { 8351; GENERIC-LABEL: _sd8xdouble_maskz: 8352; GENERIC: # %bb.0: 8353; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8354; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] 8355; GENERIC-NEXT: retq # sched: [1:1.00] 8356; 8357; SKX-LABEL: _sd8xdouble_maskz: 8358; SKX: # %bb.0: 8359; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8360; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] 8361; SKX-NEXT: retq # sched: [7:1.00] 8362 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 8363 %b = insertelement <8 x double> undef, double %a, i32 0 8364 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8365 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer 8366 ret <8 x double> %r 8367} 8368 8369define <8 x double> @_sd8xdouble_load(double* %a.ptr) { 8370; GENERIC-LABEL: _sd8xdouble_load: 8371; GENERIC: # %bb.0: 8372; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00] 8373; GENERIC-NEXT: retq # sched: [1:1.00] 8374; 8375; SKX-LABEL: _sd8xdouble_load: 8376; SKX: # %bb.0: 8377; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50] 8378; SKX-NEXT: retq # sched: [7:1.00] 8379 %a = load double, double* %a.ptr 8380 %b = insertelement <8 x double> undef, double %a, i32 0 8381 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8382 ret <8 x double> %c 8383} 8384 8385define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { 8386; GENERIC-LABEL: _sd8xdouble_mask_load: 8387; GENERIC: # %bb.0: 8388; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33] 8389; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00] 8390; GENERIC-NEXT: retq # sched: [1:1.00] 8391; 8392; SKX-LABEL: _sd8xdouble_mask_load: 8393; SKX: # %bb.0: 8394; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00] 8395; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50] 8396; SKX-NEXT: retq # sched: [7:1.00] 8397 %a = load double, double* %a.ptr 8398 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 8399 %b = insertelement <8 x double> undef, double %a, i32 0 8400 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8401 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i 8402 ret <8 x double> %r 8403} 8404 8405define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { 8406; GENERIC-LABEL: _sd8xdouble_maskz_load: 8407; GENERIC: # %bb.0: 8408; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33] 8409; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] 8410; GENERIC-NEXT: retq # sched: [1:1.00] 8411; 8412; SKX-LABEL: _sd8xdouble_maskz_load: 8413; SKX: # %bb.0: 8414; SKX-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [3:1.00] 8415; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] 8416; SKX-NEXT: retq # sched: [7:1.00] 8417 %a = load double, double* %a.ptr 8418 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 8419 %b = insertelement <8 x double> undef, double %a, i32 0 8420 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8421 %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer 8422 ret <8 x double> %r 8423} 8424 8425define <16 x i32> @_xmm16xi32(<16 x i32> %a) { 8426; GENERIC-LABEL: _xmm16xi32: 8427; GENERIC: # %bb.0: 8428; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8429; GENERIC-NEXT: retq # sched: [1:1.00] 8430; 8431; SKX-LABEL: _xmm16xi32: 8432; SKX: # %bb.0: 8433; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8434; SKX-NEXT: retq # sched: [7:1.00] 8435 %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer 8436 ret <16 x i32> %b 8437} 8438 8439define <16 x float> @_xmm16xfloat(<16 x float> %a) { 8440; GENERIC-LABEL: _xmm16xfloat: 8441; GENERIC: # %bb.0: 8442; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8443; GENERIC-NEXT: retq # sched: [1:1.00] 8444; 8445; SKX-LABEL: _xmm16xfloat: 8446; SKX: # %bb.0: 8447; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8448; SKX-NEXT: retq # sched: [7:1.00] 8449 %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer 8450 ret <16 x float> %b 8451} 8452 8453define <16 x i32> @test_vbroadcast() { 8454; GENERIC-LABEL: test_vbroadcast: 8455; GENERIC: # %bb.0: # %entry 8456; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] 8457; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] 8458; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] 8459; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:0.33] 8460; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] 8461; GENERIC-NEXT: retq # sched: [1:1.00] 8462; 8463; SKX-LABEL: test_vbroadcast: 8464; SKX: # %bb.0: # %entry 8465; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 8466; SKX-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] 8467; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] 8468; SKX-NEXT: knotw %k0, %k1 # sched: [1:1.00] 8469; SKX-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] 8470; SKX-NEXT: retq # sched: [7:1.00] 8471entry: 8472 %0 = sext <16 x i1> zeroinitializer to <16 x i32> 8473 %1 = fcmp uno <16 x float> undef, zeroinitializer 8474 %2 = sext <16 x i1> %1 to <16 x i32> 8475 %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2 8476 ret <16 x i32> %3 8477} 8478 8479; We implement the set1 intrinsics with vector initializers. Verify that the 8480; IR generated will produce broadcasts at the end. 8481define <8 x double> @test_set1_pd(double %d) #2 { 8482; GENERIC-LABEL: test_set1_pd: 8483; GENERIC: # %bb.0: # %entry 8484; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8485; GENERIC-NEXT: retq # sched: [1:1.00] 8486; 8487; SKX-LABEL: test_set1_pd: 8488; SKX: # %bb.0: # %entry 8489; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8490; SKX-NEXT: retq # sched: [7:1.00] 8491entry: 8492 %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 8493 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1 8494 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2 8495 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3 8496 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4 8497 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5 8498 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6 8499 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7 8500 ret <8 x double> %vecinit7.i 8501} 8502 8503define <8 x i64> @test_set1_epi64(i64 %d) #2 { 8504; GENERIC-LABEL: test_set1_epi64: 8505; GENERIC: # %bb.0: # %entry 8506; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00] 8507; GENERIC-NEXT: retq # sched: [1:1.00] 8508; 8509; SKX-LABEL: test_set1_epi64: 8510; SKX: # %bb.0: # %entry 8511; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] 8512; SKX-NEXT: retq # sched: [7:1.00] 8513entry: 8514 %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0 8515 %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1 8516 %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2 8517 %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3 8518 %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4 8519 %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5 8520 %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6 8521 %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7 8522 ret <8 x i64> %vecinit7.i 8523} 8524 8525define <16 x float> @test_set1_ps(float %f) #2 { 8526; GENERIC-LABEL: test_set1_ps: 8527; GENERIC: # %bb.0: # %entry 8528; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8529; GENERIC-NEXT: retq # sched: [1:1.00] 8530; 8531; SKX-LABEL: test_set1_ps: 8532; SKX: # %bb.0: # %entry 8533; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8534; SKX-NEXT: retq # sched: [7:1.00] 8535entry: 8536 %vecinit.i = insertelement <16 x float> undef, float %f, i32 0 8537 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1 8538 %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2 8539 %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3 8540 %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4 8541 %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5 8542 %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6 8543 %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7 8544 %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8 8545 %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9 8546 %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10 8547 %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11 8548 %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12 8549 %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13 8550 %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14 8551 %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15 8552 ret <16 x float> %vecinit15.i 8553} 8554 8555define <16 x i32> @test_set1_epi32(i32 %f) #2 { 8556; GENERIC-LABEL: test_set1_epi32: 8557; GENERIC: # %bb.0: # %entry 8558; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00] 8559; GENERIC-NEXT: retq # sched: [1:1.00] 8560; 8561; SKX-LABEL: test_set1_epi32: 8562; SKX: # %bb.0: # %entry 8563; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] 8564; SKX-NEXT: retq # sched: [7:1.00] 8565entry: 8566 %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0 8567 %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1 8568 %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2 8569 %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3 8570 %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4 8571 %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5 8572 %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6 8573 %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7 8574 %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8 8575 %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9 8576 %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10 8577 %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11 8578 %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12 8579 %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13 8580 %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14 8581 %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15 8582 ret <16 x i32> %vecinit15.i 8583} 8584 8585; We implement the scalar broadcast intrinsics with vector initializers. 8586; Verify that the IR generated will produce the broadcast at the end. 8587define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) { 8588; GENERIC-LABEL: test_mm512_broadcastsd_pd: 8589; GENERIC: # %bb.0: # %entry 8590; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8591; GENERIC-NEXT: retq # sched: [1:1.00] 8592; 8593; SKX-LABEL: test_mm512_broadcastsd_pd: 8594; SKX: # %bb.0: # %entry 8595; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8596; SKX-NEXT: retq # sched: [7:1.00] 8597entry: 8598 %0 = extractelement <2 x double> %a, i32 0 8599 %vecinit.i = insertelement <8 x double> undef, double %0, i32 0 8600 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1 8601 %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2 8602 %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3 8603 %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4 8604 %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5 8605 %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6 8606 %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7 8607 ret <8 x double> %vecinit7.i 8608} 8609 8610define <16 x float> @suff_test1(<8 x float>%a) { 8611; GENERIC-LABEL: suff_test1: 8612; GENERIC: # %bb.0: 8613; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8614; GENERIC-NEXT: retq # sched: [1:1.00] 8615; 8616; SKX-LABEL: suff_test1: 8617; SKX: # %bb.0: 8618; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8619; SKX-NEXT: retq # sched: [7:1.00] 8620 %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer 8621 ret <16 x float>%res 8622} 8623 8624define <8 x double> @suff_test2(<4 x double>%a) { 8625; GENERIC-LABEL: suff_test2: 8626; GENERIC: # %bb.0: 8627; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8628; GENERIC-NEXT: retq # sched: [1:1.00] 8629; 8630; SKX-LABEL: suff_test2: 8631; SKX: # %bb.0: 8632; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8633; SKX-NEXT: retq # sched: [7:1.00] 8634 %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer 8635 ret <8 x double>%res 8636} 8637 8638define <64 x i8> @_invec32xi8(<32 x i8>%a) { 8639; GENERIC-LABEL: _invec32xi8: 8640; GENERIC: # %bb.0: 8641; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00] 8642; GENERIC-NEXT: retq # sched: [1:1.00] 8643; 8644; SKX-LABEL: _invec32xi8: 8645; SKX: # %bb.0: 8646; SKX-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00] 8647; SKX-NEXT: retq # sched: [7:1.00] 8648 %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer 8649 ret <64 x i8>%res 8650} 8651 8652define <32 x i16> @_invec16xi16(<16 x i16>%a) { 8653; GENERIC-LABEL: _invec16xi16: 8654; GENERIC: # %bb.0: 8655; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00] 8656; GENERIC-NEXT: retq # sched: [1:1.00] 8657; 8658; SKX-LABEL: _invec16xi16: 8659; SKX: # %bb.0: 8660; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00] 8661; SKX-NEXT: retq # sched: [7:1.00] 8662 %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer 8663 ret <32 x i16>%res 8664} 8665 8666define <16 x i32> @_invec8xi32(<8 x i32>%a) { 8667; GENERIC-LABEL: _invec8xi32: 8668; GENERIC: # %bb.0: 8669; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00] 8670; GENERIC-NEXT: retq # sched: [1:1.00] 8671; 8672; SKX-LABEL: _invec8xi32: 8673; SKX: # %bb.0: 8674; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] 8675; SKX-NEXT: retq # sched: [7:1.00] 8676 %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer 8677 ret <16 x i32>%res 8678} 8679 8680define <8 x i64> @_invec4xi64(<4 x i64>%a) { 8681; GENERIC-LABEL: _invec4xi64: 8682; GENERIC: # %bb.0: 8683; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00] 8684; GENERIC-NEXT: retq # sched: [1:1.00] 8685; 8686; SKX-LABEL: _invec4xi64: 8687; SKX: # %bb.0: 8688; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] 8689; SKX-NEXT: retq # sched: [7:1.00] 8690 %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer 8691 ret <8 x i64>%res 8692} 8693 8694declare void @func_f32(float) 8695define <16 x float> @broadcast_ss_spill(float %x) { 8696; GENERIC-LABEL: broadcast_ss_spill: 8697; GENERIC: # %bb.0: 8698; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] 8699; GENERIC-NEXT: .cfi_def_cfa_offset 32 8700; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 8701; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] 8702; GENERIC-NEXT: callq func_f32 8703; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] 8704; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] 8705; GENERIC-NEXT: .cfi_def_cfa_offset 8 8706; GENERIC-NEXT: retq # sched: [1:1.00] 8707; 8708; SKX-LABEL: broadcast_ss_spill: 8709; SKX: # %bb.0: 8710; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] 8711; SKX-NEXT: .cfi_def_cfa_offset 32 8712; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 8713; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] 8714; SKX-NEXT: callq func_f32 8715; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] 8716; SKX-NEXT: addq $24, %rsp # sched: [1:0.25] 8717; SKX-NEXT: .cfi_def_cfa_offset 8 8718; SKX-NEXT: retq # sched: [7:1.00] 8719 %a = fadd float %x, %x 8720 call void @func_f32(float %a) 8721 %b = insertelement <16 x float> undef, float %a, i32 0 8722 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer 8723 ret <16 x float> %c 8724} 8725 8726declare void @func_f64(double) 8727define <8 x double> @broadcast_sd_spill(double %x) { 8728; GENERIC-LABEL: broadcast_sd_spill: 8729; GENERIC: # %bb.0: 8730; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] 8731; GENERIC-NEXT: .cfi_def_cfa_offset 32 8732; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 8733; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] 8734; GENERIC-NEXT: callq func_f64 8735; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00] 8736; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] 8737; GENERIC-NEXT: .cfi_def_cfa_offset 8 8738; GENERIC-NEXT: retq # sched: [1:1.00] 8739; 8740; SKX-LABEL: broadcast_sd_spill: 8741; SKX: # %bb.0: 8742; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] 8743; SKX-NEXT: .cfi_def_cfa_offset 32 8744; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 8745; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] 8746; SKX-NEXT: callq func_f64 8747; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] 8748; SKX-NEXT: addq $24, %rsp # sched: [1:0.25] 8749; SKX-NEXT: .cfi_def_cfa_offset 8 8750; SKX-NEXT: retq # sched: [7:1.00] 8751 %a = fadd double %x, %x 8752 call void @func_f64(double %a) 8753 %b = insertelement <8 x double> undef, double %a, i32 0 8754 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer 8755 ret <8 x double> %c 8756} 8757