1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3; CHECK-LABEL: vpandd 4; CHECK: vpandd %zmm 5; CHECK: ret 6define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 7entry: 8 ; Force the execution domain with an add. 9 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, 10 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 11 %x = and <16 x i32> %a2, %b 12 ret <16 x i32> %x 13} 14 15; CHECK-LABEL: vpord 16; CHECK: vpord %zmm 17; CHECK: ret 18define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 19entry: 20 ; Force the execution domain with an add. 21 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, 22 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 23 %x = or <16 x i32> %a2, %b 24 ret <16 x i32> %x 25} 26 27; CHECK-LABEL: vpxord 28; CHECK: vpxord %zmm 29; CHECK: ret 30define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { 31entry: 32 ; Force the execution domain with an add. 33 %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, 34 i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 35 %x = xor <16 x i32> %a2, %b 36 ret <16 x i32> %x 37} 38 39; CHECK-LABEL: vpandq 40; CHECK: vpandq %zmm 41; CHECK: ret 42define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 43entry: 44 ; Force the execution domain with an add. 45 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 46 %x = and <8 x i64> %a2, %b 47 ret <8 x i64> %x 48} 49 50; CHECK-LABEL: vporq 51; CHECK: vporq %zmm 52; CHECK: ret 53define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 54entry: 55 ; Force the execution domain with an add. 56 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 57 %x = or <8 x i64> %a2, %b 58 ret <8 x i64> %x 59} 60 61; CHECK-LABEL: vpxorq 62; CHECK: vpxorq %zmm 63; CHECK: ret 64define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { 65entry: 66 ; Force the execution domain with an add. 67 %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 68 %x = xor <8 x i64> %a2, %b 69 ret <8 x i64> %x 70} 71 72 73; CHECK-LABEL: orq_broadcast 74; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0 75; CHECK: ret 76define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 77 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 78 ret <8 x i64> %b 79} 80 81; CHECK-LABEL: andd512fold 82; CHECK: vpandd (% 83; CHECK: ret 84define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 85entry: 86 %a = load <16 x i32>, <16 x i32>* %x, align 4 87 %b = and <16 x i32> %y, %a 88 ret <16 x i32> %b 89} 90 91; CHECK-LABEL: andqbrst 92; CHECK: vpandq (%rdi){1to8}, %zmm 93; CHECK: ret 94define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 95entry: 96 %a = load i64, i64* %ap, align 8 97 %b = insertelement <8 x i64> undef, i64 %a, i32 0 98 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 99 %d = and <8 x i64> %p1, %c 100 ret <8 x i64>%d 101} 102