1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+vpclmulqdq,+avx512vl | FileCheck %s 3; FIXME: actual vpclmulqdq operation should be eliminated 4 5declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 6declare <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64>, <4 x i64>, i8) nounwind readnone 7declare <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64>, <8 x i64>, i8) nounwind readnone 8 9define <2 x i64> @commute_xmm_v1(<2 x i64> %a0, <2 x i64> %a1) { 10; CHECK-LABEL: commute_xmm_v1: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 13; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 14; CHECK-NEXT: retq 15 %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 16 %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a1, <2 x i64> %a0, i8 0) 17 %3 = xor <2 x i64> %1, %2 18 ret <2 x i64> %3 19} 20 21define <2 x i64> @commute_xmm_v2(<2 x i64> %a0, <2 x i64> %a1) { 22; CHECK-LABEL: commute_xmm_v2: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vpclmulqdq $16, %xmm1, %xmm0, %xmm0 25; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 26; CHECK-NEXT: retq 27 %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 16) 28 %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a1, <2 x i64> %a0, i8 1) 29 %3 = xor <2 x i64> %2, %1 30 ret <2 x i64> %3 31} 32 33define <2 x i64> @commute_xmm_v3(<2 x i64> %a0, <2 x i64> %a1) { 34; CHECK-LABEL: commute_xmm_v3: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vpclmulqdq $17, %xmm1, %xmm0, %xmm0 37; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 38; CHECK-NEXT: retq 39 %1 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 17) 40 %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a1, <2 x i64> %a0, i8 17) 41 %3 = xor <2 x i64> %2, %1 42 ret <2 x i64> %3 43} 44 45define <4 x i64> @commute_ymm_v1(<4 x i64> %a0, <4 x i64> %a1) { 46; CHECK-LABEL: commute_ymm_v1: 47; CHECK: # %bb.0: 48; CHECK-NEXT: vpclmulqdq $0, %ymm1, %ymm0, %ymm0 49; CHECK-NEXT: vpxor %ymm0, %ymm0, %ymm0 50; CHECK-NEXT: retq 51 %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 0) 52 %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 0) 53 %3 = xor <4 x i64> %1, %2 54 ret <4 x i64> %3 55} 56 57define <4 x i64> @commute_ymm_v2(<4 x i64> %a0, <4 x i64> %a1) { 58; CHECK-LABEL: commute_ymm_v2: 59; CHECK: # %bb.0: 60; CHECK-NEXT: vpclmulqdq $16, %ymm1, %ymm0, %ymm0 61; CHECK-NEXT: vpxor %ymm0, %ymm0, %ymm0 62; CHECK-NEXT: retq 63 %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 16) 64 %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 1) 65 %3 = xor <4 x i64> %2, %1 66 ret <4 x i64> %3 67} 68 69define <4 x i64> @commute_ymm_v3(<4 x i64> %a0, <4 x i64> %a1) { 70; CHECK-LABEL: commute_ymm_v3: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vpclmulqdq $17, %ymm1, %ymm0, %ymm0 73; CHECK-NEXT: vpxor %ymm0, %ymm0, %ymm0 74; CHECK-NEXT: retq 75 %1 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a0, <4 x i64> %a1, i8 17) 76 %2 = call <4 x i64> @llvm.x86.pclmulqdq.256(<4 x i64> %a1, <4 x i64> %a0, i8 17) 77 %3 = xor <4 x i64> %2, %1 78 ret <4 x i64> %3 79} 80 81define <8 x i64> @commute_zmm_v1(<8 x i64> %a0, <8 x i64> %a1) { 82; CHECK-LABEL: commute_zmm_v1: 83; CHECK: # %bb.0: 84; CHECK-NEXT: vpclmulqdq $0, %zmm1, %zmm0, %zmm0 85; CHECK-NEXT: vpxorq %zmm0, %zmm0, %zmm0 86; CHECK-NEXT: retq 87 %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a0, <8 x i64> %a1, i8 0) 88 %2 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a1, <8 x i64> %a0, i8 0) 89 %3 = xor <8 x i64> %1, %2 90 ret <8 x i64> %3 91} 92 93define <8 x i64> @commute_zmm_v2(<8 x i64> %a0, <8 x i64> %a1) { 94; CHECK-LABEL: commute_zmm_v2: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vpclmulqdq $16, %zmm1, %zmm0, %zmm0 97; CHECK-NEXT: vpxorq %zmm0, %zmm0, %zmm0 98; CHECK-NEXT: retq 99 %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a0, <8 x i64> %a1, i8 16) 100 %2 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a1, <8 x i64> %a0, i8 1) 101 %3 = xor <8 x i64> %2, %1 102 ret <8 x i64> %3 103} 104 105define <8 x i64> @commute_zmm_v3(<8 x i64> %a0, <8 x i64> %a1) { 106; CHECK-LABEL: commute_zmm_v3: 107; CHECK: # %bb.0: 108; CHECK-NEXT: vpclmulqdq $17, %zmm1, %zmm0, %zmm0 109; CHECK-NEXT: vpxorq %zmm0, %zmm0, %zmm0 110; CHECK-NEXT: retq 111 %1 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a0, <8 x i64> %a1, i8 17) 112 %2 = call <8 x i64> @llvm.x86.pclmulqdq.512(<8 x i64> %a1, <8 x i64> %a0, i8 17) 113 %3 = xor <8 x i64> %2, %1 114 ret <8 x i64> %3 115} 116 117