• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
3
4; Check that we perform a scalar XOR on i32.
5
6define void @pull_bitcast(<4 x i8>* %pA, <4 x i8>* %pB) {
7; CHECK-LABEL: pull_bitcast:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    movl (%rsi), %eax
10; CHECK-NEXT:    xorl %eax, (%rdi)
11; CHECK-NEXT:    retq
12  %A = load <4 x i8>, <4 x i8>* %pA
13  %B = load <4 x i8>, <4 x i8>* %pB
14  %C = xor <4 x i8> %A, %B
15  store <4 x i8> %C, <4 x i8>* %pA
16  ret void
17}
18
19define <4 x i32> @multi_use_swizzle(<4 x i32>* %pA, <4 x i32>* %pB) {
20; CHECK-LABEL: multi_use_swizzle:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    vmovaps (%rdi), %xmm0
23; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],mem[1,2]
24; CHECK-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,3,2,2]
25; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,0,2]
26; CHECK-NEXT:    vxorps %xmm0, %xmm1, %xmm0
27; CHECK-NEXT:    retq
28  %A = load <4 x i32>, <4 x i32>* %pA
29  %B = load <4 x i32>, <4 x i32>* %pB
30  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
31  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
32  %S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
33  %R = xor <4 x i32> %S1, %S2
34  ret <4 x i32> %R
35}
36
37define <4 x i8> @pull_bitcast2(<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
38; CHECK-LABEL: pull_bitcast2:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    movl (%rdi), %eax
41; CHECK-NEXT:    movl %eax, (%rdx)
42; CHECK-NEXT:    xorl (%rsi), %eax
43; CHECK-NEXT:    vmovd %eax, %xmm0
44; CHECK-NEXT:    movl %eax, (%rdi)
45; CHECK-NEXT:    retq
46  %A = load <4 x i8>, <4 x i8>* %pA
47  store <4 x i8> %A, <4 x i8>* %pC
48  %B = load <4 x i8>, <4 x i8>* %pB
49  %C = xor <4 x i8> %A, %B
50  store <4 x i8> %C, <4 x i8>* %pA
51  ret <4 x i8> %C
52}
53
54define <4 x i32> @reverse_1(<4 x i32>* %pA, <4 x i32>* %pB) {
55; CHECK-LABEL: reverse_1:
56; CHECK:       # %bb.0:
57; CHECK-NEXT:    vmovaps (%rdi), %xmm0
58; CHECK-NEXT:    retq
59  %A = load <4 x i32>, <4 x i32>* %pA
60  %B = load <4 x i32>, <4 x i32>* %pB
61  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
62  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
63  ret <4 x i32> %S1
64}
65
66define <4 x i32> @no_reverse_shuff(<4 x i32>* %pA, <4 x i32>* %pB) {
67; CHECK-LABEL: no_reverse_shuff:
68; CHECK:       # %bb.0:
69; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
70; CHECK-NEXT:    retq
71  %A = load <4 x i32>, <4 x i32>* %pA
72  %B = load <4 x i32>, <4 x i32>* %pB
73  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
74  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
75  ret <4 x i32> %S1
76}
77
78