1 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
2 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
3 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
4 // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
5 // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
6 // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
7 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
8 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX
9
h1(float * c,float * a,double b[],int size)10 void h1(float *c, float *a, double b[], int size)
11 {
12 // CHECK-LABEL: define void @h1
13 int t = 0;
14 #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b)
15 // CHECK: [[C_PTRINT:%.+]] = ptrtoint
16 // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
17 // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
18 // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])
19 // CHECK: [[A_PTRINT:%.+]] = ptrtoint
20
21 // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
22 // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
23 // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
24 // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
25 // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
26
27 // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
28 // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])
29 // CHECK: [[B_PTRINT:%.+]] = ptrtoint
30
31 // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
32 // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
33 // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
34 // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
35 // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
36
37 // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
38 // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])
39 for (int i = 0; i < size; ++i) {
40 c[i] = a[i] * a[i] + b[i] * b[t];
41 ++t;
42 }
43 // do not emit parallel_loop_access metadata due to usage of safelen clause.
44 // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
45 #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8)
46 // CHECK: [[C_PTRINT:%.+]] = ptrtoint
47 // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
48 // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
49 // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])
50 // CHECK: [[A_PTRINT:%.+]] = ptrtoint
51
52 // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
53 // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
54 // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
55 // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
56 // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
57
58 // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
59 // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])
60 // CHECK: [[B_PTRINT:%.+]] = ptrtoint
61
62 // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
63 // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
64 // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
65 // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
66 // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
67
68 // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
69 // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])
70 for (int i = 0; i < size; ++i) {
71 c[i] = a[i] * a[i] + b[i] * b[t];
72 ++t;
73 }
74 // do not emit parallel_loop_access metadata due to usage of safelen clause.
75 // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
76 #pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8)
77 // CHECK: [[C_PTRINT:%.+]] = ptrtoint
78 // CHECK-NEXT: [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
79 // CHECK-NEXT: [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
80 // CHECK-NEXT: call void @llvm.assume(i1 [[C_MASKCOND]])
81 // CHECK: [[A_PTRINT:%.+]] = ptrtoint
82
83 // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
84 // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
85 // X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
86 // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
87 // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
88
89 // CHECK-NEXT: [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
90 // CHECK-NEXT: call void @llvm.assume(i1 [[A_MASKCOND]])
91 // CHECK: [[B_PTRINT:%.+]] = ptrtoint
92
93 // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
94 // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
95 // X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
96 // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
97 // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
98
99 // CHECK-NEXT: [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
100 // CHECK-NEXT: call void @llvm.assume(i1 [[B_MASKCOND]])
101 for (int i = 0; i < size; ++i) {
102 c[i] = a[i] * a[i] + b[i] * b[t];
103 ++t;
104 // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
105 }
106 }
107
h2(float * c,float * a,float * b,int size)108 void h2(float *c, float *a, float *b, int size)
109 {
110 // CHECK-LABEL: define void @h2
111 int t = 0;
112 #pragma omp simd linear(t)
113 for (int i = 0; i < size; ++i) {
114 c[i] = a[i] * a[i] + b[i] * b[t];
115 ++t;
116 // CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access [[LOOP_H2_HEADER:![0-9]+]]
117 }
118 }
119
h3(float * c,float * a,float * b,int size)120 void h3(float *c, float *a, float *b, int size)
121 {
122 // CHECK-LABEL: define void @h3
123 #pragma omp simd
124 for (int i = 0; i < size; ++i) {
125 for (int j = 0; j < size; ++j) {
126 c[j*i] = a[i] * b[j];
127 }
128 }
129 // do not emit parallel_loop_access for nested loop.
130 // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
131 }
132
133 // Metadata for h1:
134 // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_16:![0-9]+]], [[LOOP_VEC_ENABLE:![0-9]+]]}
135 // CHECK: [[LOOP_WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16}
136 // CHECK: [[LOOP_VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
137 // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8:![0-9]+]], [[LOOP_VEC_ENABLE]]}
138 // CHECK: [[LOOP_WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
139 // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8]], [[LOOP_VEC_ENABLE]]}
140 //
141 // Metadata for h2:
142 // CHECK: [[LOOP_H2_HEADER]] = distinct !{[[LOOP_H2_HEADER]], [[LOOP_VEC_ENABLE]]}
143 //
144 // Metadata for h3:
145 // CHECK: [[LOOP_H3_HEADER:![0-9]+]] = distinct !{[[LOOP_H3_HEADER]], [[LOOP_VEC_ENABLE]]}
146 //
147