1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "perf_counter.h"
17
18 #ifndef unlikely
19 #define unlikely(x) __builtin_expect(!!(x), 0)
20 #endif
21 #ifndef likely
22 #define likely(x) __builtin_expect(!!(x), 1)
23 #endif
24
25 #ifdef PERF_MONITOR
26
27 static const char* config_file = "perf_config.txt";
28 static const char* output_file = "perf_result.txt";
29 static pthread_mutex_t __pw_mutex = PTHREAD_MUTEX_INITIALIZER;
30 static int __perf_write = 0;
31 static std::atomic<int> __perf_init = 0;
32
33 static pthread_mutex_t __g_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
34 static std::vector<struct perf_stat_t*> __g_perfstat;
35
36 static int n_event = 5;
37 static int even_type = PERF_TYPE_SOFTWARE;
38 static int pmu_event[MAX_COUNTERS] = {PERF_COUNT_SW_CPU_CLOCK, PERF_COUNT_SW_TASK_CLOCK, PERF_COUNT_SW_PAGE_FAULTS,
39 PERF_COUNT_SW_CONTEXT_SWITCHES, PERF_COUNT_SW_CPU_MIGRATIONS};
40
41 static __thread struct perf_stat_t* t_perfStat = NULL;
42
__gettid(void)43 static inline pid_t __gettid(void)
44 {
45 return syscall(__NR_gettid);
46 }
47
perf_event_open(struct perf_event_attr * attr,pid_t pid,int cpu,int group_fd,unsigned long flags)48 static inline int perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
49 {
50 return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
51 }
52
perf_open(struct perf_stat_t * pf,int event)53 static void perf_open(struct perf_stat_t* pf, int event)
54 {
55 struct perf_event_attr attr = {0};
56
57 attr.size = sizeof(struct perf_event_attr);
58 attr.type = even_type;
59 attr.config = event;
60 attr.disabled = 1;
61 attr.exclude_kernel = 1;
62 attr.exclude_hv = 1;
63 attr.read_format = PERF_FORMAT_GROUP;
64
65 // calling process/thread on any CPU
66 /********************************************************************************************************
67 detail in https://man7.org/linux/man-pages/man2/perf_event_open.2.html
68
69 int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags);
70 pid == 0 and cpu == -1
71 This measures the calling process/thread on any CPU.
72
73 pid == 0 and cpu >= 0
74 This measures the calling process/thread only when running
75 on the specified CPU.
76
77 pid > 0 and cpu == -1
78 This measures the specified process/thread on any CPU.
79
80 pid > 0 and cpu >= 0
81 This measures the specified process/thread only when
82 running on the specified CPU.
83
84 pid == -1 and cpu >= 0
85 This measures all processes/threads on the specified CPU.
86 This requires CAP_PERFMON (since Linux 5.8) or
87 CAP_SYS_ADMIN capability or a
88 /proc/sys/kernel/perf_event_paranoid value of less than 1.
89
90 pid == -1 and cpu == -1
91 This setting is invalid and will return an error.
92
93 *********************************************************************************************************/
94 int ret = perf_event_open(&attr, pf->pid, -1, pf->perf_fd, 0);
95 if (ret < 0) {
96 return;
97 }
98
99 if (pf->perf_fd == -1) {
100 pf->perf_fd = ret;
101 }
102 pf->n_counters++;
103 }
104
perf_init(void)105 static void perf_init(void)
106 {
107 std::fstream file(config_file, std::ios::in);
108 if (!file) {
109 printf("perf_config.txt not exist.\n");
110 return;
111 }
112
113 if (!(file >> even_type)) {
114 printf("perf event type not exist.\n");
115 file.close();
116 return;
117 }
118
119 if (!(file >> n_event)) {
120 printf("perf event num not exist.\n");
121 file.close();
122 return;
123 }
124
125 if ((n_event > MAX_COUNTERS) || (even_type > PERF_TYPE_MAX)) {
126 printf("pmu config err type:%d, num:%d.\n", even_type, n_event);
127 file.close();
128 return;
129 }
130
131 for (int i = 0; i < n_event; i++) {
132 if (!(file >> pmu_event[i]))
133 break;
134 printf("pmu event id:%d.\n", pmu_event[i]);
135 }
136
137 file.close();
138 }
139
perf_thread_env_init(void)140 static void perf_thread_env_init(void)
141 {
142 pthread_mutex_lock(&__pw_mutex);
143 if (__perf_init == 0) {
144 perf_init();
145 __perf_init = 1;
146 }
147 pthread_mutex_unlock(&__pw_mutex);
148 t_perfStat = reinterpret_cast<struct perf_stat_t*>(malloc(sizeof(struct perf_stat_t)));
149 if (!t_perfStat) {
150 abort();
151 }
152 pthread_mutex_lock(&__g_stat_mutex);
153 __g_perfstat.push_back(t_perfStat);
154 pthread_mutex_unlock(&__g_stat_mutex);
155
156 std::fill(t_perfStat, t_perfStat + sizeof(struct perf_stat_t), perf_stat_t {0});
157 pid_t pid = __gettid();
158
159 t_perfStat->pid = pid;
160 t_perfStat->perf_fd = -1;
161
162 for (int i = 0; i < n_event; i++) {
163 perf_open(t_perfStat, pmu_event[i]);
164 }
165
166 ioctl(t_perfStat->perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
167 }
168
perf_counter_clear(void)169 void perf_counter_clear(void)
170 {
171 pthread_mutex_lock(&__g_stat_mutex);
172 for (unsigned int j = 0; j < __g_perfstat.size(); j++) {
173 struct perf_stat_t* pstat = __g_perfstat[j];
174 for (int i = 0; i < TASK_NUM; i++) {
175 pstat->perf_task[i].rd = 0;
176 }
177 }
178
179 pthread_mutex_unlock(&__g_stat_mutex);
180 }
181
perf_begin(const char * name,int id)182 unsigned long perf_begin(const char* name, int id)
183 {
184 unsigned long rd;
185 if (unlikely(!t_perfStat)) {
186 perf_thread_env_init();
187 }
188 struct perf_task_t* task = &t_perfStat->perf_task[id % TASK_NUM];
189
190 rd = task->rd;
191 struct perf_record_t* record = &task->perf_record[rd % RECORD_NUM];
192 record->begin_flag = false;
193 record->end_flag = false;
194 ssize_t rc =
195 read(t_perfStat->perf_fd, &record->counters_begin, sizeof(unsigned long) * (t_perfStat->n_counters + 1));
196 if (unlikely(rc < 0))
197 perror("perf_begin: read");
198
199 if (likely(name)) {
200 std::copy_n(name, NAME_LEN - 1, record->name);
201 }
202 task->rd++;
203 record->begin_flag = true;
204 return rd;
205 }
206
perf_end(int id,unsigned long rd)207 void perf_end(int id, unsigned long rd)
208 {
209 if (unlikely(!t_perfStat)) {
210 return;
211 }
212
213 struct perf_task_t* task = &t_perfStat->perf_task[id % TASK_NUM];
214
215 struct perf_record_t* record = &task->perf_record[rd % RECORD_NUM];
216
217 ssize_t rc = read(t_perfStat->perf_fd, &record->counters_end, sizeof(unsigned long) * (t_perfStat->n_counters + 1));
218 if (unlikely(rc < 0))
219 perror("perf_end: read");
220
221 record->end_flag = true;
222 }
223 struct perf_ignore {
224 const char* ig_task;
225 int ignore_count;
226 };
227
228 static struct perf_ignore __g_ignore[] = {
229 {"cpu_work", 2},
230 {"task_build", 2},
231 };
232
find_ignore(struct perf_record_t * record)233 static struct perf_ignore* find_ignore(struct perf_record_t* record)
234 {
235 int i;
236 int size = sizeof(__g_ignore) / sizeof(struct perf_ignore);
237 for (i = 0; i < size; i++) {
238 if (strncmp(__g_ignore[i].ig_task, record->name, NAME_LEN) == 0) {
239 return &__g_ignore[i];
240 }
241 }
242
243 return nullptr;
244 }
245
perf_ignore(struct perf_record_t * record)246 static bool perf_ignore(struct perf_record_t* record)
247 {
248 struct perf_ignore* p = find_ignore(record);
249 if ((!p) || (!p->ignore_count)) {
250 return false;
251 }
252 p->ignore_count--;
253 return true;
254 }
255
perf_counter_output(struct perf_stat_t * stat)256 static void perf_counter_output(struct perf_stat_t* stat)
257 {
258 if (!stat) {
259 printf("no perf stat,tid:%d\n", __gettid());
260 return;
261 }
262 std::map<std::string, counters_t> m_counters;
263
264 pthread_mutex_lock(&__pw_mutex);
265
266 FILE* fd = fopen(output_file, __perf_write == 0 ? (__perf_write = 1, "wt") : "a");
267 if (!fd) {
268 printf("perf_result.txt creat err.\n");
269 pthread_mutex_unlock(&__pw_mutex);
270 return;
271 }
272
273 auto doRecord = [&](struct perf_record_t* record) {
274 auto it = m_counters.find(record->name);
275 if (it != m_counters.end()) {
276 it->second.nr++;
277 for (int k = 0; k < MAX_COUNTERS; k++) {
278 it->second.vals[k] += (record->counters_end.vals[k] - record->counters_begin.vals[k]);
279 }
280 } else {
281 counters_t new_;
282 new_.nr = 1;
283 for (int k = 0; k < MAX_COUNTERS; k++) {
284 new_.vals[k] = (record->counters_end.vals[k] - record->counters_begin.vals[k]);
285 }
286 m_counters.insert(std::pair<std::string, counters_t>(record->name, new_));
287 }
288 };
289
290 for (int i = 0; i < TASK_NUM; i++) {
291 struct perf_task_t* task = &stat->perf_task[i];
292 int max_rd = (task->rd > RECORD_NUM) ? RECORD_NUM : task->rd;
293 for (int j = 0; j < max_rd; j++) {
294 struct perf_record_t* record = &task->perf_record[j];
295 if (!(record->begin_flag) || !(record->end_flag)) {
296 continue;
297 }
298 if (perf_ignore(record)) {
299 continue;
300 }
301
302 doRecord(record);
303 }
304 }
305
306 for (auto iter = m_counters.begin(); iter != m_counters.end(); iter++) {
307 fprintf(fd,
308 "pid:%d, taskname:%s, taskid:%d, recordid:%d, evt_num:%d, pmu_%x:%lu, pmu_%x:%lu, pmu_%x:%lu, pmu_%x:%lu, "
309 "pmu_%x:%lu, pmu_%x:%lu, pmu_%x:%lu, nr:%lu.\n",
310 stat->pid, iter->first.c_str(), 0, 1, stat->n_counters, pmu_event[0], iter->second.vals[0], pmu_event[1],
311 iter->second.vals[1], pmu_event[2], iter->second.vals[2], pmu_event[3], iter->second.vals[3], pmu_event[4],
312 iter->second.vals[4], pmu_event[5], iter->second.vals[5], pmu_event[6], iter->second.vals[6],
313 iter->second.nr);
314 }
315
316 fclose(fd);
317 pthread_mutex_unlock(&__pw_mutex);
318 m_counters.clear();
319 }
320
perf_counter_output_single(void)321 void perf_counter_output_single(void)
322 {
323 perf_counter_output(t_perfStat);
324 }
325
perf_counter_output_all(void)326 void perf_counter_output_all(void)
327 {
328 pthread_mutex_lock(&__g_stat_mutex);
329 for (unsigned int j = 0; j < __g_perfstat.size(); j++) {
330 struct perf_stat_t* pstat = __g_perfstat[j];
331 perf_counter_output(pstat);
332 }
333
334 pthread_mutex_unlock(&__g_stat_mutex);
335 }
336
337 #else
perf_begin(const char * name,int id)338 unsigned long perf_begin(const char* name, int id)
339 {
340 (void)name;
341 (void)id;
342 return 0;
343 };
perf_end(int id,unsigned long rd)344 void perf_end(int id, unsigned long rd)
345 {
346 (void)id;
347 (void)rd;
348 };
349
perf_counter_output_all(void)350 void perf_counter_output_all(void) {};
perf_counter_output_single(void)351 void perf_counter_output_single(void) {};
perf_counter_clear(void)352 void perf_counter_clear(void) {};
353
354 #endif
355