• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "perf_counter.h"
17 
18 #ifndef unlikely
19 #define unlikely(x) __builtin_expect(!!(x), 0)
20 #endif
21 #ifndef likely
22 #define likely(x) __builtin_expect(!!(x), 1)
23 #endif
24 
25 #ifdef PERF_MONITOR
26 
27 static const char* config_file = "perf_config.txt";
28 static const char* output_file = "perf_result.txt";
29 static pthread_mutex_t __pw_mutex = PTHREAD_MUTEX_INITIALIZER;
30 static int __perf_write = 0;
31 static std::atomic<int> __perf_init = 0;
32 
33 static pthread_mutex_t __g_stat_mutex = PTHREAD_MUTEX_INITIALIZER;
34 static std::vector<struct perf_stat_t*> __g_perfstat;
35 
36 static int n_event = 5;
37 static int even_type = PERF_TYPE_SOFTWARE;
38 static int pmu_event[MAX_COUNTERS] = {PERF_COUNT_SW_CPU_CLOCK, PERF_COUNT_SW_TASK_CLOCK, PERF_COUNT_SW_PAGE_FAULTS,
39     PERF_COUNT_SW_CONTEXT_SWITCHES, PERF_COUNT_SW_CPU_MIGRATIONS};
40 
41 static __thread struct perf_stat_t* t_perfStat = NULL;
42 
__gettid(void)43 static inline pid_t __gettid(void)
44 {
45     return syscall(__NR_gettid);
46 }
47 
perf_event_open(struct perf_event_attr * attr,pid_t pid,int cpu,int group_fd,unsigned long flags)48 static inline int perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
49 {
50     return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
51 }
52 
perf_open(struct perf_stat_t * pf,int event)53 static void perf_open(struct perf_stat_t* pf, int event)
54 {
55     struct perf_event_attr attr = {0};
56 
57     attr.size = sizeof(struct perf_event_attr);
58     attr.type = even_type;
59     attr.config = event;
60     attr.disabled = 1;
61     attr.exclude_kernel = 1;
62     attr.exclude_hv = 1;
63     attr.read_format = PERF_FORMAT_GROUP;
64 
65     // calling process/thread on any CPU
66     /********************************************************************************************************
67       detail in https://man7.org/linux/man-pages/man2/perf_event_open.2.html
68 
69        int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags);
70        pid == 0 and cpu == -1
71               This measures the calling process/thread on any CPU.
72 
73        pid == 0 and cpu >= 0
74               This measures the calling process/thread only when running
75               on the specified CPU.
76 
77        pid > 0 and cpu == -1
78               This measures the specified process/thread on any CPU.
79 
80        pid > 0 and cpu >= 0
81               This measures the specified process/thread only when
82               running on the specified CPU.
83 
84        pid == -1 and cpu >= 0
85               This measures all processes/threads on the specified CPU.
86               This requires CAP_PERFMON (since Linux 5.8) or
87               CAP_SYS_ADMIN capability or a
88               /proc/sys/kernel/perf_event_paranoid value of less than 1.
89 
90        pid == -1 and cpu == -1
91               This setting is invalid and will return an error.
92 
93     *********************************************************************************************************/
94     int ret = perf_event_open(&attr, pf->pid, -1, pf->perf_fd, 0);
95     if (ret < 0) {
96         return;
97     }
98 
99     if (pf->perf_fd == -1) {
100         pf->perf_fd = ret;
101     }
102     pf->n_counters++;
103 }
104 
perf_init(void)105 static void perf_init(void)
106 {
107     std::fstream file(config_file, std::ios::in);
108     if (!file) {
109         printf("perf_config.txt not exist.\n");
110         return;
111     }
112 
113     if (!(file >> even_type)) {
114         printf("perf event type not exist.\n");
115         file.close();
116         return;
117     }
118 
119     if (!(file >> n_event)) {
120         printf("perf event num not exist.\n");
121         file.close();
122         return;
123     }
124 
125     if ((n_event > MAX_COUNTERS) || (even_type > PERF_TYPE_MAX)) {
126         printf("pmu config err type:%d, num:%d.\n", even_type, n_event);
127         file.close();
128         return;
129     }
130 
131     for (int i = 0; i < n_event; i++) {
132         if (!(file >> pmu_event[i]))
133             break;
134         printf("pmu event id:%d.\n", pmu_event[i]);
135     }
136 
137     file.close();
138 }
139 
perf_thread_env_init(void)140 static void perf_thread_env_init(void)
141 {
142     pthread_mutex_lock(&__pw_mutex);
143     if (__perf_init == 0) {
144         perf_init();
145         __perf_init = 1;
146     }
147     pthread_mutex_unlock(&__pw_mutex);
148     t_perfStat = reinterpret_cast<struct perf_stat_t*>(malloc(sizeof(struct perf_stat_t)));
149     if (!t_perfStat) {
150         abort();
151     }
152     pthread_mutex_lock(&__g_stat_mutex);
153     __g_perfstat.push_back(t_perfStat);
154     pthread_mutex_unlock(&__g_stat_mutex);
155 
156     std::fill(t_perfStat, t_perfStat + sizeof(struct perf_stat_t), perf_stat_t {0});
157     pid_t pid = __gettid();
158 
159     t_perfStat->pid = pid;
160     t_perfStat->perf_fd = -1;
161 
162     for (int i = 0; i < n_event; i++) {
163         perf_open(t_perfStat, pmu_event[i]);
164     }
165 
166     ioctl(t_perfStat->perf_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
167 }
168 
perf_counter_clear(void)169 void perf_counter_clear(void)
170 {
171     pthread_mutex_lock(&__g_stat_mutex);
172     for (unsigned int j = 0; j < __g_perfstat.size(); j++) {
173         struct perf_stat_t* pstat = __g_perfstat[j];
174         for (int i = 0; i < TASK_NUM; i++) {
175             pstat->perf_task[i].rd = 0;
176         }
177     }
178 
179     pthread_mutex_unlock(&__g_stat_mutex);
180 }
181 
perf_begin(const char * name,int id)182 unsigned long perf_begin(const char* name, int id)
183 {
184     unsigned long rd;
185     if (unlikely(!t_perfStat)) {
186         perf_thread_env_init();
187     }
188     struct perf_task_t* task = &t_perfStat->perf_task[id % TASK_NUM];
189 
190     rd = task->rd;
191     struct perf_record_t* record = &task->perf_record[rd % RECORD_NUM];
192     record->begin_flag = false;
193     record->end_flag = false;
194     ssize_t rc =
195         read(t_perfStat->perf_fd, &record->counters_begin, sizeof(unsigned long) * (t_perfStat->n_counters + 1));
196     if (unlikely(rc < 0))
197         perror("perf_begin: read");
198 
199     if (likely(name)) {
200         std::copy_n(name, NAME_LEN - 1, record->name);
201     }
202     task->rd++;
203     record->begin_flag = true;
204     return rd;
205 }
206 
perf_end(int id,unsigned long rd)207 void perf_end(int id, unsigned long rd)
208 {
209     if (unlikely(!t_perfStat)) {
210         return;
211     }
212 
213     struct perf_task_t* task = &t_perfStat->perf_task[id % TASK_NUM];
214 
215     struct perf_record_t* record = &task->perf_record[rd % RECORD_NUM];
216 
217     ssize_t rc = read(t_perfStat->perf_fd, &record->counters_end, sizeof(unsigned long) * (t_perfStat->n_counters + 1));
218     if (unlikely(rc < 0))
219         perror("perf_end: read");
220 
221     record->end_flag = true;
222 }
223 struct perf_ignore {
224     const char* ig_task;
225     int ignore_count;
226 };
227 
228 static struct perf_ignore __g_ignore[] = {
229     {"cpu_work", 2},
230     {"task_build", 2},
231 };
232 
find_ignore(struct perf_record_t * record)233 static struct perf_ignore* find_ignore(struct perf_record_t* record)
234 {
235     int i;
236     int size = sizeof(__g_ignore) / sizeof(struct perf_ignore);
237     for (i = 0; i < size; i++) {
238         if (strncmp(__g_ignore[i].ig_task, record->name, NAME_LEN) == 0) {
239             return &__g_ignore[i];
240         }
241     }
242 
243     return nullptr;
244 }
245 
perf_ignore(struct perf_record_t * record)246 static bool perf_ignore(struct perf_record_t* record)
247 {
248     struct perf_ignore* p = find_ignore(record);
249     if ((!p) || (!p->ignore_count)) {
250         return false;
251     }
252     p->ignore_count--;
253     return true;
254 }
255 
perf_counter_output(struct perf_stat_t * stat)256 static void perf_counter_output(struct perf_stat_t* stat)
257 {
258     if (!stat) {
259         printf("no perf stat,tid:%d\n", __gettid());
260         return;
261     }
262     std::map<std::string, counters_t> m_counters;
263 
264     pthread_mutex_lock(&__pw_mutex);
265 
266     FILE* fd = fopen(output_file, __perf_write == 0 ? (__perf_write = 1, "wt") : "a");
267     if (!fd) {
268         printf("perf_result.txt creat err.\n");
269         pthread_mutex_unlock(&__pw_mutex);
270         return;
271     }
272 
273     auto doRecord = [&](struct perf_record_t* record) {
274         auto it = m_counters.find(record->name);
275         if (it != m_counters.end()) {
276             it->second.nr++;
277             for (int k = 0; k < MAX_COUNTERS; k++) {
278                 it->second.vals[k] += (record->counters_end.vals[k] - record->counters_begin.vals[k]);
279             }
280         } else {
281             counters_t new_;
282             new_.nr = 1;
283             for (int k = 0; k < MAX_COUNTERS; k++) {
284                 new_.vals[k] = (record->counters_end.vals[k] - record->counters_begin.vals[k]);
285             }
286             m_counters.insert(std::pair<std::string, counters_t>(record->name, new_));
287         }
288     };
289 
290     for (int i = 0; i < TASK_NUM; i++) {
291         struct perf_task_t* task = &stat->perf_task[i];
292         int max_rd = (task->rd > RECORD_NUM) ? RECORD_NUM : task->rd;
293         for (int j = 0; j < max_rd; j++) {
294             struct perf_record_t* record = &task->perf_record[j];
295             if (!(record->begin_flag) || !(record->end_flag)) {
296                 continue;
297             }
298             if (perf_ignore(record)) {
299                 continue;
300             }
301 
302             doRecord(record);
303         }
304     }
305 
306     for (auto iter = m_counters.begin(); iter != m_counters.end(); iter++) {
307         fprintf(fd,
308             "pid:%d, taskname:%s, taskid:%d, recordid:%d, evt_num:%d, pmu_%x:%lu, pmu_%x:%lu, pmu_%x:%lu, pmu_%x:%lu, "
309             "pmu_%x:%lu, pmu_%x:%lu, pmu_%x:%lu, nr:%lu.\n",
310             stat->pid, iter->first.c_str(), 0, 1, stat->n_counters, pmu_event[0], iter->second.vals[0], pmu_event[1],
311             iter->second.vals[1], pmu_event[2], iter->second.vals[2], pmu_event[3], iter->second.vals[3], pmu_event[4],
312             iter->second.vals[4], pmu_event[5], iter->second.vals[5], pmu_event[6], iter->second.vals[6],
313             iter->second.nr);
314     }
315 
316     fclose(fd);
317     pthread_mutex_unlock(&__pw_mutex);
318     m_counters.clear();
319 }
320 
perf_counter_output_single(void)321 void perf_counter_output_single(void)
322 {
323     perf_counter_output(t_perfStat);
324 }
325 
perf_counter_output_all(void)326 void perf_counter_output_all(void)
327 {
328     pthread_mutex_lock(&__g_stat_mutex);
329     for (unsigned int j = 0; j < __g_perfstat.size(); j++) {
330         struct perf_stat_t* pstat = __g_perfstat[j];
331         perf_counter_output(pstat);
332     }
333 
334     pthread_mutex_unlock(&__g_stat_mutex);
335 }
336 
337 #else
perf_begin(const char * name,int id)338 unsigned long perf_begin(const char* name, int id)
339 {
340     (void)name;
341     (void)id;
342     return 0;
343 };
perf_end(int id,unsigned long rd)344 void perf_end(int id, unsigned long rd)
345 {
346     (void)id;
347     (void)rd;
348 };
349 
perf_counter_output_all(void)350 void perf_counter_output_all(void) {};
perf_counter_output_single(void)351 void perf_counter_output_single(void) {};
perf_counter_clear(void)352 void perf_counter_clear(void) {};
353 
354 #endif
355