1 /*
2 * Copyright (c) 2015 PLUMgrid, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <inttypes.h>
18 #include <poll.h>
19 #include <stdio.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <syscall.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/types.h>
27 #include <unistd.h>
28 #include <linux/perf_event.h>
29
30 #include "libbpf.h"
31 #include "perf_reader.h"
32
33 enum {
34 RB_NOT_USED = 0, // ring buffer not usd
35 RB_USED_IN_MUNMAP = 1, // used in munmap
36 RB_USED_IN_READ = 2, // used in read
37 };
38
39 struct perf_reader {
40 perf_reader_raw_cb raw_cb;
41 perf_reader_lost_cb lost_cb;
42 void *cb_cookie; // to be returned in the cb
43 void *buf; // for keeping segmented data
44 size_t buf_size;
45 void *base;
46 int rb_use_state;
47 pid_t rb_read_tid;
48 int page_size;
49 int page_cnt;
50 int fd;
51 };
52
perf_reader_new(perf_reader_raw_cb raw_cb,perf_reader_lost_cb lost_cb,void * cb_cookie,int page_cnt)53 struct perf_reader * perf_reader_new(perf_reader_raw_cb raw_cb,
54 perf_reader_lost_cb lost_cb,
55 void *cb_cookie, int page_cnt) {
56 struct perf_reader *reader = calloc(1, sizeof(struct perf_reader));
57 if (!reader)
58 return NULL;
59 reader->raw_cb = raw_cb;
60 reader->lost_cb = lost_cb;
61 reader->cb_cookie = cb_cookie;
62 reader->fd = -1;
63 reader->page_size = getpagesize();
64 reader->page_cnt = page_cnt;
65 return reader;
66 }
67
perf_reader_free(void * ptr)68 void perf_reader_free(void *ptr) {
69 if (ptr) {
70 struct perf_reader *reader = ptr;
71 pid_t tid = syscall(__NR_gettid);
72 while (!__sync_bool_compare_and_swap(&reader->rb_use_state, RB_NOT_USED, RB_USED_IN_MUNMAP)) {
73 // If the same thread, it is called from call back handler, no locking needed
74 if (tid == reader->rb_read_tid)
75 break;
76 }
77 munmap(reader->base, reader->page_size * (reader->page_cnt + 1));
78 if (reader->fd >= 0) {
79 ioctl(reader->fd, PERF_EVENT_IOC_DISABLE, 0);
80 close(reader->fd);
81 }
82 free(reader->buf);
83 free(ptr);
84 }
85 }
86
perf_reader_mmap(struct perf_reader * reader)87 int perf_reader_mmap(struct perf_reader *reader) {
88 int mmap_size = reader->page_size * (reader->page_cnt + 1);
89
90 if (reader->fd < 0) {
91 fprintf(stderr, "%s: reader fd is not set\n", __FUNCTION__);
92 return -1;
93 }
94
95 reader->base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE , MAP_SHARED, reader->fd, 0);
96 if (reader->base == MAP_FAILED) {
97 perror("mmap");
98 return -1;
99 }
100
101 return 0;
102 }
103
104 struct perf_sample_trace_common {
105 uint16_t id;
106 uint8_t flags;
107 uint8_t preempt_count;
108 int pid;
109 };
110
111 struct perf_sample_trace_kprobe {
112 struct perf_sample_trace_common common;
113 uint64_t ip;
114 };
115
parse_sw(struct perf_reader * reader,void * data,int size)116 static void parse_sw(struct perf_reader *reader, void *data, int size) {
117 uint8_t *ptr = data;
118 struct perf_event_header *header = (void *)data;
119
120 struct {
121 uint32_t size;
122 char data[0];
123 } *raw = NULL;
124
125 ptr += sizeof(*header);
126 if (ptr > (uint8_t *)data + size) {
127 fprintf(stderr, "%s: corrupt sample header\n", __FUNCTION__);
128 return;
129 }
130
131 raw = (void *)ptr;
132 ptr += sizeof(raw->size) + raw->size;
133 if (ptr > (uint8_t *)data + size) {
134 fprintf(stderr, "%s: corrupt raw sample\n", __FUNCTION__);
135 return;
136 }
137
138 // sanity check
139 if (ptr != (uint8_t *)data + size) {
140 fprintf(stderr, "%s: extra data at end of sample\n", __FUNCTION__);
141 return;
142 }
143
144 if (reader->raw_cb)
145 reader->raw_cb(reader->cb_cookie, raw->data, raw->size);
146 }
147
read_data_head(volatile struct perf_event_mmap_page * perf_header)148 static uint64_t read_data_head(volatile struct perf_event_mmap_page *perf_header) {
149 uint64_t data_head = perf_header->data_head;
150 asm volatile("" ::: "memory");
151 return data_head;
152 }
153
write_data_tail(volatile struct perf_event_mmap_page * perf_header,uint64_t data_tail)154 static void write_data_tail(volatile struct perf_event_mmap_page *perf_header, uint64_t data_tail) {
155 asm volatile("" ::: "memory");
156 perf_header->data_tail = data_tail;
157 }
158
perf_reader_event_read(struct perf_reader * reader)159 void perf_reader_event_read(struct perf_reader *reader) {
160 volatile struct perf_event_mmap_page *perf_header = reader->base;
161 uint64_t buffer_size = (uint64_t)reader->page_size * reader->page_cnt;
162 uint64_t data_head;
163 uint8_t *base = (uint8_t *)reader->base + reader->page_size;
164 uint8_t *sentinel = (uint8_t *)reader->base + buffer_size + reader->page_size;
165 uint8_t *begin, *end;
166
167 reader->rb_read_tid = syscall(__NR_gettid);
168 if (!__sync_bool_compare_and_swap(&reader->rb_use_state, RB_NOT_USED, RB_USED_IN_READ))
169 return;
170
171 // Consume all the events on this ring, calling the cb function for each one.
172 // The message may fall on the ring boundary, in which case copy the message
173 // into a malloced buffer.
174 for (data_head = read_data_head(perf_header); perf_header->data_tail != data_head;
175 data_head = read_data_head(perf_header)) {
176 uint64_t data_tail = perf_header->data_tail;
177 uint8_t *ptr;
178
179 begin = base + data_tail % buffer_size;
180 // event header is u64, won't wrap
181 struct perf_event_header *e = (void *)begin;
182 ptr = begin;
183 end = base + (data_tail + e->size) % buffer_size;
184 if (end < begin) {
185 // perf event wraps around the ring, make a contiguous copy
186 reader->buf = realloc(reader->buf, e->size);
187 size_t len = sentinel - begin;
188 memcpy(reader->buf, begin, len);
189 memcpy((void *)((unsigned long)reader->buf + len), base, e->size - len);
190 ptr = reader->buf;
191 }
192
193 if (e->type == PERF_RECORD_LOST) {
194 /*
195 * struct {
196 * struct perf_event_header header;
197 * u64 id;
198 * u64 lost;
199 * struct sample_id sample_id;
200 * };
201 */
202 uint64_t lost = *(uint64_t *)(ptr + sizeof(*e) + sizeof(uint64_t));
203 if (reader->lost_cb) {
204 reader->lost_cb(reader->cb_cookie, lost);
205 } else {
206 fprintf(stderr, "Possibly lost %" PRIu64 " samples\n", lost);
207 }
208 } else if (e->type == PERF_RECORD_SAMPLE) {
209 parse_sw(reader, ptr, e->size);
210 } else {
211 fprintf(stderr, "%s: unknown sample type %d\n", __FUNCTION__, e->type);
212 }
213
214 write_data_tail(perf_header, perf_header->data_tail + e->size);
215 }
216 reader->rb_use_state = RB_NOT_USED;
217 __sync_synchronize();
218 reader->rb_read_tid = 0;
219 }
220
perf_reader_poll(int num_readers,struct perf_reader ** readers,int timeout)221 int perf_reader_poll(int num_readers, struct perf_reader **readers, int timeout) {
222 struct pollfd pfds[num_readers];
223 int i;
224
225 for (i = 0; i <num_readers; ++i) {
226 pfds[i].fd = readers[i]->fd;
227 pfds[i].events = POLLIN;
228 }
229
230 if (poll(pfds, num_readers, timeout) > 0) {
231 for (i = 0; i < num_readers; ++i) {
232 if (pfds[i].revents & POLLIN)
233 perf_reader_event_read(readers[i]);
234 }
235 }
236 return 0;
237 }
238
perf_reader_set_fd(struct perf_reader * reader,int fd)239 void perf_reader_set_fd(struct perf_reader *reader, int fd) {
240 reader->fd = fd;
241 }
242
perf_reader_fd(struct perf_reader * reader)243 int perf_reader_fd(struct perf_reader *reader) {
244 return reader->fd;
245 }
246