1 /*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #pragma once
18
19 #include <android-base/result.h>
20 #include <android-base/unique_fd.h>
21 #include <linux/bpf.h>
22 #include <poll.h>
23 #include <sys/mman.h>
24 #include <utils/Log.h>
25
26 #include "bpf/BpfUtils.h"
27
28 #include <atomic>
29
30 namespace android {
31 namespace bpf {
32
33 // BpfRingbufBase contains the non-templated functionality of BPF ring buffers.
34 class BpfRingbufBase {
35 public:
~BpfRingbufBase()36 ~BpfRingbufBase() {
37 if (mConsumerPos) munmap(mConsumerPos, mConsumerSize);
38 if (mProducerPos) munmap(mProducerPos, mProducerSize);
39 mConsumerPos = nullptr;
40 mProducerPos = nullptr;
41 }
42
43 bool isEmpty(void);
44
45 // returns !isEmpty() for convenience
46 bool wait(int timeout_ms = -1);
47
48 protected:
49 // Non-initializing constructor, used by Create.
BpfRingbufBase(size_t value_size)50 BpfRingbufBase(size_t value_size) : mValueSize(value_size) {}
51
52 // Full construction that aborts on error (use Create/Init to handle errors).
BpfRingbufBase(const char * path,size_t value_size)53 BpfRingbufBase(const char* path, size_t value_size) : mValueSize(value_size) {
54 if (auto status = Init(path); !status.ok()) {
55 ALOGE("BpfRingbuf init failed: %s", status.error().message().c_str());
56 abort();
57 }
58 }
59
60 // Delete copy constructor (class owns raw pointers).
61 BpfRingbufBase(const BpfRingbufBase&) = delete;
62
63 // Initialize the base ringbuffer components. Must be called exactly once.
64 base::Result<void> Init(const char* path);
65
66 // Consumes all messages from the ring buffer, passing them to the callback.
67 base::Result<int> ConsumeAll(
68 const std::function<void(const void*)>& callback);
69
70 // Replicates c-style void* "byte-wise" pointer addition.
71 template <typename Ptr>
pointerAddBytes(void * base,ssize_t offset_bytes)72 static Ptr pointerAddBytes(void* base, ssize_t offset_bytes) {
73 return reinterpret_cast<Ptr>(reinterpret_cast<char*>(base) + offset_bytes);
74 }
75
76 // Rounds len by clearing bitmask, adding header, and aligning to 8 bytes.
roundLength(uint32_t len)77 static uint32_t roundLength(uint32_t len) {
78 len &= ~(BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT);
79 len += BPF_RINGBUF_HDR_SZ;
80 return (len + 7) & ~7;
81 }
82
83 const size_t mValueSize;
84
85 size_t mConsumerSize;
86 size_t mProducerSize;
87 unsigned long mPosMask;
88 android::base::unique_fd mRingFd;
89
90 void* mDataPos = nullptr;
91 // The kernel uses an "unsigned long" type for both consumer and producer position.
92 // Unsigned long is a 4 byte value on a 32-bit kernel, and an 8 byte value on a 64-bit kernel.
93 // To support 32-bit kernels, producer pos is capped at 4 bytes (despite it being 8 bytes on
94 // 64-bit kernels) and all comparisons of consumer and producer pos only compare the low-order 4
95 // bytes (an inequality comparison is performed to support overflow).
96 // This solution is bitness agnostic. The consumer only increments the 8 byte consumer pos, which,
97 // in a little-endian architecture, is safe since the entire page is mapped into memory and a
98 // 32-bit kernel will just ignore the high-order bits.
99 std::atomic_uint64_t* mConsumerPos = nullptr;
100 std::atomic_uint32_t* mProducerPos = nullptr;
101
102 // In order to guarantee atomic access in a 32 bit userspace environment, atomic_uint64_t is used
103 // in addition to std::atomic<T>::is_always_lock_free that guarantees that read / write operations
104 // are indeed atomic.
105 // Since std::atomic does not support wrapping preallocated memory, an additional static assert on
106 // the size of the atomic and the underlying type is added to ensure a reinterpret_cast from type
107 // to its atomic version is safe (is_always_lock_free being true should provide additional
108 // confidence).
109 static_assert(std::atomic_uint64_t::is_always_lock_free);
110 static_assert(std::atomic_uint32_t::is_always_lock_free);
111 static_assert(sizeof(std::atomic_uint64_t) == sizeof(uint64_t));
112 static_assert(sizeof(std::atomic_uint32_t) == sizeof(uint32_t));
113 };
114
115 // This is a class wrapper for eBPF ring buffers. An eBPF ring buffer is a
116 // special type of eBPF map used for sending messages from eBPF to userspace.
117 // The implementation relies on fast shared memory and atomics for the producer
118 // and consumer management. Ring buffers are a faster alternative to eBPF perf
119 // buffers.
120 //
121 // This class is thread compatible, but not thread safe.
122 //
123 // Note: A kernel eBPF ring buffer may be accessed by both kernel and userspace
124 // processes at the same time. However, the userspace consumers of a given ring
125 // buffer all share a single read pointer. There is no guarantee which readers
126 // will read which messages.
127 template <typename Value>
128 class BpfRingbuf : public BpfRingbufBase {
129 public:
130 using MessageCallback = std::function<void(const Value&)>;
131
132 // Creates a ringbuffer wrapper from a pinned path. This initialization will
133 // abort on error. To handle errors, initialize with Create instead.
BpfRingbuf(const char * path)134 BpfRingbuf(const char* path) : BpfRingbufBase(path, sizeof(Value)) {}
135
136 // Creates a ringbuffer wrapper from a pinned path. There are no guarantees
137 // that the ringbuf outputs messaged of type `Value`, only that they are the
138 // same size. Size is only checked in ConsumeAll.
139 static base::Result<std::unique_ptr<BpfRingbuf<Value>>> Create(
140 const char* path);
141
142 // Consumes all messages from the ring buffer, passing them to the callback.
143 // Returns the number of messages consumed or a non-ok result on error. If the
144 // ring buffer has no pending messages an OK result with count 0 is returned.
145 base::Result<int> ConsumeAll(const MessageCallback& callback);
146
147 private:
148 // Empty ctor for use by Create.
BpfRingbuf()149 BpfRingbuf() : BpfRingbufBase(sizeof(Value)) {}
150 };
151
152
Init(const char * path)153 inline base::Result<void> BpfRingbufBase::Init(const char* path) {
154 mRingFd.reset(mapRetrieveExclusiveRW(path));
155 if (!mRingFd.ok()) {
156 return android::base::ErrnoError()
157 << "failed to retrieve ringbuffer at " << path;
158 }
159
160 int map_type = android::bpf::bpfGetFdMapType(mRingFd);
161 if (map_type != BPF_MAP_TYPE_RINGBUF) {
162 errno = EINVAL;
163 return android::base::ErrnoError()
164 << "bpf map has wrong type: want BPF_MAP_TYPE_RINGBUF ("
165 << BPF_MAP_TYPE_RINGBUF << ") got " << map_type;
166 }
167
168 int max_entries = android::bpf::bpfGetFdMaxEntries(mRingFd);
169 if (max_entries < 0) {
170 return android::base::ErrnoError()
171 << "failed to read max_entries from ringbuf";
172 }
173 if (max_entries == 0) {
174 errno = EINVAL;
175 return android::base::ErrnoError() << "max_entries must be non-zero";
176 }
177
178 mPosMask = max_entries - 1;
179 mConsumerSize = getpagesize();
180 mProducerSize = getpagesize() + 2 * max_entries;
181
182 {
183 void* ptr = mmap(NULL, mConsumerSize, PROT_READ | PROT_WRITE, MAP_SHARED,
184 mRingFd, 0);
185 if (ptr == MAP_FAILED) {
186 return android::base::ErrnoError()
187 << "failed to mmap ringbuf consumer pages";
188 }
189 mConsumerPos = reinterpret_cast<decltype(mConsumerPos)>(ptr);
190 }
191
192 {
193 void* ptr = mmap(NULL, mProducerSize, PROT_READ, MAP_SHARED, mRingFd,
194 mConsumerSize);
195 if (ptr == MAP_FAILED) {
196 return android::base::ErrnoError()
197 << "failed to mmap ringbuf producer page";
198 }
199 mProducerPos = reinterpret_cast<decltype(mProducerPos)>(ptr);
200 }
201
202 mDataPos = pointerAddBytes<void*>(mProducerPos, getpagesize());
203 return {};
204 }
205
isEmpty(void)206 inline bool BpfRingbufBase::isEmpty(void) {
207 uint32_t prod_pos = mProducerPos->load(std::memory_order_relaxed);
208 uint64_t cons_pos = mConsumerPos->load(std::memory_order_relaxed);
209 return (cons_pos & 0xFFFFFFFF) == prod_pos;
210 }
211
wait(int timeout_ms)212 inline bool BpfRingbufBase::wait(int timeout_ms) {
213 // possible optimization: if (!isEmpty()) return true;
214 struct pollfd pfd = { // 1-element array
215 .fd = mRingFd.get(),
216 .events = POLLIN,
217 };
218 (void)poll(&pfd, 1, timeout_ms); // 'best effort' poll
219 return !isEmpty();
220 }
221
ConsumeAll(const std::function<void (const void *)> & callback)222 inline base::Result<int> BpfRingbufBase::ConsumeAll(
223 const std::function<void(const void*)>& callback) {
224 int64_t count = 0;
225 uint32_t prod_pos = mProducerPos->load(std::memory_order_acquire);
226 // Only userspace writes to mConsumerPos, so no need to use std::memory_order_acquire
227 uint64_t cons_pos = mConsumerPos->load(std::memory_order_relaxed);
228 while ((cons_pos & 0xFFFFFFFF) != prod_pos) {
229 // Find the start of the entry for this read (wrapping is done here).
230 void* start_ptr = pointerAddBytes<void*>(mDataPos, cons_pos & mPosMask);
231
232 // The entry has an 8 byte header containing the sample length.
233 // struct bpf_ringbuf_hdr {
234 // u32 len;
235 // u32 pg_off;
236 // };
237 uint32_t length = *reinterpret_cast<volatile uint32_t*>(start_ptr);
238
239 // If the sample isn't committed, we're caught up with the producer.
240 if (length & BPF_RINGBUF_BUSY_BIT) return count;
241
242 cons_pos += roundLength(length);
243
244 if ((length & BPF_RINGBUF_DISCARD_BIT) == 0) {
245 if (length != mValueSize) {
246 mConsumerPos->store(cons_pos, std::memory_order_release);
247 errno = EMSGSIZE;
248 return android::base::ErrnoError()
249 << "BPF ring buffer message has unexpected size (want "
250 << mValueSize << " bytes, got " << length << " bytes)";
251 }
252 callback(pointerAddBytes<const void*>(start_ptr, BPF_RINGBUF_HDR_SZ));
253 count++;
254 }
255
256 mConsumerPos->store(cons_pos, std::memory_order_release);
257 }
258
259 return count;
260 }
261
262 template <typename Value>
263 inline base::Result<std::unique_ptr<BpfRingbuf<Value>>>
Create(const char * path)264 BpfRingbuf<Value>::Create(const char* path) {
265 auto rb = std::unique_ptr<BpfRingbuf>(new BpfRingbuf);
266 if (auto status = rb->Init(path); !status.ok()) return status.error();
267 return rb;
268 }
269
270 template <typename Value>
ConsumeAll(const MessageCallback & callback)271 inline base::Result<int> BpfRingbuf<Value>::ConsumeAll(
272 const MessageCallback& callback) {
273 return BpfRingbufBase::ConsumeAll([&](const void* value) {
274 callback(*reinterpret_cast<const Value*>(value));
275 });
276 }
277
278 } // namespace bpf
279 } // namespace android
280