• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <android-base/result.h>
20 #include <android-base/unique_fd.h>
21 #include <linux/bpf.h>
22 #include <poll.h>
23 #include <sys/mman.h>
24 #include <utils/Log.h>
25 
26 #include "bpf/BpfUtils.h"
27 
28 #include <atomic>
29 
30 namespace android {
31 namespace bpf {
32 
33 // BpfRingbufBase contains the non-templated functionality of BPF ring buffers.
34 class BpfRingbufBase {
35  public:
~BpfRingbufBase()36   ~BpfRingbufBase() {
37     if (mConsumerPos) munmap(mConsumerPos, mConsumerSize);
38     if (mProducerPos) munmap(mProducerPos, mProducerSize);
39     mConsumerPos = nullptr;
40     mProducerPos = nullptr;
41   }
42 
43   bool isEmpty(void);
44 
45   // returns !isEmpty() for convenience
46   bool wait(int timeout_ms = -1);
47 
48  protected:
49   // Non-initializing constructor, used by Create.
BpfRingbufBase(size_t value_size)50   BpfRingbufBase(size_t value_size) : mValueSize(value_size) {}
51 
52   // Full construction that aborts on error (use Create/Init to handle errors).
BpfRingbufBase(const char * path,size_t value_size)53   BpfRingbufBase(const char* path, size_t value_size) : mValueSize(value_size) {
54     if (auto status = Init(path); !status.ok()) {
55       ALOGE("BpfRingbuf init failed: %s", status.error().message().c_str());
56       abort();
57     }
58   }
59 
60   // Delete copy constructor (class owns raw pointers).
61   BpfRingbufBase(const BpfRingbufBase&) = delete;
62 
63   // Initialize the base ringbuffer components. Must be called exactly once.
64   base::Result<void> Init(const char* path);
65 
66   // Consumes all messages from the ring buffer, passing them to the callback.
67   base::Result<int> ConsumeAll(
68       const std::function<void(const void*)>& callback);
69 
70   // Replicates c-style void* "byte-wise" pointer addition.
71   template <typename Ptr>
pointerAddBytes(void * base,ssize_t offset_bytes)72   static Ptr pointerAddBytes(void* base, ssize_t offset_bytes) {
73     return reinterpret_cast<Ptr>(reinterpret_cast<char*>(base) + offset_bytes);
74   }
75 
76   // Rounds len by clearing bitmask, adding header, and aligning to 8 bytes.
roundLength(uint32_t len)77   static uint32_t roundLength(uint32_t len) {
78     len &= ~(BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT);
79     len += BPF_RINGBUF_HDR_SZ;
80     return (len + 7) & ~7;
81   }
82 
83   const size_t mValueSize;
84 
85   size_t mConsumerSize;
86   size_t mProducerSize;
87   unsigned long mPosMask;
88   android::base::unique_fd mRingFd;
89 
90   void* mDataPos = nullptr;
91   // The kernel uses an "unsigned long" type for both consumer and producer position.
92   // Unsigned long is a 4 byte value on a 32-bit kernel, and an 8 byte value on a 64-bit kernel.
93   // To support 32-bit kernels, producer pos is capped at 4 bytes (despite it being 8 bytes on
94   // 64-bit kernels) and all comparisons of consumer and producer pos only compare the low-order 4
95   // bytes (an inequality comparison is performed to support overflow).
96   // This solution is bitness agnostic. The consumer only increments the 8 byte consumer pos, which,
97   // in a little-endian architecture, is safe since the entire page is mapped into memory and a
98   // 32-bit kernel will just ignore the high-order bits.
99   std::atomic_uint64_t* mConsumerPos = nullptr;
100   std::atomic_uint32_t* mProducerPos = nullptr;
101 
102   // In order to guarantee atomic access in a 32 bit userspace environment, atomic_uint64_t is used
103   // in addition to std::atomic<T>::is_always_lock_free that guarantees that read / write operations
104   // are indeed atomic.
105   // Since std::atomic does not support wrapping preallocated memory, an additional static assert on
106   // the size of the atomic and the underlying type is added to ensure a reinterpret_cast from type
107   // to its atomic version is safe (is_always_lock_free being true should provide additional
108   // confidence).
109   static_assert(std::atomic_uint64_t::is_always_lock_free);
110   static_assert(std::atomic_uint32_t::is_always_lock_free);
111   static_assert(sizeof(std::atomic_uint64_t) == sizeof(uint64_t));
112   static_assert(sizeof(std::atomic_uint32_t) == sizeof(uint32_t));
113 };
114 
115 // This is a class wrapper for eBPF ring buffers. An eBPF ring buffer is a
116 // special type of eBPF map used for sending messages from eBPF to userspace.
117 // The implementation relies on fast shared memory and atomics for the producer
118 // and consumer management. Ring buffers are a faster alternative to eBPF perf
119 // buffers.
120 //
121 // This class is thread compatible, but not thread safe.
122 //
123 // Note: A kernel eBPF ring buffer may be accessed by both kernel and userspace
124 // processes at the same time. However, the userspace consumers of a given ring
125 // buffer all share a single read pointer. There is no guarantee which readers
126 // will read which messages.
127 template <typename Value>
128 class BpfRingbuf : public BpfRingbufBase {
129  public:
130   using MessageCallback = std::function<void(const Value&)>;
131 
132   // Creates a ringbuffer wrapper from a pinned path. This initialization will
133   // abort on error. To handle errors, initialize with Create instead.
BpfRingbuf(const char * path)134   BpfRingbuf(const char* path) : BpfRingbufBase(path, sizeof(Value)) {}
135 
136   // Creates a ringbuffer wrapper from a pinned path. There are no guarantees
137   // that the ringbuf outputs messaged of type `Value`, only that they are the
138   // same size. Size is only checked in ConsumeAll.
139   static base::Result<std::unique_ptr<BpfRingbuf<Value>>> Create(
140       const char* path);
141 
142   // Consumes all messages from the ring buffer, passing them to the callback.
143   // Returns the number of messages consumed or a non-ok result on error. If the
144   // ring buffer has no pending messages an OK result with count 0 is returned.
145   base::Result<int> ConsumeAll(const MessageCallback& callback);
146 
147  private:
148   // Empty ctor for use by Create.
BpfRingbuf()149   BpfRingbuf() : BpfRingbufBase(sizeof(Value)) {}
150 };
151 
152 
Init(const char * path)153 inline base::Result<void> BpfRingbufBase::Init(const char* path) {
154   mRingFd.reset(mapRetrieveExclusiveRW(path));
155   if (!mRingFd.ok()) {
156     return android::base::ErrnoError()
157            << "failed to retrieve ringbuffer at " << path;
158   }
159 
160   int map_type = android::bpf::bpfGetFdMapType(mRingFd);
161   if (map_type != BPF_MAP_TYPE_RINGBUF) {
162     errno = EINVAL;
163     return android::base::ErrnoError()
164            << "bpf map has wrong type: want BPF_MAP_TYPE_RINGBUF ("
165            << BPF_MAP_TYPE_RINGBUF << ") got " << map_type;
166   }
167 
168   int max_entries = android::bpf::bpfGetFdMaxEntries(mRingFd);
169   if (max_entries < 0) {
170     return android::base::ErrnoError()
171            << "failed to read max_entries from ringbuf";
172   }
173   if (max_entries == 0) {
174     errno = EINVAL;
175     return android::base::ErrnoError() << "max_entries must be non-zero";
176   }
177 
178   mPosMask = max_entries - 1;
179   mConsumerSize = getpagesize();
180   mProducerSize = getpagesize() + 2 * max_entries;
181 
182   {
183     void* ptr = mmap(NULL, mConsumerSize, PROT_READ | PROT_WRITE, MAP_SHARED,
184                      mRingFd, 0);
185     if (ptr == MAP_FAILED) {
186       return android::base::ErrnoError()
187              << "failed to mmap ringbuf consumer pages";
188     }
189     mConsumerPos = reinterpret_cast<decltype(mConsumerPos)>(ptr);
190   }
191 
192   {
193     void* ptr = mmap(NULL, mProducerSize, PROT_READ, MAP_SHARED, mRingFd,
194                      mConsumerSize);
195     if (ptr == MAP_FAILED) {
196       return android::base::ErrnoError()
197              << "failed to mmap ringbuf producer page";
198     }
199     mProducerPos = reinterpret_cast<decltype(mProducerPos)>(ptr);
200   }
201 
202   mDataPos = pointerAddBytes<void*>(mProducerPos, getpagesize());
203   return {};
204 }
205 
isEmpty(void)206 inline bool BpfRingbufBase::isEmpty(void) {
207   uint32_t prod_pos = mProducerPos->load(std::memory_order_relaxed);
208   uint64_t cons_pos = mConsumerPos->load(std::memory_order_relaxed);
209   return (cons_pos & 0xFFFFFFFF) == prod_pos;
210 }
211 
wait(int timeout_ms)212 inline bool BpfRingbufBase::wait(int timeout_ms) {
213   // possible optimization: if (!isEmpty()) return true;
214   struct pollfd pfd = {  // 1-element array
215     .fd = mRingFd.get(),
216     .events = POLLIN,
217   };
218   (void)poll(&pfd, 1, timeout_ms);  // 'best effort' poll
219   return !isEmpty();
220 }
221 
ConsumeAll(const std::function<void (const void *)> & callback)222 inline base::Result<int> BpfRingbufBase::ConsumeAll(
223     const std::function<void(const void*)>& callback) {
224   int64_t count = 0;
225   uint32_t prod_pos = mProducerPos->load(std::memory_order_acquire);
226   // Only userspace writes to mConsumerPos, so no need to use std::memory_order_acquire
227   uint64_t cons_pos = mConsumerPos->load(std::memory_order_relaxed);
228   while ((cons_pos & 0xFFFFFFFF) != prod_pos) {
229     // Find the start of the entry for this read (wrapping is done here).
230     void* start_ptr = pointerAddBytes<void*>(mDataPos, cons_pos & mPosMask);
231 
232     // The entry has an 8 byte header containing the sample length.
233     // struct bpf_ringbuf_hdr {
234     //   u32 len;
235     //   u32 pg_off;
236     // };
237     uint32_t length = *reinterpret_cast<volatile uint32_t*>(start_ptr);
238 
239     // If the sample isn't committed, we're caught up with the producer.
240     if (length & BPF_RINGBUF_BUSY_BIT) return count;
241 
242     cons_pos += roundLength(length);
243 
244     if ((length & BPF_RINGBUF_DISCARD_BIT) == 0) {
245       if (length != mValueSize) {
246         mConsumerPos->store(cons_pos, std::memory_order_release);
247         errno = EMSGSIZE;
248         return android::base::ErrnoError()
249                << "BPF ring buffer message has unexpected size (want "
250                << mValueSize << " bytes, got " << length << " bytes)";
251       }
252       callback(pointerAddBytes<const void*>(start_ptr, BPF_RINGBUF_HDR_SZ));
253       count++;
254     }
255 
256     mConsumerPos->store(cons_pos, std::memory_order_release);
257   }
258 
259   return count;
260 }
261 
262 template <typename Value>
263 inline base::Result<std::unique_ptr<BpfRingbuf<Value>>>
Create(const char * path)264 BpfRingbuf<Value>::Create(const char* path) {
265   auto rb = std::unique_ptr<BpfRingbuf>(new BpfRingbuf);
266   if (auto status = rb->Init(path); !status.ok()) return status.error();
267   return rb;
268 }
269 
270 template <typename Value>
ConsumeAll(const MessageCallback & callback)271 inline base::Result<int> BpfRingbuf<Value>::ConsumeAll(
272     const MessageCallback& callback) {
273   return BpfRingbufBase::ConsumeAll([&](const void* value) {
274     callback(*reinterpret_cast<const Value*>(value));
275   });
276 }
277 
278 }  // namespace bpf
279 }  // namespace android
280