1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "OffloadUtils.h"
18
19 #include <arpa/inet.h>
20 #include <linux/if.h>
21 #include <linux/if_arp.h>
22 #include <linux/netlink.h>
23 #include <linux/pkt_cls.h>
24 #include <linux/pkt_sched.h>
25 #include <sys/ioctl.h>
26 #include <sys/socket.h>
27 #include <sys/types.h>
28 #include <unistd.h>
29
30 #define LOG_TAG "OffloadUtils"
31 #include <log/log.h>
32
33 #include "NetlinkCommands.h"
34 #include "android-base/unique_fd.h"
35
36 namespace android {
37 namespace net {
38
39 using std::max;
40
doSIOCGIF(const std::string & interface,int opt)41 static int doSIOCGIF(const std::string& interface, int opt) {
42 base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
43
44 if (ufd < 0) {
45 const int err = errno;
46 ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
47 return -err;
48 };
49
50 struct ifreq ifr = {};
51 // We use strncpy() instead of strlcpy() since kernel has to be able
52 // to handle non-zero terminated junk passed in by userspace anyway,
53 // and this way too long interface names (more than IFNAMSIZ-1 = 15
54 // characters plus terminating NULL) will not get truncated to 15
55 // characters and zero-terminated and thus potentially erroneously
56 // match a truncated interface if one were to exist.
57 strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
58
59 if (ioctl(ufd, opt, &ifr, sizeof(ifr))) return -errno;
60
61 if (opt == SIOCGIFHWADDR) return ifr.ifr_hwaddr.sa_family;
62 if (opt == SIOCGIFMTU) return ifr.ifr_mtu;
63 return -EINVAL;
64 }
65
hardwareAddressType(const std::string & interface)66 int hardwareAddressType(const std::string& interface) {
67 return doSIOCGIF(interface, SIOCGIFHWADDR);
68 }
69
deviceMTU(const std::string & interface)70 int deviceMTU(const std::string& interface) {
71 return doSIOCGIF(interface, SIOCGIFMTU);
72 }
73
isEthernet(const std::string & interface)74 base::Result<bool> isEthernet(const std::string& interface) {
75 int rv = hardwareAddressType(interface);
76 if (rv < 0) {
77 errno = -rv;
78 return ErrnoErrorf("Get hardware address type of interface {} failed", interface);
79 }
80
81 switch (rv) {
82 case ARPHRD_ETHER:
83 return true;
84 case ARPHRD_NONE:
85 case ARPHRD_RAWIP: // in Linux 4.14+ rmnet support was upstreamed and this is 519
86 case 530: // this is ARPHRD_RAWIP on some Android 4.9 kernels with rmnet
87 return false;
88 default:
89 errno = EAFNOSUPPORT; // Address family not supported
90 return ErrnoErrorf("Unknown hardware address type {} on interface {}", rv, interface);
91 }
92 }
93
94 // TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
95 // and //system/netd/server/SockDiag.cpp:checkError(fd)
sendAndProcessNetlinkResponse(const void * req,int len)96 static int sendAndProcessNetlinkResponse(const void* req, int len) {
97 base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
98 if (fd == -1) {
99 const int err = errno;
100 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
101 return -err;
102 }
103
104 static constexpr int on = 1;
105 int rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
106 if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
107
108 // this is needed to get sane strace netlink parsing, it allocates the pid
109 rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
110 if (rv) {
111 const int err = errno;
112 ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
113 return -err;
114 }
115
116 // we do not want to receive messages from anyone besides the kernel
117 rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
118 if (rv) {
119 const int err = errno;
120 ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
121 return -err;
122 }
123
124 rv = send(fd, req, len, 0);
125 if (rv == -1) return -errno;
126 if (rv != len) return -EMSGSIZE;
127
128 struct {
129 nlmsghdr h;
130 nlmsgerr e;
131 char buf[256];
132 } resp = {};
133
134 rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
135
136 if (rv == -1) {
137 const int err = errno;
138 ALOGE("recv() failed");
139 return -err;
140 }
141
142 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
143 ALOGE("recv() returned short packet: %d", rv);
144 return -EMSGSIZE;
145 }
146
147 if (resp.h.nlmsg_len != (unsigned)rv) {
148 ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
149 return -EBADMSG;
150 }
151
152 if (resp.h.nlmsg_type != NLMSG_ERROR) {
153 ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
154 return -EBADMSG;
155 }
156
157 return resp.e.error; // returns 0 on success
158 }
159
160 // ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
161 // REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
162 // DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
doTcQdiscClsact(int ifIndex,uint16_t nlMsgType,uint16_t nlMsgFlags)163 int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
164 // This is the name of the qdisc we are attaching.
165 // Some hoop jumping to make this compile time constant with known size,
166 // so that the structure declaration is well defined at compile time.
167 #define CLSACT "clsact"
168 // sizeof() includes the terminating NULL
169 static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
170
171 const struct {
172 nlmsghdr n;
173 tcmsg t;
174 struct {
175 nlattr attr;
176 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
177 } kind;
178 } req = {
179 .n =
180 {
181 .nlmsg_len = sizeof(req),
182 .nlmsg_type = nlMsgType,
183 .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
184 },
185 .t =
186 {
187 .tcm_family = AF_UNSPEC,
188 .tcm_ifindex = ifIndex,
189 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
190 .tcm_parent = TC_H_CLSACT,
191 },
192 .kind =
193 {
194 .attr =
195 {
196 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
197 .nla_type = TCA_KIND,
198 },
199 .str = CLSACT,
200 },
201 };
202 #undef CLSACT
203
204 return sendAndProcessNetlinkResponse(&req, sizeof(req));
205 }
206
207 // The priority of clat hook - must be after tethering.
208 constexpr uint16_t PRIO_CLAT = 4;
209
210 // tc filter add dev .. in/egress prio 4 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
211 // direct-action
tcFilterAddDevBpf(int ifIndex,bool ingress,uint16_t proto,int bpfFd,bool ethernet)212 int tcFilterAddDevBpf(int ifIndex, bool ingress, uint16_t proto, int bpfFd, bool ethernet) {
213 // This is the name of the filter we're attaching (ie. this is the 'bpf'
214 // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
215 //
216 // We go through some hoops in order to make this compile time constants
217 // so that we can define the struct further down the function with the
218 // field for this sized correctly already during the build.
219 #define BPF "bpf"
220 // sizeof() includes the terminating NULL
221 static constexpr size_t ASCIIZ_LEN_BPF = sizeof(BPF);
222
223 // This is to replicate program name suffix used by 'tc' Linux cli
224 // when it attaches programs.
225 #define FSOBJ_SUFFIX ":[*fsobj]"
226
227 // This macro expands (from header files) to:
228 // prog_clatd_schedcls_ingress6_clat_rawip:[*fsobj]
229 // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
230 // (also compatible with anything that has 0 size L2 header)
231 static constexpr char name_clat_rx_rawip[] = CLAT_INGRESS6_PROG_RAWIP_NAME FSOBJ_SUFFIX;
232
233 // This macro expands (from header files) to:
234 // prog_clatd_schedcls_ingress6_clat_ether:[*fsobj]
235 // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
236 // (also compatible with anything that has standard ethernet header)
237 static constexpr char name_clat_rx_ether[] = CLAT_INGRESS6_PROG_ETHER_NAME FSOBJ_SUFFIX;
238
239 // This macro expands (from header files) to:
240 // prog_clatd_schedcls_egress4_clat_rawip:[*fsobj]
241 // and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
242 // (also compatible with anything that has 0 size L2 header)
243 static constexpr char name_clat_tx_rawip[] = CLAT_EGRESS4_PROG_RAWIP_NAME FSOBJ_SUFFIX;
244
245 // This macro expands (from header files) to:
246 // prog_clatd_schedcls_egress4_clat_ether:[*fsobj]
247 // and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
248 // (also compatible with anything that has standard ethernet header)
249 static constexpr char name_clat_tx_ether[] = CLAT_EGRESS4_PROG_ETHER_NAME FSOBJ_SUFFIX;
250
251 #undef FSOBJ_SUFFIX
252
253 // The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
254 // booleans. We need to compile time allocate enough space in the struct
255 // hence this macro magic to make sure we have enough space for either
256 // possibility. In practice some of these are actually the same size.
257 static constexpr size_t ASCIIZ_MAXLEN_NAME = max({
258 sizeof(name_clat_rx_rawip),
259 sizeof(name_clat_rx_ether),
260 sizeof(name_clat_tx_rawip),
261 sizeof(name_clat_tx_ether),
262 });
263
264 // These are not compile time constants: 'name' is used in strncpy below
265 const char* const name_clat_rx = ethernet ? name_clat_rx_ether : name_clat_rx_rawip;
266 const char* const name_clat_tx = ethernet ? name_clat_tx_ether : name_clat_tx_rawip;
267 const char* const name = ingress ? name_clat_rx : name_clat_tx;
268
269 struct {
270 nlmsghdr n;
271 tcmsg t;
272 struct {
273 nlattr attr;
274 char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
275 } kind;
276 struct {
277 nlattr attr;
278 struct {
279 nlattr attr;
280 __u32 u32;
281 } fd;
282 struct {
283 nlattr attr;
284 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
285 } name;
286 struct {
287 nlattr attr;
288 __u32 u32;
289 } flags;
290 } options;
291 } req = {
292 .n =
293 {
294 .nlmsg_len = sizeof(req),
295 .nlmsg_type = RTM_NEWTFILTER,
296 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
297 },
298 .t =
299 {
300 .tcm_family = AF_UNSPEC,
301 .tcm_ifindex = ifIndex,
302 .tcm_handle = TC_H_UNSPEC,
303 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
304 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
305 .tcm_info = static_cast<__u32>((PRIO_CLAT << 16) | htons(proto)),
306 },
307 .kind =
308 {
309 .attr =
310 {
311 .nla_len = sizeof(req.kind),
312 .nla_type = TCA_KIND,
313 },
314 .str = BPF,
315 },
316 .options =
317 {
318 .attr =
319 {
320 .nla_len = sizeof(req.options),
321 .nla_type = NLA_F_NESTED | TCA_OPTIONS,
322 },
323 .fd =
324 {
325 .attr =
326 {
327 .nla_len = sizeof(req.options.fd),
328 .nla_type = TCA_BPF_FD,
329 },
330 .u32 = static_cast<__u32>(bpfFd),
331 },
332 .name =
333 {
334 .attr =
335 {
336 .nla_len = sizeof(req.options.name),
337 .nla_type = TCA_BPF_NAME,
338 },
339 // Visible via 'tc filter show', but
340 // is overwritten by strncpy below
341 .str = "placeholder",
342 },
343 .flags =
344 {
345 .attr =
346 {
347 .nla_len = sizeof(req.options.flags),
348 .nla_type = TCA_BPF_FLAGS,
349 },
350 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
351 },
352 },
353 };
354 #undef BPF
355
356 strncpy(req.options.name.str, name, sizeof(req.options.name.str));
357
358 return sendAndProcessNetlinkResponse(&req, sizeof(req));
359 }
360
361 // tc filter del dev .. in/egress prio 4 protocol ..
tcFilterDelDev(int ifIndex,bool ingress,uint16_t proto)362 int tcFilterDelDev(int ifIndex, bool ingress, uint16_t proto) {
363 const struct {
364 nlmsghdr n;
365 tcmsg t;
366 } req = {
367 .n =
368 {
369 .nlmsg_len = sizeof(req),
370 .nlmsg_type = RTM_DELTFILTER,
371 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
372 },
373 .t =
374 {
375 .tcm_family = AF_UNSPEC,
376 .tcm_ifindex = ifIndex,
377 .tcm_handle = TC_H_UNSPEC,
378 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
379 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
380 .tcm_info = static_cast<__u32>((PRIO_CLAT << 16) | htons(proto)),
381 },
382 };
383
384 return sendAndProcessNetlinkResponse(&req, sizeof(req));
385 }
386
387 } // namespace net
388 } // namespace android
389