• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ClatUtils.h"
18 
19 #include <arpa/inet.h>
20 #include <errno.h>
21 #include <linux/if.h>
22 #include <linux/netlink.h>
23 #include <linux/pkt_cls.h>
24 #include <linux/pkt_sched.h>
25 #include <linux/rtnetlink.h>
26 #include <sys/ioctl.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <unistd.h>
30 
31 #define LOG_TAG "ClatUtils"
32 #include <log/log.h>
33 
34 #include "NetlinkCommands.h"
35 #include "android-base/unique_fd.h"
36 #include "bpf/BpfUtils.h"
37 #include "netdbpf/bpf_shared.h"
38 
39 namespace android {
40 namespace net {
41 
hardwareAddressType(const std::string & interface)42 int hardwareAddressType(const std::string& interface) {
43     base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
44 
45     if (ufd < 0) {
46         const int err = errno;
47         ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
48         return -err;
49     };
50 
51     struct ifreq ifr = {};
52     // We use strncpy() instead of strlcpy() since kernel has to be able
53     // to handle non-zero terminated junk passed in by userspace anyway,
54     // and this way too long interface names (more than IFNAMSIZ-1 = 15
55     // characters plus terminating NULL) will not get truncated to 15
56     // characters and zero-terminated and thus potentially erroneously
57     // match a truncated interface if one were to exist.
58     strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
59 
60     if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
61 
62     return ifr.ifr_hwaddr.sa_family;
63 }
64 
getClatIngressMapFd(void)65 int getClatIngressMapFd(void) {
66     const int fd = bpf::bpfFdGet(CLAT_INGRESS_MAP_PATH, 0);
67     return (fd == -1) ? -errno : fd;
68 }
69 
getClatIngressProgFd(bool with_ethernet_header)70 int getClatIngressProgFd(bool with_ethernet_header) {
71     const int fd = bpf::bpfFdGet(
72             with_ethernet_header ? CLAT_INGRESS_PROG_ETHER_PATH : CLAT_INGRESS_PROG_RAWIP_PATH, 0);
73     return (fd == -1) ? -errno : fd;
74 }
75 
76 // TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
openNetlinkSocket(void)77 int openNetlinkSocket(void) {
78     base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
79     if (fd == -1) {
80         const int err = errno;
81         ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
82         return -err;
83     }
84 
85     int rv;
86 
87     const int on = 1;
88     rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
89     if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
90 
91     // this is needed to get sane strace netlink parsing, it allocates the pid
92     rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
93     if (rv) {
94         const int err = errno;
95         ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
96         return -err;
97     }
98 
99     // we do not want to receive messages from anyone besides the kernel
100     rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
101     if (rv) {
102         const int err = errno;
103         ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
104         return -err;
105     }
106 
107     return fd.release();
108 }
109 
110 // TODO: merge with //system/netd/server/SockDiag.cpp:checkError(fd)
processNetlinkResponse(int fd)111 int processNetlinkResponse(int fd) {
112     struct {
113         nlmsghdr h;
114         nlmsgerr e;
115         char buf[256];
116     } resp = {};
117 
118     const int rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
119 
120     if (rv == -1) {
121         const int err = errno;
122         ALOGE("recv() failed");
123         return -err;
124     }
125 
126     if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
127         ALOGE("recv() returned short packet: %d", rv);
128         return -EMSGSIZE;
129     }
130 
131     if (resp.h.nlmsg_len != (unsigned)rv) {
132         ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
133         return -EBADMSG;
134     }
135 
136     if (resp.h.nlmsg_type != NLMSG_ERROR) {
137         ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
138         return -EBADMSG;
139     }
140 
141     return resp.e.error;  // returns 0 on success
142 }
143 
144 // ADD:     nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
145 // REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
146 // DEL:     nlMsgType=RTM_DELQDISC nlMsgFlags=0
doTcQdiscClsact(int fd,int ifIndex,__u16 nlMsgType,__u16 nlMsgFlags)147 int doTcQdiscClsact(int fd, int ifIndex, __u16 nlMsgType, __u16 nlMsgFlags) {
148     // This is the name of the qdisc we are attaching.
149     // Some hoop jumping to make this compile time constant with known size,
150     // so that the structure declaration is well defined at compile time.
151 #define CLSACT "clsact"
152     static const char clsact[] = CLSACT;
153     // sizeof() includes the terminating NULL
154 #define ASCIIZ_LEN_CLSACT sizeof(clsact)
155 
156     const struct {
157         nlmsghdr n;
158         tcmsg t;
159         struct {
160             nlattr attr;
161             char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
162         } kind;
163     } req = {
164             .n =
165                     {
166                             .nlmsg_len = sizeof(req),
167                             .nlmsg_type = nlMsgType,
168                             .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
169                     },
170             .t =
171                     {
172                             .tcm_family = AF_UNSPEC,
173                             .tcm_ifindex = ifIndex,
174                             .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
175                             .tcm_parent = TC_H_CLSACT,
176                     },
177             .kind =
178                     {
179                             .attr =
180                                     {
181                                             .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
182                                             .nla_type = TCA_KIND,
183                                     },
184                             .str = CLSACT,
185                     },
186     };
187 #undef ASCIIZ_LEN_CLSACT
188 #undef CLSACT
189 
190     const int rv = send(fd, &req, sizeof(req), 0);
191     if (rv == -1) return -errno;
192     if (rv != sizeof(req)) return -EMSGSIZE;
193 
194     return processNetlinkResponse(fd);
195 }
196 
tcQdiscAddDevClsact(int fd,int ifIndex)197 int tcQdiscAddDevClsact(int fd, int ifIndex) {
198     return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE);
199 }
200 
tcQdiscReplaceDevClsact(int fd,int ifIndex)201 int tcQdiscReplaceDevClsact(int fd, int ifIndex) {
202     return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_REPLACE);
203 }
204 
tcQdiscDelDevClsact(int fd,int ifIndex)205 int tcQdiscDelDevClsact(int fd, int ifIndex) {
206     return doTcQdiscClsact(fd, ifIndex, RTM_DELQDISC, 0);
207 }
208 
209 // tc filter add dev .. ingress prio 1 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
tcFilterAddDevBpf(int fd,int ifIndex,int bpfFd,bool ethernet)210 int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
211     // The priority doesn't matter until we actually start attaching multiple
212     // things to the same interface's ingress point.
213     const int prio = 1;
214 
215     // This is the name of the filter we're attaching (ie. this is the 'bpf'
216     // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
217     //
218     // We go through some hoops in order to make this compile time constants
219     // so that we can define the struct further down the function with the
220     // field for this sized correctly already during the build.
221 #define BPF "bpf"
222     const char bpf[] = BPF;
223     // sizeof() includes the terminating NULL
224 #define ASCIIZ_LEN_BPF sizeof(bpf)
225 
226     // This is to replicate program name suffix used by 'tc' Linux cli
227     // when it attaches programs.
228 #define FSOBJ_SUFFIX ":[*fsobj]"
229 
230     // This macro expands (from header files) to:
231     //   prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
232     // and is the name of the pinned ebpf program for ARPHRD_RAWIP interfaces.
233     // (also compatible with anything that has 0 size L2 header)
234 #define NAME_RAWIP CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
235     const char name_rawip[] = NAME_RAWIP;
236 
237     // This macro expands (from header files) to:
238     //   prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
239     // and is the name of the pinned ebpf program for ARPHRD_ETHER interfaces.
240     // (also compatible with anything that has standard ethernet header)
241 #define NAME_ETHER CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
242     const char name_ether[] = NAME_ETHER;
243 
244     // The actual name we'll use is determined at run time via 'ethernet'
245     // boolean.  We need to compile time allocate enough space in the struct
246     // hence this macro magic to make sure we have enough space for either
247     // possibility.  In practice both are actually the same size.
248 #define ASCIIZ_MAXLEN_NAME \
249     ((sizeof(name_rawip) > sizeof(name_ether)) ? sizeof(name_rawip) : sizeof(name_ether))
250 
251     // This is not a compile time constant and is used in strcpy below
252 #define NAME (ethernet ? NAME_ETHER : NAME_RAWIP)
253 
254     struct {
255         nlmsghdr n;
256         tcmsg t;
257         struct {
258             nlattr attr;
259             char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
260         } kind;
261         struct {
262             nlattr attr;
263             struct {
264                 nlattr attr;
265                 __u32 u32;
266             } fd;
267             struct {
268                 nlattr attr;
269                 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
270             } name;
271             struct {
272                 nlattr attr;
273                 __u32 u32;
274             } flags;
275         } options;
276     } req = {
277             .n =
278                     {
279                             .nlmsg_len = sizeof(req),
280                             .nlmsg_type = RTM_NEWTFILTER,
281                             .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
282                     },
283             .t =
284                     {
285                             .tcm_family = AF_UNSPEC,
286                             .tcm_ifindex = ifIndex,
287                             .tcm_handle = TC_H_UNSPEC,
288                             .tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS),
289                             .tcm_info = (prio << 16) | htons(ETH_P_IPV6),
290                     },
291             .kind =
292                     {
293                             .attr =
294                                     {
295                                             .nla_len = sizeof(req.kind),
296                                             .nla_type = TCA_KIND,
297                                     },
298                             .str = BPF,
299                     },
300             .options =
301                     {
302                             .attr =
303                                     {
304                                             .nla_len = sizeof(req.options),
305                                             .nla_type = TCA_OPTIONS,
306                                     },
307                             .fd =
308                                     {
309                                             .attr =
310                                                     {
311                                                             .nla_len = sizeof(req.options.fd),
312                                                             .nla_type = TCA_BPF_FD,
313                                                     },
314                                             .u32 = static_cast<__u32>(bpfFd),
315                                     },
316                             .name =
317                                     {
318                                             .attr =
319                                                     {
320                                                             .nla_len = sizeof(req.options.name),
321                                                             .nla_type = TCA_BPF_NAME,
322                                                     },
323                                             // Visible via 'tc filter show', but
324                                             // is overwritten by strcpy below
325                                             .str = "placeholder",
326                                     },
327                             .flags =
328                                     {
329                                             .attr =
330                                                     {
331                                                             .nla_len = sizeof(req.options.flags),
332                                                             .nla_type = TCA_BPF_FLAGS,
333                                                     },
334                                             .u32 = TCA_BPF_FLAG_ACT_DIRECT,
335                                     },
336                     },
337     };
338 
339     strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
340 
341 #undef NAME
342 #undef ASCIIZ_MAXLEN_NAME
343 #undef NAME_ETHER
344 #undef NAME_RAWIP
345 #undef NAME
346 #undef ASCIIZ_LEN_BPF
347 #undef BPF
348 
349     const int rv = send(fd, &req, sizeof(req), 0);
350     if (rv == -1) return -errno;
351     if (rv != sizeof(req)) return -EMSGSIZE;
352 
353     return processNetlinkResponse(fd);
354 }
355 
356 }  // namespace net
357 }  // namespace android
358