1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "ClatUtils.h"
18
19 #include <arpa/inet.h>
20 #include <errno.h>
21 #include <linux/if.h>
22 #include <linux/netlink.h>
23 #include <linux/pkt_cls.h>
24 #include <linux/pkt_sched.h>
25 #include <linux/rtnetlink.h>
26 #include <sys/ioctl.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <unistd.h>
30
31 #define LOG_TAG "ClatUtils"
32 #include <log/log.h>
33
34 #include "NetlinkCommands.h"
35 #include "android-base/unique_fd.h"
36 #include "bpf/BpfUtils.h"
37 #include "netdbpf/bpf_shared.h"
38
39 namespace android {
40 namespace net {
41
hardwareAddressType(const std::string & interface)42 int hardwareAddressType(const std::string& interface) {
43 base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
44
45 if (ufd < 0) {
46 const int err = errno;
47 ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
48 return -err;
49 };
50
51 struct ifreq ifr = {};
52 // We use strncpy() instead of strlcpy() since kernel has to be able
53 // to handle non-zero terminated junk passed in by userspace anyway,
54 // and this way too long interface names (more than IFNAMSIZ-1 = 15
55 // characters plus terminating NULL) will not get truncated to 15
56 // characters and zero-terminated and thus potentially erroneously
57 // match a truncated interface if one were to exist.
58 strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
59
60 if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
61
62 return ifr.ifr_hwaddr.sa_family;
63 }
64
getClatIngressMapFd(void)65 int getClatIngressMapFd(void) {
66 const int fd = bpf::bpfFdGet(CLAT_INGRESS_MAP_PATH, 0);
67 return (fd == -1) ? -errno : fd;
68 }
69
getClatIngressProgFd(bool with_ethernet_header)70 int getClatIngressProgFd(bool with_ethernet_header) {
71 const int fd = bpf::bpfFdGet(
72 with_ethernet_header ? CLAT_INGRESS_PROG_ETHER_PATH : CLAT_INGRESS_PROG_RAWIP_PATH, 0);
73 return (fd == -1) ? -errno : fd;
74 }
75
76 // TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
openNetlinkSocket(void)77 int openNetlinkSocket(void) {
78 base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
79 if (fd == -1) {
80 const int err = errno;
81 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
82 return -err;
83 }
84
85 int rv;
86
87 const int on = 1;
88 rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
89 if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
90
91 // this is needed to get sane strace netlink parsing, it allocates the pid
92 rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
93 if (rv) {
94 const int err = errno;
95 ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
96 return -err;
97 }
98
99 // we do not want to receive messages from anyone besides the kernel
100 rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
101 if (rv) {
102 const int err = errno;
103 ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
104 return -err;
105 }
106
107 return fd.release();
108 }
109
110 // TODO: merge with //system/netd/server/SockDiag.cpp:checkError(fd)
processNetlinkResponse(int fd)111 int processNetlinkResponse(int fd) {
112 struct {
113 nlmsghdr h;
114 nlmsgerr e;
115 char buf[256];
116 } resp = {};
117
118 const int rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
119
120 if (rv == -1) {
121 const int err = errno;
122 ALOGE("recv() failed");
123 return -err;
124 }
125
126 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
127 ALOGE("recv() returned short packet: %d", rv);
128 return -EMSGSIZE;
129 }
130
131 if (resp.h.nlmsg_len != (unsigned)rv) {
132 ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
133 return -EBADMSG;
134 }
135
136 if (resp.h.nlmsg_type != NLMSG_ERROR) {
137 ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
138 return -EBADMSG;
139 }
140
141 return resp.e.error; // returns 0 on success
142 }
143
144 // ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
145 // REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
146 // DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
doTcQdiscClsact(int fd,int ifIndex,__u16 nlMsgType,__u16 nlMsgFlags)147 int doTcQdiscClsact(int fd, int ifIndex, __u16 nlMsgType, __u16 nlMsgFlags) {
148 // This is the name of the qdisc we are attaching.
149 // Some hoop jumping to make this compile time constant with known size,
150 // so that the structure declaration is well defined at compile time.
151 #define CLSACT "clsact"
152 static const char clsact[] = CLSACT;
153 // sizeof() includes the terminating NULL
154 #define ASCIIZ_LEN_CLSACT sizeof(clsact)
155
156 const struct {
157 nlmsghdr n;
158 tcmsg t;
159 struct {
160 nlattr attr;
161 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
162 } kind;
163 } req = {
164 .n =
165 {
166 .nlmsg_len = sizeof(req),
167 .nlmsg_type = nlMsgType,
168 .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
169 },
170 .t =
171 {
172 .tcm_family = AF_UNSPEC,
173 .tcm_ifindex = ifIndex,
174 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
175 .tcm_parent = TC_H_CLSACT,
176 },
177 .kind =
178 {
179 .attr =
180 {
181 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
182 .nla_type = TCA_KIND,
183 },
184 .str = CLSACT,
185 },
186 };
187 #undef ASCIIZ_LEN_CLSACT
188 #undef CLSACT
189
190 const int rv = send(fd, &req, sizeof(req), 0);
191 if (rv == -1) return -errno;
192 if (rv != sizeof(req)) return -EMSGSIZE;
193
194 return processNetlinkResponse(fd);
195 }
196
tcQdiscAddDevClsact(int fd,int ifIndex)197 int tcQdiscAddDevClsact(int fd, int ifIndex) {
198 return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE);
199 }
200
tcQdiscReplaceDevClsact(int fd,int ifIndex)201 int tcQdiscReplaceDevClsact(int fd, int ifIndex) {
202 return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_REPLACE);
203 }
204
tcQdiscDelDevClsact(int fd,int ifIndex)205 int tcQdiscDelDevClsact(int fd, int ifIndex) {
206 return doTcQdiscClsact(fd, ifIndex, RTM_DELQDISC, 0);
207 }
208
209 // tc filter add dev .. ingress prio 1 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
tcFilterAddDevBpf(int fd,int ifIndex,int bpfFd,bool ethernet)210 int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
211 // The priority doesn't matter until we actually start attaching multiple
212 // things to the same interface's ingress point.
213 const int prio = 1;
214
215 // This is the name of the filter we're attaching (ie. this is the 'bpf'
216 // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
217 //
218 // We go through some hoops in order to make this compile time constants
219 // so that we can define the struct further down the function with the
220 // field for this sized correctly already during the build.
221 #define BPF "bpf"
222 const char bpf[] = BPF;
223 // sizeof() includes the terminating NULL
224 #define ASCIIZ_LEN_BPF sizeof(bpf)
225
226 // This is to replicate program name suffix used by 'tc' Linux cli
227 // when it attaches programs.
228 #define FSOBJ_SUFFIX ":[*fsobj]"
229
230 // This macro expands (from header files) to:
231 // prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
232 // and is the name of the pinned ebpf program for ARPHRD_RAWIP interfaces.
233 // (also compatible with anything that has 0 size L2 header)
234 #define NAME_RAWIP CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
235 const char name_rawip[] = NAME_RAWIP;
236
237 // This macro expands (from header files) to:
238 // prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
239 // and is the name of the pinned ebpf program for ARPHRD_ETHER interfaces.
240 // (also compatible with anything that has standard ethernet header)
241 #define NAME_ETHER CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
242 const char name_ether[] = NAME_ETHER;
243
244 // The actual name we'll use is determined at run time via 'ethernet'
245 // boolean. We need to compile time allocate enough space in the struct
246 // hence this macro magic to make sure we have enough space for either
247 // possibility. In practice both are actually the same size.
248 #define ASCIIZ_MAXLEN_NAME \
249 ((sizeof(name_rawip) > sizeof(name_ether)) ? sizeof(name_rawip) : sizeof(name_ether))
250
251 // This is not a compile time constant and is used in strcpy below
252 #define NAME (ethernet ? NAME_ETHER : NAME_RAWIP)
253
254 struct {
255 nlmsghdr n;
256 tcmsg t;
257 struct {
258 nlattr attr;
259 char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
260 } kind;
261 struct {
262 nlattr attr;
263 struct {
264 nlattr attr;
265 __u32 u32;
266 } fd;
267 struct {
268 nlattr attr;
269 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
270 } name;
271 struct {
272 nlattr attr;
273 __u32 u32;
274 } flags;
275 } options;
276 } req = {
277 .n =
278 {
279 .nlmsg_len = sizeof(req),
280 .nlmsg_type = RTM_NEWTFILTER,
281 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
282 },
283 .t =
284 {
285 .tcm_family = AF_UNSPEC,
286 .tcm_ifindex = ifIndex,
287 .tcm_handle = TC_H_UNSPEC,
288 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS),
289 .tcm_info = (prio << 16) | htons(ETH_P_IPV6),
290 },
291 .kind =
292 {
293 .attr =
294 {
295 .nla_len = sizeof(req.kind),
296 .nla_type = TCA_KIND,
297 },
298 .str = BPF,
299 },
300 .options =
301 {
302 .attr =
303 {
304 .nla_len = sizeof(req.options),
305 .nla_type = TCA_OPTIONS,
306 },
307 .fd =
308 {
309 .attr =
310 {
311 .nla_len = sizeof(req.options.fd),
312 .nla_type = TCA_BPF_FD,
313 },
314 .u32 = static_cast<__u32>(bpfFd),
315 },
316 .name =
317 {
318 .attr =
319 {
320 .nla_len = sizeof(req.options.name),
321 .nla_type = TCA_BPF_NAME,
322 },
323 // Visible via 'tc filter show', but
324 // is overwritten by strcpy below
325 .str = "placeholder",
326 },
327 .flags =
328 {
329 .attr =
330 {
331 .nla_len = sizeof(req.options.flags),
332 .nla_type = TCA_BPF_FLAGS,
333 },
334 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
335 },
336 },
337 };
338
339 strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
340
341 #undef NAME
342 #undef ASCIIZ_MAXLEN_NAME
343 #undef NAME_ETHER
344 #undef NAME_RAWIP
345 #undef NAME
346 #undef ASCIIZ_LEN_BPF
347 #undef BPF
348
349 const int rv = send(fd, &req, sizeof(req), 0);
350 if (rv == -1) return -errno;
351 if (rv != sizeof(req)) return -EMSGSIZE;
352
353 return processNetlinkResponse(fd);
354 }
355
356 } // namespace net
357 } // namespace android
358