• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Daniel Drown
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  * clatd.c - tun interface setup and main event loop
17  */
18 #include <arpa/inet.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <poll.h>
22 #include <signal.h>
23 #include <stdbool.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <sys/ioctl.h>
28 #include <sys/prctl.h>
29 #include <sys/stat.h>
30 #include <sys/types.h>
31 #include <time.h>
32 #include <unistd.h>
33 
34 #include <linux/filter.h>
35 #include <linux/if.h>
36 #include <linux/if_ether.h>
37 #include <linux/if_packet.h>
38 #include <linux/if_tun.h>
39 #include <linux/virtio_net.h>
40 #include <net/if.h>
41 #include <sys/uio.h>
42 
43 #include "clatd.h"
44 #include "checksum.h"
45 #include "config.h"
46 #include "dump.h"
47 #include "logging.h"
48 #include "translate.h"
49 
50 struct clat_config Global_Clatd_Config;
51 
52 volatile sig_atomic_t sigterm = 0;
53 bool running = true;
54 
55 // reads IPv6 packet from AF_PACKET socket, translates to IPv4, writes to tun
process_packet_6_to_4(struct tun_data * tunnel)56 void process_packet_6_to_4(struct tun_data *tunnel) {
57   // ethernet header is 14 bytes, plus 4 for a normal VLAN tag or 8 for Q-in-Q
58   // we don't really support vlans (or especially Q-in-Q)...
59   // but a few bytes of extra buffer space doesn't hurt...
60   struct {
61     struct virtio_net_hdr vnet;
62     uint8_t payload[22 + MAXMTU];
63     char pad; // +1 to make packet truncation obvious
64   } buf;
65   struct iovec iov = {
66     .iov_base = &buf,
67     .iov_len = sizeof(buf),
68   };
69   char cmsg_buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
70   struct msghdr msgh = {
71     .msg_iov = &iov,
72     .msg_iovlen = 1,
73     .msg_control = cmsg_buf,
74     .msg_controllen = sizeof(cmsg_buf),
75   };
76   ssize_t readlen = recvmsg(tunnel->read_fd6, &msgh, /*flags*/ 0);
77 
78   if (readlen < 0) {
79     if (errno != EAGAIN) {
80       logmsg(ANDROID_LOG_WARN, "%s: read error: %s", __func__, strerror(errno));
81     }
82     if (errno == ENETDOWN) running = false;
83     return;
84   } else if (readlen == 0) {
85     logmsg(ANDROID_LOG_WARN, "%s: packet socket removed?", __func__);
86     running = false;
87     return;
88   } else if (readlen >= sizeof(buf)) {
89     logmsg(ANDROID_LOG_WARN, "%s: read truncation - ignoring pkt", __func__);
90     return;
91   }
92 
93   bool ok = false;
94   __u32 tp_status = 0;
95   __u16 tp_net = 0;
96 
97   for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; cmsg = CMSG_NXTHDR(&msgh,cmsg)) {
98     if (cmsg->cmsg_level == SOL_PACKET && cmsg->cmsg_type == PACKET_AUXDATA) {
99       struct tpacket_auxdata *aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
100       ok = true;
101       tp_status = aux->tp_status;
102       tp_net = aux->tp_net;
103       break;
104     }
105   }
106 
107   if (!ok) {
108     // theoretically this should not happen...
109     static bool logged = false;
110     if (!logged) {
111       logmsg(ANDROID_LOG_ERROR, "%s: failed to fetch tpacket_auxdata cmsg", __func__);
112       logged = true;
113     }
114   }
115 
116   const int payload_offset = offsetof(typeof(buf), payload);
117   if (readlen < payload_offset + tp_net) {
118     logmsg(ANDROID_LOG_WARN, "%s: ignoring %zd byte pkt shorter than %d+%u L2 header",
119            __func__, readlen, payload_offset, tp_net);
120     return;
121   }
122 
123   const int pkt_len = readlen - payload_offset;
124 
125   // This will detect a skb->ip_summed == CHECKSUM_PARTIAL packet with non-final L4 checksum
126   if (tp_status & TP_STATUS_CSUMNOTREADY) {
127     static bool logged = false;
128     if (!logged) {
129       logmsg(ANDROID_LOG_WARN, "%s: L4 checksum calculation required", __func__);
130       logged = true;
131     }
132 
133     // These are non-negative by virtue of csum_start/offset being u16
134     const int cs_start = buf.vnet.csum_start;
135     const int cs_offset = cs_start + buf.vnet.csum_offset;
136     if (cs_start > pkt_len) {
137       logmsg(ANDROID_LOG_ERROR, "%s: out of range - checksum start %d > %d",
138              __func__, cs_start, pkt_len);
139     } else if (cs_offset + 1 >= pkt_len) {
140       logmsg(ANDROID_LOG_ERROR, "%s: out of range - checksum offset %d + 1 >= %d",
141              __func__, cs_offset, pkt_len);
142     } else {
143       uint16_t csum = ip_checksum(buf.payload + cs_start, pkt_len - cs_start);
144       if (!csum) csum = 0xFFFF;  // required fixup for UDP, TCP must live with it
145       buf.payload[cs_offset] = csum & 0xFF;
146       buf.payload[cs_offset + 1] = csum >> 8;
147     }
148   }
149 
150   translate_packet(tunnel->fd4, 0 /* to_ipv6 */, buf.payload + tp_net, pkt_len - tp_net);
151 }
152 
153 // reads TUN_PI + L3 IPv4 packet from tun, translates to IPv6, writes to AF_INET6/RAW socket
process_packet_4_to_6(struct tun_data * tunnel)154 void process_packet_4_to_6(struct tun_data *tunnel) {
155   struct {
156     struct tun_pi pi;
157     uint8_t payload[MAXMTU];
158     char pad; // +1 byte to make packet truncation obvious
159   } buf;
160   ssize_t readlen = read(tunnel->fd4, &buf, sizeof(buf));
161 
162   if (readlen < 0) {
163     if (errno != EAGAIN) {
164       logmsg(ANDROID_LOG_WARN, "%s: read error: %s", __func__, strerror(errno));
165     }
166     if (errno == ENETDOWN) running = false;  // not sure if this can happen
167     return;
168   } else if (readlen == 0) {
169     logmsg(ANDROID_LOG_WARN, "%s: tun interface removed", __func__);
170     running = false;
171     return;
172   } else if (readlen >= sizeof(buf)) {
173     logmsg(ANDROID_LOG_WARN, "%s: read truncation - ignoring pkt", __func__);
174     return;
175   }
176 
177   const int payload_offset = offsetof(typeof(buf), payload);
178 
179   if (readlen < payload_offset) {
180     logmsg(ANDROID_LOG_WARN, "%s: short read: got %ld bytes", __func__, readlen);
181     return;
182   }
183 
184   const int pkt_len = readlen - payload_offset;
185 
186   uint16_t proto = ntohs(buf.pi.proto);
187   if (proto != ETH_P_IP) {
188     logmsg(ANDROID_LOG_WARN, "%s: unknown packet type = 0x%x", __func__, proto);
189     return;
190   }
191 
192   if (buf.pi.flags != 0) {
193     logmsg(ANDROID_LOG_WARN, "%s: unexpected flags = %d", __func__, buf.pi.flags);
194   }
195 
196   translate_packet(tunnel->write_fd6, 1 /* to_ipv6 */, buf.payload, pkt_len);
197 }
198 
199 // IPv6 DAD packet format:
200 //   Ethernet header (if needed) will be added by the kernel:
201 //     u8[6] src_mac; u8[6] dst_mac '33:33:ff:XX:XX:XX'; be16 ethertype '0x86DD'
202 //   IPv6 header:
203 //     be32 0x60000000 - ipv6, tclass 0, flowlabel 0
204 //     be16 payload_length '32'; u8 nxt_hdr ICMPv6 '58'; u8 hop limit '255'
205 //     u128 src_ip6 '::'
206 //     u128 dst_ip6 'ff02::1:ffXX:XXXX'
207 //   ICMPv6 header:
208 //     u8 type '135'; u8 code '0'; u16 icmp6 checksum; u32 reserved '0'
209 //   ICMPv6 neighbour solicitation payload:
210 //     u128 tgt_ip6
211 //   ICMPv6 ND options:
212 //     u8 opt nr '14'; u8 length '1'; u8[6] nonce '6 random bytes'
send_dad(int fd,const struct in6_addr * tgt)213 void send_dad(int fd, const struct in6_addr* tgt) {
214   struct {
215     struct ip6_hdr ip6h;
216     struct nd_neighbor_solicit ns;
217     uint8_t ns_opt_nr;
218     uint8_t ns_opt_len;
219     uint8_t ns_opt_nonce[6];
220   } dad_pkt = {
221     .ip6h = {
222       .ip6_flow = htonl(6 << 28),  // v6, 0 tclass, 0 flowlabel
223       .ip6_plen = htons(sizeof(dad_pkt) - sizeof(struct ip6_hdr)),  // payload length, ie. 32
224       .ip6_nxt = IPPROTO_ICMPV6,  // 58
225       .ip6_hlim = 255,
226       .ip6_src = {},  // ::
227       .ip6_dst.s6_addr = {
228         0xFF, 0x02, 0, 0,
229         0, 0, 0, 0,
230         0, 0, 0, 1,
231         0xFF, tgt->s6_addr[13], tgt->s6_addr[14], tgt->s6_addr[15],
232       },  // ff02::1:ffXX:XXXX - multicast group address derived from bottom 24-bits of tgt
233     },
234     .ns = {
235       .nd_ns_type = ND_NEIGHBOR_SOLICIT,  // 135
236       .nd_ns_code = 0,
237       .nd_ns_cksum = 0,  // will be calculated later
238       .nd_ns_reserved = 0,
239       .nd_ns_target = *tgt,
240     },
241     .ns_opt_nr = 14,  // icmp6 option 'nonce' from RFC3971
242     .ns_opt_len = 1,  // in units of 8 bytes, including option nr and len
243     .ns_opt_nonce = {},  // opt_len *8 - sizeof u8(opt_nr) - sizeof u8(opt_len) = 6 ranodmized bytes
244   };
245   arc4random_buf(&dad_pkt.ns_opt_nonce, sizeof(dad_pkt.ns_opt_nonce));
246 
247   // 40 byte IPv6 header + 8 byte ICMPv6 header + 16 byte ipv6 target address + 8 byte nonce option
248   _Static_assert(sizeof(dad_pkt) == 40 + 8 + 16 + 8, "sizeof dad packet != 72");
249 
250   // IPv6 header checksum is standard negated 16-bit one's complement sum over the icmpv6 pseudo
251   // header (which includes payload length, nextheader, and src/dst ip) and the icmpv6 payload.
252   //
253   // Src/dst ip immediately prefix the icmpv6 header itself, so can be handled along
254   // with the payload.  We thus only need to manually account for payload len & next header.
255   //
256   // The magic '8' is simply the offset of the ip6_src field in the ipv6 header,
257   // ie. we're skipping over the ipv6 version, tclass, flowlabel, payload length, next header
258   // and hop limit fields, because they're not quite where we want them to be.
259   //
260   // ip6_plen is already in network order, while ip6_nxt is a single byte and thus needs htons().
261   uint32_t csum = dad_pkt.ip6h.ip6_plen + htons(dad_pkt.ip6h.ip6_nxt);
262   csum = ip_checksum_add(csum, &dad_pkt.ip6h.ip6_src, sizeof(dad_pkt) - 8);
263   dad_pkt.ns.nd_ns_cksum = ip_checksum_finish(csum);
264 
265   const struct sockaddr_in6 dst = {
266     .sin6_family = AF_INET6,
267     .sin6_addr = dad_pkt.ip6h.ip6_dst,
268     .sin6_scope_id = if_nametoindex(Global_Clatd_Config.native_ipv6_interface),
269   };
270 
271   sendto(fd, &dad_pkt, sizeof(dad_pkt), 0 /*flags*/, (const struct sockaddr *)&dst, sizeof(dst));
272 }
273 
274 /* function: event_loop
275  * reads packets from the tun network interface and passes them down the stack
276  *   tunnel - tun device data
277  */
event_loop(struct tun_data * tunnel)278 void event_loop(struct tun_data *tunnel) {
279   struct pollfd wait_fd[] = {
280     { tunnel->read_fd6, POLLIN, 0 },
281     { tunnel->fd4, POLLIN, 0 },
282   };
283 
284   while (running && !sigterm) {
285     if (poll(wait_fd, ARRAY_SIZE(wait_fd), -1) == -1) {
286       if (errno != EINTR) {
287         logmsg(ANDROID_LOG_WARN, "event_loop/poll returned an error: %s", strerror(errno));
288       }
289     } else {
290       // Call process_packet if the socket has data to be read, but also if an
291       // error is waiting. If we don't call read() after getting POLLERR, a
292       // subsequent poll() will return immediately with POLLERR again,
293       // causing this code to spin in a loop. Calling read() will clear the
294       // socket error flag instead.
295       if (wait_fd[0].revents) process_packet_6_to_4(tunnel);
296       if (wait_fd[1].revents) process_packet_4_to_6(tunnel);
297     }
298   }
299 }
300