• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <linux/bpf.h>
18 #include <linux/if.h>
19 #include <linux/if_ether.h>
20 #include <linux/in.h>
21 #include <linux/in6.h>
22 #include <linux/ip.h>
23 #include <linux/ipv6.h>
24 #include <linux/pkt_cls.h>
25 #include <linux/swab.h>
26 #include <linux/tcp.h>
27 #include <linux/udp.h>
28 #include <stdbool.h>
29 #include <stdint.h>
30 
31 #include "bpf_helpers.h"
32 #include "netdbpf/bpf_shared.h"
33 
34 // bionic/libc/kernel/uapi/linux/udp.h:
35 //   struct __kernel_udphdr {
36 // bionic/libc/kernel/tools/defaults.py:
37 //   # We want to support both BSD and Linux member names in struct udphdr.
38 //   "udphdr": "__kernel_udphdr",
39 // so instead it just doesn't work... ugh.
40 #define udphdr __kernel_udphdr
41 
42 // From kernel:include/net/ip.h
43 #define IP_DF 0x4000  // Flag: "Don't Fragment"
44 
45 // Android only supports little endian architectures
46 #define htons(x) (__builtin_constant_p(x) ? ___constant_swab16(x) : __builtin_bswap16(x))
47 #define htonl(x) (__builtin_constant_p(x) ? ___constant_swab32(x) : __builtin_bswap32(x))
48 #define ntohs(x) htons(x)
49 #define ntohl(x) htonl(x)
50 
51 DEFINE_BPF_MAP(clat_ingress_map, HASH, ClatIngressKey, ClatIngressValue, 16)
52 
nat64(struct __sk_buff * skb,bool is_ethernet)53 static inline __always_inline int nat64(struct __sk_buff* skb, bool is_ethernet) {
54     const int l2_header_size = is_ethernet ? sizeof(struct ethhdr) : 0;
55     void* data = (void*)(long)skb->data;
56     const void* data_end = (void*)(long)skb->data_end;
57     const struct ethhdr* const eth = is_ethernet ? data : NULL;  // used iff is_ethernet
58     const struct ipv6hdr* const ip6 = is_ethernet ? (void*)(eth + 1) : data;
59     const struct tcphdr* const tcp = (void*)(ip6 + 1);
60     const struct udphdr* const udp = (void*)(ip6 + 1);
61 
62     // Must be meta-ethernet IPv6 frame
63     if (skb->protocol != htons(ETH_P_IPV6)) return TC_ACT_OK;
64 
65     // Must have (ethernet and) ipv6 header
66     if (data + l2_header_size + sizeof(*ip6) > data_end) return TC_ACT_OK;
67 
68     // Ethertype - if present - must be IPv6
69     if (is_ethernet && (eth->h_proto != htons(ETH_P_IPV6))) return TC_ACT_OK;
70 
71     // IP version must be 6
72     if (ip6->version != 6) return TC_ACT_OK;
73 
74     // Maximum IPv6 payload length that can be translated to IPv4
75     if (ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr)) return TC_ACT_OK;
76 
77     switch (ip6->nexthdr) {
78         case IPPROTO_TCP:  // If TCP, must have 20 byte minimal TCP header
79             if (tcp + 1 > (struct tcphdr*)data_end) return TC_ACT_OK;
80             break;
81 
82         case IPPROTO_UDP:  // If UDP, must have 8 byte minimal UDP header
83             if (udp + 1 > (struct udphdr*)data_end) return TC_ACT_OK;
84             break;
85 
86         default:  // do not know how to handle anything else
87             return TC_ACT_OK;
88     }
89 
90     ClatIngressKey k = {
91             .iif = skb->ifindex,
92             .pfx96.in6_u.u6_addr32 =
93                     {
94                             ip6->saddr.in6_u.u6_addr32[0],
95                             ip6->saddr.in6_u.u6_addr32[1],
96                             ip6->saddr.in6_u.u6_addr32[2],
97                     },
98             .local6 = ip6->daddr,
99     };
100 
101     ClatIngressValue* v = bpf_clat_ingress_map_lookup_elem(&k);
102 
103     if (!v) return TC_ACT_OK;
104 
105     struct ethhdr eth2;  // used iff is_ethernet
106     if (is_ethernet) {
107         eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
108         eth2.h_proto = htons(ETH_P_IP);  // But replace the ethertype
109     }
110 
111     struct iphdr ip = {
112             .version = 4,                                                      // u4
113             .ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
114             .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
115             .tot_len = htons(ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
116             .id = 0,                                                           // u16
117             .frag_off = htons(IP_DF),                                          // u16
118             .ttl = ip6->hop_limit,                                             // u8
119             .protocol = ip6->nexthdr,                                          // u8
120             .check = 0,                                                        // u16
121             .saddr = ip6->saddr.in6_u.u6_addr32[3],                            // u32
122             .daddr = v->local4.s_addr,                                         // u32
123     };
124 
125     // Calculate the IPv4 one's complement checksum of the IPv4 header.
126     __u32 sum = 0;
127     for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i) {
128         sum += ((__u16*)&ip)[i];
129     }
130     // Note that sum is guaranteed to be non-zero by virtue of ip.version == 4
131     sum = (sum & 0xFFFF) + (sum >> 16);  // collapse u32 into range 1 .. 0x1FFFE
132     sum = (sum & 0xFFFF) + (sum >> 16);  // collapse any potential carry into u16
133     ip.check = (__u16)~sum;              // sum cannot be zero, so this is never 0xFFFF
134 
135     // Note that there is no L4 checksum update: we are relying on the checksum neutrality
136     // of the ipv6 address chosen by netd's ClatdController.
137 
138     // Packet mutations begin - point of no return.
139     if (bpf_skb_change_proto(skb, htons(ETH_P_IP), 0)) return TC_ACT_SHOT;
140 
141     // bpf_skb_change_proto() invalidates all pointers - reload them.
142     data = (void*)(long)skb->data;
143     data_end = (void*)(long)skb->data_end;
144 
145     // I cannot think of any valid way for this error condition to trigger, however I do
146     // believe the explicit check is required to keep the in kernel ebpf verifier happy.
147     if (data + l2_header_size + sizeof(struct iphdr) > data_end) return TC_ACT_SHOT;
148 
149     if (is_ethernet) {
150         struct ethhdr* new_eth = data;
151 
152         // Copy over the updated ethernet header
153         *new_eth = eth2;
154 
155         // Copy over the new ipv4 header.
156         *(struct iphdr*)(new_eth + 1) = ip;
157     } else {
158         // Copy over the new ipv4 header without an ethernet header.
159         *(struct iphdr*)data = ip;
160     }
161 
162     // Redirect, possibly back to same interface, so tcpdump sees packet twice.
163     if (v->oif) return bpf_redirect(v->oif, BPF_F_INGRESS);
164 
165     // Just let it through, tcpdump will not see IPv4 packet.
166     return TC_ACT_OK;
167 }
168 
169 SEC("schedcls/ingress/clat_ether")
sched_cls_ingress_clat_ether(struct __sk_buff * skb)170 int sched_cls_ingress_clat_ether(struct __sk_buff* skb) {
171     return nat64(skb, true);
172 }
173 
174 SEC("schedcls/ingress/clat_rawip")
sched_cls_ingress_clat_rawip(struct __sk_buff * skb)175 int sched_cls_ingress_clat_rawip(struct __sk_buff* skb) {
176     return nat64(skb, false);
177 }
178 
179 char _license[] SEC("license") = "Apache 2.0";
180