1 // Copyright (c) PLUMgrid, Inc.
2 // Licensed under the Apache License, Version 2.0 (the "License")
3 #include <bcc/proto.h>
4
5 #define _memcpy __builtin_memcpy
6
7 // meta data passed between bpf programs
8 typedef struct bpf_metadata {
9 u32 prog_id;
10 u32 rx_port_id;
11 } bpf_metadata_t;
12
13 typedef struct bpf_dest {
14 u32 prog_id;
15 u32 port_id;
16 } bpf_dest_t;
17
18 // use u64 to represent eth_addr.
19 // maintain the structure though to indicate the semantics
20 typedef struct eth_addr {
21 u64 addr;
22 } eth_addr_t;
23
24 // Program table definitions for tail calls
25 BPF_PROG_ARRAY(jump, 16);
26
27 // physical endpoint manager (pem) tables which connects to boeht bridge 1 and bridge 2
28 // <port_id, bpf_dest>
29 BPF_ARRAY(pem_dest, bpf_dest_t, 256);
30 // <port_id, ifindex>
31 BPF_ARRAY(pem_port, u32, 256);
32 // <ifindex, port_id>
33 BPF_HASH(pem_ifindex, u32, u32, 256);
34 // <0, tx2vm_pkts>
35 BPF_ARRAY(pem_stats, u32, 1);
36
37 // bridge 1 (br1) tables
38 // <port_id, bpf_dest>
39 BPF_ARRAY(br1_dest, bpf_dest_t, 256);
40 // <eth_addr, port_id>
41 BPF_HASH(br1_mac, eth_addr_t, u32, 256);
42 // <0, rtr_ifindex>
43 BPF_ARRAY(br1_rtr, u32, 1);
44 // <mac, ifindex>
45 BPF_HASH(br1_mac_ifindex, eth_addr_t, u32, 1);
46
47 // bridge 2 (br2) tables
48 // <port_id, bpf_dest>
49 BPF_ARRAY(br2_dest, bpf_dest_t, 256);
50 // <eth_addr, port_id>
51 BPF_HASH(br2_mac, eth_addr_t, u32, 256);
52 // <0, rtr_ifindex>
53 BPF_ARRAY(br2_rtr, u32, 1);
54 // <mac, ifindex>
55 BPF_HASH(br2_mac_ifindex, eth_addr_t, u32, 1);
56
pem(struct __sk_buff * skb)57 int pem(struct __sk_buff *skb) {
58 bpf_metadata_t meta = {};
59 u32 ifindex;
60 u32 *tx_port_id_p;
61 u32 tx_port_id;
62 u32 rx_port;
63 u32 *ifindex_p;
64 bpf_dest_t *dest_p;
65
66 // pem does not look at packet data
67 if (skb->tc_index == 0) {
68 skb->tc_index = 1;
69 skb->cb[0] = skb->cb[1] = 0;
70 meta.prog_id = meta.rx_port_id = 0;
71 } else {
72 meta.prog_id = skb->cb[0];
73 asm volatile("" ::: "memory");
74 meta.rx_port_id = skb->cb[1];
75 }
76 if (!meta.prog_id) {
77 /* from external */
78 ifindex = skb->ingress_ifindex;
79 tx_port_id_p = pem_ifindex.lookup(&ifindex);
80 if (tx_port_id_p) {
81 tx_port_id = *tx_port_id_p;
82 dest_p = pem_dest.lookup(&tx_port_id);
83 if (dest_p) {
84 skb->cb[0] = dest_p->prog_id;
85 skb->cb[1] = dest_p->port_id;
86 jump.call(skb, dest_p->prog_id);
87 }
88 }
89 } else {
90 /* from internal */
91 rx_port = meta.rx_port_id;
92 ifindex_p = pem_port.lookup(&rx_port);
93 if (ifindex_p) {
94 #if 1
95 /* accumulate stats, may hurt performance slightly */
96 u32 index = 0;
97 u32 *value = pem_stats.lookup(&index);
98 if (value)
99 lock_xadd(value, 1);
100 #endif
101 bpf_clone_redirect(skb, *ifindex_p, 0);
102 }
103 }
104
105 return 1;
106 }
107
br_common(struct __sk_buff * skb,int which_br)108 static int br_common(struct __sk_buff *skb, int which_br) {
109 u8 *cursor = 0;
110 u16 proto;
111 u16 arpop;
112 eth_addr_t dmac;
113 u8 *mac_p;
114 u32 dip;
115 u32 *tx_port_id_p;
116 u32 tx_port_id;
117 bpf_dest_t *dest_p;
118 u32 index, *rtrif_p;
119
120 struct ethernet_t *ethernet = cursor_advance(cursor, sizeof(*ethernet));
121 /* handle ethernet packet header */
122 {
123 dmac.addr = ethernet->dst;
124 /* skb->tc_index may be preserved across router namespace if router simply rewrite packet
125 * and send it back.
126 */
127 if (skb->tc_index == 1) {
128 /* packet from pem, send to the router, set tc_index to 2 */
129 skb->tc_index = 2;
130 if (dmac.addr == 0xffffffffffffULL) {
131 index = 0;
132 if (which_br == 1)
133 rtrif_p = br1_rtr.lookup(&index);
134 else
135 rtrif_p = br2_rtr.lookup(&index);
136 if (rtrif_p)
137 bpf_clone_redirect(skb, *rtrif_p, 0);
138 } else {
139 /* the dmac address should match the router's */
140 if (which_br == 1)
141 rtrif_p = br1_mac_ifindex.lookup(&dmac);
142 else
143 rtrif_p = br2_mac_ifindex.lookup(&dmac);
144 if (rtrif_p)
145 bpf_clone_redirect(skb, *rtrif_p, 0);
146 }
147 return 1;
148 }
149
150 /* set the tc_index to 1 so pem knows it is from internal */
151 skb->tc_index = 1;
152 switch (ethernet->type) {
153 case ETH_P_IP: goto ip;
154 case ETH_P_ARP: goto arp;
155 case ETH_P_8021Q: goto dot1q;
156 default: goto EOP;
157 }
158 }
159
160 dot1q: {
161 struct dot1q_t *dot1q = cursor_advance(cursor, sizeof(*dot1q));
162 switch(dot1q->type) {
163 case ETH_P_IP: goto ip;
164 case ETH_P_ARP: goto arp;
165 default: goto EOP;
166 }
167 }
168
169 arp: {
170 struct arp_t *arp = cursor_advance(cursor, sizeof(*arp));
171 /* mac learning */
172 arpop = arp->oper;
173 if (arpop == 2) {
174 index = 0;
175 if (which_br == 1)
176 rtrif_p = br1_rtr.lookup(&index);
177 else
178 rtrif_p = br2_rtr.lookup(&index);
179 if (rtrif_p) {
180 __u32 ifindex = *rtrif_p;
181 eth_addr_t smac;
182
183 smac.addr = ethernet->src;
184 if (which_br == 1)
185 br1_mac_ifindex.update(&smac, &ifindex);
186 else
187 br2_mac_ifindex.update(&smac, &ifindex);
188 }
189 }
190 goto xmit;
191 }
192
193 ip: {
194 struct ip_t *ip = cursor_advance(cursor, sizeof(*ip));
195 goto xmit;
196 }
197
198 xmit:
199 if (which_br == 1)
200 tx_port_id_p = br1_mac.lookup(&dmac);
201 else
202 tx_port_id_p = br2_mac.lookup(&dmac);
203 if (tx_port_id_p) {
204 tx_port_id = *tx_port_id_p;
205 if (which_br == 1)
206 dest_p = br1_dest.lookup(&tx_port_id);
207 else
208 dest_p = br2_dest.lookup(&tx_port_id);
209 if (dest_p) {
210 skb->cb[0] = dest_p->prog_id;
211 skb->cb[1] = dest_p->port_id;
212 jump.call(skb, dest_p->prog_id);
213 }
214 }
215
216 EOP:
217 return 1;
218 }
219
br1(struct __sk_buff * skb)220 int br1(struct __sk_buff *skb) {
221 return br_common(skb, 1);
222 }
223
br2(struct __sk_buff * skb)224 int br2(struct __sk_buff *skb) {
225 return br_common(skb, 2);
226 }
227