1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/types.h>
3 #include <linux/ip.h>
4 #include <linux/netfilter.h>
5 #include <linux/netfilter_ipv6.h>
6 #include <linux/netfilter_bridge.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/icmp.h>
10 #include <linux/sysctl.h>
11 #include <net/route.h>
12 #include <net/ip.h>
13
14 #include <net/netfilter/nf_conntrack.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_helper.h>
17 #include <net/netfilter/nf_conntrack_bridge.h>
18
19 #include <linux/netfilter/nf_tables.h>
20 #include <net/netfilter/nf_tables.h>
21
22 #include "../br_private.h"
23
24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
25 * has been linearized or cloned.
26 */
nf_br_ip_fragment(struct net * net,struct sock * sk,struct sk_buff * skb,struct nf_bridge_frag_data * data,int (* output)(struct net *,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff *))27 static int nf_br_ip_fragment(struct net *net, struct sock *sk,
28 struct sk_buff *skb,
29 struct nf_bridge_frag_data *data,
30 int (*output)(struct net *, struct sock *sk,
31 const struct nf_bridge_frag_data *data,
32 struct sk_buff *))
33 {
34 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
35 unsigned int hlen, ll_rs, mtu;
36 ktime_t tstamp = skb->tstamp;
37 struct ip_frag_state state;
38 struct iphdr *iph;
39 int err = 0;
40
41 /* for offloaded checksums cleanup checksum before fragmentation */
42 if (skb->ip_summed == CHECKSUM_PARTIAL &&
43 (err = skb_checksum_help(skb)))
44 goto blackhole;
45
46 iph = ip_hdr(skb);
47
48 /*
49 * Setup starting values
50 */
51
52 hlen = iph->ihl * 4;
53 frag_max_size -= hlen;
54 ll_rs = LL_RESERVED_SPACE(skb->dev);
55 mtu = skb->dev->mtu;
56
57 if (skb_has_frag_list(skb)) {
58 unsigned int first_len = skb_pagelen(skb);
59 struct ip_fraglist_iter iter;
60 struct sk_buff *frag;
61
62 if (first_len - hlen > mtu ||
63 skb_headroom(skb) < ll_rs)
64 goto blackhole;
65
66 if (skb_cloned(skb))
67 goto slow_path;
68
69 skb_walk_frags(skb, frag) {
70 if (frag->len > mtu ||
71 skb_headroom(frag) < hlen + ll_rs)
72 goto blackhole;
73
74 if (skb_shared(frag))
75 goto slow_path;
76 }
77
78 ip_fraglist_init(skb, iph, hlen, &iter);
79
80 for (;;) {
81 if (iter.frag)
82 ip_fraglist_prepare(skb, &iter);
83
84 skb->tstamp = tstamp;
85 err = output(net, sk, data, skb);
86 if (err || !iter.frag)
87 break;
88
89 skb = ip_fraglist_next(&iter);
90 }
91
92 if (!err)
93 return 0;
94
95 kfree_skb_list(iter.frag);
96
97 return err;
98 }
99 slow_path:
100 /* This is a linearized skbuff, the original geometry is lost for us.
101 * This may also be a clone skbuff, we could preserve the geometry for
102 * the copies but probably not worth the effort.
103 */
104 ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
105
106 while (state.left > 0) {
107 struct sk_buff *skb2;
108
109 skb2 = ip_frag_next(skb, &state);
110 if (IS_ERR(skb2)) {
111 err = PTR_ERR(skb2);
112 goto blackhole;
113 }
114
115 skb2->tstamp = tstamp;
116 err = output(net, sk, data, skb2);
117 if (err)
118 goto blackhole;
119 }
120 consume_skb(skb);
121 return err;
122
123 blackhole:
124 kfree_skb(skb);
125 return 0;
126 }
127
128 /* ip_defrag() expects IPCB() in place. */
br_skb_cb_save(struct sk_buff * skb,struct br_input_skb_cb * cb,size_t inet_skb_parm_size)129 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
130 size_t inet_skb_parm_size)
131 {
132 memcpy(cb, skb->cb, sizeof(*cb));
133 memset(skb->cb, 0, inet_skb_parm_size);
134 }
135
br_skb_cb_restore(struct sk_buff * skb,const struct br_input_skb_cb * cb,u16 fragsz)136 static void br_skb_cb_restore(struct sk_buff *skb,
137 const struct br_input_skb_cb *cb,
138 u16 fragsz)
139 {
140 memcpy(skb->cb, cb, sizeof(*cb));
141 BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
142 }
143
nf_ct_br_defrag4(struct sk_buff * skb,const struct nf_hook_state * state)144 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
145 const struct nf_hook_state *state)
146 {
147 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
148 enum ip_conntrack_info ctinfo;
149 struct br_input_skb_cb cb;
150 const struct nf_conn *ct;
151 int err;
152
153 if (!ip_is_fragment(ip_hdr(skb)))
154 return NF_ACCEPT;
155
156 ct = nf_ct_get(skb, &ctinfo);
157 if (ct)
158 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
159
160 br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
161 local_bh_disable();
162 err = ip_defrag(state->net, skb,
163 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
164 local_bh_enable();
165 if (!err) {
166 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
167 skb->ignore_df = 1;
168 return NF_ACCEPT;
169 }
170
171 return NF_STOLEN;
172 }
173
nf_ct_br_defrag6(struct sk_buff * skb,const struct nf_hook_state * state)174 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
175 const struct nf_hook_state *state)
176 {
177 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
178 u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
179 enum ip_conntrack_info ctinfo;
180 struct br_input_skb_cb cb;
181 const struct nf_conn *ct;
182 int err;
183
184 ct = nf_ct_get(skb, &ctinfo);
185 if (ct)
186 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
187
188 br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
189
190 err = nf_ct_frag6_gather(state->net, skb,
191 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
192 /* queued */
193 if (err == -EINPROGRESS)
194 return NF_STOLEN;
195
196 br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
197 return err == 0 ? NF_ACCEPT : NF_DROP;
198 #else
199 return NF_ACCEPT;
200 #endif
201 }
202
nf_ct_br_ip_check(const struct sk_buff * skb)203 static int nf_ct_br_ip_check(const struct sk_buff *skb)
204 {
205 const struct iphdr *iph;
206 int nhoff, len;
207
208 nhoff = skb_network_offset(skb);
209 iph = ip_hdr(skb);
210 if (iph->ihl < 5 ||
211 iph->version != 4)
212 return -1;
213
214 len = ntohs(iph->tot_len);
215 if (skb->len < nhoff + len ||
216 len < (iph->ihl * 4))
217 return -1;
218
219 return 0;
220 }
221
nf_ct_br_ipv6_check(const struct sk_buff * skb)222 static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
223 {
224 const struct ipv6hdr *hdr;
225 int nhoff, len;
226
227 nhoff = skb_network_offset(skb);
228 hdr = ipv6_hdr(skb);
229 if (hdr->version != 6)
230 return -1;
231
232 len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
233 if (skb->len < len)
234 return -1;
235
236 return 0;
237 }
238
nf_ct_bridge_pre(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)239 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
240 const struct nf_hook_state *state)
241 {
242 struct nf_hook_state bridge_state = *state;
243 enum ip_conntrack_info ctinfo;
244 struct nf_conn *ct;
245 u32 len;
246 int ret;
247
248 ct = nf_ct_get(skb, &ctinfo);
249 if ((ct && !nf_ct_is_template(ct)) ||
250 ctinfo == IP_CT_UNTRACKED)
251 return NF_ACCEPT;
252
253 switch (skb->protocol) {
254 case htons(ETH_P_IP):
255 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
256 return NF_ACCEPT;
257
258 len = ntohs(ip_hdr(skb)->tot_len);
259 if (pskb_trim_rcsum(skb, len))
260 return NF_ACCEPT;
261
262 if (nf_ct_br_ip_check(skb))
263 return NF_ACCEPT;
264
265 bridge_state.pf = NFPROTO_IPV4;
266 ret = nf_ct_br_defrag4(skb, &bridge_state);
267 break;
268 case htons(ETH_P_IPV6):
269 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
270 return NF_ACCEPT;
271
272 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
273 if (pskb_trim_rcsum(skb, len))
274 return NF_ACCEPT;
275
276 if (nf_ct_br_ipv6_check(skb))
277 return NF_ACCEPT;
278
279 bridge_state.pf = NFPROTO_IPV6;
280 ret = nf_ct_br_defrag6(skb, &bridge_state);
281 break;
282 default:
283 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
284 return NF_ACCEPT;
285 }
286
287 if (ret != NF_ACCEPT)
288 return ret;
289
290 return nf_conntrack_in(skb, &bridge_state);
291 }
292
nf_ct_bridge_in(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)293 static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
294 const struct nf_hook_state *state)
295 {
296 enum ip_conntrack_info ctinfo;
297 struct nf_conn *ct;
298
299 if (skb->pkt_type == PACKET_HOST)
300 return NF_ACCEPT;
301
302 /* nf_conntrack_confirm() cannot handle concurrent clones,
303 * this happens for broad/multicast frames with e.g. macvlan on top
304 * of the bridge device.
305 */
306 ct = nf_ct_get(skb, &ctinfo);
307 if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
308 return NF_ACCEPT;
309
310 /* let inet prerouting call conntrack again */
311 skb->_nfct = 0;
312 nf_ct_put(ct);
313
314 return NF_ACCEPT;
315 }
316
nf_ct_bridge_frag_save(struct sk_buff * skb,struct nf_bridge_frag_data * data)317 static void nf_ct_bridge_frag_save(struct sk_buff *skb,
318 struct nf_bridge_frag_data *data)
319 {
320 if (skb_vlan_tag_present(skb)) {
321 data->vlan_present = true;
322 data->vlan_tci = skb->vlan_tci;
323 data->vlan_proto = skb->vlan_proto;
324 } else {
325 data->vlan_present = false;
326 }
327 skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
328 }
329
330 static unsigned int
nf_ct_bridge_refrag(struct sk_buff * skb,const struct nf_hook_state * state,int (* output)(struct net *,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff *))331 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
332 int (*output)(struct net *, struct sock *sk,
333 const struct nf_bridge_frag_data *data,
334 struct sk_buff *))
335 {
336 struct nf_bridge_frag_data data;
337
338 if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
339 return NF_ACCEPT;
340
341 nf_ct_bridge_frag_save(skb, &data);
342 switch (skb->protocol) {
343 case htons(ETH_P_IP):
344 nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
345 break;
346 case htons(ETH_P_IPV6):
347 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
348 break;
349 default:
350 WARN_ON_ONCE(1);
351 return NF_DROP;
352 }
353
354 return NF_STOLEN;
355 }
356
357 /* Actually only slow path refragmentation needs this. */
nf_ct_bridge_frag_restore(struct sk_buff * skb,const struct nf_bridge_frag_data * data)358 static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
359 const struct nf_bridge_frag_data *data)
360 {
361 int err;
362
363 err = skb_cow_head(skb, ETH_HLEN);
364 if (err) {
365 kfree_skb(skb);
366 return -ENOMEM;
367 }
368 if (data->vlan_present)
369 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
370 else if (skb_vlan_tag_present(skb))
371 __vlan_hwaccel_clear_tag(skb);
372
373 skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
374 skb_reset_mac_header(skb);
375
376 return 0;
377 }
378
nf_ct_bridge_refrag_post(struct net * net,struct sock * sk,const struct nf_bridge_frag_data * data,struct sk_buff * skb)379 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
380 const struct nf_bridge_frag_data *data,
381 struct sk_buff *skb)
382 {
383 int err;
384
385 err = nf_ct_bridge_frag_restore(skb, data);
386 if (err < 0)
387 return err;
388
389 return br_dev_queue_push_xmit(net, sk, skb);
390 }
391
nf_ct_bridge_confirm(struct sk_buff * skb)392 static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
393 {
394 enum ip_conntrack_info ctinfo;
395 struct nf_conn *ct;
396 int protoff;
397
398 ct = nf_ct_get(skb, &ctinfo);
399 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
400 return nf_conntrack_confirm(skb);
401
402 switch (skb->protocol) {
403 case htons(ETH_P_IP):
404 protoff = skb_network_offset(skb) + ip_hdrlen(skb);
405 break;
406 case htons(ETH_P_IPV6): {
407 unsigned char pnum = ipv6_hdr(skb)->nexthdr;
408 __be16 frag_off;
409
410 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
411 &frag_off);
412 if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
413 return nf_conntrack_confirm(skb);
414 }
415 break;
416 default:
417 return NF_ACCEPT;
418 }
419 return nf_confirm(skb, protoff, ct, ctinfo);
420 }
421
nf_ct_bridge_post(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)422 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
423 const struct nf_hook_state *state)
424 {
425 int ret;
426
427 ret = nf_ct_bridge_confirm(skb);
428 if (ret != NF_ACCEPT)
429 return ret;
430
431 return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
432 }
433
434 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
435 {
436 .hook = nf_ct_bridge_pre,
437 .pf = NFPROTO_BRIDGE,
438 .hooknum = NF_BR_PRE_ROUTING,
439 .priority = NF_IP_PRI_CONNTRACK,
440 },
441 {
442 .hook = nf_ct_bridge_in,
443 .pf = NFPROTO_BRIDGE,
444 .hooknum = NF_BR_LOCAL_IN,
445 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
446 },
447 {
448 .hook = nf_ct_bridge_post,
449 .pf = NFPROTO_BRIDGE,
450 .hooknum = NF_BR_POST_ROUTING,
451 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
452 },
453 };
454
455 static struct nf_ct_bridge_info bridge_info = {
456 .ops = nf_ct_bridge_hook_ops,
457 .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops),
458 .me = THIS_MODULE,
459 };
460
nf_conntrack_l3proto_bridge_init(void)461 static int __init nf_conntrack_l3proto_bridge_init(void)
462 {
463 nf_ct_bridge_register(&bridge_info);
464
465 return 0;
466 }
467
nf_conntrack_l3proto_bridge_fini(void)468 static void __exit nf_conntrack_l3proto_bridge_fini(void)
469 {
470 nf_ct_bridge_unregister(&bridge_info);
471 }
472
473 module_init(nf_conntrack_l3proto_bridge_init);
474 module_exit(nf_conntrack_l3proto_bridge_fini);
475
476 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
477 MODULE_LICENSE("GPL");
478