• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/types.h>
4 #include <linux/atomic.h>
5 #include <linux/inetdevice.h>
6 #include <linux/netfilter.h>
7 #include <linux/netfilter_ipv4.h>
8 #include <linux/netfilter_ipv6.h>
9 
10 #include <net/netfilter/nf_nat_masquerade.h>
11 
12 struct masq_dev_work {
13 	struct work_struct work;
14 	struct net *net;
15 	union nf_inet_addr addr;
16 	int ifindex;
17 	int (*iter)(struct nf_conn *i, void *data);
18 };
19 
20 #define MAX_MASQ_WORKER_COUNT	16
21 
22 static DEFINE_MUTEX(masq_mutex);
23 static unsigned int masq_refcnt __read_mostly;
24 static atomic_t masq_worker_count __read_mostly;
25 
26 unsigned int
nf_nat_masquerade_ipv4(struct sk_buff * skb,unsigned int hooknum,const struct nf_nat_range2 * range,const struct net_device * out)27 nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
28 		       const struct nf_nat_range2 *range,
29 		       const struct net_device *out)
30 {
31 	struct nf_conn *ct;
32 	struct nf_conn_nat *nat;
33 	enum ip_conntrack_info ctinfo;
34 	struct nf_nat_range2 newrange;
35 	const struct rtable *rt;
36 	__be32 newsrc, nh;
37 
38 	WARN_ON(hooknum != NF_INET_POST_ROUTING);
39 
40 	ct = nf_ct_get(skb, &ctinfo);
41 
42 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
43 			 ctinfo == IP_CT_RELATED_REPLY)));
44 
45 	/* Source address is 0.0.0.0 - locally generated packet that is
46 	 * probably not supposed to be masqueraded.
47 	 */
48 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
49 		return NF_ACCEPT;
50 
51 	rt = skb_rtable(skb);
52 	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
53 	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
54 	if (!newsrc) {
55 		pr_info("%s ate my IP address\n", out->name);
56 		return NF_DROP;
57 	}
58 
59 	nat = nf_ct_nat_ext_add(ct);
60 	if (nat)
61 		nat->masq_index = out->ifindex;
62 
63 	/* Transfer from original range. */
64 	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
65 	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
66 	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
67 	newrange.min_addr.ip = newsrc;
68 	newrange.max_addr.ip = newsrc;
69 	newrange.min_proto   = range->min_proto;
70 	newrange.max_proto   = range->max_proto;
71 
72 	/* Hand modified range to generic setup. */
73 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
74 }
75 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
76 
iterate_cleanup_work(struct work_struct * work)77 static void iterate_cleanup_work(struct work_struct *work)
78 {
79 	struct masq_dev_work *w;
80 
81 	w = container_of(work, struct masq_dev_work, work);
82 
83 	nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
84 
85 	put_net(w->net);
86 	kfree(w);
87 	atomic_dec(&masq_worker_count);
88 	module_put(THIS_MODULE);
89 }
90 
91 /* Iterate conntrack table in the background and remove conntrack entries
92  * that use the device/address being removed.
93  *
94  * In case too many work items have been queued already or memory allocation
95  * fails iteration is skipped, conntrack entries will time out eventually.
96  */
nf_nat_masq_schedule(struct net * net,union nf_inet_addr * addr,int ifindex,int (* iter)(struct nf_conn * i,void * data),gfp_t gfp_flags)97 static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
98 				 int ifindex,
99 				 int (*iter)(struct nf_conn *i, void *data),
100 				 gfp_t gfp_flags)
101 {
102 	struct masq_dev_work *w;
103 
104 	if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
105 		return;
106 
107 	net = maybe_get_net(net);
108 	if (!net)
109 		return;
110 
111 	if (!try_module_get(THIS_MODULE))
112 		goto err_module;
113 
114 	w = kzalloc(sizeof(*w), gfp_flags);
115 	if (w) {
116 		/* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
117 		atomic_inc(&masq_worker_count);
118 
119 		INIT_WORK(&w->work, iterate_cleanup_work);
120 		w->ifindex = ifindex;
121 		w->net = net;
122 		w->iter = iter;
123 		if (addr)
124 			w->addr = *addr;
125 		schedule_work(&w->work);
126 		return;
127 	}
128 
129 	module_put(THIS_MODULE);
130  err_module:
131 	put_net(net);
132 }
133 
device_cmp(struct nf_conn * i,void * arg)134 static int device_cmp(struct nf_conn *i, void *arg)
135 {
136 	const struct nf_conn_nat *nat = nfct_nat(i);
137 	const struct masq_dev_work *w = arg;
138 
139 	if (!nat)
140 		return 0;
141 	return nat->masq_index == w->ifindex;
142 }
143 
masq_device_event(struct notifier_block * this,unsigned long event,void * ptr)144 static int masq_device_event(struct notifier_block *this,
145 			     unsigned long event,
146 			     void *ptr)
147 {
148 	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
149 	struct net *net = dev_net(dev);
150 
151 	if (event == NETDEV_DOWN) {
152 		/* Device was downed.  Search entire table for
153 		 * conntracks which were associated with that device,
154 		 * and forget them.
155 		 */
156 
157 		nf_nat_masq_schedule(net, NULL, dev->ifindex,
158 				     device_cmp, GFP_KERNEL);
159 	}
160 
161 	return NOTIFY_DONE;
162 }
163 
inet_cmp(struct nf_conn * ct,void * ptr)164 static int inet_cmp(struct nf_conn *ct, void *ptr)
165 {
166 	struct nf_conntrack_tuple *tuple;
167 	struct masq_dev_work *w = ptr;
168 
169 	if (!device_cmp(ct, ptr))
170 		return 0;
171 
172 	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
173 
174 	return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
175 }
176 
masq_inet_event(struct notifier_block * this,unsigned long event,void * ptr)177 static int masq_inet_event(struct notifier_block *this,
178 			   unsigned long event,
179 			   void *ptr)
180 {
181 	const struct in_ifaddr *ifa = ptr;
182 	const struct in_device *idev;
183 	const struct net_device *dev;
184 	union nf_inet_addr addr;
185 
186 	if (event != NETDEV_DOWN)
187 		return NOTIFY_DONE;
188 
189 	/* The masq_dev_notifier will catch the case of the device going
190 	 * down.  So if the inetdev is dead and being destroyed we have
191 	 * no work to do.  Otherwise this is an individual address removal
192 	 * and we have to perform the flush.
193 	 */
194 	idev = ifa->ifa_dev;
195 	if (idev->dead)
196 		return NOTIFY_DONE;
197 
198 	memset(&addr, 0, sizeof(addr));
199 
200 	addr.ip = ifa->ifa_address;
201 
202 	dev = idev->dev;
203 	nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
204 			     inet_cmp, GFP_KERNEL);
205 
206 	return NOTIFY_DONE;
207 }
208 
209 static struct notifier_block masq_dev_notifier = {
210 	.notifier_call	= masq_device_event,
211 };
212 
213 static struct notifier_block masq_inet_notifier = {
214 	.notifier_call	= masq_inet_event,
215 };
216 
217 #if IS_ENABLED(CONFIG_IPV6)
218 static int
nat_ipv6_dev_get_saddr(struct net * net,const struct net_device * dev,const struct in6_addr * daddr,unsigned int srcprefs,struct in6_addr * saddr)219 nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
220 		       const struct in6_addr *daddr, unsigned int srcprefs,
221 		       struct in6_addr *saddr)
222 {
223 #ifdef CONFIG_IPV6_MODULE
224 	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
225 
226 	if (!v6_ops)
227 		return -EHOSTUNREACH;
228 
229 	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
230 #else
231 	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
232 #endif
233 }
234 
235 unsigned int
nf_nat_masquerade_ipv6(struct sk_buff * skb,const struct nf_nat_range2 * range,const struct net_device * out)236 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
237 		       const struct net_device *out)
238 {
239 	enum ip_conntrack_info ctinfo;
240 	struct nf_conn_nat *nat;
241 	struct in6_addr src;
242 	struct nf_conn *ct;
243 	struct nf_nat_range2 newrange;
244 
245 	ct = nf_ct_get(skb, &ctinfo);
246 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
247 			 ctinfo == IP_CT_RELATED_REPLY)));
248 
249 	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
250 				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
251 		return NF_DROP;
252 
253 	nat = nf_ct_nat_ext_add(ct);
254 	if (nat)
255 		nat->masq_index = out->ifindex;
256 
257 	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
258 	newrange.min_addr.in6	= src;
259 	newrange.max_addr.in6	= src;
260 	newrange.min_proto	= range->min_proto;
261 	newrange.max_proto	= range->max_proto;
262 
263 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
264 }
265 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
266 
267 /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
268  *
269  * Defer it to the system workqueue.
270  *
271  * As we can have 'a lot' of inet_events (depending on amount of ipv6
272  * addresses being deleted), we also need to limit work item queue.
273  */
masq_inet6_event(struct notifier_block * this,unsigned long event,void * ptr)274 static int masq_inet6_event(struct notifier_block *this,
275 			    unsigned long event, void *ptr)
276 {
277 	struct inet6_ifaddr *ifa = ptr;
278 	const struct net_device *dev;
279 	union nf_inet_addr addr;
280 
281 	if (event != NETDEV_DOWN)
282 		return NOTIFY_DONE;
283 
284 	dev = ifa->idev->dev;
285 
286 	memset(&addr, 0, sizeof(addr));
287 
288 	addr.in6 = ifa->addr;
289 
290 	nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
291 			     GFP_ATOMIC);
292 	return NOTIFY_DONE;
293 }
294 
295 static struct notifier_block masq_inet6_notifier = {
296 	.notifier_call	= masq_inet6_event,
297 };
298 
nf_nat_masquerade_ipv6_register_notifier(void)299 static int nf_nat_masquerade_ipv6_register_notifier(void)
300 {
301 	return register_inet6addr_notifier(&masq_inet6_notifier);
302 }
303 #else
nf_nat_masquerade_ipv6_register_notifier(void)304 static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
305 #endif
306 
nf_nat_masquerade_inet_register_notifiers(void)307 int nf_nat_masquerade_inet_register_notifiers(void)
308 {
309 	int ret = 0;
310 
311 	mutex_lock(&masq_mutex);
312 	if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
313 		ret = -EOVERFLOW;
314 		goto out_unlock;
315 	}
316 
317 	/* check if the notifier was already set */
318 	if (++masq_refcnt > 1)
319 		goto out_unlock;
320 
321 	/* Register for device down reports */
322 	ret = register_netdevice_notifier(&masq_dev_notifier);
323 	if (ret)
324 		goto err_dec;
325 	/* Register IP address change reports */
326 	ret = register_inetaddr_notifier(&masq_inet_notifier);
327 	if (ret)
328 		goto err_unregister;
329 
330 	ret = nf_nat_masquerade_ipv6_register_notifier();
331 	if (ret)
332 		goto err_unreg_inet;
333 
334 	mutex_unlock(&masq_mutex);
335 	return ret;
336 err_unreg_inet:
337 	unregister_inetaddr_notifier(&masq_inet_notifier);
338 err_unregister:
339 	unregister_netdevice_notifier(&masq_dev_notifier);
340 err_dec:
341 	masq_refcnt--;
342 out_unlock:
343 	mutex_unlock(&masq_mutex);
344 	return ret;
345 }
346 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
347 
nf_nat_masquerade_inet_unregister_notifiers(void)348 void nf_nat_masquerade_inet_unregister_notifiers(void)
349 {
350 	mutex_lock(&masq_mutex);
351 	/* check if the notifiers still have clients */
352 	if (--masq_refcnt > 0)
353 		goto out_unlock;
354 
355 	unregister_netdevice_notifier(&masq_dev_notifier);
356 	unregister_inetaddr_notifier(&masq_inet_notifier);
357 #if IS_ENABLED(CONFIG_IPV6)
358 	unregister_inet6addr_notifier(&masq_inet6_notifier);
359 #endif
360 out_unlock:
361 	mutex_unlock(&masq_mutex);
362 }
363 EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
364