1 /*
2 * xt_HMARK - Netfilter module to set mark by means of hashing
3 *
4 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
5 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 */
11
12 #include <linux/module.h>
13 #include <linux/skbuff.h>
14 #include <linux/icmp.h>
15
16 #include <linux/netfilter/x_tables.h>
17 #include <linux/netfilter/xt_HMARK.h>
18
19 #include <net/ip.h>
20 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
21 #include <net/netfilter/nf_conntrack.h>
22 #endif
23 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
24 #include <net/ipv6.h>
25 #include <linux/netfilter_ipv6/ip6_tables.h>
26 #endif
27
28 MODULE_LICENSE("GPL");
29 MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>");
30 MODULE_DESCRIPTION("Xtables: packet marking using hash calculation");
31 MODULE_ALIAS("ipt_HMARK");
32 MODULE_ALIAS("ip6t_HMARK");
33
34 struct hmark_tuple {
35 __be32 src;
36 __be32 dst;
37 union hmark_ports uports;
38 u8 proto;
39 };
40
hmark_addr6_mask(const __be32 * addr32,const __be32 * mask)41 static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
42 {
43 return (addr32[0] & mask[0]) ^
44 (addr32[1] & mask[1]) ^
45 (addr32[2] & mask[2]) ^
46 (addr32[3] & mask[3]);
47 }
48
49 static inline __be32
hmark_addr_mask(int l3num,const __be32 * addr32,const __be32 * mask)50 hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
51 {
52 switch (l3num) {
53 case AF_INET:
54 return *addr32 & *mask;
55 case AF_INET6:
56 return hmark_addr6_mask(addr32, mask);
57 }
58 return 0;
59 }
60
hmark_swap_ports(union hmark_ports * uports,const struct xt_hmark_info * info)61 static inline void hmark_swap_ports(union hmark_ports *uports,
62 const struct xt_hmark_info *info)
63 {
64 union hmark_ports hp;
65 u16 src, dst;
66
67 hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
68 src = ntohs(hp.b16.src);
69 dst = ntohs(hp.b16.dst);
70
71 if (dst > src)
72 uports->v32 = (dst << 16) | src;
73 else
74 uports->v32 = (src << 16) | dst;
75 }
76
77 static int
hmark_ct_set_htuple(const struct sk_buff * skb,struct hmark_tuple * t,const struct xt_hmark_info * info)78 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
79 const struct xt_hmark_info *info)
80 {
81 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
82 enum ip_conntrack_info ctinfo;
83 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
84 struct nf_conntrack_tuple *otuple;
85 struct nf_conntrack_tuple *rtuple;
86
87 if (ct == NULL)
88 return -1;
89
90 otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
91 rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
92
93 t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
94 info->src_mask.ip6);
95 t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
96 info->dst_mask.ip6);
97
98 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
99 return 0;
100
101 t->proto = nf_ct_protonum(ct);
102 if (t->proto != IPPROTO_ICMP) {
103 t->uports.b16.src = otuple->src.u.all;
104 t->uports.b16.dst = rtuple->src.u.all;
105 hmark_swap_ports(&t->uports, info);
106 }
107
108 return 0;
109 #else
110 return -1;
111 #endif
112 }
113
114 /* This hash function is endian independent, to ensure consistent hashing if
115 * the cluster is composed of big and little endian systems. */
116 static inline u32
hmark_hash(struct hmark_tuple * t,const struct xt_hmark_info * info)117 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
118 {
119 u32 hash;
120 u32 src = ntohl(t->src);
121 u32 dst = ntohl(t->dst);
122
123 if (dst < src)
124 swap(src, dst);
125
126 hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
127 hash = hash ^ (t->proto & info->proto_mask);
128
129 return reciprocal_scale(hash, info->hmodulus) + info->hoffset;
130 }
131
132 static void
hmark_set_tuple_ports(const struct sk_buff * skb,unsigned int nhoff,struct hmark_tuple * t,const struct xt_hmark_info * info)133 hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
134 struct hmark_tuple *t, const struct xt_hmark_info *info)
135 {
136 int protoff;
137
138 protoff = proto_ports_offset(t->proto);
139 if (protoff < 0)
140 return;
141
142 nhoff += protoff;
143 if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
144 return;
145
146 hmark_swap_ports(&t->uports, info);
147 }
148
149 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
get_inner6_hdr(const struct sk_buff * skb,int * offset)150 static int get_inner6_hdr(const struct sk_buff *skb, int *offset)
151 {
152 struct icmp6hdr *icmp6h, _ih6;
153
154 icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6);
155 if (icmp6h == NULL)
156 return 0;
157
158 if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) {
159 *offset += sizeof(struct icmp6hdr);
160 return 1;
161 }
162 return 0;
163 }
164
165 static int
hmark_pkt_set_htuple_ipv6(const struct sk_buff * skb,struct hmark_tuple * t,const struct xt_hmark_info * info)166 hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
167 const struct xt_hmark_info *info)
168 {
169 struct ipv6hdr *ip6, _ip6;
170 int flag = IP6_FH_F_AUTH;
171 unsigned int nhoff = 0;
172 u16 fragoff = 0;
173 int nexthdr;
174
175 ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb));
176 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
177 if (nexthdr < 0)
178 return 0;
179 /* No need to check for icmp errors on fragments */
180 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
181 goto noicmp;
182 /* Use inner header in case of ICMP errors */
183 if (get_inner6_hdr(skb, &nhoff)) {
184 ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6);
185 if (ip6 == NULL)
186 return -1;
187 /* If AH present, use SPI like in ESP. */
188 flag = IP6_FH_F_AUTH;
189 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
190 if (nexthdr < 0)
191 return -1;
192 }
193 noicmp:
194 t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
195 t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
196
197 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
198 return 0;
199
200 t->proto = nexthdr;
201 if (t->proto == IPPROTO_ICMPV6)
202 return 0;
203
204 if (flag & IP6_FH_F_FRAG)
205 return 0;
206
207 hmark_set_tuple_ports(skb, nhoff, t, info);
208 return 0;
209 }
210
211 static unsigned int
hmark_tg_v6(struct sk_buff * skb,const struct xt_action_param * par)212 hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par)
213 {
214 const struct xt_hmark_info *info = par->targinfo;
215 struct hmark_tuple t;
216
217 memset(&t, 0, sizeof(struct hmark_tuple));
218
219 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
220 if (hmark_ct_set_htuple(skb, &t, info) < 0)
221 return XT_CONTINUE;
222 } else {
223 if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0)
224 return XT_CONTINUE;
225 }
226
227 skb->mark = hmark_hash(&t, info);
228 return XT_CONTINUE;
229 }
230 #endif
231
get_inner_hdr(const struct sk_buff * skb,int iphsz,int * nhoff)232 static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
233 {
234 const struct icmphdr *icmph;
235 struct icmphdr _ih;
236
237 /* Not enough header? */
238 icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih);
239 if (icmph == NULL || icmph->type > NR_ICMP_TYPES)
240 return 0;
241
242 /* Error message? */
243 if (icmph->type != ICMP_DEST_UNREACH &&
244 icmph->type != ICMP_SOURCE_QUENCH &&
245 icmph->type != ICMP_TIME_EXCEEDED &&
246 icmph->type != ICMP_PARAMETERPROB &&
247 icmph->type != ICMP_REDIRECT)
248 return 0;
249
250 *nhoff += iphsz + sizeof(_ih);
251 return 1;
252 }
253
254 static int
hmark_pkt_set_htuple_ipv4(const struct sk_buff * skb,struct hmark_tuple * t,const struct xt_hmark_info * info)255 hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
256 const struct xt_hmark_info *info)
257 {
258 struct iphdr *ip, _ip;
259 int nhoff = skb_network_offset(skb);
260
261 ip = (struct iphdr *) (skb->data + nhoff);
262 if (ip->protocol == IPPROTO_ICMP) {
263 /* Use inner header in case of ICMP errors */
264 if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) {
265 ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip);
266 if (ip == NULL)
267 return -1;
268 }
269 }
270
271 t->src = ip->saddr & info->src_mask.ip;
272 t->dst = ip->daddr & info->dst_mask.ip;
273
274 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
275 return 0;
276
277 t->proto = ip->protocol;
278
279 /* ICMP has no ports, skip */
280 if (t->proto == IPPROTO_ICMP)
281 return 0;
282
283 /* follow-up fragments don't contain ports, skip all fragments */
284 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
285 return 0;
286
287 hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info);
288
289 return 0;
290 }
291
292 static unsigned int
hmark_tg_v4(struct sk_buff * skb,const struct xt_action_param * par)293 hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
294 {
295 const struct xt_hmark_info *info = par->targinfo;
296 struct hmark_tuple t;
297
298 memset(&t, 0, sizeof(struct hmark_tuple));
299
300 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
301 if (hmark_ct_set_htuple(skb, &t, info) < 0)
302 return XT_CONTINUE;
303 } else {
304 if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0)
305 return XT_CONTINUE;
306 }
307
308 skb->mark = hmark_hash(&t, info);
309 return XT_CONTINUE;
310 }
311
hmark_tg_check(const struct xt_tgchk_param * par)312 static int hmark_tg_check(const struct xt_tgchk_param *par)
313 {
314 const struct xt_hmark_info *info = par->targinfo;
315
316 if (!info->hmodulus) {
317 pr_info("xt_HMARK: hash modulus can't be zero\n");
318 return -EINVAL;
319 }
320 if (info->proto_mask &&
321 (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) {
322 pr_info("xt_HMARK: proto mask must be zero with L3 mode\n");
323 return -EINVAL;
324 }
325 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
326 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
327 XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) {
328 pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n");
329 return -EINVAL;
330 }
331 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
332 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
333 XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
334 pr_info("xt_HMARK: spi-set and port-set can't be combined\n");
335 return -EINVAL;
336 }
337 return 0;
338 }
339
340 static struct xt_target hmark_tg_reg[] __read_mostly = {
341 {
342 .name = "HMARK",
343 .family = NFPROTO_IPV4,
344 .target = hmark_tg_v4,
345 .targetsize = sizeof(struct xt_hmark_info),
346 .checkentry = hmark_tg_check,
347 .me = THIS_MODULE,
348 },
349 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
350 {
351 .name = "HMARK",
352 .family = NFPROTO_IPV6,
353 .target = hmark_tg_v6,
354 .targetsize = sizeof(struct xt_hmark_info),
355 .checkentry = hmark_tg_check,
356 .me = THIS_MODULE,
357 },
358 #endif
359 };
360
hmark_tg_init(void)361 static int __init hmark_tg_init(void)
362 {
363 return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
364 }
365
hmark_tg_exit(void)366 static void __exit hmark_tg_exit(void)
367 {
368 xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
369 }
370
371 module_init(hmark_tg_init);
372 module_exit(hmark_tg_exit);
373