• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Seagate, Inc.
31  */
32 #define DEBUG_SUBSYSTEM S_LNET
33 
34 #include <linux/if.h>
35 #include <linux/in.h>
36 #include <linux/net.h>
37 #include <linux/file.h>
38 #include <linux/pagemap.h>
39 /* For sys_open & sys_close */
40 #include <linux/syscalls.h>
41 #include <net/sock.h>
42 
43 #include "../../include/linux/libcfs/libcfs.h"
44 #include "../../include/linux/lnet/lib-lnet.h"
45 
46 static int
kernel_sock_unlocked_ioctl(struct file * filp,int cmd,unsigned long arg)47 kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
48 {
49 	mm_segment_t oldfs = get_fs();
50 	int err;
51 
52 	set_fs(KERNEL_DS);
53 	err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
54 	set_fs(oldfs);
55 
56 	return err;
57 }
58 
59 static int
lnet_sock_ioctl(int cmd,unsigned long arg)60 lnet_sock_ioctl(int cmd, unsigned long arg)
61 {
62 	struct file *sock_filp;
63 	struct socket *sock;
64 	int rc;
65 
66 	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
67 	if (rc) {
68 		CERROR("Can't create socket: %d\n", rc);
69 		return rc;
70 	}
71 
72 	sock_filp = sock_alloc_file(sock, 0, NULL);
73 	if (IS_ERR(sock_filp)) {
74 		sock_release(sock);
75 		rc = PTR_ERR(sock_filp);
76 		goto out;
77 	}
78 
79 	rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
80 
81 	fput(sock_filp);
82 out:
83 	return rc;
84 }
85 
86 int
lnet_ipif_query(char * name,int * up,__u32 * ip,__u32 * mask)87 lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
88 {
89 	struct ifreq ifr;
90 	int nob;
91 	int rc;
92 	__u32 val;
93 
94 	nob = strnlen(name, IFNAMSIZ);
95 	if (nob == IFNAMSIZ) {
96 		CERROR("Interface name %s too long\n", name);
97 		return -EINVAL;
98 	}
99 
100 	CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);
101 
102 	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
103 		return -E2BIG;
104 	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
105 
106 	rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
107 	if (rc) {
108 		CERROR("Can't get flags for interface %s\n", name);
109 		return rc;
110 	}
111 
112 	if (!(ifr.ifr_flags & IFF_UP)) {
113 		CDEBUG(D_NET, "Interface %s down\n", name);
114 		*up = 0;
115 		*ip = *mask = 0;
116 		return 0;
117 	}
118 	*up = 1;
119 
120 	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
121 		return -E2BIG;
122 	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
123 
124 	ifr.ifr_addr.sa_family = AF_INET;
125 	rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
126 	if (rc) {
127 		CERROR("Can't get IP address for interface %s\n", name);
128 		return rc;
129 	}
130 
131 	val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
132 	*ip = ntohl(val);
133 
134 	if (strlen(name) > sizeof(ifr.ifr_name) - 1)
135 		return -E2BIG;
136 	strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
137 
138 	ifr.ifr_addr.sa_family = AF_INET;
139 	rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
140 	if (rc) {
141 		CERROR("Can't get netmask for interface %s\n", name);
142 		return rc;
143 	}
144 
145 	val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
146 	*mask = ntohl(val);
147 
148 	return 0;
149 }
150 EXPORT_SYMBOL(lnet_ipif_query);
151 
152 int
lnet_ipif_enumerate(char *** namesp)153 lnet_ipif_enumerate(char ***namesp)
154 {
155 	/* Allocate and fill in 'names', returning # interfaces/error */
156 	char **names;
157 	int toobig;
158 	int nalloc;
159 	int nfound;
160 	struct ifreq *ifr;
161 	struct ifconf ifc;
162 	int rc;
163 	int nob;
164 	int i;
165 
166 	nalloc = 16;	/* first guess at max interfaces */
167 	toobig = 0;
168 	for (;;) {
169 		if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
170 			toobig = 1;
171 			nalloc = PAGE_SIZE / sizeof(*ifr);
172 			CWARN("Too many interfaces: only enumerating first %d\n",
173 			      nalloc);
174 		}
175 
176 		LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
177 		if (!ifr) {
178 			CERROR("ENOMEM enumerating up to %d interfaces\n",
179 			       nalloc);
180 			rc = -ENOMEM;
181 			goto out0;
182 		}
183 
184 		ifc.ifc_buf = (char *)ifr;
185 		ifc.ifc_len = nalloc * sizeof(*ifr);
186 
187 		rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
188 		if (rc < 0) {
189 			CERROR("Error %d enumerating interfaces\n", rc);
190 			goto out1;
191 		}
192 
193 		LASSERT(!rc);
194 
195 		nfound = ifc.ifc_len / sizeof(*ifr);
196 		LASSERT(nfound <= nalloc);
197 
198 		if (nfound < nalloc || toobig)
199 			break;
200 
201 		LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
202 		nalloc *= 2;
203 	}
204 
205 	if (!nfound)
206 		goto out1;
207 
208 	LIBCFS_ALLOC(names, nfound * sizeof(*names));
209 	if (!names) {
210 		rc = -ENOMEM;
211 		goto out1;
212 	}
213 
214 	for (i = 0; i < nfound; i++) {
215 		nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
216 		if (nob == IFNAMSIZ) {
217 			/* no space for terminating NULL */
218 			CERROR("interface name %.*s too long (%d max)\n",
219 			       nob, ifr[i].ifr_name, IFNAMSIZ);
220 			rc = -ENAMETOOLONG;
221 			goto out2;
222 		}
223 
224 		LIBCFS_ALLOC(names[i], IFNAMSIZ);
225 		if (!names[i]) {
226 			rc = -ENOMEM;
227 			goto out2;
228 		}
229 
230 		memcpy(names[i], ifr[i].ifr_name, nob);
231 		names[i][nob] = 0;
232 	}
233 
234 	*namesp = names;
235 	rc = nfound;
236 
237 out2:
238 	if (rc < 0)
239 		lnet_ipif_free_enumeration(names, nfound);
240 out1:
241 	LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
242 out0:
243 	return rc;
244 }
245 EXPORT_SYMBOL(lnet_ipif_enumerate);
246 
247 void
lnet_ipif_free_enumeration(char ** names,int n)248 lnet_ipif_free_enumeration(char **names, int n)
249 {
250 	int i;
251 
252 	LASSERT(n > 0);
253 
254 	for (i = 0; i < n && names[i]; i++)
255 		LIBCFS_FREE(names[i], IFNAMSIZ);
256 
257 	LIBCFS_FREE(names, n * sizeof(*names));
258 }
259 EXPORT_SYMBOL(lnet_ipif_free_enumeration);
260 
261 int
lnet_sock_write(struct socket * sock,void * buffer,int nob,int timeout)262 lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
263 {
264 	int rc;
265 	long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
266 	unsigned long then;
267 	struct timeval tv;
268 	struct kvec  iov = { .iov_base = buffer, .iov_len  = nob };
269 	struct msghdr msg = {NULL,};
270 
271 	LASSERT(nob > 0);
272 	/*
273 	 * Caller may pass a zero timeout if she thinks the socket buffer is
274 	 * empty enough to take the whole message immediately
275 	 */
276 	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
277 	for (;;) {
278 		msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
279 		if (timeout) {
280 			/* Set send timeout to remaining time */
281 			jiffies_to_timeval(jiffies_left, &tv);
282 			rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
283 					       (char *)&tv, sizeof(tv));
284 			if (rc) {
285 				CERROR("Can't set socket send timeout %ld.%06d: %d\n",
286 				       (long)tv.tv_sec, (int)tv.tv_usec, rc);
287 				return rc;
288 			}
289 		}
290 
291 		then = jiffies;
292 		rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
293 		jiffies_left -= jiffies - then;
294 
295 		if (rc < 0)
296 			return rc;
297 
298 		if (!rc) {
299 			CERROR("Unexpected zero rc\n");
300 			return -ECONNABORTED;
301 		}
302 
303 		if (!msg_data_left(&msg))
304 			break;
305 
306 		if (jiffies_left <= 0)
307 			return -EAGAIN;
308 	}
309 	return 0;
310 }
311 EXPORT_SYMBOL(lnet_sock_write);
312 
313 int
lnet_sock_read(struct socket * sock,void * buffer,int nob,int timeout)314 lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
315 {
316 	int rc;
317 	long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
318 	unsigned long then;
319 	struct timeval tv;
320 
321 	LASSERT(nob > 0);
322 	LASSERT(jiffies_left > 0);
323 
324 	for (;;) {
325 		struct kvec  iov = {
326 			.iov_base = buffer,
327 			.iov_len  = nob
328 		};
329 		struct msghdr msg = {
330 			.msg_flags = 0
331 		};
332 
333 		/* Set receive timeout to remaining time */
334 		jiffies_to_timeval(jiffies_left, &tv);
335 		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
336 				       (char *)&tv, sizeof(tv));
337 		if (rc) {
338 			CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
339 			       (long)tv.tv_sec, (int)tv.tv_usec, rc);
340 			return rc;
341 		}
342 
343 		then = jiffies;
344 		rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
345 		jiffies_left -= jiffies - then;
346 
347 		if (rc < 0)
348 			return rc;
349 
350 		if (!rc)
351 			return -ECONNRESET;
352 
353 		buffer = ((char *)buffer) + rc;
354 		nob -= rc;
355 
356 		if (!nob)
357 			return 0;
358 
359 		if (jiffies_left <= 0)
360 			return -ETIMEDOUT;
361 	}
362 }
363 EXPORT_SYMBOL(lnet_sock_read);
364 
365 static int
lnet_sock_create(struct socket ** sockp,int * fatal,__u32 local_ip,int local_port)366 lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
367 		 int local_port)
368 {
369 	struct sockaddr_in locaddr;
370 	struct socket *sock;
371 	int rc;
372 	int option;
373 
374 	/* All errors are fatal except bind failure if the port is in use */
375 	*fatal = 1;
376 
377 	rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
378 	*sockp = sock;
379 	if (rc) {
380 		CERROR("Can't create socket: %d\n", rc);
381 		return rc;
382 	}
383 
384 	option = 1;
385 	rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
386 			       (char *)&option, sizeof(option));
387 	if (rc) {
388 		CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
389 		goto failed;
390 	}
391 
392 	if (local_ip || local_port) {
393 		memset(&locaddr, 0, sizeof(locaddr));
394 		locaddr.sin_family = AF_INET;
395 		locaddr.sin_port = htons(local_port);
396 		locaddr.sin_addr.s_addr = !local_ip ?
397 					  INADDR_ANY : htonl(local_ip);
398 
399 		rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
400 				 sizeof(locaddr));
401 		if (rc == -EADDRINUSE) {
402 			CDEBUG(D_NET, "Port %d already in use\n", local_port);
403 			*fatal = 0;
404 			goto failed;
405 		}
406 		if (rc) {
407 			CERROR("Error trying to bind to port %d: %d\n",
408 			       local_port, rc);
409 			goto failed;
410 		}
411 	}
412 	return 0;
413 
414 failed:
415 	sock_release(sock);
416 	return rc;
417 }
418 
419 int
lnet_sock_setbuf(struct socket * sock,int txbufsize,int rxbufsize)420 lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
421 {
422 	int option;
423 	int rc;
424 
425 	if (txbufsize) {
426 		option = txbufsize;
427 		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
428 				       (char *)&option, sizeof(option));
429 		if (rc) {
430 			CERROR("Can't set send buffer %d: %d\n",
431 			       option, rc);
432 			return rc;
433 		}
434 	}
435 
436 	if (rxbufsize) {
437 		option = rxbufsize;
438 		rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
439 				       (char *)&option, sizeof(option));
440 		if (rc) {
441 			CERROR("Can't set receive buffer %d: %d\n",
442 			       option, rc);
443 			return rc;
444 		}
445 	}
446 	return 0;
447 }
448 EXPORT_SYMBOL(lnet_sock_setbuf);
449 
450 int
lnet_sock_getaddr(struct socket * sock,bool remote,__u32 * ip,int * port)451 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
452 {
453 	struct sockaddr_in sin;
454 	int len = sizeof(sin);
455 	int rc;
456 
457 	if (remote)
458 		rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
459 	else
460 		rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
461 	if (rc) {
462 		CERROR("Error %d getting sock %s IP/port\n",
463 		       rc, remote ? "peer" : "local");
464 		return rc;
465 	}
466 
467 	if (ip)
468 		*ip = ntohl(sin.sin_addr.s_addr);
469 
470 	if (port)
471 		*port = ntohs(sin.sin_port);
472 
473 	return 0;
474 }
475 EXPORT_SYMBOL(lnet_sock_getaddr);
476 
477 int
lnet_sock_getbuf(struct socket * sock,int * txbufsize,int * rxbufsize)478 lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
479 {
480 	if (txbufsize)
481 		*txbufsize = sock->sk->sk_sndbuf;
482 
483 	if (rxbufsize)
484 		*rxbufsize = sock->sk->sk_rcvbuf;
485 
486 	return 0;
487 }
488 EXPORT_SYMBOL(lnet_sock_getbuf);
489 
490 int
lnet_sock_listen(struct socket ** sockp,__u32 local_ip,int local_port,int backlog)491 lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
492 		 int backlog)
493 {
494 	int fatal;
495 	int rc;
496 
497 	rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
498 	if (rc) {
499 		if (!fatal)
500 			CERROR("Can't create socket: port %d already in use\n",
501 			       local_port);
502 		return rc;
503 	}
504 
505 	rc = kernel_listen(*sockp, backlog);
506 	if (!rc)
507 		return 0;
508 
509 	CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
510 	sock_release(*sockp);
511 	return rc;
512 }
513 
514 int
lnet_sock_accept(struct socket ** newsockp,struct socket * sock)515 lnet_sock_accept(struct socket **newsockp, struct socket *sock)
516 {
517 	wait_queue_t wait;
518 	struct socket *newsock;
519 	int rc;
520 
521 	/*
522 	 * XXX this should add a ref to sock->ops->owner, if
523 	 * TCP could be a module
524 	 */
525 	rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
526 	if (rc) {
527 		CERROR("Can't allocate socket\n");
528 		return rc;
529 	}
530 
531 	newsock->ops = sock->ops;
532 
533 	rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
534 	if (rc == -EAGAIN) {
535 		/* Nothing ready, so wait for activity */
536 		init_waitqueue_entry(&wait, current);
537 		add_wait_queue(sk_sleep(sock->sk), &wait);
538 		set_current_state(TASK_INTERRUPTIBLE);
539 		schedule();
540 		remove_wait_queue(sk_sleep(sock->sk), &wait);
541 		rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
542 	}
543 
544 	if (rc)
545 		goto failed;
546 
547 	*newsockp = newsock;
548 	return 0;
549 
550 failed:
551 	sock_release(newsock);
552 	return rc;
553 }
554 
555 int
lnet_sock_connect(struct socket ** sockp,int * fatal,__u32 local_ip,int local_port,__u32 peer_ip,int peer_port)556 lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
557 		  int local_port, __u32 peer_ip, int peer_port)
558 {
559 	struct sockaddr_in srvaddr;
560 	int rc;
561 
562 	rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
563 	if (rc)
564 		return rc;
565 
566 	memset(&srvaddr, 0, sizeof(srvaddr));
567 	srvaddr.sin_family = AF_INET;
568 	srvaddr.sin_port = htons(peer_port);
569 	srvaddr.sin_addr.s_addr = htonl(peer_ip);
570 
571 	rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
572 			    sizeof(srvaddr), 0);
573 	if (!rc)
574 		return 0;
575 
576 	/*
577 	 * EADDRNOTAVAIL probably means we're already connected to the same
578 	 * peer/port on the same local port on a differently typed
579 	 * connection.  Let our caller retry with a different local
580 	 * port...
581 	 */
582 	*fatal = !(rc == -EADDRNOTAVAIL);
583 
584 	CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
585 		     "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
586 		     &local_ip, local_port, &peer_ip, peer_port);
587 
588 	sock_release(*sockp);
589 	return rc;
590 }
591