1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
19 *
20 * GPL HEADER END
21 */
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Copyright (c) 2012, 2015, Intel Corporation.
27 */
28 /*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Seagate, Inc.
31 */
32 #define DEBUG_SUBSYSTEM S_LNET
33
34 #include <linux/if.h>
35 #include <linux/in.h>
36 #include <linux/net.h>
37 #include <linux/file.h>
38 #include <linux/pagemap.h>
39 /* For sys_open & sys_close */
40 #include <linux/syscalls.h>
41 #include <net/sock.h>
42
43 #include "../../include/linux/libcfs/libcfs.h"
44 #include "../../include/linux/lnet/lib-lnet.h"
45
46 static int
kernel_sock_unlocked_ioctl(struct file * filp,int cmd,unsigned long arg)47 kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
48 {
49 mm_segment_t oldfs = get_fs();
50 int err;
51
52 set_fs(KERNEL_DS);
53 err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
54 set_fs(oldfs);
55
56 return err;
57 }
58
59 static int
lnet_sock_ioctl(int cmd,unsigned long arg)60 lnet_sock_ioctl(int cmd, unsigned long arg)
61 {
62 struct file *sock_filp;
63 struct socket *sock;
64 int rc;
65
66 rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
67 if (rc) {
68 CERROR("Can't create socket: %d\n", rc);
69 return rc;
70 }
71
72 sock_filp = sock_alloc_file(sock, 0, NULL);
73 if (IS_ERR(sock_filp)) {
74 sock_release(sock);
75 rc = PTR_ERR(sock_filp);
76 goto out;
77 }
78
79 rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
80
81 fput(sock_filp);
82 out:
83 return rc;
84 }
85
86 int
lnet_ipif_query(char * name,int * up,__u32 * ip,__u32 * mask)87 lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
88 {
89 struct ifreq ifr;
90 int nob;
91 int rc;
92 __u32 val;
93
94 nob = strnlen(name, IFNAMSIZ);
95 if (nob == IFNAMSIZ) {
96 CERROR("Interface name %s too long\n", name);
97 return -EINVAL;
98 }
99
100 CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);
101
102 if (strlen(name) > sizeof(ifr.ifr_name) - 1)
103 return -E2BIG;
104 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
105
106 rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
107 if (rc) {
108 CERROR("Can't get flags for interface %s\n", name);
109 return rc;
110 }
111
112 if (!(ifr.ifr_flags & IFF_UP)) {
113 CDEBUG(D_NET, "Interface %s down\n", name);
114 *up = 0;
115 *ip = *mask = 0;
116 return 0;
117 }
118 *up = 1;
119
120 if (strlen(name) > sizeof(ifr.ifr_name) - 1)
121 return -E2BIG;
122 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
123
124 ifr.ifr_addr.sa_family = AF_INET;
125 rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
126 if (rc) {
127 CERROR("Can't get IP address for interface %s\n", name);
128 return rc;
129 }
130
131 val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
132 *ip = ntohl(val);
133
134 if (strlen(name) > sizeof(ifr.ifr_name) - 1)
135 return -E2BIG;
136 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
137
138 ifr.ifr_addr.sa_family = AF_INET;
139 rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
140 if (rc) {
141 CERROR("Can't get netmask for interface %s\n", name);
142 return rc;
143 }
144
145 val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
146 *mask = ntohl(val);
147
148 return 0;
149 }
150 EXPORT_SYMBOL(lnet_ipif_query);
151
152 int
lnet_ipif_enumerate(char *** namesp)153 lnet_ipif_enumerate(char ***namesp)
154 {
155 /* Allocate and fill in 'names', returning # interfaces/error */
156 char **names;
157 int toobig;
158 int nalloc;
159 int nfound;
160 struct ifreq *ifr;
161 struct ifconf ifc;
162 int rc;
163 int nob;
164 int i;
165
166 nalloc = 16; /* first guess at max interfaces */
167 toobig = 0;
168 for (;;) {
169 if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
170 toobig = 1;
171 nalloc = PAGE_SIZE / sizeof(*ifr);
172 CWARN("Too many interfaces: only enumerating first %d\n",
173 nalloc);
174 }
175
176 LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
177 if (!ifr) {
178 CERROR("ENOMEM enumerating up to %d interfaces\n",
179 nalloc);
180 rc = -ENOMEM;
181 goto out0;
182 }
183
184 ifc.ifc_buf = (char *)ifr;
185 ifc.ifc_len = nalloc * sizeof(*ifr);
186
187 rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
188 if (rc < 0) {
189 CERROR("Error %d enumerating interfaces\n", rc);
190 goto out1;
191 }
192
193 LASSERT(!rc);
194
195 nfound = ifc.ifc_len / sizeof(*ifr);
196 LASSERT(nfound <= nalloc);
197
198 if (nfound < nalloc || toobig)
199 break;
200
201 LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
202 nalloc *= 2;
203 }
204
205 if (!nfound)
206 goto out1;
207
208 LIBCFS_ALLOC(names, nfound * sizeof(*names));
209 if (!names) {
210 rc = -ENOMEM;
211 goto out1;
212 }
213
214 for (i = 0; i < nfound; i++) {
215 nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
216 if (nob == IFNAMSIZ) {
217 /* no space for terminating NULL */
218 CERROR("interface name %.*s too long (%d max)\n",
219 nob, ifr[i].ifr_name, IFNAMSIZ);
220 rc = -ENAMETOOLONG;
221 goto out2;
222 }
223
224 LIBCFS_ALLOC(names[i], IFNAMSIZ);
225 if (!names[i]) {
226 rc = -ENOMEM;
227 goto out2;
228 }
229
230 memcpy(names[i], ifr[i].ifr_name, nob);
231 names[i][nob] = 0;
232 }
233
234 *namesp = names;
235 rc = nfound;
236
237 out2:
238 if (rc < 0)
239 lnet_ipif_free_enumeration(names, nfound);
240 out1:
241 LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
242 out0:
243 return rc;
244 }
245 EXPORT_SYMBOL(lnet_ipif_enumerate);
246
247 void
lnet_ipif_free_enumeration(char ** names,int n)248 lnet_ipif_free_enumeration(char **names, int n)
249 {
250 int i;
251
252 LASSERT(n > 0);
253
254 for (i = 0; i < n && names[i]; i++)
255 LIBCFS_FREE(names[i], IFNAMSIZ);
256
257 LIBCFS_FREE(names, n * sizeof(*names));
258 }
259 EXPORT_SYMBOL(lnet_ipif_free_enumeration);
260
261 int
lnet_sock_write(struct socket * sock,void * buffer,int nob,int timeout)262 lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
263 {
264 int rc;
265 long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
266 unsigned long then;
267 struct timeval tv;
268 struct kvec iov = { .iov_base = buffer, .iov_len = nob };
269 struct msghdr msg = {NULL,};
270
271 LASSERT(nob > 0);
272 /*
273 * Caller may pass a zero timeout if she thinks the socket buffer is
274 * empty enough to take the whole message immediately
275 */
276 iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, nob);
277 for (;;) {
278 msg.msg_flags = !timeout ? MSG_DONTWAIT : 0;
279 if (timeout) {
280 /* Set send timeout to remaining time */
281 jiffies_to_timeval(jiffies_left, &tv);
282 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
283 (char *)&tv, sizeof(tv));
284 if (rc) {
285 CERROR("Can't set socket send timeout %ld.%06d: %d\n",
286 (long)tv.tv_sec, (int)tv.tv_usec, rc);
287 return rc;
288 }
289 }
290
291 then = jiffies;
292 rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
293 jiffies_left -= jiffies - then;
294
295 if (rc < 0)
296 return rc;
297
298 if (!rc) {
299 CERROR("Unexpected zero rc\n");
300 return -ECONNABORTED;
301 }
302
303 if (!msg_data_left(&msg))
304 break;
305
306 if (jiffies_left <= 0)
307 return -EAGAIN;
308 }
309 return 0;
310 }
311 EXPORT_SYMBOL(lnet_sock_write);
312
313 int
lnet_sock_read(struct socket * sock,void * buffer,int nob,int timeout)314 lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
315 {
316 int rc;
317 long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
318 unsigned long then;
319 struct timeval tv;
320
321 LASSERT(nob > 0);
322 LASSERT(jiffies_left > 0);
323
324 for (;;) {
325 struct kvec iov = {
326 .iov_base = buffer,
327 .iov_len = nob
328 };
329 struct msghdr msg = {
330 .msg_flags = 0
331 };
332
333 /* Set receive timeout to remaining time */
334 jiffies_to_timeval(jiffies_left, &tv);
335 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
336 (char *)&tv, sizeof(tv));
337 if (rc) {
338 CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
339 (long)tv.tv_sec, (int)tv.tv_usec, rc);
340 return rc;
341 }
342
343 then = jiffies;
344 rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
345 jiffies_left -= jiffies - then;
346
347 if (rc < 0)
348 return rc;
349
350 if (!rc)
351 return -ECONNRESET;
352
353 buffer = ((char *)buffer) + rc;
354 nob -= rc;
355
356 if (!nob)
357 return 0;
358
359 if (jiffies_left <= 0)
360 return -ETIMEDOUT;
361 }
362 }
363 EXPORT_SYMBOL(lnet_sock_read);
364
365 static int
lnet_sock_create(struct socket ** sockp,int * fatal,__u32 local_ip,int local_port)366 lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
367 int local_port)
368 {
369 struct sockaddr_in locaddr;
370 struct socket *sock;
371 int rc;
372 int option;
373
374 /* All errors are fatal except bind failure if the port is in use */
375 *fatal = 1;
376
377 rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
378 *sockp = sock;
379 if (rc) {
380 CERROR("Can't create socket: %d\n", rc);
381 return rc;
382 }
383
384 option = 1;
385 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
386 (char *)&option, sizeof(option));
387 if (rc) {
388 CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
389 goto failed;
390 }
391
392 if (local_ip || local_port) {
393 memset(&locaddr, 0, sizeof(locaddr));
394 locaddr.sin_family = AF_INET;
395 locaddr.sin_port = htons(local_port);
396 locaddr.sin_addr.s_addr = !local_ip ?
397 INADDR_ANY : htonl(local_ip);
398
399 rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
400 sizeof(locaddr));
401 if (rc == -EADDRINUSE) {
402 CDEBUG(D_NET, "Port %d already in use\n", local_port);
403 *fatal = 0;
404 goto failed;
405 }
406 if (rc) {
407 CERROR("Error trying to bind to port %d: %d\n",
408 local_port, rc);
409 goto failed;
410 }
411 }
412 return 0;
413
414 failed:
415 sock_release(sock);
416 return rc;
417 }
418
419 int
lnet_sock_setbuf(struct socket * sock,int txbufsize,int rxbufsize)420 lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
421 {
422 int option;
423 int rc;
424
425 if (txbufsize) {
426 option = txbufsize;
427 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
428 (char *)&option, sizeof(option));
429 if (rc) {
430 CERROR("Can't set send buffer %d: %d\n",
431 option, rc);
432 return rc;
433 }
434 }
435
436 if (rxbufsize) {
437 option = rxbufsize;
438 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
439 (char *)&option, sizeof(option));
440 if (rc) {
441 CERROR("Can't set receive buffer %d: %d\n",
442 option, rc);
443 return rc;
444 }
445 }
446 return 0;
447 }
448 EXPORT_SYMBOL(lnet_sock_setbuf);
449
450 int
lnet_sock_getaddr(struct socket * sock,bool remote,__u32 * ip,int * port)451 lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
452 {
453 struct sockaddr_in sin;
454 int len = sizeof(sin);
455 int rc;
456
457 if (remote)
458 rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
459 else
460 rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
461 if (rc) {
462 CERROR("Error %d getting sock %s IP/port\n",
463 rc, remote ? "peer" : "local");
464 return rc;
465 }
466
467 if (ip)
468 *ip = ntohl(sin.sin_addr.s_addr);
469
470 if (port)
471 *port = ntohs(sin.sin_port);
472
473 return 0;
474 }
475 EXPORT_SYMBOL(lnet_sock_getaddr);
476
477 int
lnet_sock_getbuf(struct socket * sock,int * txbufsize,int * rxbufsize)478 lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
479 {
480 if (txbufsize)
481 *txbufsize = sock->sk->sk_sndbuf;
482
483 if (rxbufsize)
484 *rxbufsize = sock->sk->sk_rcvbuf;
485
486 return 0;
487 }
488 EXPORT_SYMBOL(lnet_sock_getbuf);
489
490 int
lnet_sock_listen(struct socket ** sockp,__u32 local_ip,int local_port,int backlog)491 lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
492 int backlog)
493 {
494 int fatal;
495 int rc;
496
497 rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
498 if (rc) {
499 if (!fatal)
500 CERROR("Can't create socket: port %d already in use\n",
501 local_port);
502 return rc;
503 }
504
505 rc = kernel_listen(*sockp, backlog);
506 if (!rc)
507 return 0;
508
509 CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
510 sock_release(*sockp);
511 return rc;
512 }
513
514 int
lnet_sock_accept(struct socket ** newsockp,struct socket * sock)515 lnet_sock_accept(struct socket **newsockp, struct socket *sock)
516 {
517 wait_queue_t wait;
518 struct socket *newsock;
519 int rc;
520
521 /*
522 * XXX this should add a ref to sock->ops->owner, if
523 * TCP could be a module
524 */
525 rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
526 if (rc) {
527 CERROR("Can't allocate socket\n");
528 return rc;
529 }
530
531 newsock->ops = sock->ops;
532
533 rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
534 if (rc == -EAGAIN) {
535 /* Nothing ready, so wait for activity */
536 init_waitqueue_entry(&wait, current);
537 add_wait_queue(sk_sleep(sock->sk), &wait);
538 set_current_state(TASK_INTERRUPTIBLE);
539 schedule();
540 remove_wait_queue(sk_sleep(sock->sk), &wait);
541 rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
542 }
543
544 if (rc)
545 goto failed;
546
547 *newsockp = newsock;
548 return 0;
549
550 failed:
551 sock_release(newsock);
552 return rc;
553 }
554
555 int
lnet_sock_connect(struct socket ** sockp,int * fatal,__u32 local_ip,int local_port,__u32 peer_ip,int peer_port)556 lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
557 int local_port, __u32 peer_ip, int peer_port)
558 {
559 struct sockaddr_in srvaddr;
560 int rc;
561
562 rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
563 if (rc)
564 return rc;
565
566 memset(&srvaddr, 0, sizeof(srvaddr));
567 srvaddr.sin_family = AF_INET;
568 srvaddr.sin_port = htons(peer_port);
569 srvaddr.sin_addr.s_addr = htonl(peer_ip);
570
571 rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
572 sizeof(srvaddr), 0);
573 if (!rc)
574 return 0;
575
576 /*
577 * EADDRNOTAVAIL probably means we're already connected to the same
578 * peer/port on the same local port on a differently typed
579 * connection. Let our caller retry with a different local
580 * port...
581 */
582 *fatal = !(rc == -EADDRNOTAVAIL);
583
584 CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
585 "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
586 &local_ip, local_port, &peer_ip, peer_port);
587
588 sock_release(*sockp);
589 return rc;
590 }
591