1 /*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2013 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel MIC User Space Tools.
19 */
20
21 #define _GNU_SOURCE
22
23 #include <stdlib.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <assert.h>
27 #include <unistd.h>
28 #include <stdbool.h>
29 #include <signal.h>
30 #include <poll.h>
31 #include <features.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/socket.h>
36 #include <linux/virtio_ring.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_console.h>
39 #include <linux/virtio_blk.h>
40 #include <linux/version.h>
41 #include "mpssd.h"
42 #include <linux/mic_ioctl.h>
43 #include <linux/mic_common.h>
44 #include <tools/endian.h>
45
46 static void *init_mic(void *arg);
47
48 static FILE *logfp;
49 static struct mic_info mic_list;
50
51 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52
53 #define min_t(type, x, y) ({ \
54 type __min1 = (x); \
55 type __min2 = (y); \
56 __min1 < __min2 ? __min1 : __min2; })
57
58 /* align addr on a size boundary - adjust address up/down if needed */
59 #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
60 #define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
61
62 /* align addr on a size boundary - adjust address up if needed */
63 #define _ALIGN(addr, size) _ALIGN_UP(addr, size)
64
65 /* to align the pointer to the (next) page boundary */
66 #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
67
68 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69
70 #define GSO_ENABLED 1
71 #define MAX_GSO_SIZE (64 * 1024)
72 #define ETH_H_LEN 14
73 #define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74 #define MIC_DEVICE_PAGE_END 0x1000
75
76 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
77 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
78 #endif
79
80 static struct {
81 struct mic_device_desc dd;
82 struct mic_vqconfig vqconfig[2];
83 __u32 host_features, guest_acknowledgements;
84 struct virtio_console_config cons_config;
85 } virtcons_dev_page = {
86 .dd = {
87 .type = VIRTIO_ID_CONSOLE,
88 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89 .feature_len = sizeof(virtcons_dev_page.host_features),
90 .config_len = sizeof(virtcons_dev_page.cons_config),
91 },
92 .vqconfig[0] = {
93 .num = htole16(MIC_VRING_ENTRIES),
94 },
95 .vqconfig[1] = {
96 .num = htole16(MIC_VRING_ENTRIES),
97 },
98 };
99
100 static struct {
101 struct mic_device_desc dd;
102 struct mic_vqconfig vqconfig[2];
103 __u32 host_features, guest_acknowledgements;
104 struct virtio_net_config net_config;
105 } virtnet_dev_page = {
106 .dd = {
107 .type = VIRTIO_ID_NET,
108 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109 .feature_len = sizeof(virtnet_dev_page.host_features),
110 .config_len = sizeof(virtnet_dev_page.net_config),
111 },
112 .vqconfig[0] = {
113 .num = htole16(MIC_VRING_ENTRIES),
114 },
115 .vqconfig[1] = {
116 .num = htole16(MIC_VRING_ENTRIES),
117 },
118 #if GSO_ENABLED
119 .host_features = htole32(
120 1 << VIRTIO_NET_F_CSUM |
121 1 << VIRTIO_NET_F_GSO |
122 1 << VIRTIO_NET_F_GUEST_TSO4 |
123 1 << VIRTIO_NET_F_GUEST_TSO6 |
124 1 << VIRTIO_NET_F_GUEST_ECN),
125 #else
126 .host_features = 0,
127 #endif
128 };
129
130 static const char *mic_config_dir = "/etc/mpss";
131 static const char *virtblk_backend = "VIRTBLK_BACKEND";
132 static struct {
133 struct mic_device_desc dd;
134 struct mic_vqconfig vqconfig[1];
135 __u32 host_features, guest_acknowledgements;
136 struct virtio_blk_config blk_config;
137 } virtblk_dev_page = {
138 .dd = {
139 .type = VIRTIO_ID_BLOCK,
140 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141 .feature_len = sizeof(virtblk_dev_page.host_features),
142 .config_len = sizeof(virtblk_dev_page.blk_config),
143 },
144 .vqconfig[0] = {
145 .num = htole16(MIC_VRING_ENTRIES),
146 },
147 .host_features =
148 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
149 .blk_config = {
150 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
151 .capacity = htole64(0),
152 }
153 };
154
155 static char *myname;
156
157 static int
tap_configure(struct mic_info * mic,char * dev)158 tap_configure(struct mic_info *mic, char *dev)
159 {
160 pid_t pid;
161 char *ifargv[7];
162 char ipaddr[IFNAMSIZ];
163 int ret = 0;
164
165 pid = fork();
166 if (pid == 0) {
167 ifargv[0] = "ip";
168 ifargv[1] = "link";
169 ifargv[2] = "set";
170 ifargv[3] = dev;
171 ifargv[4] = "up";
172 ifargv[5] = NULL;
173 mpsslog("Configuring %s\n", dev);
174 ret = execvp("ip", ifargv);
175 if (ret < 0) {
176 mpsslog("%s execvp failed errno %s\n",
177 mic->name, strerror(errno));
178 return ret;
179 }
180 }
181 if (pid < 0) {
182 mpsslog("%s fork failed errno %s\n",
183 mic->name, strerror(errno));
184 return ret;
185 }
186
187 ret = waitpid(pid, NULL, 0);
188 if (ret < 0) {
189 mpsslog("%s waitpid failed errno %s\n",
190 mic->name, strerror(errno));
191 return ret;
192 }
193
194 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
195
196 pid = fork();
197 if (pid == 0) {
198 ifargv[0] = "ip";
199 ifargv[1] = "addr";
200 ifargv[2] = "add";
201 ifargv[3] = ipaddr;
202 ifargv[4] = "dev";
203 ifargv[5] = dev;
204 ifargv[6] = NULL;
205 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206 ret = execvp("ip", ifargv);
207 if (ret < 0) {
208 mpsslog("%s execvp failed errno %s\n",
209 mic->name, strerror(errno));
210 return ret;
211 }
212 }
213 if (pid < 0) {
214 mpsslog("%s fork failed errno %s\n",
215 mic->name, strerror(errno));
216 return ret;
217 }
218
219 ret = waitpid(pid, NULL, 0);
220 if (ret < 0) {
221 mpsslog("%s waitpid failed errno %s\n",
222 mic->name, strerror(errno));
223 return ret;
224 }
225 mpsslog("MIC name %s %s %d DONE!\n",
226 mic->name, __func__, __LINE__);
227 return 0;
228 }
229
tun_alloc(struct mic_info * mic,char * dev)230 static int tun_alloc(struct mic_info *mic, char *dev)
231 {
232 struct ifreq ifr;
233 int fd, err;
234 #if GSO_ENABLED
235 unsigned offload;
236 #endif
237 fd = open("/dev/net/tun", O_RDWR);
238 if (fd < 0) {
239 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
240 goto done;
241 }
242
243 memset(&ifr, 0, sizeof(ifr));
244
245 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
246 if (*dev)
247 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
248
249 err = ioctl(fd, TUNSETIFF, (void *)&ifr);
250 if (err < 0) {
251 mpsslog("%s %s %d TUNSETIFF failed %s\n",
252 mic->name, __func__, __LINE__, strerror(errno));
253 close(fd);
254 return err;
255 }
256 #if GSO_ENABLED
257 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
258
259 err = ioctl(fd, TUNSETOFFLOAD, offload);
260 if (err < 0) {
261 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
262 mic->name, __func__, __LINE__, strerror(errno));
263 close(fd);
264 return err;
265 }
266 #endif
267 strcpy(dev, ifr.ifr_name);
268 mpsslog("Created TAP %s\n", dev);
269 done:
270 return fd;
271 }
272
273 #define NET_FD_VIRTIO_NET 0
274 #define NET_FD_TUN 1
275 #define MAX_NET_FD 2
276
set_dp(struct mic_info * mic,int type,void * dp)277 static void set_dp(struct mic_info *mic, int type, void *dp)
278 {
279 switch (type) {
280 case VIRTIO_ID_CONSOLE:
281 mic->mic_console.console_dp = dp;
282 return;
283 case VIRTIO_ID_NET:
284 mic->mic_net.net_dp = dp;
285 return;
286 case VIRTIO_ID_BLOCK:
287 mic->mic_virtblk.block_dp = dp;
288 return;
289 }
290 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
291 assert(0);
292 }
293
get_dp(struct mic_info * mic,int type)294 static void *get_dp(struct mic_info *mic, int type)
295 {
296 switch (type) {
297 case VIRTIO_ID_CONSOLE:
298 return mic->mic_console.console_dp;
299 case VIRTIO_ID_NET:
300 return mic->mic_net.net_dp;
301 case VIRTIO_ID_BLOCK:
302 return mic->mic_virtblk.block_dp;
303 }
304 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
305 assert(0);
306 return NULL;
307 }
308
get_device_desc(struct mic_info * mic,int type)309 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
310 {
311 struct mic_device_desc *d;
312 int i;
313 void *dp = get_dp(mic, type);
314
315 for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
316 i += mic_total_desc_size(d)) {
317 d = dp + i;
318
319 /* End of list */
320 if (d->type == 0)
321 break;
322
323 if (d->type == -1)
324 continue;
325
326 mpsslog("%s %s d-> type %d d %p\n",
327 mic->name, __func__, d->type, d);
328
329 if (d->type == (__u8)type)
330 return d;
331 }
332 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
333 return NULL;
334 }
335
336 /* See comments in vhost.c for explanation of next_desc() */
next_desc(struct vring_desc * desc)337 static unsigned next_desc(struct vring_desc *desc)
338 {
339 unsigned int next;
340
341 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
342 return -1U;
343 next = le16toh(desc->next);
344 return next;
345 }
346
347 /* Sum up all the IOVEC length */
348 static ssize_t
sum_iovec_len(struct mic_copy_desc * copy)349 sum_iovec_len(struct mic_copy_desc *copy)
350 {
351 ssize_t sum = 0;
352 int i;
353
354 for (i = 0; i < copy->iovcnt; i++)
355 sum += copy->iov[i].iov_len;
356 return sum;
357 }
358
verify_out_len(struct mic_info * mic,struct mic_copy_desc * copy)359 static inline void verify_out_len(struct mic_info *mic,
360 struct mic_copy_desc *copy)
361 {
362 if (copy->out_len != sum_iovec_len(copy)) {
363 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
364 mic->name, __func__, __LINE__,
365 copy->out_len, sum_iovec_len(copy));
366 assert(copy->out_len == sum_iovec_len(copy));
367 }
368 }
369
370 /* Display an iovec */
371 static void
disp_iovec(struct mic_info * mic,struct mic_copy_desc * copy,const char * s,int line)372 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
373 const char *s, int line)
374 {
375 int i;
376
377 for (i = 0; i < copy->iovcnt; i++)
378 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
379 mic->name, s, line, i,
380 copy->iov[i].iov_base, copy->iov[i].iov_len);
381 }
382
read_avail_idx(struct mic_vring * vr)383 static inline __u16 read_avail_idx(struct mic_vring *vr)
384 {
385 return ACCESS_ONCE(vr->info->avail_idx);
386 }
387
txrx_prepare(int type,bool tx,struct mic_vring * vr,struct mic_copy_desc * copy,ssize_t len)388 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
389 struct mic_copy_desc *copy, ssize_t len)
390 {
391 copy->vr_idx = tx ? 0 : 1;
392 copy->update_used = true;
393 if (type == VIRTIO_ID_NET)
394 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
395 else
396 copy->iov[0].iov_len = len;
397 }
398
399 /* Central API which triggers the copies */
400 static int
mic_virtio_copy(struct mic_info * mic,int fd,struct mic_vring * vr,struct mic_copy_desc * copy)401 mic_virtio_copy(struct mic_info *mic, int fd,
402 struct mic_vring *vr, struct mic_copy_desc *copy)
403 {
404 int ret;
405
406 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
407 if (ret) {
408 mpsslog("%s %s %d errno %s ret %d\n",
409 mic->name, __func__, __LINE__,
410 strerror(errno), ret);
411 }
412 return ret;
413 }
414
_vring_size(unsigned int num,unsigned long align)415 static inline unsigned _vring_size(unsigned int num, unsigned long align)
416 {
417 return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
418 + align - 1) & ~(align - 1))
419 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
420 }
421
422 /*
423 * This initialization routine requires at least one
424 * vring i.e. vr0. vr1 is optional.
425 */
426 static void *
init_vr(struct mic_info * mic,int fd,int type,struct mic_vring * vr0,struct mic_vring * vr1,int num_vq)427 init_vr(struct mic_info *mic, int fd, int type,
428 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
429 {
430 int vr_size;
431 char *va;
432
433 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
434 MIC_VIRTIO_RING_ALIGN) +
435 sizeof(struct _mic_vring_info));
436 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
437 PROT_READ, MAP_SHARED, fd, 0);
438 if (MAP_FAILED == va) {
439 mpsslog("%s %s %d mmap failed errno %s\n",
440 mic->name, __func__, __LINE__,
441 strerror(errno));
442 goto done;
443 }
444 set_dp(mic, type, va);
445 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
446 vr0->info = vr0->va +
447 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
448 vring_init(&vr0->vr,
449 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
450 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
451 __func__, mic->name, vr0->va, vr0->info, vr_size,
452 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
453 mpsslog("magic 0x%x expected 0x%x\n",
454 le32toh(vr0->info->magic), MIC_MAGIC + type);
455 assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
456 if (vr1) {
457 vr1->va = (struct mic_vring *)
458 &va[MIC_DEVICE_PAGE_END + vr_size];
459 vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
460 MIC_VIRTIO_RING_ALIGN);
461 vring_init(&vr1->vr,
462 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
463 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
464 __func__, mic->name, vr1->va, vr1->info, vr_size,
465 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
466 mpsslog("magic 0x%x expected 0x%x\n",
467 le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
468 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
469 }
470 done:
471 return va;
472 }
473
474 static int
wait_for_card_driver(struct mic_info * mic,int fd,int type)475 wait_for_card_driver(struct mic_info *mic, int fd, int type)
476 {
477 struct pollfd pollfd;
478 int err;
479 struct mic_device_desc *desc = get_device_desc(mic, type);
480 __u8 prev_status;
481
482 if (!desc)
483 return -ENODEV;
484 prev_status = desc->status;
485 pollfd.fd = fd;
486 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
487 mic->name, __func__, type, desc->status);
488
489 while (1) {
490 pollfd.events = POLLIN;
491 pollfd.revents = 0;
492 err = poll(&pollfd, 1, -1);
493 if (err < 0) {
494 mpsslog("%s %s poll failed %s\n",
495 mic->name, __func__, strerror(errno));
496 continue;
497 }
498
499 if (pollfd.revents) {
500 if (desc->status != prev_status) {
501 mpsslog("%s %s Waiting... desc-> type %d "
502 "status 0x%x\n",
503 mic->name, __func__, type,
504 desc->status);
505 prev_status = desc->status;
506 }
507 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
508 mpsslog("%s %s poll.revents %d\n",
509 mic->name, __func__, pollfd.revents);
510 mpsslog("%s %s desc-> type %d status 0x%x\n",
511 mic->name, __func__, type,
512 desc->status);
513 break;
514 }
515 }
516 }
517 return 0;
518 }
519
520 /* Spin till we have some descriptors */
521 static void
spin_for_descriptors(struct mic_info * mic,struct mic_vring * vr)522 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
523 {
524 __u16 avail_idx = read_avail_idx(vr);
525
526 while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
527 #ifdef DEBUG
528 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
529 mic->name, __func__,
530 le16toh(vr->vr.avail->idx), vr->info->avail_idx);
531 #endif
532 sched_yield();
533 }
534 }
535
536 static void *
virtio_net(void * arg)537 virtio_net(void *arg)
538 {
539 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
540 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
541 struct iovec vnet_iov[2][2] = {
542 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
543 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
544 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
545 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
546 };
547 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
548 struct mic_info *mic = (struct mic_info *)arg;
549 char if_name[IFNAMSIZ];
550 struct pollfd net_poll[MAX_NET_FD];
551 struct mic_vring tx_vr, rx_vr;
552 struct mic_copy_desc copy;
553 struct mic_device_desc *desc;
554 int err;
555
556 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
557 mic->mic_net.tap_fd = tun_alloc(mic, if_name);
558 if (mic->mic_net.tap_fd < 0)
559 goto done;
560
561 if (tap_configure(mic, if_name))
562 goto done;
563 mpsslog("MIC name %s id %d\n", mic->name, mic->id);
564
565 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
566 net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
567 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
568 net_poll[NET_FD_TUN].events = POLLIN;
569
570 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
571 VIRTIO_ID_NET, &tx_vr, &rx_vr,
572 virtnet_dev_page.dd.num_vq)) {
573 mpsslog("%s init_vr failed %s\n",
574 mic->name, strerror(errno));
575 goto done;
576 }
577
578 copy.iovcnt = 2;
579 desc = get_device_desc(mic, VIRTIO_ID_NET);
580
581 while (1) {
582 ssize_t len;
583
584 net_poll[NET_FD_VIRTIO_NET].revents = 0;
585 net_poll[NET_FD_TUN].revents = 0;
586
587 /* Start polling for data from tap and virtio net */
588 err = poll(net_poll, 2, -1);
589 if (err < 0) {
590 mpsslog("%s poll failed %s\n",
591 __func__, strerror(errno));
592 continue;
593 }
594 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
595 err = wait_for_card_driver(mic,
596 mic->mic_net.virtio_net_fd,
597 VIRTIO_ID_NET);
598 if (err) {
599 mpsslog("%s %s %d Exiting...\n",
600 mic->name, __func__, __LINE__);
601 break;
602 }
603 }
604 /*
605 * Check if there is data to be read from TUN and write to
606 * virtio net fd if there is.
607 */
608 if (net_poll[NET_FD_TUN].revents & POLLIN) {
609 copy.iov = iov0;
610 len = readv(net_poll[NET_FD_TUN].fd,
611 copy.iov, copy.iovcnt);
612 if (len > 0) {
613 struct virtio_net_hdr *hdr
614 = (struct virtio_net_hdr *)vnet_hdr[0];
615
616 /* Disable checksums on the card since we are on
617 a reliable PCIe link */
618 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
619 #ifdef DEBUG
620 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
621 __func__, __LINE__, hdr->flags);
622 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
623 copy.out_len, hdr->gso_type);
624 #endif
625 #ifdef DEBUG
626 disp_iovec(mic, copy, __func__, __LINE__);
627 mpsslog("%s %s %d read from tap 0x%lx\n",
628 mic->name, __func__, __LINE__,
629 len);
630 #endif
631 spin_for_descriptors(mic, &tx_vr);
632 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©,
633 len);
634
635 err = mic_virtio_copy(mic,
636 mic->mic_net.virtio_net_fd, &tx_vr,
637 ©);
638 if (err < 0) {
639 mpsslog("%s %s %d mic_virtio_copy %s\n",
640 mic->name, __func__, __LINE__,
641 strerror(errno));
642 }
643 if (!err)
644 verify_out_len(mic, ©);
645 #ifdef DEBUG
646 disp_iovec(mic, copy, __func__, __LINE__);
647 mpsslog("%s %s %d wrote to net 0x%lx\n",
648 mic->name, __func__, __LINE__,
649 sum_iovec_len(©));
650 #endif
651 /* Reinitialize IOV for next run */
652 iov0[1].iov_len = MAX_NET_PKT_SIZE;
653 } else if (len < 0) {
654 disp_iovec(mic, ©, __func__, __LINE__);
655 mpsslog("%s %s %d read failed %s ", mic->name,
656 __func__, __LINE__, strerror(errno));
657 mpsslog("cnt %d sum %zd\n",
658 copy.iovcnt, sum_iovec_len(©));
659 }
660 }
661
662 /*
663 * Check if there is data to be read from virtio net and
664 * write to TUN if there is.
665 */
666 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
667 while (rx_vr.info->avail_idx !=
668 le16toh(rx_vr.vr.avail->idx)) {
669 copy.iov = iov1;
670 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©,
671 MAX_NET_PKT_SIZE
672 + sizeof(struct virtio_net_hdr));
673
674 err = mic_virtio_copy(mic,
675 mic->mic_net.virtio_net_fd, &rx_vr,
676 ©);
677 if (!err) {
678 #ifdef DEBUG
679 struct virtio_net_hdr *hdr
680 = (struct virtio_net_hdr *)
681 vnet_hdr[1];
682
683 mpsslog("%s %s %d hdr->flags 0x%x, ",
684 mic->name, __func__, __LINE__,
685 hdr->flags);
686 mpsslog("out_len %d gso_type 0x%x\n",
687 copy.out_len,
688 hdr->gso_type);
689 #endif
690 /* Set the correct output iov_len */
691 iov1[1].iov_len = copy.out_len -
692 sizeof(struct virtio_net_hdr);
693 verify_out_len(mic, ©);
694 #ifdef DEBUG
695 disp_iovec(mic, copy, __func__,
696 __LINE__);
697 mpsslog("%s %s %d ",
698 mic->name, __func__, __LINE__);
699 mpsslog("read from net 0x%lx\n",
700 sum_iovec_len(copy));
701 #endif
702 len = writev(net_poll[NET_FD_TUN].fd,
703 copy.iov, copy.iovcnt);
704 if (len != sum_iovec_len(©)) {
705 mpsslog("Tun write failed %s ",
706 strerror(errno));
707 mpsslog("len 0x%zx ", len);
708 mpsslog("read_len 0x%zx\n",
709 sum_iovec_len(©));
710 } else {
711 #ifdef DEBUG
712 disp_iovec(mic, ©, __func__,
713 __LINE__);
714 mpsslog("%s %s %d ",
715 mic->name, __func__,
716 __LINE__);
717 mpsslog("wrote to tap 0x%lx\n",
718 len);
719 #endif
720 }
721 } else {
722 mpsslog("%s %s %d mic_virtio_copy %s\n",
723 mic->name, __func__, __LINE__,
724 strerror(errno));
725 break;
726 }
727 }
728 }
729 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
730 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
731 }
732 done:
733 pthread_exit(NULL);
734 }
735
736 /* virtio_console */
737 #define VIRTIO_CONSOLE_FD 0
738 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
739 #define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
740 #define MAX_BUFFER_SIZE PAGE_SIZE
741
742 static void *
virtio_console(void * arg)743 virtio_console(void *arg)
744 {
745 static __u8 vcons_buf[2][PAGE_SIZE];
746 struct iovec vcons_iov[2] = {
747 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
748 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
749 };
750 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
751 struct mic_info *mic = (struct mic_info *)arg;
752 int err;
753 struct pollfd console_poll[MAX_CONSOLE_FD];
754 int pty_fd;
755 char *pts_name;
756 ssize_t len;
757 struct mic_vring tx_vr, rx_vr;
758 struct mic_copy_desc copy;
759 struct mic_device_desc *desc;
760
761 pty_fd = posix_openpt(O_RDWR);
762 if (pty_fd < 0) {
763 mpsslog("can't open a pseudoterminal master device: %s\n",
764 strerror(errno));
765 goto _return;
766 }
767 pts_name = ptsname(pty_fd);
768 if (pts_name == NULL) {
769 mpsslog("can't get pts name\n");
770 goto _close_pty;
771 }
772 printf("%s console message goes to %s\n", mic->name, pts_name);
773 mpsslog("%s console message goes to %s\n", mic->name, pts_name);
774 err = grantpt(pty_fd);
775 if (err < 0) {
776 mpsslog("can't grant access: %s %s\n",
777 pts_name, strerror(errno));
778 goto _close_pty;
779 }
780 err = unlockpt(pty_fd);
781 if (err < 0) {
782 mpsslog("can't unlock a pseudoterminal: %s %s\n",
783 pts_name, strerror(errno));
784 goto _close_pty;
785 }
786 console_poll[MONITOR_FD].fd = pty_fd;
787 console_poll[MONITOR_FD].events = POLLIN;
788
789 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
790 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
791
792 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
793 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
794 virtcons_dev_page.dd.num_vq)) {
795 mpsslog("%s init_vr failed %s\n",
796 mic->name, strerror(errno));
797 goto _close_pty;
798 }
799
800 copy.iovcnt = 1;
801 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
802
803 for (;;) {
804 console_poll[MONITOR_FD].revents = 0;
805 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
806 err = poll(console_poll, MAX_CONSOLE_FD, -1);
807 if (err < 0) {
808 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
809 strerror(errno));
810 continue;
811 }
812 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
813 err = wait_for_card_driver(mic,
814 mic->mic_console.virtio_console_fd,
815 VIRTIO_ID_CONSOLE);
816 if (err) {
817 mpsslog("%s %s %d Exiting...\n",
818 mic->name, __func__, __LINE__);
819 break;
820 }
821 }
822
823 if (console_poll[MONITOR_FD].revents & POLLIN) {
824 copy.iov = iov0;
825 len = readv(pty_fd, copy.iov, copy.iovcnt);
826 if (len > 0) {
827 #ifdef DEBUG
828 disp_iovec(mic, copy, __func__, __LINE__);
829 mpsslog("%s %s %d read from tap 0x%lx\n",
830 mic->name, __func__, __LINE__,
831 len);
832 #endif
833 spin_for_descriptors(mic, &tx_vr);
834 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
835 ©, len);
836
837 err = mic_virtio_copy(mic,
838 mic->mic_console.virtio_console_fd,
839 &tx_vr, ©);
840 if (err < 0) {
841 mpsslog("%s %s %d mic_virtio_copy %s\n",
842 mic->name, __func__, __LINE__,
843 strerror(errno));
844 }
845 if (!err)
846 verify_out_len(mic, ©);
847 #ifdef DEBUG
848 disp_iovec(mic, copy, __func__, __LINE__);
849 mpsslog("%s %s %d wrote to net 0x%lx\n",
850 mic->name, __func__, __LINE__,
851 sum_iovec_len(copy));
852 #endif
853 /* Reinitialize IOV for next run */
854 iov0->iov_len = PAGE_SIZE;
855 } else if (len < 0) {
856 disp_iovec(mic, ©, __func__, __LINE__);
857 mpsslog("%s %s %d read failed %s ",
858 mic->name, __func__, __LINE__,
859 strerror(errno));
860 mpsslog("cnt %d sum %zd\n",
861 copy.iovcnt, sum_iovec_len(©));
862 }
863 }
864
865 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
866 while (rx_vr.info->avail_idx !=
867 le16toh(rx_vr.vr.avail->idx)) {
868 copy.iov = iov1;
869 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
870 ©, PAGE_SIZE);
871
872 err = mic_virtio_copy(mic,
873 mic->mic_console.virtio_console_fd,
874 &rx_vr, ©);
875 if (!err) {
876 /* Set the correct output iov_len */
877 iov1->iov_len = copy.out_len;
878 verify_out_len(mic, ©);
879 #ifdef DEBUG
880 disp_iovec(mic, copy, __func__,
881 __LINE__);
882 mpsslog("%s %s %d ",
883 mic->name, __func__, __LINE__);
884 mpsslog("read from net 0x%lx\n",
885 sum_iovec_len(copy));
886 #endif
887 len = writev(pty_fd,
888 copy.iov, copy.iovcnt);
889 if (len != sum_iovec_len(©)) {
890 mpsslog("Tun write failed %s ",
891 strerror(errno));
892 mpsslog("len 0x%zx ", len);
893 mpsslog("read_len 0x%zx\n",
894 sum_iovec_len(©));
895 } else {
896 #ifdef DEBUG
897 disp_iovec(mic, copy, __func__,
898 __LINE__);
899 mpsslog("%s %s %d ",
900 mic->name, __func__,
901 __LINE__);
902 mpsslog("wrote to tap 0x%lx\n",
903 len);
904 #endif
905 }
906 } else {
907 mpsslog("%s %s %d mic_virtio_copy %s\n",
908 mic->name, __func__, __LINE__,
909 strerror(errno));
910 break;
911 }
912 }
913 }
914 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
915 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
916 }
917 _close_pty:
918 close(pty_fd);
919 _return:
920 pthread_exit(NULL);
921 }
922
923 static void
add_virtio_device(struct mic_info * mic,struct mic_device_desc * dd)924 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
925 {
926 char path[PATH_MAX];
927 int fd, err;
928
929 snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
930 fd = open(path, O_RDWR);
931 if (fd < 0) {
932 mpsslog("Could not open %s %s\n", path, strerror(errno));
933 return;
934 }
935
936 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
937 if (err < 0) {
938 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
939 close(fd);
940 return;
941 }
942 switch (dd->type) {
943 case VIRTIO_ID_NET:
944 mic->mic_net.virtio_net_fd = fd;
945 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
946 break;
947 case VIRTIO_ID_CONSOLE:
948 mic->mic_console.virtio_console_fd = fd;
949 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
950 break;
951 case VIRTIO_ID_BLOCK:
952 mic->mic_virtblk.virtio_block_fd = fd;
953 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
954 break;
955 }
956 }
957
958 static bool
set_backend_file(struct mic_info * mic)959 set_backend_file(struct mic_info *mic)
960 {
961 FILE *config;
962 char buff[PATH_MAX], *line, *evv, *p;
963
964 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
965 config = fopen(buff, "r");
966 if (config == NULL)
967 return false;
968 do { /* look for "virtblk_backend=XXXX" */
969 line = fgets(buff, PATH_MAX, config);
970 if (line == NULL)
971 break;
972 if (*line == '#')
973 continue;
974 p = strchr(line, '\n');
975 if (p)
976 *p = '\0';
977 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
978 fclose(config);
979 if (line == NULL)
980 return false;
981 evv = strchr(line, '=');
982 if (evv == NULL)
983 return false;
984 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
985 if (mic->mic_virtblk.backend_file == NULL) {
986 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
987 return false;
988 }
989 strcpy(mic->mic_virtblk.backend_file, evv + 1);
990 return true;
991 }
992
993 #define SECTOR_SIZE 512
994 static bool
set_backend_size(struct mic_info * mic)995 set_backend_size(struct mic_info *mic)
996 {
997 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
998 SEEK_END);
999 if (mic->mic_virtblk.backend_size < 0) {
1000 mpsslog("%s: can't seek: %s\n",
1001 mic->name, mic->mic_virtblk.backend_file);
1002 return false;
1003 }
1004 virtblk_dev_page.blk_config.capacity =
1005 mic->mic_virtblk.backend_size / SECTOR_SIZE;
1006 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
1007 virtblk_dev_page.blk_config.capacity++;
1008
1009 virtblk_dev_page.blk_config.capacity =
1010 htole64(virtblk_dev_page.blk_config.capacity);
1011
1012 return true;
1013 }
1014
1015 static bool
open_backend(struct mic_info * mic)1016 open_backend(struct mic_info *mic)
1017 {
1018 if (!set_backend_file(mic))
1019 goto _error_exit;
1020 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1021 if (mic->mic_virtblk.backend < 0) {
1022 mpsslog("%s: can't open: %s\n", mic->name,
1023 mic->mic_virtblk.backend_file);
1024 goto _error_free;
1025 }
1026 if (!set_backend_size(mic))
1027 goto _error_close;
1028 mic->mic_virtblk.backend_addr = mmap(NULL,
1029 mic->mic_virtblk.backend_size,
1030 PROT_READ|PROT_WRITE, MAP_SHARED,
1031 mic->mic_virtblk.backend, 0L);
1032 if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1033 mpsslog("%s: can't map: %s %s\n",
1034 mic->name, mic->mic_virtblk.backend_file,
1035 strerror(errno));
1036 goto _error_close;
1037 }
1038 return true;
1039
1040 _error_close:
1041 close(mic->mic_virtblk.backend);
1042 _error_free:
1043 free(mic->mic_virtblk.backend_file);
1044 _error_exit:
1045 return false;
1046 }
1047
1048 static void
close_backend(struct mic_info * mic)1049 close_backend(struct mic_info *mic)
1050 {
1051 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1052 close(mic->mic_virtblk.backend);
1053 free(mic->mic_virtblk.backend_file);
1054 }
1055
1056 static bool
start_virtblk(struct mic_info * mic,struct mic_vring * vring)1057 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1058 {
1059 if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1060 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1061 mic->name);
1062 return false;
1063 }
1064 add_virtio_device(mic, &virtblk_dev_page.dd);
1065 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1066 VIRTIO_ID_BLOCK, vring, NULL,
1067 virtblk_dev_page.dd.num_vq)) {
1068 mpsslog("%s init_vr failed %s\n",
1069 mic->name, strerror(errno));
1070 return false;
1071 }
1072 return true;
1073 }
1074
1075 static void
stop_virtblk(struct mic_info * mic)1076 stop_virtblk(struct mic_info *mic)
1077 {
1078 int vr_size, ret;
1079
1080 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1081 MIC_VIRTIO_RING_ALIGN) +
1082 sizeof(struct _mic_vring_info));
1083 ret = munmap(mic->mic_virtblk.block_dp,
1084 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1085 if (ret < 0)
1086 mpsslog("%s munmap errno %d\n", mic->name, errno);
1087 close(mic->mic_virtblk.virtio_block_fd);
1088 }
1089
1090 static __u8
header_error_check(struct vring_desc * desc)1091 header_error_check(struct vring_desc *desc)
1092 {
1093 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1094 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1095 __func__, __LINE__);
1096 return -EIO;
1097 }
1098 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1099 mpsslog("%s() %d: alone\n",
1100 __func__, __LINE__);
1101 return -EIO;
1102 }
1103 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1104 mpsslog("%s() %d: not read\n",
1105 __func__, __LINE__);
1106 return -EIO;
1107 }
1108 return 0;
1109 }
1110
1111 static int
read_header(int fd,struct virtio_blk_outhdr * hdr,__u32 desc_idx)1112 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1113 {
1114 struct iovec iovec;
1115 struct mic_copy_desc copy;
1116
1117 iovec.iov_len = sizeof(*hdr);
1118 iovec.iov_base = hdr;
1119 copy.iov = &iovec;
1120 copy.iovcnt = 1;
1121 copy.vr_idx = 0; /* only one vring on virtio_block */
1122 copy.update_used = false; /* do not update used index */
1123 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1124 }
1125
1126 static int
transfer_blocks(int fd,struct iovec * iovec,__u32 iovcnt)1127 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1128 {
1129 struct mic_copy_desc copy;
1130
1131 copy.iov = iovec;
1132 copy.iovcnt = iovcnt;
1133 copy.vr_idx = 0; /* only one vring on virtio_block */
1134 copy.update_used = false; /* do not update used index */
1135 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1136 }
1137
1138 static __u8
status_error_check(struct vring_desc * desc)1139 status_error_check(struct vring_desc *desc)
1140 {
1141 if (le32toh(desc->len) != sizeof(__u8)) {
1142 mpsslog("%s() %d: length is not sizeof(status)\n",
1143 __func__, __LINE__);
1144 return -EIO;
1145 }
1146 return 0;
1147 }
1148
1149 static int
write_status(int fd,__u8 * status)1150 write_status(int fd, __u8 *status)
1151 {
1152 struct iovec iovec;
1153 struct mic_copy_desc copy;
1154
1155 iovec.iov_base = status;
1156 iovec.iov_len = sizeof(*status);
1157 copy.iov = &iovec;
1158 copy.iovcnt = 1;
1159 copy.vr_idx = 0; /* only one vring on virtio_block */
1160 copy.update_used = true; /* Update used index */
1161 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1162 }
1163
1164 #ifndef VIRTIO_BLK_T_GET_ID
1165 #define VIRTIO_BLK_T_GET_ID 8
1166 #endif
1167
1168 static void *
virtio_block(void * arg)1169 virtio_block(void *arg)
1170 {
1171 struct mic_info *mic = (struct mic_info *)arg;
1172 int ret;
1173 struct pollfd block_poll;
1174 struct mic_vring vring;
1175 __u16 avail_idx;
1176 __u32 desc_idx;
1177 struct vring_desc *desc;
1178 struct iovec *iovec, *piov;
1179 __u8 status;
1180 __u32 buffer_desc_idx;
1181 struct virtio_blk_outhdr hdr;
1182 void *fos;
1183
1184 for (;;) { /* forever */
1185 if (!open_backend(mic)) { /* No virtblk */
1186 for (mic->mic_virtblk.signaled = 0;
1187 !mic->mic_virtblk.signaled;)
1188 sleep(1);
1189 continue;
1190 }
1191
1192 /* backend file is specified. */
1193 if (!start_virtblk(mic, &vring))
1194 goto _close_backend;
1195 iovec = malloc(sizeof(*iovec) *
1196 le32toh(virtblk_dev_page.blk_config.seg_max));
1197 if (!iovec) {
1198 mpsslog("%s: can't alloc iovec: %s\n",
1199 mic->name, strerror(ENOMEM));
1200 goto _stop_virtblk;
1201 }
1202
1203 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1204 block_poll.events = POLLIN;
1205 for (mic->mic_virtblk.signaled = 0;
1206 !mic->mic_virtblk.signaled;) {
1207 block_poll.revents = 0;
1208 /* timeout in 1 sec to see signaled */
1209 ret = poll(&block_poll, 1, 1000);
1210 if (ret < 0) {
1211 mpsslog("%s %d: poll failed: %s\n",
1212 __func__, __LINE__,
1213 strerror(errno));
1214 continue;
1215 }
1216
1217 if (!(block_poll.revents & POLLIN)) {
1218 #ifdef DEBUG
1219 mpsslog("%s %d: block_poll.revents=0x%x\n",
1220 __func__, __LINE__, block_poll.revents);
1221 #endif
1222 continue;
1223 }
1224
1225 /* POLLIN */
1226 while (vring.info->avail_idx !=
1227 le16toh(vring.vr.avail->idx)) {
1228 /* read header element */
1229 avail_idx =
1230 vring.info->avail_idx &
1231 (vring.vr.num - 1);
1232 desc_idx = le16toh(
1233 vring.vr.avail->ring[avail_idx]);
1234 desc = &vring.vr.desc[desc_idx];
1235 #ifdef DEBUG
1236 mpsslog("%s() %d: avail_idx=%d ",
1237 __func__, __LINE__,
1238 vring.info->avail_idx);
1239 mpsslog("vring.vr.num=%d desc=%p\n",
1240 vring.vr.num, desc);
1241 #endif
1242 status = header_error_check(desc);
1243 ret = read_header(
1244 mic->mic_virtblk.virtio_block_fd,
1245 &hdr, desc_idx);
1246 if (ret < 0) {
1247 mpsslog("%s() %d %s: ret=%d %s\n",
1248 __func__, __LINE__,
1249 mic->name, ret,
1250 strerror(errno));
1251 break;
1252 }
1253 /* buffer element */
1254 piov = iovec;
1255 status = 0;
1256 fos = mic->mic_virtblk.backend_addr +
1257 (hdr.sector * SECTOR_SIZE);
1258 buffer_desc_idx = next_desc(desc);
1259 desc_idx = buffer_desc_idx;
1260 for (desc = &vring.vr.desc[buffer_desc_idx];
1261 desc->flags & VRING_DESC_F_NEXT;
1262 desc_idx = next_desc(desc),
1263 desc = &vring.vr.desc[desc_idx]) {
1264 piov->iov_len = desc->len;
1265 piov->iov_base = fos;
1266 piov++;
1267 fos += desc->len;
1268 }
1269 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1270 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1271 VIRTIO_BLK_T_GET_ID)) {
1272 /*
1273 VIRTIO_BLK_T_IN - does not do
1274 anything. Probably for documenting.
1275 VIRTIO_BLK_T_SCSI_CMD - for
1276 virtio_scsi.
1277 VIRTIO_BLK_T_FLUSH - turned off in
1278 config space.
1279 VIRTIO_BLK_T_BARRIER - defined but not
1280 used in anywhere.
1281 */
1282 mpsslog("%s() %d: type %x ",
1283 __func__, __LINE__,
1284 hdr.type);
1285 mpsslog("is not supported\n");
1286 status = -ENOTSUP;
1287
1288 } else {
1289 ret = transfer_blocks(
1290 mic->mic_virtblk.virtio_block_fd,
1291 iovec,
1292 piov - iovec);
1293 if (ret < 0 &&
1294 status != 0)
1295 status = ret;
1296 }
1297 /* write status and update used pointer */
1298 if (status != 0)
1299 status = status_error_check(desc);
1300 ret = write_status(
1301 mic->mic_virtblk.virtio_block_fd,
1302 &status);
1303 #ifdef DEBUG
1304 mpsslog("%s() %d: write status=%d on desc=%p\n",
1305 __func__, __LINE__,
1306 status, desc);
1307 #endif
1308 }
1309 }
1310 free(iovec);
1311 _stop_virtblk:
1312 stop_virtblk(mic);
1313 _close_backend:
1314 close_backend(mic);
1315 } /* forever */
1316
1317 pthread_exit(NULL);
1318 }
1319
1320 static void
reset(struct mic_info * mic)1321 reset(struct mic_info *mic)
1322 {
1323 #define RESET_TIMEOUT 120
1324 int i = RESET_TIMEOUT;
1325 setsysfs(mic->name, "state", "reset");
1326 while (i) {
1327 char *state;
1328 state = readsysfs(mic->name, "state");
1329 if (!state)
1330 goto retry;
1331 mpsslog("%s: %s %d state %s\n",
1332 mic->name, __func__, __LINE__, state);
1333
1334 if (!strcmp(state, "ready")) {
1335 free(state);
1336 break;
1337 }
1338 free(state);
1339 retry:
1340 sleep(1);
1341 i--;
1342 }
1343 }
1344
1345 static int
get_mic_shutdown_status(struct mic_info * mic,char * shutdown_status)1346 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1347 {
1348 if (!strcmp(shutdown_status, "nop"))
1349 return MIC_NOP;
1350 if (!strcmp(shutdown_status, "crashed"))
1351 return MIC_CRASHED;
1352 if (!strcmp(shutdown_status, "halted"))
1353 return MIC_HALTED;
1354 if (!strcmp(shutdown_status, "poweroff"))
1355 return MIC_POWER_OFF;
1356 if (!strcmp(shutdown_status, "restart"))
1357 return MIC_RESTART;
1358 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1359 /* Invalid state */
1360 assert(0);
1361 };
1362
get_mic_state(struct mic_info * mic)1363 static int get_mic_state(struct mic_info *mic)
1364 {
1365 char *state = NULL;
1366 enum mic_states mic_state;
1367
1368 while (!state) {
1369 state = readsysfs(mic->name, "state");
1370 sleep(1);
1371 }
1372 mpsslog("%s: %s %d state %s\n",
1373 mic->name, __func__, __LINE__, state);
1374
1375 if (!strcmp(state, "ready")) {
1376 mic_state = MIC_READY;
1377 } else if (!strcmp(state, "booting")) {
1378 mic_state = MIC_BOOTING;
1379 } else if (!strcmp(state, "online")) {
1380 mic_state = MIC_ONLINE;
1381 } else if (!strcmp(state, "shutting_down")) {
1382 mic_state = MIC_SHUTTING_DOWN;
1383 } else if (!strcmp(state, "reset_failed")) {
1384 mic_state = MIC_RESET_FAILED;
1385 } else if (!strcmp(state, "resetting")) {
1386 mic_state = MIC_RESETTING;
1387 } else {
1388 mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1389 assert(0);
1390 }
1391
1392 free(state);
1393 return mic_state;
1394 };
1395
mic_handle_shutdown(struct mic_info * mic)1396 static void mic_handle_shutdown(struct mic_info *mic)
1397 {
1398 #define SHUTDOWN_TIMEOUT 60
1399 int i = SHUTDOWN_TIMEOUT;
1400 char *shutdown_status;
1401 while (i) {
1402 shutdown_status = readsysfs(mic->name, "shutdown_status");
1403 if (!shutdown_status) {
1404 sleep(1);
1405 continue;
1406 }
1407 mpsslog("%s: %s %d shutdown_status %s\n",
1408 mic->name, __func__, __LINE__, shutdown_status);
1409 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1410 case MIC_RESTART:
1411 mic->restart = 1;
1412 case MIC_HALTED:
1413 case MIC_POWER_OFF:
1414 case MIC_CRASHED:
1415 free(shutdown_status);
1416 goto reset;
1417 default:
1418 break;
1419 }
1420 free(shutdown_status);
1421 sleep(1);
1422 i--;
1423 }
1424 reset:
1425 if (!i)
1426 mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1427 mic->name, __func__, __LINE__, shutdown_status);
1428 reset(mic);
1429 }
1430
open_state_fd(struct mic_info * mic)1431 static int open_state_fd(struct mic_info *mic)
1432 {
1433 char pathname[PATH_MAX];
1434 int fd;
1435
1436 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1437 MICSYSFSDIR, mic->name, "state");
1438
1439 fd = open(pathname, O_RDONLY);
1440 if (fd < 0)
1441 mpsslog("%s: opening file %s failed %s\n",
1442 mic->name, pathname, strerror(errno));
1443 return fd;
1444 }
1445
block_till_state_change(int fd,struct mic_info * mic)1446 static int block_till_state_change(int fd, struct mic_info *mic)
1447 {
1448 struct pollfd ufds[1];
1449 char value[PAGE_SIZE];
1450 int ret;
1451
1452 ufds[0].fd = fd;
1453 ufds[0].events = POLLERR | POLLPRI;
1454 ret = poll(ufds, 1, -1);
1455 if (ret < 0) {
1456 mpsslog("%s: %s %d poll failed %s\n",
1457 mic->name, __func__, __LINE__, strerror(errno));
1458 return ret;
1459 }
1460
1461 ret = lseek(fd, 0, SEEK_SET);
1462 if (ret < 0) {
1463 mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1464 mic->name, __func__, __LINE__, strerror(errno));
1465 return ret;
1466 }
1467
1468 ret = read(fd, value, sizeof(value));
1469 if (ret < 0) {
1470 mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1471 mic->name, __func__, __LINE__, strerror(errno));
1472 return ret;
1473 }
1474
1475 return 0;
1476 }
1477
1478 static void *
mic_config(void * arg)1479 mic_config(void *arg)
1480 {
1481 struct mic_info *mic = (struct mic_info *)arg;
1482 int fd, ret, stat = 0;
1483
1484 fd = open_state_fd(mic);
1485 if (fd < 0) {
1486 mpsslog("%s: %s %d open state fd failed %s\n",
1487 mic->name, __func__, __LINE__, strerror(errno));
1488 goto exit;
1489 }
1490
1491 do {
1492 ret = block_till_state_change(fd, mic);
1493 if (ret < 0) {
1494 mpsslog("%s: %s %d block_till_state_change error %s\n",
1495 mic->name, __func__, __LINE__, strerror(errno));
1496 goto close_exit;
1497 }
1498
1499 switch (get_mic_state(mic)) {
1500 case MIC_SHUTTING_DOWN:
1501 mic_handle_shutdown(mic);
1502 break;
1503 case MIC_READY:
1504 case MIC_RESET_FAILED:
1505 ret = kill(mic->pid, SIGTERM);
1506 mpsslog("%s: %s %d kill pid %d ret %d\n",
1507 mic->name, __func__, __LINE__,
1508 mic->pid, ret);
1509 if (!ret) {
1510 ret = waitpid(mic->pid, &stat,
1511 WIFSIGNALED(stat));
1512 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1513 mic->name, __func__, __LINE__,
1514 ret, mic->pid);
1515 }
1516 if (mic->boot_on_resume) {
1517 setsysfs(mic->name, "state", "boot");
1518 mic->boot_on_resume = 0;
1519 }
1520 goto close_exit;
1521 default:
1522 break;
1523 }
1524 } while (1);
1525
1526 close_exit:
1527 close(fd);
1528 exit:
1529 init_mic(mic);
1530 pthread_exit(NULL);
1531 }
1532
1533 static void
set_cmdline(struct mic_info * mic)1534 set_cmdline(struct mic_info *mic)
1535 {
1536 char buffer[PATH_MAX];
1537 int len;
1538
1539 len = snprintf(buffer, PATH_MAX,
1540 "clocksource=tsc highres=off nohz=off ");
1541 len += snprintf(buffer + len, PATH_MAX - len,
1542 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1543 len += snprintf(buffer + len, PATH_MAX - len,
1544 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1545 mic->id + 1);
1546
1547 setsysfs(mic->name, "cmdline", buffer);
1548 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1549 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1550 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1551 }
1552
1553 static void
set_log_buf_info(struct mic_info * mic)1554 set_log_buf_info(struct mic_info *mic)
1555 {
1556 int fd;
1557 off_t len;
1558 char system_map[] = "/lib/firmware/mic/System.map";
1559 char *map, *temp, log_buf[17] = {'\0'};
1560
1561 fd = open(system_map, O_RDONLY);
1562 if (fd < 0) {
1563 mpsslog("%s: Opening System.map failed: %d\n",
1564 mic->name, errno);
1565 return;
1566 }
1567 len = lseek(fd, 0, SEEK_END);
1568 if (len < 0) {
1569 mpsslog("%s: Reading System.map size failed: %d\n",
1570 mic->name, errno);
1571 close(fd);
1572 return;
1573 }
1574 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1575 if (map == MAP_FAILED) {
1576 mpsslog("%s: mmap of System.map failed: %d\n",
1577 mic->name, errno);
1578 close(fd);
1579 return;
1580 }
1581 temp = strstr(map, "__log_buf");
1582 if (!temp) {
1583 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1584 munmap(map, len);
1585 close(fd);
1586 return;
1587 }
1588 strncpy(log_buf, temp - 19, 16);
1589 setsysfs(mic->name, "log_buf_addr", log_buf);
1590 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1591 temp = strstr(map, "log_buf_len");
1592 if (!temp) {
1593 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1594 munmap(map, len);
1595 close(fd);
1596 return;
1597 }
1598 strncpy(log_buf, temp - 19, 16);
1599 setsysfs(mic->name, "log_buf_len", log_buf);
1600 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1601 munmap(map, len);
1602 close(fd);
1603 }
1604
1605 static void
change_virtblk_backend(int x,siginfo_t * siginfo,void * p)1606 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1607 {
1608 struct mic_info *mic;
1609
1610 for (mic = mic_list.next; mic != NULL; mic = mic->next)
1611 mic->mic_virtblk.signaled = 1/* true */;
1612 }
1613
1614 static void
set_mic_boot_params(struct mic_info * mic)1615 set_mic_boot_params(struct mic_info *mic)
1616 {
1617 set_log_buf_info(mic);
1618 set_cmdline(mic);
1619 }
1620
1621 static void *
init_mic(void * arg)1622 init_mic(void *arg)
1623 {
1624 struct mic_info *mic = (struct mic_info *)arg;
1625 struct sigaction ignore = {
1626 .sa_flags = 0,
1627 .sa_handler = SIG_IGN
1628 };
1629 struct sigaction act = {
1630 .sa_flags = SA_SIGINFO,
1631 .sa_sigaction = change_virtblk_backend,
1632 };
1633 char buffer[PATH_MAX];
1634 int err, fd;
1635
1636 /*
1637 * Currently, one virtio block device is supported for each MIC card
1638 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1639 * The signal informs the virtio block backend about a change in the
1640 * configuration file which specifies the virtio backend file name on
1641 * the host. Virtio block backend then re-reads the configuration file
1642 * and switches to the new block device. This signalling mechanism may
1643 * not be required once multiple virtio block devices are supported by
1644 * the MIC daemon.
1645 */
1646 sigaction(SIGUSR1, &ignore, NULL);
1647 retry:
1648 fd = open_state_fd(mic);
1649 if (fd < 0) {
1650 mpsslog("%s: %s %d open state fd failed %s\n",
1651 mic->name, __func__, __LINE__, strerror(errno));
1652 sleep(2);
1653 goto retry;
1654 }
1655
1656 if (mic->restart) {
1657 snprintf(buffer, PATH_MAX, "boot");
1658 setsysfs(mic->name, "state", buffer);
1659 mpsslog("%s restarting mic %d\n",
1660 mic->name, mic->restart);
1661 mic->restart = 0;
1662 }
1663
1664 while (1) {
1665 while (block_till_state_change(fd, mic)) {
1666 mpsslog("%s: %s %d block_till_state_change error %s\n",
1667 mic->name, __func__, __LINE__, strerror(errno));
1668 sleep(2);
1669 continue;
1670 }
1671
1672 if (get_mic_state(mic) == MIC_BOOTING)
1673 break;
1674 }
1675
1676 mic->pid = fork();
1677 switch (mic->pid) {
1678 case 0:
1679 add_virtio_device(mic, &virtcons_dev_page.dd);
1680 add_virtio_device(mic, &virtnet_dev_page.dd);
1681 err = pthread_create(&mic->mic_console.console_thread, NULL,
1682 virtio_console, mic);
1683 if (err)
1684 mpsslog("%s virtcons pthread_create failed %s\n",
1685 mic->name, strerror(err));
1686 err = pthread_create(&mic->mic_net.net_thread, NULL,
1687 virtio_net, mic);
1688 if (err)
1689 mpsslog("%s virtnet pthread_create failed %s\n",
1690 mic->name, strerror(err));
1691 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1692 virtio_block, mic);
1693 if (err)
1694 mpsslog("%s virtblk pthread_create failed %s\n",
1695 mic->name, strerror(err));
1696 sigemptyset(&act.sa_mask);
1697 err = sigaction(SIGUSR1, &act, NULL);
1698 if (err)
1699 mpsslog("%s sigaction SIGUSR1 failed %s\n",
1700 mic->name, strerror(errno));
1701 while (1)
1702 sleep(60);
1703 case -1:
1704 mpsslog("fork failed MIC name %s id %d errno %d\n",
1705 mic->name, mic->id, errno);
1706 break;
1707 default:
1708 err = pthread_create(&mic->config_thread, NULL,
1709 mic_config, mic);
1710 if (err)
1711 mpsslog("%s mic_config pthread_create failed %s\n",
1712 mic->name, strerror(err));
1713 }
1714
1715 return NULL;
1716 }
1717
1718 static void
start_daemon(void)1719 start_daemon(void)
1720 {
1721 struct mic_info *mic;
1722 int err;
1723
1724 for (mic = mic_list.next; mic; mic = mic->next) {
1725 set_mic_boot_params(mic);
1726 err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1727 if (err)
1728 mpsslog("%s init_mic pthread_create failed %s\n",
1729 mic->name, strerror(err));
1730 }
1731
1732 while (1)
1733 sleep(60);
1734 }
1735
1736 static int
init_mic_list(void)1737 init_mic_list(void)
1738 {
1739 struct mic_info *mic = &mic_list;
1740 struct dirent *file;
1741 DIR *dp;
1742 int cnt = 0;
1743
1744 dp = opendir(MICSYSFSDIR);
1745 if (!dp)
1746 return 0;
1747
1748 while ((file = readdir(dp)) != NULL) {
1749 if (!strncmp(file->d_name, "mic", 3)) {
1750 mic->next = calloc(1, sizeof(struct mic_info));
1751 if (mic->next) {
1752 mic = mic->next;
1753 mic->id = atoi(&file->d_name[3]);
1754 mic->name = malloc(strlen(file->d_name) + 16);
1755 if (mic->name)
1756 strcpy(mic->name, file->d_name);
1757 mpsslog("MIC name %s id %d\n", mic->name,
1758 mic->id);
1759 cnt++;
1760 }
1761 }
1762 }
1763
1764 closedir(dp);
1765 return cnt;
1766 }
1767
1768 void
mpsslog(char * format,...)1769 mpsslog(char *format, ...)
1770 {
1771 va_list args;
1772 char buffer[4096];
1773 char ts[52], *ts1;
1774 time_t t;
1775
1776 if (logfp == NULL)
1777 return;
1778
1779 va_start(args, format);
1780 vsprintf(buffer, format, args);
1781 va_end(args);
1782
1783 time(&t);
1784 ts1 = ctime_r(&t, ts);
1785 ts1[strlen(ts1) - 1] = '\0';
1786 fprintf(logfp, "%s: %s", ts1, buffer);
1787
1788 fflush(logfp);
1789 }
1790
1791 int
main(int argc,char * argv[])1792 main(int argc, char *argv[])
1793 {
1794 int cnt;
1795 pid_t pid;
1796
1797 myname = argv[0];
1798
1799 logfp = fopen(LOGFILE_NAME, "a+");
1800 if (!logfp) {
1801 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1802 exit(1);
1803 }
1804 pid = fork();
1805 switch (pid) {
1806 case 0:
1807 break;
1808 case -1:
1809 exit(2);
1810 default:
1811 exit(0);
1812 }
1813
1814 mpsslog("MIC Daemon start\n");
1815
1816 cnt = init_mic_list();
1817 if (cnt == 0) {
1818 mpsslog("MIC module not loaded\n");
1819 exit(3);
1820 }
1821 mpsslog("MIC found %d devices\n", cnt);
1822
1823 start_daemon();
1824
1825 exit(0);
1826 }
1827