1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Intel MIC Platform Software Stack (MPSS)
4 *
5 * Copyright(c) 2013 Intel Corporation.
6 *
7 * Intel MIC User Space Tools.
8 */
9
10 #define _GNU_SOURCE
11
12 #include <stdlib.h>
13 #include <fcntl.h>
14 #include <getopt.h>
15 #include <assert.h>
16 #include <unistd.h>
17 #include <stdbool.h>
18 #include <signal.h>
19 #include <poll.h>
20 #include <features.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <sys/mman.h>
24 #include <sys/socket.h>
25 #include <linux/virtio_ring.h>
26 #include <linux/virtio_net.h>
27 #include <linux/virtio_console.h>
28 #include <linux/virtio_blk.h>
29 #include <linux/version.h>
30 #include "mpssd.h"
31 #include <linux/mic_ioctl.h>
32 #include <linux/mic_common.h>
33 #include <tools/endian.h>
34
35 static void *init_mic(void *arg);
36
37 static FILE *logfp;
38 static struct mic_info mic_list;
39
40 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
41
42 #define min_t(type, x, y) ({ \
43 type __min1 = (x); \
44 type __min2 = (y); \
45 __min1 < __min2 ? __min1 : __min2; })
46
47 /* align addr on a size boundary - adjust address up/down if needed */
48 #define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
49 #define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
50
51 /* align addr on a size boundary - adjust address up if needed */
52 #define _ALIGN(addr, size) _ALIGN_UP(addr, size)
53
54 /* to align the pointer to the (next) page boundary */
55 #define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
56
57 #define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
58
59 #define GSO_ENABLED 1
60 #define MAX_GSO_SIZE (64 * 1024)
61 #define ETH_H_LEN 14
62 #define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
63 #define MIC_DEVICE_PAGE_END 0x1000
64
65 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
66 #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
67 #endif
68
69 static struct {
70 struct mic_device_desc dd;
71 struct mic_vqconfig vqconfig[2];
72 __u32 host_features, guest_acknowledgements;
73 struct virtio_console_config cons_config;
74 } virtcons_dev_page = {
75 .dd = {
76 .type = VIRTIO_ID_CONSOLE,
77 .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
78 .feature_len = sizeof(virtcons_dev_page.host_features),
79 .config_len = sizeof(virtcons_dev_page.cons_config),
80 },
81 .vqconfig[0] = {
82 .num = htole16(MIC_VRING_ENTRIES),
83 },
84 .vqconfig[1] = {
85 .num = htole16(MIC_VRING_ENTRIES),
86 },
87 };
88
89 static struct {
90 struct mic_device_desc dd;
91 struct mic_vqconfig vqconfig[2];
92 __u32 host_features, guest_acknowledgements;
93 struct virtio_net_config net_config;
94 } virtnet_dev_page = {
95 .dd = {
96 .type = VIRTIO_ID_NET,
97 .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
98 .feature_len = sizeof(virtnet_dev_page.host_features),
99 .config_len = sizeof(virtnet_dev_page.net_config),
100 },
101 .vqconfig[0] = {
102 .num = htole16(MIC_VRING_ENTRIES),
103 },
104 .vqconfig[1] = {
105 .num = htole16(MIC_VRING_ENTRIES),
106 },
107 #if GSO_ENABLED
108 .host_features = htole32(
109 1 << VIRTIO_NET_F_CSUM |
110 1 << VIRTIO_NET_F_GSO |
111 1 << VIRTIO_NET_F_GUEST_TSO4 |
112 1 << VIRTIO_NET_F_GUEST_TSO6 |
113 1 << VIRTIO_NET_F_GUEST_ECN),
114 #else
115 .host_features = 0,
116 #endif
117 };
118
119 static const char *mic_config_dir = "/etc/mpss";
120 static const char *virtblk_backend = "VIRTBLK_BACKEND";
121 static struct {
122 struct mic_device_desc dd;
123 struct mic_vqconfig vqconfig[1];
124 __u32 host_features, guest_acknowledgements;
125 struct virtio_blk_config blk_config;
126 } virtblk_dev_page = {
127 .dd = {
128 .type = VIRTIO_ID_BLOCK,
129 .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
130 .feature_len = sizeof(virtblk_dev_page.host_features),
131 .config_len = sizeof(virtblk_dev_page.blk_config),
132 },
133 .vqconfig[0] = {
134 .num = htole16(MIC_VRING_ENTRIES),
135 },
136 .host_features =
137 htole32(1<<VIRTIO_BLK_F_SEG_MAX),
138 .blk_config = {
139 .seg_max = htole32(MIC_VRING_ENTRIES - 2),
140 .capacity = htole64(0),
141 }
142 };
143
144 static char *myname;
145
146 static int
tap_configure(struct mic_info * mic,char * dev)147 tap_configure(struct mic_info *mic, char *dev)
148 {
149 pid_t pid;
150 char *ifargv[7];
151 char ipaddr[IFNAMSIZ];
152 int ret = 0;
153
154 pid = fork();
155 if (pid == 0) {
156 ifargv[0] = "ip";
157 ifargv[1] = "link";
158 ifargv[2] = "set";
159 ifargv[3] = dev;
160 ifargv[4] = "up";
161 ifargv[5] = NULL;
162 mpsslog("Configuring %s\n", dev);
163 ret = execvp("ip", ifargv);
164 if (ret < 0) {
165 mpsslog("%s execvp failed errno %s\n",
166 mic->name, strerror(errno));
167 return ret;
168 }
169 }
170 if (pid < 0) {
171 mpsslog("%s fork failed errno %s\n",
172 mic->name, strerror(errno));
173 return ret;
174 }
175
176 ret = waitpid(pid, NULL, 0);
177 if (ret < 0) {
178 mpsslog("%s waitpid failed errno %s\n",
179 mic->name, strerror(errno));
180 return ret;
181 }
182
183 snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
184
185 pid = fork();
186 if (pid == 0) {
187 ifargv[0] = "ip";
188 ifargv[1] = "addr";
189 ifargv[2] = "add";
190 ifargv[3] = ipaddr;
191 ifargv[4] = "dev";
192 ifargv[5] = dev;
193 ifargv[6] = NULL;
194 mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
195 ret = execvp("ip", ifargv);
196 if (ret < 0) {
197 mpsslog("%s execvp failed errno %s\n",
198 mic->name, strerror(errno));
199 return ret;
200 }
201 }
202 if (pid < 0) {
203 mpsslog("%s fork failed errno %s\n",
204 mic->name, strerror(errno));
205 return ret;
206 }
207
208 ret = waitpid(pid, NULL, 0);
209 if (ret < 0) {
210 mpsslog("%s waitpid failed errno %s\n",
211 mic->name, strerror(errno));
212 return ret;
213 }
214 mpsslog("MIC name %s %s %d DONE!\n",
215 mic->name, __func__, __LINE__);
216 return 0;
217 }
218
tun_alloc(struct mic_info * mic,char * dev)219 static int tun_alloc(struct mic_info *mic, char *dev)
220 {
221 struct ifreq ifr;
222 int fd, err;
223 #if GSO_ENABLED
224 unsigned offload;
225 #endif
226 fd = open("/dev/net/tun", O_RDWR);
227 if (fd < 0) {
228 mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
229 goto done;
230 }
231
232 memset(&ifr, 0, sizeof(ifr));
233
234 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
235 if (*dev)
236 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
237
238 err = ioctl(fd, TUNSETIFF, (void *)&ifr);
239 if (err < 0) {
240 mpsslog("%s %s %d TUNSETIFF failed %s\n",
241 mic->name, __func__, __LINE__, strerror(errno));
242 close(fd);
243 return err;
244 }
245 #if GSO_ENABLED
246 offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
247
248 err = ioctl(fd, TUNSETOFFLOAD, offload);
249 if (err < 0) {
250 mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
251 mic->name, __func__, __LINE__, strerror(errno));
252 close(fd);
253 return err;
254 }
255 #endif
256 strcpy(dev, ifr.ifr_name);
257 mpsslog("Created TAP %s\n", dev);
258 done:
259 return fd;
260 }
261
262 #define NET_FD_VIRTIO_NET 0
263 #define NET_FD_TUN 1
264 #define MAX_NET_FD 2
265
set_dp(struct mic_info * mic,int type,void * dp)266 static void set_dp(struct mic_info *mic, int type, void *dp)
267 {
268 switch (type) {
269 case VIRTIO_ID_CONSOLE:
270 mic->mic_console.console_dp = dp;
271 return;
272 case VIRTIO_ID_NET:
273 mic->mic_net.net_dp = dp;
274 return;
275 case VIRTIO_ID_BLOCK:
276 mic->mic_virtblk.block_dp = dp;
277 return;
278 }
279 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
280 assert(0);
281 }
282
get_dp(struct mic_info * mic,int type)283 static void *get_dp(struct mic_info *mic, int type)
284 {
285 switch (type) {
286 case VIRTIO_ID_CONSOLE:
287 return mic->mic_console.console_dp;
288 case VIRTIO_ID_NET:
289 return mic->mic_net.net_dp;
290 case VIRTIO_ID_BLOCK:
291 return mic->mic_virtblk.block_dp;
292 }
293 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
294 assert(0);
295 return NULL;
296 }
297
get_device_desc(struct mic_info * mic,int type)298 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
299 {
300 struct mic_device_desc *d;
301 int i;
302 void *dp = get_dp(mic, type);
303
304 for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
305 i += mic_total_desc_size(d)) {
306 d = dp + i;
307
308 /* End of list */
309 if (d->type == 0)
310 break;
311
312 if (d->type == -1)
313 continue;
314
315 mpsslog("%s %s d-> type %d d %p\n",
316 mic->name, __func__, d->type, d);
317
318 if (d->type == (__u8)type)
319 return d;
320 }
321 mpsslog("%s %s %d not found\n", mic->name, __func__, type);
322 return NULL;
323 }
324
325 /* See comments in vhost.c for explanation of next_desc() */
next_desc(struct vring_desc * desc)326 static unsigned next_desc(struct vring_desc *desc)
327 {
328 unsigned int next;
329
330 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
331 return -1U;
332 next = le16toh(desc->next);
333 return next;
334 }
335
336 /* Sum up all the IOVEC length */
337 static ssize_t
sum_iovec_len(struct mic_copy_desc * copy)338 sum_iovec_len(struct mic_copy_desc *copy)
339 {
340 ssize_t sum = 0;
341 unsigned int i;
342
343 for (i = 0; i < copy->iovcnt; i++)
344 sum += copy->iov[i].iov_len;
345 return sum;
346 }
347
verify_out_len(struct mic_info * mic,struct mic_copy_desc * copy)348 static inline void verify_out_len(struct mic_info *mic,
349 struct mic_copy_desc *copy)
350 {
351 if (copy->out_len != sum_iovec_len(copy)) {
352 mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
353 mic->name, __func__, __LINE__,
354 copy->out_len, sum_iovec_len(copy));
355 assert(copy->out_len == sum_iovec_len(copy));
356 }
357 }
358
359 /* Display an iovec */
360 static void
disp_iovec(struct mic_info * mic,struct mic_copy_desc * copy,const char * s,int line)361 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
362 const char *s, int line)
363 {
364 unsigned int i;
365
366 for (i = 0; i < copy->iovcnt; i++)
367 mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
368 mic->name, s, line, i,
369 copy->iov[i].iov_base, copy->iov[i].iov_len);
370 }
371
read_avail_idx(struct mic_vring * vr)372 static inline __u16 read_avail_idx(struct mic_vring *vr)
373 {
374 return READ_ONCE(vr->info->avail_idx);
375 }
376
txrx_prepare(int type,bool tx,struct mic_vring * vr,struct mic_copy_desc * copy,ssize_t len)377 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
378 struct mic_copy_desc *copy, ssize_t len)
379 {
380 copy->vr_idx = tx ? 0 : 1;
381 copy->update_used = true;
382 if (type == VIRTIO_ID_NET)
383 copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
384 else
385 copy->iov[0].iov_len = len;
386 }
387
388 /* Central API which triggers the copies */
389 static int
mic_virtio_copy(struct mic_info * mic,int fd,struct mic_vring * vr,struct mic_copy_desc * copy)390 mic_virtio_copy(struct mic_info *mic, int fd,
391 struct mic_vring *vr, struct mic_copy_desc *copy)
392 {
393 int ret;
394
395 ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
396 if (ret) {
397 mpsslog("%s %s %d errno %s ret %d\n",
398 mic->name, __func__, __LINE__,
399 strerror(errno), ret);
400 }
401 return ret;
402 }
403
_vring_size(unsigned int num,unsigned long align)404 static inline unsigned _vring_size(unsigned int num, unsigned long align)
405 {
406 return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
407 + align - 1) & ~(align - 1))
408 + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
409 }
410
411 /*
412 * This initialization routine requires at least one
413 * vring i.e. vr0. vr1 is optional.
414 */
415 static void *
init_vr(struct mic_info * mic,int fd,int type,struct mic_vring * vr0,struct mic_vring * vr1,int num_vq)416 init_vr(struct mic_info *mic, int fd, int type,
417 struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
418 {
419 int vr_size;
420 char *va;
421
422 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
423 MIC_VIRTIO_RING_ALIGN) +
424 sizeof(struct _mic_vring_info));
425 va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
426 PROT_READ, MAP_SHARED, fd, 0);
427 if (MAP_FAILED == va) {
428 mpsslog("%s %s %d mmap failed errno %s\n",
429 mic->name, __func__, __LINE__,
430 strerror(errno));
431 goto done;
432 }
433 set_dp(mic, type, va);
434 vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
435 vr0->info = vr0->va +
436 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
437 vring_init(&vr0->vr,
438 MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
439 mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
440 __func__, mic->name, vr0->va, vr0->info, vr_size,
441 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
442 mpsslog("magic 0x%x expected 0x%x\n",
443 le32toh(vr0->info->magic), MIC_MAGIC + type);
444 assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
445 if (vr1) {
446 vr1->va = (struct mic_vring *)
447 &va[MIC_DEVICE_PAGE_END + vr_size];
448 vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
449 MIC_VIRTIO_RING_ALIGN);
450 vring_init(&vr1->vr,
451 MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
452 mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
453 __func__, mic->name, vr1->va, vr1->info, vr_size,
454 _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
455 mpsslog("magic 0x%x expected 0x%x\n",
456 le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
457 assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
458 }
459 done:
460 return va;
461 }
462
463 static int
wait_for_card_driver(struct mic_info * mic,int fd,int type)464 wait_for_card_driver(struct mic_info *mic, int fd, int type)
465 {
466 struct pollfd pollfd;
467 int err;
468 struct mic_device_desc *desc = get_device_desc(mic, type);
469 __u8 prev_status;
470
471 if (!desc)
472 return -ENODEV;
473 prev_status = desc->status;
474 pollfd.fd = fd;
475 mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
476 mic->name, __func__, type, desc->status);
477
478 while (1) {
479 pollfd.events = POLLIN;
480 pollfd.revents = 0;
481 err = poll(&pollfd, 1, -1);
482 if (err < 0) {
483 mpsslog("%s %s poll failed %s\n",
484 mic->name, __func__, strerror(errno));
485 continue;
486 }
487
488 if (pollfd.revents) {
489 if (desc->status != prev_status) {
490 mpsslog("%s %s Waiting... desc-> type %d "
491 "status 0x%x\n",
492 mic->name, __func__, type,
493 desc->status);
494 prev_status = desc->status;
495 }
496 if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
497 mpsslog("%s %s poll.revents %d\n",
498 mic->name, __func__, pollfd.revents);
499 mpsslog("%s %s desc-> type %d status 0x%x\n",
500 mic->name, __func__, type,
501 desc->status);
502 break;
503 }
504 }
505 }
506 return 0;
507 }
508
509 /* Spin till we have some descriptors */
510 static void
spin_for_descriptors(struct mic_info * mic,struct mic_vring * vr)511 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
512 {
513 __u16 avail_idx = read_avail_idx(vr);
514
515 while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) {
516 #ifdef DEBUG
517 mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
518 mic->name, __func__,
519 le16toh(vr->vr.avail->idx), vr->info->avail_idx);
520 #endif
521 sched_yield();
522 }
523 }
524
525 static void *
virtio_net(void * arg)526 virtio_net(void *arg)
527 {
528 static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
529 static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
530 struct iovec vnet_iov[2][2] = {
531 { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
532 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
533 { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
534 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
535 };
536 struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
537 struct mic_info *mic = (struct mic_info *)arg;
538 char if_name[IFNAMSIZ];
539 struct pollfd net_poll[MAX_NET_FD];
540 struct mic_vring tx_vr, rx_vr;
541 struct mic_copy_desc copy;
542 struct mic_device_desc *desc;
543 int err;
544
545 snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
546 mic->mic_net.tap_fd = tun_alloc(mic, if_name);
547 if (mic->mic_net.tap_fd < 0)
548 goto done;
549
550 if (tap_configure(mic, if_name))
551 goto done;
552 mpsslog("MIC name %s id %d\n", mic->name, mic->id);
553
554 net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
555 net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
556 net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
557 net_poll[NET_FD_TUN].events = POLLIN;
558
559 if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
560 VIRTIO_ID_NET, &tx_vr, &rx_vr,
561 virtnet_dev_page.dd.num_vq)) {
562 mpsslog("%s init_vr failed %s\n",
563 mic->name, strerror(errno));
564 goto done;
565 }
566
567 copy.iovcnt = 2;
568 desc = get_device_desc(mic, VIRTIO_ID_NET);
569
570 while (1) {
571 ssize_t len;
572
573 net_poll[NET_FD_VIRTIO_NET].revents = 0;
574 net_poll[NET_FD_TUN].revents = 0;
575
576 /* Start polling for data from tap and virtio net */
577 err = poll(net_poll, 2, -1);
578 if (err < 0) {
579 mpsslog("%s poll failed %s\n",
580 __func__, strerror(errno));
581 continue;
582 }
583 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
584 err = wait_for_card_driver(mic,
585 mic->mic_net.virtio_net_fd,
586 VIRTIO_ID_NET);
587 if (err) {
588 mpsslog("%s %s %d Exiting...\n",
589 mic->name, __func__, __LINE__);
590 break;
591 }
592 }
593 /*
594 * Check if there is data to be read from TUN and write to
595 * virtio net fd if there is.
596 */
597 if (net_poll[NET_FD_TUN].revents & POLLIN) {
598 copy.iov = iov0;
599 len = readv(net_poll[NET_FD_TUN].fd,
600 copy.iov, copy.iovcnt);
601 if (len > 0) {
602 struct virtio_net_hdr *hdr
603 = (struct virtio_net_hdr *)vnet_hdr[0];
604
605 /* Disable checksums on the card since we are on
606 a reliable PCIe link */
607 hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
608 #ifdef DEBUG
609 mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
610 __func__, __LINE__, hdr->flags);
611 mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
612 copy.out_len, hdr->gso_type);
613 #endif
614 #ifdef DEBUG
615 disp_iovec(mic, copy, __func__, __LINE__);
616 mpsslog("%s %s %d read from tap 0x%lx\n",
617 mic->name, __func__, __LINE__,
618 len);
619 #endif
620 spin_for_descriptors(mic, &tx_vr);
621 txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, ©,
622 len);
623
624 err = mic_virtio_copy(mic,
625 mic->mic_net.virtio_net_fd, &tx_vr,
626 ©);
627 if (err < 0) {
628 mpsslog("%s %s %d mic_virtio_copy %s\n",
629 mic->name, __func__, __LINE__,
630 strerror(errno));
631 }
632 if (!err)
633 verify_out_len(mic, ©);
634 #ifdef DEBUG
635 disp_iovec(mic, copy, __func__, __LINE__);
636 mpsslog("%s %s %d wrote to net 0x%lx\n",
637 mic->name, __func__, __LINE__,
638 sum_iovec_len(©));
639 #endif
640 /* Reinitialize IOV for next run */
641 iov0[1].iov_len = MAX_NET_PKT_SIZE;
642 } else if (len < 0) {
643 disp_iovec(mic, ©, __func__, __LINE__);
644 mpsslog("%s %s %d read failed %s ", mic->name,
645 __func__, __LINE__, strerror(errno));
646 mpsslog("cnt %d sum %zd\n",
647 copy.iovcnt, sum_iovec_len(©));
648 }
649 }
650
651 /*
652 * Check if there is data to be read from virtio net and
653 * write to TUN if there is.
654 */
655 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
656 while (rx_vr.info->avail_idx !=
657 le16toh(rx_vr.vr.avail->idx)) {
658 copy.iov = iov1;
659 txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, ©,
660 MAX_NET_PKT_SIZE
661 + sizeof(struct virtio_net_hdr));
662
663 err = mic_virtio_copy(mic,
664 mic->mic_net.virtio_net_fd, &rx_vr,
665 ©);
666 if (!err) {
667 #ifdef DEBUG
668 struct virtio_net_hdr *hdr
669 = (struct virtio_net_hdr *)
670 vnet_hdr[1];
671
672 mpsslog("%s %s %d hdr->flags 0x%x, ",
673 mic->name, __func__, __LINE__,
674 hdr->flags);
675 mpsslog("out_len %d gso_type 0x%x\n",
676 copy.out_len,
677 hdr->gso_type);
678 #endif
679 /* Set the correct output iov_len */
680 iov1[1].iov_len = copy.out_len -
681 sizeof(struct virtio_net_hdr);
682 verify_out_len(mic, ©);
683 #ifdef DEBUG
684 disp_iovec(mic, copy, __func__,
685 __LINE__);
686 mpsslog("%s %s %d ",
687 mic->name, __func__, __LINE__);
688 mpsslog("read from net 0x%lx\n",
689 sum_iovec_len(copy));
690 #endif
691 len = writev(net_poll[NET_FD_TUN].fd,
692 copy.iov, copy.iovcnt);
693 if (len != sum_iovec_len(©)) {
694 mpsslog("Tun write failed %s ",
695 strerror(errno));
696 mpsslog("len 0x%zx ", len);
697 mpsslog("read_len 0x%zx\n",
698 sum_iovec_len(©));
699 } else {
700 #ifdef DEBUG
701 disp_iovec(mic, ©, __func__,
702 __LINE__);
703 mpsslog("%s %s %d ",
704 mic->name, __func__,
705 __LINE__);
706 mpsslog("wrote to tap 0x%lx\n",
707 len);
708 #endif
709 }
710 } else {
711 mpsslog("%s %s %d mic_virtio_copy %s\n",
712 mic->name, __func__, __LINE__,
713 strerror(errno));
714 break;
715 }
716 }
717 }
718 if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
719 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
720 }
721 done:
722 pthread_exit(NULL);
723 }
724
725 /* virtio_console */
726 #define VIRTIO_CONSOLE_FD 0
727 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
728 #define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
729 #define MAX_BUFFER_SIZE PAGE_SIZE
730
731 static void *
virtio_console(void * arg)732 virtio_console(void *arg)
733 {
734 static __u8 vcons_buf[2][PAGE_SIZE];
735 struct iovec vcons_iov[2] = {
736 { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
737 { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
738 };
739 struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
740 struct mic_info *mic = (struct mic_info *)arg;
741 int err;
742 struct pollfd console_poll[MAX_CONSOLE_FD];
743 int pty_fd;
744 char *pts_name;
745 ssize_t len;
746 struct mic_vring tx_vr, rx_vr;
747 struct mic_copy_desc copy;
748 struct mic_device_desc *desc;
749
750 pty_fd = posix_openpt(O_RDWR);
751 if (pty_fd < 0) {
752 mpsslog("can't open a pseudoterminal master device: %s\n",
753 strerror(errno));
754 goto _return;
755 }
756 pts_name = ptsname(pty_fd);
757 if (pts_name == NULL) {
758 mpsslog("can't get pts name\n");
759 goto _close_pty;
760 }
761 printf("%s console message goes to %s\n", mic->name, pts_name);
762 mpsslog("%s console message goes to %s\n", mic->name, pts_name);
763 err = grantpt(pty_fd);
764 if (err < 0) {
765 mpsslog("can't grant access: %s %s\n",
766 pts_name, strerror(errno));
767 goto _close_pty;
768 }
769 err = unlockpt(pty_fd);
770 if (err < 0) {
771 mpsslog("can't unlock a pseudoterminal: %s %s\n",
772 pts_name, strerror(errno));
773 goto _close_pty;
774 }
775 console_poll[MONITOR_FD].fd = pty_fd;
776 console_poll[MONITOR_FD].events = POLLIN;
777
778 console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
779 console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
780
781 if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
782 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
783 virtcons_dev_page.dd.num_vq)) {
784 mpsslog("%s init_vr failed %s\n",
785 mic->name, strerror(errno));
786 goto _close_pty;
787 }
788
789 copy.iovcnt = 1;
790 desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
791
792 for (;;) {
793 console_poll[MONITOR_FD].revents = 0;
794 console_poll[VIRTIO_CONSOLE_FD].revents = 0;
795 err = poll(console_poll, MAX_CONSOLE_FD, -1);
796 if (err < 0) {
797 mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
798 strerror(errno));
799 continue;
800 }
801 if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
802 err = wait_for_card_driver(mic,
803 mic->mic_console.virtio_console_fd,
804 VIRTIO_ID_CONSOLE);
805 if (err) {
806 mpsslog("%s %s %d Exiting...\n",
807 mic->name, __func__, __LINE__);
808 break;
809 }
810 }
811
812 if (console_poll[MONITOR_FD].revents & POLLIN) {
813 copy.iov = iov0;
814 len = readv(pty_fd, copy.iov, copy.iovcnt);
815 if (len > 0) {
816 #ifdef DEBUG
817 disp_iovec(mic, copy, __func__, __LINE__);
818 mpsslog("%s %s %d read from tap 0x%lx\n",
819 mic->name, __func__, __LINE__,
820 len);
821 #endif
822 spin_for_descriptors(mic, &tx_vr);
823 txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
824 ©, len);
825
826 err = mic_virtio_copy(mic,
827 mic->mic_console.virtio_console_fd,
828 &tx_vr, ©);
829 if (err < 0) {
830 mpsslog("%s %s %d mic_virtio_copy %s\n",
831 mic->name, __func__, __LINE__,
832 strerror(errno));
833 }
834 if (!err)
835 verify_out_len(mic, ©);
836 #ifdef DEBUG
837 disp_iovec(mic, copy, __func__, __LINE__);
838 mpsslog("%s %s %d wrote to net 0x%lx\n",
839 mic->name, __func__, __LINE__,
840 sum_iovec_len(copy));
841 #endif
842 /* Reinitialize IOV for next run */
843 iov0->iov_len = PAGE_SIZE;
844 } else if (len < 0) {
845 disp_iovec(mic, ©, __func__, __LINE__);
846 mpsslog("%s %s %d read failed %s ",
847 mic->name, __func__, __LINE__,
848 strerror(errno));
849 mpsslog("cnt %d sum %zd\n",
850 copy.iovcnt, sum_iovec_len(©));
851 }
852 }
853
854 if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
855 while (rx_vr.info->avail_idx !=
856 le16toh(rx_vr.vr.avail->idx)) {
857 copy.iov = iov1;
858 txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
859 ©, PAGE_SIZE);
860
861 err = mic_virtio_copy(mic,
862 mic->mic_console.virtio_console_fd,
863 &rx_vr, ©);
864 if (!err) {
865 /* Set the correct output iov_len */
866 iov1->iov_len = copy.out_len;
867 verify_out_len(mic, ©);
868 #ifdef DEBUG
869 disp_iovec(mic, copy, __func__,
870 __LINE__);
871 mpsslog("%s %s %d ",
872 mic->name, __func__, __LINE__);
873 mpsslog("read from net 0x%lx\n",
874 sum_iovec_len(copy));
875 #endif
876 len = writev(pty_fd,
877 copy.iov, copy.iovcnt);
878 if (len != sum_iovec_len(©)) {
879 mpsslog("Tun write failed %s ",
880 strerror(errno));
881 mpsslog("len 0x%zx ", len);
882 mpsslog("read_len 0x%zx\n",
883 sum_iovec_len(©));
884 } else {
885 #ifdef DEBUG
886 disp_iovec(mic, copy, __func__,
887 __LINE__);
888 mpsslog("%s %s %d ",
889 mic->name, __func__,
890 __LINE__);
891 mpsslog("wrote to tap 0x%lx\n",
892 len);
893 #endif
894 }
895 } else {
896 mpsslog("%s %s %d mic_virtio_copy %s\n",
897 mic->name, __func__, __LINE__,
898 strerror(errno));
899 break;
900 }
901 }
902 }
903 if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
904 mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
905 }
906 _close_pty:
907 close(pty_fd);
908 _return:
909 pthread_exit(NULL);
910 }
911
912 static void
add_virtio_device(struct mic_info * mic,struct mic_device_desc * dd)913 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
914 {
915 char path[PATH_MAX];
916 int fd, err;
917
918 snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
919 fd = open(path, O_RDWR);
920 if (fd < 0) {
921 mpsslog("Could not open %s %s\n", path, strerror(errno));
922 return;
923 }
924
925 err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
926 if (err < 0) {
927 mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
928 close(fd);
929 return;
930 }
931 switch (dd->type) {
932 case VIRTIO_ID_NET:
933 mic->mic_net.virtio_net_fd = fd;
934 mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
935 break;
936 case VIRTIO_ID_CONSOLE:
937 mic->mic_console.virtio_console_fd = fd;
938 mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
939 break;
940 case VIRTIO_ID_BLOCK:
941 mic->mic_virtblk.virtio_block_fd = fd;
942 mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
943 break;
944 }
945 }
946
947 static bool
set_backend_file(struct mic_info * mic)948 set_backend_file(struct mic_info *mic)
949 {
950 FILE *config;
951 char buff[PATH_MAX], *line, *evv, *p;
952
953 snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
954 config = fopen(buff, "r");
955 if (config == NULL)
956 return false;
957 do { /* look for "virtblk_backend=XXXX" */
958 line = fgets(buff, PATH_MAX, config);
959 if (line == NULL)
960 break;
961 if (*line == '#')
962 continue;
963 p = strchr(line, '\n');
964 if (p)
965 *p = '\0';
966 } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
967 fclose(config);
968 if (line == NULL)
969 return false;
970 evv = strchr(line, '=');
971 if (evv == NULL)
972 return false;
973 mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
974 if (mic->mic_virtblk.backend_file == NULL) {
975 mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
976 return false;
977 }
978 strcpy(mic->mic_virtblk.backend_file, evv + 1);
979 return true;
980 }
981
982 #define SECTOR_SIZE 512
983 static bool
set_backend_size(struct mic_info * mic)984 set_backend_size(struct mic_info *mic)
985 {
986 mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
987 SEEK_END);
988 if (mic->mic_virtblk.backend_size < 0) {
989 mpsslog("%s: can't seek: %s\n",
990 mic->name, mic->mic_virtblk.backend_file);
991 return false;
992 }
993 virtblk_dev_page.blk_config.capacity =
994 mic->mic_virtblk.backend_size / SECTOR_SIZE;
995 if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
996 virtblk_dev_page.blk_config.capacity++;
997
998 virtblk_dev_page.blk_config.capacity =
999 htole64(virtblk_dev_page.blk_config.capacity);
1000
1001 return true;
1002 }
1003
1004 static bool
open_backend(struct mic_info * mic)1005 open_backend(struct mic_info *mic)
1006 {
1007 if (!set_backend_file(mic))
1008 goto _error_exit;
1009 mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1010 if (mic->mic_virtblk.backend < 0) {
1011 mpsslog("%s: can't open: %s\n", mic->name,
1012 mic->mic_virtblk.backend_file);
1013 goto _error_free;
1014 }
1015 if (!set_backend_size(mic))
1016 goto _error_close;
1017 mic->mic_virtblk.backend_addr = mmap(NULL,
1018 mic->mic_virtblk.backend_size,
1019 PROT_READ|PROT_WRITE, MAP_SHARED,
1020 mic->mic_virtblk.backend, 0L);
1021 if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1022 mpsslog("%s: can't map: %s %s\n",
1023 mic->name, mic->mic_virtblk.backend_file,
1024 strerror(errno));
1025 goto _error_close;
1026 }
1027 return true;
1028
1029 _error_close:
1030 close(mic->mic_virtblk.backend);
1031 _error_free:
1032 free(mic->mic_virtblk.backend_file);
1033 _error_exit:
1034 return false;
1035 }
1036
1037 static void
close_backend(struct mic_info * mic)1038 close_backend(struct mic_info *mic)
1039 {
1040 munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1041 close(mic->mic_virtblk.backend);
1042 free(mic->mic_virtblk.backend_file);
1043 }
1044
1045 static bool
start_virtblk(struct mic_info * mic,struct mic_vring * vring)1046 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1047 {
1048 if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1049 mpsslog("%s: blk_config is not 8 byte aligned.\n",
1050 mic->name);
1051 return false;
1052 }
1053 add_virtio_device(mic, &virtblk_dev_page.dd);
1054 if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1055 VIRTIO_ID_BLOCK, vring, NULL,
1056 virtblk_dev_page.dd.num_vq)) {
1057 mpsslog("%s init_vr failed %s\n",
1058 mic->name, strerror(errno));
1059 return false;
1060 }
1061 return true;
1062 }
1063
1064 static void
stop_virtblk(struct mic_info * mic)1065 stop_virtblk(struct mic_info *mic)
1066 {
1067 int vr_size, ret;
1068
1069 vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1070 MIC_VIRTIO_RING_ALIGN) +
1071 sizeof(struct _mic_vring_info));
1072 ret = munmap(mic->mic_virtblk.block_dp,
1073 MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1074 if (ret < 0)
1075 mpsslog("%s munmap errno %d\n", mic->name, errno);
1076 close(mic->mic_virtblk.virtio_block_fd);
1077 }
1078
1079 static __u8
header_error_check(struct vring_desc * desc)1080 header_error_check(struct vring_desc *desc)
1081 {
1082 if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1083 mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1084 __func__, __LINE__);
1085 return -EIO;
1086 }
1087 if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1088 mpsslog("%s() %d: alone\n",
1089 __func__, __LINE__);
1090 return -EIO;
1091 }
1092 if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1093 mpsslog("%s() %d: not read\n",
1094 __func__, __LINE__);
1095 return -EIO;
1096 }
1097 return 0;
1098 }
1099
1100 static int
read_header(int fd,struct virtio_blk_outhdr * hdr,__u32 desc_idx)1101 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1102 {
1103 struct iovec iovec;
1104 struct mic_copy_desc copy;
1105
1106 iovec.iov_len = sizeof(*hdr);
1107 iovec.iov_base = hdr;
1108 copy.iov = &iovec;
1109 copy.iovcnt = 1;
1110 copy.vr_idx = 0; /* only one vring on virtio_block */
1111 copy.update_used = false; /* do not update used index */
1112 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1113 }
1114
1115 static int
transfer_blocks(int fd,struct iovec * iovec,__u32 iovcnt)1116 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1117 {
1118 struct mic_copy_desc copy;
1119
1120 copy.iov = iovec;
1121 copy.iovcnt = iovcnt;
1122 copy.vr_idx = 0; /* only one vring on virtio_block */
1123 copy.update_used = false; /* do not update used index */
1124 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1125 }
1126
1127 static __u8
status_error_check(struct vring_desc * desc)1128 status_error_check(struct vring_desc *desc)
1129 {
1130 if (le32toh(desc->len) != sizeof(__u8)) {
1131 mpsslog("%s() %d: length is not sizeof(status)\n",
1132 __func__, __LINE__);
1133 return -EIO;
1134 }
1135 return 0;
1136 }
1137
1138 static int
write_status(int fd,__u8 * status)1139 write_status(int fd, __u8 *status)
1140 {
1141 struct iovec iovec;
1142 struct mic_copy_desc copy;
1143
1144 iovec.iov_base = status;
1145 iovec.iov_len = sizeof(*status);
1146 copy.iov = &iovec;
1147 copy.iovcnt = 1;
1148 copy.vr_idx = 0; /* only one vring on virtio_block */
1149 copy.update_used = true; /* Update used index */
1150 return ioctl(fd, MIC_VIRTIO_COPY_DESC, ©);
1151 }
1152
1153 #ifndef VIRTIO_BLK_T_GET_ID
1154 #define VIRTIO_BLK_T_GET_ID 8
1155 #endif
1156
1157 static void *
virtio_block(void * arg)1158 virtio_block(void *arg)
1159 {
1160 struct mic_info *mic = (struct mic_info *)arg;
1161 int ret;
1162 struct pollfd block_poll;
1163 struct mic_vring vring;
1164 __u16 avail_idx;
1165 __u32 desc_idx;
1166 struct vring_desc *desc;
1167 struct iovec *iovec, *piov;
1168 __u8 status;
1169 __u32 buffer_desc_idx;
1170 struct virtio_blk_outhdr hdr;
1171 void *fos;
1172
1173 for (;;) { /* forever */
1174 if (!open_backend(mic)) { /* No virtblk */
1175 for (mic->mic_virtblk.signaled = 0;
1176 !mic->mic_virtblk.signaled;)
1177 sleep(1);
1178 continue;
1179 }
1180
1181 /* backend file is specified. */
1182 if (!start_virtblk(mic, &vring))
1183 goto _close_backend;
1184 iovec = malloc(sizeof(*iovec) *
1185 le32toh(virtblk_dev_page.blk_config.seg_max));
1186 if (!iovec) {
1187 mpsslog("%s: can't alloc iovec: %s\n",
1188 mic->name, strerror(ENOMEM));
1189 goto _stop_virtblk;
1190 }
1191
1192 block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1193 block_poll.events = POLLIN;
1194 for (mic->mic_virtblk.signaled = 0;
1195 !mic->mic_virtblk.signaled;) {
1196 block_poll.revents = 0;
1197 /* timeout in 1 sec to see signaled */
1198 ret = poll(&block_poll, 1, 1000);
1199 if (ret < 0) {
1200 mpsslog("%s %d: poll failed: %s\n",
1201 __func__, __LINE__,
1202 strerror(errno));
1203 continue;
1204 }
1205
1206 if (!(block_poll.revents & POLLIN)) {
1207 #ifdef DEBUG
1208 mpsslog("%s %d: block_poll.revents=0x%x\n",
1209 __func__, __LINE__, block_poll.revents);
1210 #endif
1211 continue;
1212 }
1213
1214 /* POLLIN */
1215 while (vring.info->avail_idx !=
1216 le16toh(vring.vr.avail->idx)) {
1217 /* read header element */
1218 avail_idx =
1219 vring.info->avail_idx &
1220 (vring.vr.num - 1);
1221 desc_idx = le16toh(
1222 vring.vr.avail->ring[avail_idx]);
1223 desc = &vring.vr.desc[desc_idx];
1224 #ifdef DEBUG
1225 mpsslog("%s() %d: avail_idx=%d ",
1226 __func__, __LINE__,
1227 vring.info->avail_idx);
1228 mpsslog("vring.vr.num=%d desc=%p\n",
1229 vring.vr.num, desc);
1230 #endif
1231 status = header_error_check(desc);
1232 ret = read_header(
1233 mic->mic_virtblk.virtio_block_fd,
1234 &hdr, desc_idx);
1235 if (ret < 0) {
1236 mpsslog("%s() %d %s: ret=%d %s\n",
1237 __func__, __LINE__,
1238 mic->name, ret,
1239 strerror(errno));
1240 break;
1241 }
1242 /* buffer element */
1243 piov = iovec;
1244 status = 0;
1245 fos = mic->mic_virtblk.backend_addr +
1246 (hdr.sector * SECTOR_SIZE);
1247 buffer_desc_idx = next_desc(desc);
1248 desc_idx = buffer_desc_idx;
1249 for (desc = &vring.vr.desc[buffer_desc_idx];
1250 desc->flags & VRING_DESC_F_NEXT;
1251 desc_idx = next_desc(desc),
1252 desc = &vring.vr.desc[desc_idx]) {
1253 piov->iov_len = desc->len;
1254 piov->iov_base = fos;
1255 piov++;
1256 fos += desc->len;
1257 }
1258 /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1259 if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1260 VIRTIO_BLK_T_GET_ID)) {
1261 /*
1262 VIRTIO_BLK_T_IN - does not do
1263 anything. Probably for documenting.
1264 VIRTIO_BLK_T_SCSI_CMD - for
1265 virtio_scsi.
1266 VIRTIO_BLK_T_FLUSH - turned off in
1267 config space.
1268 VIRTIO_BLK_T_BARRIER - defined but not
1269 used in anywhere.
1270 */
1271 mpsslog("%s() %d: type %x ",
1272 __func__, __LINE__,
1273 hdr.type);
1274 mpsslog("is not supported\n");
1275 status = -ENOTSUP;
1276
1277 } else {
1278 ret = transfer_blocks(
1279 mic->mic_virtblk.virtio_block_fd,
1280 iovec,
1281 piov - iovec);
1282 if (ret < 0 &&
1283 status != 0)
1284 status = ret;
1285 }
1286 /* write status and update used pointer */
1287 if (status != 0)
1288 status = status_error_check(desc);
1289 ret = write_status(
1290 mic->mic_virtblk.virtio_block_fd,
1291 &status);
1292 #ifdef DEBUG
1293 mpsslog("%s() %d: write status=%d on desc=%p\n",
1294 __func__, __LINE__,
1295 status, desc);
1296 #endif
1297 }
1298 }
1299 free(iovec);
1300 _stop_virtblk:
1301 stop_virtblk(mic);
1302 _close_backend:
1303 close_backend(mic);
1304 } /* forever */
1305
1306 pthread_exit(NULL);
1307 }
1308
1309 static void
reset(struct mic_info * mic)1310 reset(struct mic_info *mic)
1311 {
1312 #define RESET_TIMEOUT 120
1313 int i = RESET_TIMEOUT;
1314 setsysfs(mic->name, "state", "reset");
1315 while (i) {
1316 char *state;
1317 state = readsysfs(mic->name, "state");
1318 if (!state)
1319 goto retry;
1320 mpsslog("%s: %s %d state %s\n",
1321 mic->name, __func__, __LINE__, state);
1322
1323 if (!strcmp(state, "ready")) {
1324 free(state);
1325 break;
1326 }
1327 free(state);
1328 retry:
1329 sleep(1);
1330 i--;
1331 }
1332 }
1333
1334 static int
get_mic_shutdown_status(struct mic_info * mic,char * shutdown_status)1335 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1336 {
1337 if (!strcmp(shutdown_status, "nop"))
1338 return MIC_NOP;
1339 if (!strcmp(shutdown_status, "crashed"))
1340 return MIC_CRASHED;
1341 if (!strcmp(shutdown_status, "halted"))
1342 return MIC_HALTED;
1343 if (!strcmp(shutdown_status, "poweroff"))
1344 return MIC_POWER_OFF;
1345 if (!strcmp(shutdown_status, "restart"))
1346 return MIC_RESTART;
1347 mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1348 /* Invalid state */
1349 assert(0);
1350 };
1351
get_mic_state(struct mic_info * mic)1352 static int get_mic_state(struct mic_info *mic)
1353 {
1354 char *state = NULL;
1355 enum mic_states mic_state;
1356
1357 while (!state) {
1358 state = readsysfs(mic->name, "state");
1359 sleep(1);
1360 }
1361 mpsslog("%s: %s %d state %s\n",
1362 mic->name, __func__, __LINE__, state);
1363
1364 if (!strcmp(state, "ready")) {
1365 mic_state = MIC_READY;
1366 } else if (!strcmp(state, "booting")) {
1367 mic_state = MIC_BOOTING;
1368 } else if (!strcmp(state, "online")) {
1369 mic_state = MIC_ONLINE;
1370 } else if (!strcmp(state, "shutting_down")) {
1371 mic_state = MIC_SHUTTING_DOWN;
1372 } else if (!strcmp(state, "reset_failed")) {
1373 mic_state = MIC_RESET_FAILED;
1374 } else if (!strcmp(state, "resetting")) {
1375 mic_state = MIC_RESETTING;
1376 } else {
1377 mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1378 assert(0);
1379 }
1380
1381 free(state);
1382 return mic_state;
1383 };
1384
mic_handle_shutdown(struct mic_info * mic)1385 static void mic_handle_shutdown(struct mic_info *mic)
1386 {
1387 #define SHUTDOWN_TIMEOUT 60
1388 int i = SHUTDOWN_TIMEOUT;
1389 char *shutdown_status;
1390 while (i) {
1391 shutdown_status = readsysfs(mic->name, "shutdown_status");
1392 if (!shutdown_status) {
1393 sleep(1);
1394 continue;
1395 }
1396 mpsslog("%s: %s %d shutdown_status %s\n",
1397 mic->name, __func__, __LINE__, shutdown_status);
1398 switch (get_mic_shutdown_status(mic, shutdown_status)) {
1399 case MIC_RESTART:
1400 mic->restart = 1;
1401 case MIC_HALTED:
1402 case MIC_POWER_OFF:
1403 case MIC_CRASHED:
1404 free(shutdown_status);
1405 goto reset;
1406 default:
1407 break;
1408 }
1409 free(shutdown_status);
1410 sleep(1);
1411 i--;
1412 }
1413 reset:
1414 if (!i)
1415 mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1416 mic->name, __func__, __LINE__, shutdown_status);
1417 reset(mic);
1418 }
1419
open_state_fd(struct mic_info * mic)1420 static int open_state_fd(struct mic_info *mic)
1421 {
1422 char pathname[PATH_MAX];
1423 int fd;
1424
1425 snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1426 MICSYSFSDIR, mic->name, "state");
1427
1428 fd = open(pathname, O_RDONLY);
1429 if (fd < 0)
1430 mpsslog("%s: opening file %s failed %s\n",
1431 mic->name, pathname, strerror(errno));
1432 return fd;
1433 }
1434
block_till_state_change(int fd,struct mic_info * mic)1435 static int block_till_state_change(int fd, struct mic_info *mic)
1436 {
1437 struct pollfd ufds[1];
1438 char value[PAGE_SIZE];
1439 int ret;
1440
1441 ufds[0].fd = fd;
1442 ufds[0].events = POLLERR | POLLPRI;
1443 ret = poll(ufds, 1, -1);
1444 if (ret < 0) {
1445 mpsslog("%s: %s %d poll failed %s\n",
1446 mic->name, __func__, __LINE__, strerror(errno));
1447 return ret;
1448 }
1449
1450 ret = lseek(fd, 0, SEEK_SET);
1451 if (ret < 0) {
1452 mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1453 mic->name, __func__, __LINE__, strerror(errno));
1454 return ret;
1455 }
1456
1457 ret = read(fd, value, sizeof(value));
1458 if (ret < 0) {
1459 mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1460 mic->name, __func__, __LINE__, strerror(errno));
1461 return ret;
1462 }
1463
1464 return 0;
1465 }
1466
1467 static void *
mic_config(void * arg)1468 mic_config(void *arg)
1469 {
1470 struct mic_info *mic = (struct mic_info *)arg;
1471 int fd, ret, stat = 0;
1472
1473 fd = open_state_fd(mic);
1474 if (fd < 0) {
1475 mpsslog("%s: %s %d open state fd failed %s\n",
1476 mic->name, __func__, __LINE__, strerror(errno));
1477 goto exit;
1478 }
1479
1480 do {
1481 ret = block_till_state_change(fd, mic);
1482 if (ret < 0) {
1483 mpsslog("%s: %s %d block_till_state_change error %s\n",
1484 mic->name, __func__, __LINE__, strerror(errno));
1485 goto close_exit;
1486 }
1487
1488 switch (get_mic_state(mic)) {
1489 case MIC_SHUTTING_DOWN:
1490 mic_handle_shutdown(mic);
1491 break;
1492 case MIC_READY:
1493 case MIC_RESET_FAILED:
1494 ret = kill(mic->pid, SIGTERM);
1495 mpsslog("%s: %s %d kill pid %d ret %d\n",
1496 mic->name, __func__, __LINE__,
1497 mic->pid, ret);
1498 if (!ret) {
1499 ret = waitpid(mic->pid, &stat,
1500 WIFSIGNALED(stat));
1501 mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1502 mic->name, __func__, __LINE__,
1503 ret, mic->pid);
1504 }
1505 if (mic->boot_on_resume) {
1506 setsysfs(mic->name, "state", "boot");
1507 mic->boot_on_resume = 0;
1508 }
1509 goto close_exit;
1510 default:
1511 break;
1512 }
1513 } while (1);
1514
1515 close_exit:
1516 close(fd);
1517 exit:
1518 init_mic(mic);
1519 pthread_exit(NULL);
1520 }
1521
1522 static void
set_cmdline(struct mic_info * mic)1523 set_cmdline(struct mic_info *mic)
1524 {
1525 char buffer[PATH_MAX];
1526 int len;
1527
1528 len = snprintf(buffer, PATH_MAX,
1529 "clocksource=tsc highres=off nohz=off ");
1530 len += snprintf(buffer + len, PATH_MAX - len,
1531 "cpufreq_on;corec6_off;pc3_off;pc6_off ");
1532 len += snprintf(buffer + len, PATH_MAX - len,
1533 "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1534 mic->id + 1);
1535
1536 setsysfs(mic->name, "cmdline", buffer);
1537 mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1538 snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1539 mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1540 }
1541
1542 static void
set_log_buf_info(struct mic_info * mic)1543 set_log_buf_info(struct mic_info *mic)
1544 {
1545 int fd;
1546 off_t len;
1547 char system_map[] = "/lib/firmware/mic/System.map";
1548 char *map, *temp, log_buf[17] = {'\0'};
1549
1550 fd = open(system_map, O_RDONLY);
1551 if (fd < 0) {
1552 mpsslog("%s: Opening System.map failed: %d\n",
1553 mic->name, errno);
1554 return;
1555 }
1556 len = lseek(fd, 0, SEEK_END);
1557 if (len < 0) {
1558 mpsslog("%s: Reading System.map size failed: %d\n",
1559 mic->name, errno);
1560 close(fd);
1561 return;
1562 }
1563 map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1564 if (map == MAP_FAILED) {
1565 mpsslog("%s: mmap of System.map failed: %d\n",
1566 mic->name, errno);
1567 close(fd);
1568 return;
1569 }
1570 temp = strstr(map, "__log_buf");
1571 if (!temp) {
1572 mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1573 munmap(map, len);
1574 close(fd);
1575 return;
1576 }
1577 strncpy(log_buf, temp - 19, 16);
1578 setsysfs(mic->name, "log_buf_addr", log_buf);
1579 mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1580 temp = strstr(map, "log_buf_len");
1581 if (!temp) {
1582 mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1583 munmap(map, len);
1584 close(fd);
1585 return;
1586 }
1587 strncpy(log_buf, temp - 19, 16);
1588 setsysfs(mic->name, "log_buf_len", log_buf);
1589 mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1590 munmap(map, len);
1591 close(fd);
1592 }
1593
1594 static void
change_virtblk_backend(int x,siginfo_t * siginfo,void * p)1595 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1596 {
1597 struct mic_info *mic;
1598
1599 for (mic = mic_list.next; mic != NULL; mic = mic->next)
1600 mic->mic_virtblk.signaled = 1/* true */;
1601 }
1602
1603 static void
set_mic_boot_params(struct mic_info * mic)1604 set_mic_boot_params(struct mic_info *mic)
1605 {
1606 set_log_buf_info(mic);
1607 set_cmdline(mic);
1608 }
1609
1610 static void *
init_mic(void * arg)1611 init_mic(void *arg)
1612 {
1613 struct mic_info *mic = (struct mic_info *)arg;
1614 struct sigaction ignore = {
1615 .sa_flags = 0,
1616 .sa_handler = SIG_IGN
1617 };
1618 struct sigaction act = {
1619 .sa_flags = SA_SIGINFO,
1620 .sa_sigaction = change_virtblk_backend,
1621 };
1622 char buffer[PATH_MAX];
1623 int err, fd;
1624
1625 /*
1626 * Currently, one virtio block device is supported for each MIC card
1627 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1628 * The signal informs the virtio block backend about a change in the
1629 * configuration file which specifies the virtio backend file name on
1630 * the host. Virtio block backend then re-reads the configuration file
1631 * and switches to the new block device. This signalling mechanism may
1632 * not be required once multiple virtio block devices are supported by
1633 * the MIC daemon.
1634 */
1635 sigaction(SIGUSR1, &ignore, NULL);
1636 retry:
1637 fd = open_state_fd(mic);
1638 if (fd < 0) {
1639 mpsslog("%s: %s %d open state fd failed %s\n",
1640 mic->name, __func__, __LINE__, strerror(errno));
1641 sleep(2);
1642 goto retry;
1643 }
1644
1645 if (mic->restart) {
1646 snprintf(buffer, PATH_MAX, "boot");
1647 setsysfs(mic->name, "state", buffer);
1648 mpsslog("%s restarting mic %d\n",
1649 mic->name, mic->restart);
1650 mic->restart = 0;
1651 }
1652
1653 while (1) {
1654 while (block_till_state_change(fd, mic)) {
1655 mpsslog("%s: %s %d block_till_state_change error %s\n",
1656 mic->name, __func__, __LINE__, strerror(errno));
1657 sleep(2);
1658 continue;
1659 }
1660
1661 if (get_mic_state(mic) == MIC_BOOTING)
1662 break;
1663 }
1664
1665 mic->pid = fork();
1666 switch (mic->pid) {
1667 case 0:
1668 add_virtio_device(mic, &virtcons_dev_page.dd);
1669 add_virtio_device(mic, &virtnet_dev_page.dd);
1670 err = pthread_create(&mic->mic_console.console_thread, NULL,
1671 virtio_console, mic);
1672 if (err)
1673 mpsslog("%s virtcons pthread_create failed %s\n",
1674 mic->name, strerror(err));
1675 err = pthread_create(&mic->mic_net.net_thread, NULL,
1676 virtio_net, mic);
1677 if (err)
1678 mpsslog("%s virtnet pthread_create failed %s\n",
1679 mic->name, strerror(err));
1680 err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1681 virtio_block, mic);
1682 if (err)
1683 mpsslog("%s virtblk pthread_create failed %s\n",
1684 mic->name, strerror(err));
1685 sigemptyset(&act.sa_mask);
1686 err = sigaction(SIGUSR1, &act, NULL);
1687 if (err)
1688 mpsslog("%s sigaction SIGUSR1 failed %s\n",
1689 mic->name, strerror(errno));
1690 while (1)
1691 sleep(60);
1692 case -1:
1693 mpsslog("fork failed MIC name %s id %d errno %d\n",
1694 mic->name, mic->id, errno);
1695 break;
1696 default:
1697 err = pthread_create(&mic->config_thread, NULL,
1698 mic_config, mic);
1699 if (err)
1700 mpsslog("%s mic_config pthread_create failed %s\n",
1701 mic->name, strerror(err));
1702 }
1703
1704 return NULL;
1705 }
1706
1707 static void
start_daemon(void)1708 start_daemon(void)
1709 {
1710 struct mic_info *mic;
1711 int err;
1712
1713 for (mic = mic_list.next; mic; mic = mic->next) {
1714 set_mic_boot_params(mic);
1715 err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1716 if (err)
1717 mpsslog("%s init_mic pthread_create failed %s\n",
1718 mic->name, strerror(err));
1719 }
1720
1721 while (1)
1722 sleep(60);
1723 }
1724
1725 static int
init_mic_list(void)1726 init_mic_list(void)
1727 {
1728 struct mic_info *mic = &mic_list;
1729 struct dirent *file;
1730 DIR *dp;
1731 int cnt = 0;
1732
1733 dp = opendir(MICSYSFSDIR);
1734 if (!dp)
1735 return 0;
1736
1737 while ((file = readdir(dp)) != NULL) {
1738 if (!strncmp(file->d_name, "mic", 3)) {
1739 mic->next = calloc(1, sizeof(struct mic_info));
1740 if (mic->next) {
1741 mic = mic->next;
1742 mic->id = atoi(&file->d_name[3]);
1743 mic->name = malloc(strlen(file->d_name) + 16);
1744 if (mic->name)
1745 strcpy(mic->name, file->d_name);
1746 mpsslog("MIC name %s id %d\n", mic->name,
1747 mic->id);
1748 cnt++;
1749 }
1750 }
1751 }
1752
1753 closedir(dp);
1754 return cnt;
1755 }
1756
1757 void
mpsslog(char * format,...)1758 mpsslog(char *format, ...)
1759 {
1760 va_list args;
1761 char buffer[4096];
1762 char ts[52], *ts1;
1763 time_t t;
1764
1765 if (logfp == NULL)
1766 return;
1767
1768 va_start(args, format);
1769 vsprintf(buffer, format, args);
1770 va_end(args);
1771
1772 time(&t);
1773 ts1 = ctime_r(&t, ts);
1774 ts1[strlen(ts1) - 1] = '\0';
1775 fprintf(logfp, "%s: %s", ts1, buffer);
1776
1777 fflush(logfp);
1778 }
1779
1780 int
main(int argc,char * argv[])1781 main(int argc, char *argv[])
1782 {
1783 int cnt;
1784 pid_t pid;
1785
1786 myname = argv[0];
1787
1788 logfp = fopen(LOGFILE_NAME, "a+");
1789 if (!logfp) {
1790 fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1791 exit(1);
1792 }
1793 pid = fork();
1794 switch (pid) {
1795 case 0:
1796 break;
1797 case -1:
1798 exit(2);
1799 default:
1800 exit(0);
1801 }
1802
1803 mpsslog("MIC Daemon start\n");
1804
1805 cnt = init_mic_list();
1806 if (cnt == 0) {
1807 mpsslog("MIC module not loaded\n");
1808 exit(3);
1809 }
1810 mpsslog("MIC found %d devices\n", cnt);
1811
1812 start_daemon();
1813
1814 exit(0);
1815 }
1816