• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel MIC Platform Software Stack (MPSS)
4  *
5  * Copyright(c) 2013 Intel Corporation.
6  *
7  * Intel MIC User Space Tools.
8  */
9 
10 #define _GNU_SOURCE
11 
12 #include <stdlib.h>
13 #include <fcntl.h>
14 #include <getopt.h>
15 #include <assert.h>
16 #include <unistd.h>
17 #include <stdbool.h>
18 #include <signal.h>
19 #include <poll.h>
20 #include <features.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <sys/mman.h>
24 #include <sys/socket.h>
25 #include <linux/virtio_ring.h>
26 #include <linux/virtio_net.h>
27 #include <linux/virtio_console.h>
28 #include <linux/virtio_blk.h>
29 #include <linux/version.h>
30 #include "mpssd.h"
31 #include <linux/mic_ioctl.h>
32 #include <linux/mic_common.h>
33 #include <tools/endian.h>
34 
35 static void *init_mic(void *arg);
36 
37 static FILE *logfp;
38 static struct mic_info mic_list;
39 
40 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
41 
42 #define min_t(type, x, y) ({				\
43 		type __min1 = (x);                      \
44 		type __min2 = (y);                      \
45 		__min1 < __min2 ? __min1 : __min2; })
46 
47 /* align addr on a size boundary - adjust address up/down if needed */
48 #define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
49 #define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
50 
51 /* align addr on a size boundary - adjust address up if needed */
52 #define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
53 
54 /* to align the pointer to the (next) page boundary */
55 #define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
56 
57 #define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
58 
59 #define GSO_ENABLED		1
60 #define MAX_GSO_SIZE		(64 * 1024)
61 #define ETH_H_LEN		14
62 #define MAX_NET_PKT_SIZE	(_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
63 #define MIC_DEVICE_PAGE_END	0x1000
64 
65 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
66 #define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
67 #endif
68 
69 static struct {
70 	struct mic_device_desc dd;
71 	struct mic_vqconfig vqconfig[2];
72 	__u32 host_features, guest_acknowledgements;
73 	struct virtio_console_config cons_config;
74 } virtcons_dev_page = {
75 	.dd = {
76 		.type = VIRTIO_ID_CONSOLE,
77 		.num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
78 		.feature_len = sizeof(virtcons_dev_page.host_features),
79 		.config_len = sizeof(virtcons_dev_page.cons_config),
80 	},
81 	.vqconfig[0] = {
82 		.num = htole16(MIC_VRING_ENTRIES),
83 	},
84 	.vqconfig[1] = {
85 		.num = htole16(MIC_VRING_ENTRIES),
86 	},
87 };
88 
89 static struct {
90 	struct mic_device_desc dd;
91 	struct mic_vqconfig vqconfig[2];
92 	__u32 host_features, guest_acknowledgements;
93 	struct virtio_net_config net_config;
94 } virtnet_dev_page = {
95 	.dd = {
96 		.type = VIRTIO_ID_NET,
97 		.num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
98 		.feature_len = sizeof(virtnet_dev_page.host_features),
99 		.config_len = sizeof(virtnet_dev_page.net_config),
100 	},
101 	.vqconfig[0] = {
102 		.num = htole16(MIC_VRING_ENTRIES),
103 	},
104 	.vqconfig[1] = {
105 		.num = htole16(MIC_VRING_ENTRIES),
106 	},
107 #if GSO_ENABLED
108 	.host_features = htole32(
109 		1 << VIRTIO_NET_F_CSUM |
110 		1 << VIRTIO_NET_F_GSO |
111 		1 << VIRTIO_NET_F_GUEST_TSO4 |
112 		1 << VIRTIO_NET_F_GUEST_TSO6 |
113 		1 << VIRTIO_NET_F_GUEST_ECN),
114 #else
115 		.host_features = 0,
116 #endif
117 };
118 
119 static const char *mic_config_dir = "/etc/mpss";
120 static const char *virtblk_backend = "VIRTBLK_BACKEND";
121 static struct {
122 	struct mic_device_desc dd;
123 	struct mic_vqconfig vqconfig[1];
124 	__u32 host_features, guest_acknowledgements;
125 	struct virtio_blk_config blk_config;
126 } virtblk_dev_page = {
127 	.dd = {
128 		.type = VIRTIO_ID_BLOCK,
129 		.num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
130 		.feature_len = sizeof(virtblk_dev_page.host_features),
131 		.config_len = sizeof(virtblk_dev_page.blk_config),
132 	},
133 	.vqconfig[0] = {
134 		.num = htole16(MIC_VRING_ENTRIES),
135 	},
136 	.host_features =
137 		htole32(1<<VIRTIO_BLK_F_SEG_MAX),
138 	.blk_config = {
139 		.seg_max = htole32(MIC_VRING_ENTRIES - 2),
140 		.capacity = htole64(0),
141 	 }
142 };
143 
144 static char *myname;
145 
146 static int
tap_configure(struct mic_info * mic,char * dev)147 tap_configure(struct mic_info *mic, char *dev)
148 {
149 	pid_t pid;
150 	char *ifargv[7];
151 	char ipaddr[IFNAMSIZ];
152 	int ret = 0;
153 
154 	pid = fork();
155 	if (pid == 0) {
156 		ifargv[0] = "ip";
157 		ifargv[1] = "link";
158 		ifargv[2] = "set";
159 		ifargv[3] = dev;
160 		ifargv[4] = "up";
161 		ifargv[5] = NULL;
162 		mpsslog("Configuring %s\n", dev);
163 		ret = execvp("ip", ifargv);
164 		if (ret < 0) {
165 			mpsslog("%s execvp failed errno %s\n",
166 				mic->name, strerror(errno));
167 			return ret;
168 		}
169 	}
170 	if (pid < 0) {
171 		mpsslog("%s fork failed errno %s\n",
172 			mic->name, strerror(errno));
173 		return ret;
174 	}
175 
176 	ret = waitpid(pid, NULL, 0);
177 	if (ret < 0) {
178 		mpsslog("%s waitpid failed errno %s\n",
179 			mic->name, strerror(errno));
180 		return ret;
181 	}
182 
183 	snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
184 
185 	pid = fork();
186 	if (pid == 0) {
187 		ifargv[0] = "ip";
188 		ifargv[1] = "addr";
189 		ifargv[2] = "add";
190 		ifargv[3] = ipaddr;
191 		ifargv[4] = "dev";
192 		ifargv[5] = dev;
193 		ifargv[6] = NULL;
194 		mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
195 		ret = execvp("ip", ifargv);
196 		if (ret < 0) {
197 			mpsslog("%s execvp failed errno %s\n",
198 				mic->name, strerror(errno));
199 			return ret;
200 		}
201 	}
202 	if (pid < 0) {
203 		mpsslog("%s fork failed errno %s\n",
204 			mic->name, strerror(errno));
205 		return ret;
206 	}
207 
208 	ret = waitpid(pid, NULL, 0);
209 	if (ret < 0) {
210 		mpsslog("%s waitpid failed errno %s\n",
211 			mic->name, strerror(errno));
212 		return ret;
213 	}
214 	mpsslog("MIC name %s %s %d DONE!\n",
215 		mic->name, __func__, __LINE__);
216 	return 0;
217 }
218 
tun_alloc(struct mic_info * mic,char * dev)219 static int tun_alloc(struct mic_info *mic, char *dev)
220 {
221 	struct ifreq ifr;
222 	int fd, err;
223 #if GSO_ENABLED
224 	unsigned offload;
225 #endif
226 	fd = open("/dev/net/tun", O_RDWR);
227 	if (fd < 0) {
228 		mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
229 		goto done;
230 	}
231 
232 	memset(&ifr, 0, sizeof(ifr));
233 
234 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
235 	if (*dev)
236 		strncpy(ifr.ifr_name, dev, IFNAMSIZ);
237 
238 	err = ioctl(fd, TUNSETIFF, (void *)&ifr);
239 	if (err < 0) {
240 		mpsslog("%s %s %d TUNSETIFF failed %s\n",
241 			mic->name, __func__, __LINE__, strerror(errno));
242 		close(fd);
243 		return err;
244 	}
245 #if GSO_ENABLED
246 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
247 
248 	err = ioctl(fd, TUNSETOFFLOAD, offload);
249 	if (err < 0) {
250 		mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
251 			mic->name, __func__, __LINE__, strerror(errno));
252 		close(fd);
253 		return err;
254 	}
255 #endif
256 	strcpy(dev, ifr.ifr_name);
257 	mpsslog("Created TAP %s\n", dev);
258 done:
259 	return fd;
260 }
261 
262 #define NET_FD_VIRTIO_NET 0
263 #define NET_FD_TUN 1
264 #define MAX_NET_FD 2
265 
set_dp(struct mic_info * mic,int type,void * dp)266 static void set_dp(struct mic_info *mic, int type, void *dp)
267 {
268 	switch (type) {
269 	case VIRTIO_ID_CONSOLE:
270 		mic->mic_console.console_dp = dp;
271 		return;
272 	case VIRTIO_ID_NET:
273 		mic->mic_net.net_dp = dp;
274 		return;
275 	case VIRTIO_ID_BLOCK:
276 		mic->mic_virtblk.block_dp = dp;
277 		return;
278 	}
279 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
280 	assert(0);
281 }
282 
get_dp(struct mic_info * mic,int type)283 static void *get_dp(struct mic_info *mic, int type)
284 {
285 	switch (type) {
286 	case VIRTIO_ID_CONSOLE:
287 		return mic->mic_console.console_dp;
288 	case VIRTIO_ID_NET:
289 		return mic->mic_net.net_dp;
290 	case VIRTIO_ID_BLOCK:
291 		return mic->mic_virtblk.block_dp;
292 	}
293 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
294 	assert(0);
295 	return NULL;
296 }
297 
get_device_desc(struct mic_info * mic,int type)298 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
299 {
300 	struct mic_device_desc *d;
301 	int i;
302 	void *dp = get_dp(mic, type);
303 
304 	for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
305 		i += mic_total_desc_size(d)) {
306 		d = dp + i;
307 
308 		/* End of list */
309 		if (d->type == 0)
310 			break;
311 
312 		if (d->type == -1)
313 			continue;
314 
315 		mpsslog("%s %s d-> type %d d %p\n",
316 			mic->name, __func__, d->type, d);
317 
318 		if (d->type == (__u8)type)
319 			return d;
320 	}
321 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
322 	return NULL;
323 }
324 
325 /* See comments in vhost.c for explanation of next_desc() */
next_desc(struct vring_desc * desc)326 static unsigned next_desc(struct vring_desc *desc)
327 {
328 	unsigned int next;
329 
330 	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
331 		return -1U;
332 	next = le16toh(desc->next);
333 	return next;
334 }
335 
336 /* Sum up all the IOVEC length */
337 static ssize_t
sum_iovec_len(struct mic_copy_desc * copy)338 sum_iovec_len(struct mic_copy_desc *copy)
339 {
340 	ssize_t sum = 0;
341 	unsigned int i;
342 
343 	for (i = 0; i < copy->iovcnt; i++)
344 		sum += copy->iov[i].iov_len;
345 	return sum;
346 }
347 
verify_out_len(struct mic_info * mic,struct mic_copy_desc * copy)348 static inline void verify_out_len(struct mic_info *mic,
349 	struct mic_copy_desc *copy)
350 {
351 	if (copy->out_len != sum_iovec_len(copy)) {
352 		mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
353 			mic->name, __func__, __LINE__,
354 			copy->out_len, sum_iovec_len(copy));
355 		assert(copy->out_len == sum_iovec_len(copy));
356 	}
357 }
358 
359 /* Display an iovec */
360 static void
disp_iovec(struct mic_info * mic,struct mic_copy_desc * copy,const char * s,int line)361 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
362 	   const char *s, int line)
363 {
364 	unsigned int i;
365 
366 	for (i = 0; i < copy->iovcnt; i++)
367 		mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
368 			mic->name, s, line, i,
369 			copy->iov[i].iov_base, copy->iov[i].iov_len);
370 }
371 
read_avail_idx(struct mic_vring * vr)372 static inline __u16 read_avail_idx(struct mic_vring *vr)
373 {
374 	return READ_ONCE(vr->info->avail_idx);
375 }
376 
txrx_prepare(int type,bool tx,struct mic_vring * vr,struct mic_copy_desc * copy,ssize_t len)377 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
378 				struct mic_copy_desc *copy, ssize_t len)
379 {
380 	copy->vr_idx = tx ? 0 : 1;
381 	copy->update_used = true;
382 	if (type == VIRTIO_ID_NET)
383 		copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
384 	else
385 		copy->iov[0].iov_len = len;
386 }
387 
388 /* Central API which triggers the copies */
389 static int
mic_virtio_copy(struct mic_info * mic,int fd,struct mic_vring * vr,struct mic_copy_desc * copy)390 mic_virtio_copy(struct mic_info *mic, int fd,
391 		struct mic_vring *vr, struct mic_copy_desc *copy)
392 {
393 	int ret;
394 
395 	ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
396 	if (ret) {
397 		mpsslog("%s %s %d errno %s ret %d\n",
398 			mic->name, __func__, __LINE__,
399 			strerror(errno), ret);
400 	}
401 	return ret;
402 }
403 
_vring_size(unsigned int num,unsigned long align)404 static inline unsigned _vring_size(unsigned int num, unsigned long align)
405 {
406 	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
407 				+ align - 1) & ~(align - 1))
408 		+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
409 }
410 
411 /*
412  * This initialization routine requires at least one
413  * vring i.e. vr0. vr1 is optional.
414  */
415 static void *
init_vr(struct mic_info * mic,int fd,int type,struct mic_vring * vr0,struct mic_vring * vr1,int num_vq)416 init_vr(struct mic_info *mic, int fd, int type,
417 	struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
418 {
419 	int vr_size;
420 	char *va;
421 
422 	vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
423 					 MIC_VIRTIO_RING_ALIGN) +
424 			     sizeof(struct _mic_vring_info));
425 	va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
426 		PROT_READ, MAP_SHARED, fd, 0);
427 	if (MAP_FAILED == va) {
428 		mpsslog("%s %s %d mmap failed errno %s\n",
429 			mic->name, __func__, __LINE__,
430 			strerror(errno));
431 		goto done;
432 	}
433 	set_dp(mic, type, va);
434 	vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
435 	vr0->info = vr0->va +
436 		_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
437 	vring_init(&vr0->vr,
438 		   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
439 	mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
440 		__func__, mic->name, vr0->va, vr0->info, vr_size,
441 		_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
442 	mpsslog("magic 0x%x expected 0x%x\n",
443 		le32toh(vr0->info->magic), MIC_MAGIC + type);
444 	assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
445 	if (vr1) {
446 		vr1->va = (struct mic_vring *)
447 			&va[MIC_DEVICE_PAGE_END + vr_size];
448 		vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
449 			MIC_VIRTIO_RING_ALIGN);
450 		vring_init(&vr1->vr,
451 			   MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
452 		mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
453 			__func__, mic->name, vr1->va, vr1->info, vr_size,
454 			_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
455 		mpsslog("magic 0x%x expected 0x%x\n",
456 			le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
457 		assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
458 	}
459 done:
460 	return va;
461 }
462 
463 static int
wait_for_card_driver(struct mic_info * mic,int fd,int type)464 wait_for_card_driver(struct mic_info *mic, int fd, int type)
465 {
466 	struct pollfd pollfd;
467 	int err;
468 	struct mic_device_desc *desc = get_device_desc(mic, type);
469 	__u8 prev_status;
470 
471 	if (!desc)
472 		return -ENODEV;
473 	prev_status = desc->status;
474 	pollfd.fd = fd;
475 	mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
476 		mic->name, __func__, type, desc->status);
477 
478 	while (1) {
479 		pollfd.events = POLLIN;
480 		pollfd.revents = 0;
481 		err = poll(&pollfd, 1, -1);
482 		if (err < 0) {
483 			mpsslog("%s %s poll failed %s\n",
484 				mic->name, __func__, strerror(errno));
485 			continue;
486 		}
487 
488 		if (pollfd.revents) {
489 			if (desc->status != prev_status) {
490 				mpsslog("%s %s Waiting... desc-> type %d "
491 					"status 0x%x\n",
492 					mic->name, __func__, type,
493 					desc->status);
494 				prev_status = desc->status;
495 			}
496 			if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
497 				mpsslog("%s %s poll.revents %d\n",
498 					mic->name, __func__, pollfd.revents);
499 				mpsslog("%s %s desc-> type %d status 0x%x\n",
500 					mic->name, __func__, type,
501 					desc->status);
502 				break;
503 			}
504 		}
505 	}
506 	return 0;
507 }
508 
509 /* Spin till we have some descriptors */
510 static void
spin_for_descriptors(struct mic_info * mic,struct mic_vring * vr)511 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
512 {
513 	__u16 avail_idx = read_avail_idx(vr);
514 
515 	while (avail_idx == le16toh(READ_ONCE(vr->vr.avail->idx))) {
516 #ifdef DEBUG
517 		mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
518 			mic->name, __func__,
519 			le16toh(vr->vr.avail->idx), vr->info->avail_idx);
520 #endif
521 		sched_yield();
522 	}
523 }
524 
525 static void *
virtio_net(void * arg)526 virtio_net(void *arg)
527 {
528 	static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
529 	static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
530 	struct iovec vnet_iov[2][2] = {
531 		{ { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
532 		  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
533 		{ { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
534 		  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
535 	};
536 	struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
537 	struct mic_info *mic = (struct mic_info *)arg;
538 	char if_name[IFNAMSIZ];
539 	struct pollfd net_poll[MAX_NET_FD];
540 	struct mic_vring tx_vr, rx_vr;
541 	struct mic_copy_desc copy;
542 	struct mic_device_desc *desc;
543 	int err;
544 
545 	snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
546 	mic->mic_net.tap_fd = tun_alloc(mic, if_name);
547 	if (mic->mic_net.tap_fd < 0)
548 		goto done;
549 
550 	if (tap_configure(mic, if_name))
551 		goto done;
552 	mpsslog("MIC name %s id %d\n", mic->name, mic->id);
553 
554 	net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
555 	net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
556 	net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
557 	net_poll[NET_FD_TUN].events = POLLIN;
558 
559 	if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
560 				  VIRTIO_ID_NET, &tx_vr, &rx_vr,
561 		virtnet_dev_page.dd.num_vq)) {
562 		mpsslog("%s init_vr failed %s\n",
563 			mic->name, strerror(errno));
564 		goto done;
565 	}
566 
567 	copy.iovcnt = 2;
568 	desc = get_device_desc(mic, VIRTIO_ID_NET);
569 
570 	while (1) {
571 		ssize_t len;
572 
573 		net_poll[NET_FD_VIRTIO_NET].revents = 0;
574 		net_poll[NET_FD_TUN].revents = 0;
575 
576 		/* Start polling for data from tap and virtio net */
577 		err = poll(net_poll, 2, -1);
578 		if (err < 0) {
579 			mpsslog("%s poll failed %s\n",
580 				__func__, strerror(errno));
581 			continue;
582 		}
583 		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
584 			err = wait_for_card_driver(mic,
585 						   mic->mic_net.virtio_net_fd,
586 						   VIRTIO_ID_NET);
587 			if (err) {
588 				mpsslog("%s %s %d Exiting...\n",
589 					mic->name, __func__, __LINE__);
590 				break;
591 			}
592 		}
593 		/*
594 		 * Check if there is data to be read from TUN and write to
595 		 * virtio net fd if there is.
596 		 */
597 		if (net_poll[NET_FD_TUN].revents & POLLIN) {
598 			copy.iov = iov0;
599 			len = readv(net_poll[NET_FD_TUN].fd,
600 				copy.iov, copy.iovcnt);
601 			if (len > 0) {
602 				struct virtio_net_hdr *hdr
603 					= (struct virtio_net_hdr *)vnet_hdr[0];
604 
605 				/* Disable checksums on the card since we are on
606 				   a reliable PCIe link */
607 				hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
608 #ifdef DEBUG
609 				mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
610 					__func__, __LINE__, hdr->flags);
611 				mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
612 					copy.out_len, hdr->gso_type);
613 #endif
614 #ifdef DEBUG
615 				disp_iovec(mic, copy, __func__, __LINE__);
616 				mpsslog("%s %s %d read from tap 0x%lx\n",
617 					mic->name, __func__, __LINE__,
618 					len);
619 #endif
620 				spin_for_descriptors(mic, &tx_vr);
621 				txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
622 					     len);
623 
624 				err = mic_virtio_copy(mic,
625 					mic->mic_net.virtio_net_fd, &tx_vr,
626 					&copy);
627 				if (err < 0) {
628 					mpsslog("%s %s %d mic_virtio_copy %s\n",
629 						mic->name, __func__, __LINE__,
630 						strerror(errno));
631 				}
632 				if (!err)
633 					verify_out_len(mic, &copy);
634 #ifdef DEBUG
635 				disp_iovec(mic, copy, __func__, __LINE__);
636 				mpsslog("%s %s %d wrote to net 0x%lx\n",
637 					mic->name, __func__, __LINE__,
638 					sum_iovec_len(&copy));
639 #endif
640 				/* Reinitialize IOV for next run */
641 				iov0[1].iov_len = MAX_NET_PKT_SIZE;
642 			} else if (len < 0) {
643 				disp_iovec(mic, &copy, __func__, __LINE__);
644 				mpsslog("%s %s %d read failed %s ", mic->name,
645 					__func__, __LINE__, strerror(errno));
646 				mpsslog("cnt %d sum %zd\n",
647 					copy.iovcnt, sum_iovec_len(&copy));
648 			}
649 		}
650 
651 		/*
652 		 * Check if there is data to be read from virtio net and
653 		 * write to TUN if there is.
654 		 */
655 		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
656 			while (rx_vr.info->avail_idx !=
657 				le16toh(rx_vr.vr.avail->idx)) {
658 				copy.iov = iov1;
659 				txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
660 					     MAX_NET_PKT_SIZE
661 					+ sizeof(struct virtio_net_hdr));
662 
663 				err = mic_virtio_copy(mic,
664 					mic->mic_net.virtio_net_fd, &rx_vr,
665 					&copy);
666 				if (!err) {
667 #ifdef DEBUG
668 					struct virtio_net_hdr *hdr
669 						= (struct virtio_net_hdr *)
670 							vnet_hdr[1];
671 
672 					mpsslog("%s %s %d hdr->flags 0x%x, ",
673 						mic->name, __func__, __LINE__,
674 						hdr->flags);
675 					mpsslog("out_len %d gso_type 0x%x\n",
676 						copy.out_len,
677 						hdr->gso_type);
678 #endif
679 					/* Set the correct output iov_len */
680 					iov1[1].iov_len = copy.out_len -
681 						sizeof(struct virtio_net_hdr);
682 					verify_out_len(mic, &copy);
683 #ifdef DEBUG
684 					disp_iovec(mic, copy, __func__,
685 						   __LINE__);
686 					mpsslog("%s %s %d ",
687 						mic->name, __func__, __LINE__);
688 					mpsslog("read from net 0x%lx\n",
689 						sum_iovec_len(copy));
690 #endif
691 					len = writev(net_poll[NET_FD_TUN].fd,
692 						copy.iov, copy.iovcnt);
693 					if (len != sum_iovec_len(&copy)) {
694 						mpsslog("Tun write failed %s ",
695 							strerror(errno));
696 						mpsslog("len 0x%zx ", len);
697 						mpsslog("read_len 0x%zx\n",
698 							sum_iovec_len(&copy));
699 					} else {
700 #ifdef DEBUG
701 						disp_iovec(mic, &copy, __func__,
702 							   __LINE__);
703 						mpsslog("%s %s %d ",
704 							mic->name, __func__,
705 							__LINE__);
706 						mpsslog("wrote to tap 0x%lx\n",
707 							len);
708 #endif
709 					}
710 				} else {
711 					mpsslog("%s %s %d mic_virtio_copy %s\n",
712 						mic->name, __func__, __LINE__,
713 						strerror(errno));
714 					break;
715 				}
716 			}
717 		}
718 		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
719 			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
720 	}
721 done:
722 	pthread_exit(NULL);
723 }
724 
725 /* virtio_console */
726 #define VIRTIO_CONSOLE_FD 0
727 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
728 #define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
729 #define MAX_BUFFER_SIZE PAGE_SIZE
730 
731 static void *
virtio_console(void * arg)732 virtio_console(void *arg)
733 {
734 	static __u8 vcons_buf[2][PAGE_SIZE];
735 	struct iovec vcons_iov[2] = {
736 		{ .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
737 		{ .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
738 	};
739 	struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
740 	struct mic_info *mic = (struct mic_info *)arg;
741 	int err;
742 	struct pollfd console_poll[MAX_CONSOLE_FD];
743 	int pty_fd;
744 	char *pts_name;
745 	ssize_t len;
746 	struct mic_vring tx_vr, rx_vr;
747 	struct mic_copy_desc copy;
748 	struct mic_device_desc *desc;
749 
750 	pty_fd = posix_openpt(O_RDWR);
751 	if (pty_fd < 0) {
752 		mpsslog("can't open a pseudoterminal master device: %s\n",
753 			strerror(errno));
754 		goto _return;
755 	}
756 	pts_name = ptsname(pty_fd);
757 	if (pts_name == NULL) {
758 		mpsslog("can't get pts name\n");
759 		goto _close_pty;
760 	}
761 	printf("%s console message goes to %s\n", mic->name, pts_name);
762 	mpsslog("%s console message goes to %s\n", mic->name, pts_name);
763 	err = grantpt(pty_fd);
764 	if (err < 0) {
765 		mpsslog("can't grant access: %s %s\n",
766 			pts_name, strerror(errno));
767 		goto _close_pty;
768 	}
769 	err = unlockpt(pty_fd);
770 	if (err < 0) {
771 		mpsslog("can't unlock a pseudoterminal: %s %s\n",
772 			pts_name, strerror(errno));
773 		goto _close_pty;
774 	}
775 	console_poll[MONITOR_FD].fd = pty_fd;
776 	console_poll[MONITOR_FD].events = POLLIN;
777 
778 	console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
779 	console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
780 
781 	if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
782 				  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
783 		virtcons_dev_page.dd.num_vq)) {
784 		mpsslog("%s init_vr failed %s\n",
785 			mic->name, strerror(errno));
786 		goto _close_pty;
787 	}
788 
789 	copy.iovcnt = 1;
790 	desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
791 
792 	for (;;) {
793 		console_poll[MONITOR_FD].revents = 0;
794 		console_poll[VIRTIO_CONSOLE_FD].revents = 0;
795 		err = poll(console_poll, MAX_CONSOLE_FD, -1);
796 		if (err < 0) {
797 			mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
798 				strerror(errno));
799 			continue;
800 		}
801 		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
802 			err = wait_for_card_driver(mic,
803 					mic->mic_console.virtio_console_fd,
804 					VIRTIO_ID_CONSOLE);
805 			if (err) {
806 				mpsslog("%s %s %d Exiting...\n",
807 					mic->name, __func__, __LINE__);
808 				break;
809 			}
810 		}
811 
812 		if (console_poll[MONITOR_FD].revents & POLLIN) {
813 			copy.iov = iov0;
814 			len = readv(pty_fd, copy.iov, copy.iovcnt);
815 			if (len > 0) {
816 #ifdef DEBUG
817 				disp_iovec(mic, copy, __func__, __LINE__);
818 				mpsslog("%s %s %d read from tap 0x%lx\n",
819 					mic->name, __func__, __LINE__,
820 					len);
821 #endif
822 				spin_for_descriptors(mic, &tx_vr);
823 				txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
824 					     &copy, len);
825 
826 				err = mic_virtio_copy(mic,
827 					mic->mic_console.virtio_console_fd,
828 					&tx_vr, &copy);
829 				if (err < 0) {
830 					mpsslog("%s %s %d mic_virtio_copy %s\n",
831 						mic->name, __func__, __LINE__,
832 						strerror(errno));
833 				}
834 				if (!err)
835 					verify_out_len(mic, &copy);
836 #ifdef DEBUG
837 				disp_iovec(mic, copy, __func__, __LINE__);
838 				mpsslog("%s %s %d wrote to net 0x%lx\n",
839 					mic->name, __func__, __LINE__,
840 					sum_iovec_len(copy));
841 #endif
842 				/* Reinitialize IOV for next run */
843 				iov0->iov_len = PAGE_SIZE;
844 			} else if (len < 0) {
845 				disp_iovec(mic, &copy, __func__, __LINE__);
846 				mpsslog("%s %s %d read failed %s ",
847 					mic->name, __func__, __LINE__,
848 					strerror(errno));
849 				mpsslog("cnt %d sum %zd\n",
850 					copy.iovcnt, sum_iovec_len(&copy));
851 			}
852 		}
853 
854 		if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
855 			while (rx_vr.info->avail_idx !=
856 				le16toh(rx_vr.vr.avail->idx)) {
857 				copy.iov = iov1;
858 				txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
859 					     &copy, PAGE_SIZE);
860 
861 				err = mic_virtio_copy(mic,
862 					mic->mic_console.virtio_console_fd,
863 					&rx_vr, &copy);
864 				if (!err) {
865 					/* Set the correct output iov_len */
866 					iov1->iov_len = copy.out_len;
867 					verify_out_len(mic, &copy);
868 #ifdef DEBUG
869 					disp_iovec(mic, copy, __func__,
870 						   __LINE__);
871 					mpsslog("%s %s %d ",
872 						mic->name, __func__, __LINE__);
873 					mpsslog("read from net 0x%lx\n",
874 						sum_iovec_len(copy));
875 #endif
876 					len = writev(pty_fd,
877 						copy.iov, copy.iovcnt);
878 					if (len != sum_iovec_len(&copy)) {
879 						mpsslog("Tun write failed %s ",
880 							strerror(errno));
881 						mpsslog("len 0x%zx ", len);
882 						mpsslog("read_len 0x%zx\n",
883 							sum_iovec_len(&copy));
884 					} else {
885 #ifdef DEBUG
886 						disp_iovec(mic, copy, __func__,
887 							   __LINE__);
888 						mpsslog("%s %s %d ",
889 							mic->name, __func__,
890 							__LINE__);
891 						mpsslog("wrote to tap 0x%lx\n",
892 							len);
893 #endif
894 					}
895 				} else {
896 					mpsslog("%s %s %d mic_virtio_copy %s\n",
897 						mic->name, __func__, __LINE__,
898 						strerror(errno));
899 					break;
900 				}
901 			}
902 		}
903 		if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
904 			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
905 	}
906 _close_pty:
907 	close(pty_fd);
908 _return:
909 	pthread_exit(NULL);
910 }
911 
912 static void
add_virtio_device(struct mic_info * mic,struct mic_device_desc * dd)913 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
914 {
915 	char path[PATH_MAX];
916 	int fd, err;
917 
918 	snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
919 	fd = open(path, O_RDWR);
920 	if (fd < 0) {
921 		mpsslog("Could not open %s %s\n", path, strerror(errno));
922 		return;
923 	}
924 
925 	err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
926 	if (err < 0) {
927 		mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
928 		close(fd);
929 		return;
930 	}
931 	switch (dd->type) {
932 	case VIRTIO_ID_NET:
933 		mic->mic_net.virtio_net_fd = fd;
934 		mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
935 		break;
936 	case VIRTIO_ID_CONSOLE:
937 		mic->mic_console.virtio_console_fd = fd;
938 		mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
939 		break;
940 	case VIRTIO_ID_BLOCK:
941 		mic->mic_virtblk.virtio_block_fd = fd;
942 		mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
943 		break;
944 	}
945 }
946 
947 static bool
set_backend_file(struct mic_info * mic)948 set_backend_file(struct mic_info *mic)
949 {
950 	FILE *config;
951 	char buff[PATH_MAX], *line, *evv, *p;
952 
953 	snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
954 	config = fopen(buff, "r");
955 	if (config == NULL)
956 		return false;
957 	do {  /* look for "virtblk_backend=XXXX" */
958 		line = fgets(buff, PATH_MAX, config);
959 		if (line == NULL)
960 			break;
961 		if (*line == '#')
962 			continue;
963 		p = strchr(line, '\n');
964 		if (p)
965 			*p = '\0';
966 	} while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
967 	fclose(config);
968 	if (line == NULL)
969 		return false;
970 	evv = strchr(line, '=');
971 	if (evv == NULL)
972 		return false;
973 	mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
974 	if (mic->mic_virtblk.backend_file == NULL) {
975 		mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
976 		return false;
977 	}
978 	strcpy(mic->mic_virtblk.backend_file, evv + 1);
979 	return true;
980 }
981 
982 #define SECTOR_SIZE 512
983 static bool
set_backend_size(struct mic_info * mic)984 set_backend_size(struct mic_info *mic)
985 {
986 	mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
987 		SEEK_END);
988 	if (mic->mic_virtblk.backend_size < 0) {
989 		mpsslog("%s: can't seek: %s\n",
990 			mic->name, mic->mic_virtblk.backend_file);
991 		return false;
992 	}
993 	virtblk_dev_page.blk_config.capacity =
994 		mic->mic_virtblk.backend_size / SECTOR_SIZE;
995 	if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
996 		virtblk_dev_page.blk_config.capacity++;
997 
998 	virtblk_dev_page.blk_config.capacity =
999 		htole64(virtblk_dev_page.blk_config.capacity);
1000 
1001 	return true;
1002 }
1003 
1004 static bool
open_backend(struct mic_info * mic)1005 open_backend(struct mic_info *mic)
1006 {
1007 	if (!set_backend_file(mic))
1008 		goto _error_exit;
1009 	mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1010 	if (mic->mic_virtblk.backend < 0) {
1011 		mpsslog("%s: can't open: %s\n", mic->name,
1012 			mic->mic_virtblk.backend_file);
1013 		goto _error_free;
1014 	}
1015 	if (!set_backend_size(mic))
1016 		goto _error_close;
1017 	mic->mic_virtblk.backend_addr = mmap(NULL,
1018 		mic->mic_virtblk.backend_size,
1019 		PROT_READ|PROT_WRITE, MAP_SHARED,
1020 		mic->mic_virtblk.backend, 0L);
1021 	if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1022 		mpsslog("%s: can't map: %s %s\n",
1023 			mic->name, mic->mic_virtblk.backend_file,
1024 			strerror(errno));
1025 		goto _error_close;
1026 	}
1027 	return true;
1028 
1029  _error_close:
1030 	close(mic->mic_virtblk.backend);
1031  _error_free:
1032 	free(mic->mic_virtblk.backend_file);
1033  _error_exit:
1034 	return false;
1035 }
1036 
1037 static void
close_backend(struct mic_info * mic)1038 close_backend(struct mic_info *mic)
1039 {
1040 	munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1041 	close(mic->mic_virtblk.backend);
1042 	free(mic->mic_virtblk.backend_file);
1043 }
1044 
1045 static bool
start_virtblk(struct mic_info * mic,struct mic_vring * vring)1046 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1047 {
1048 	if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1049 		mpsslog("%s: blk_config is not 8 byte aligned.\n",
1050 			mic->name);
1051 		return false;
1052 	}
1053 	add_virtio_device(mic, &virtblk_dev_page.dd);
1054 	if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1055 				  VIRTIO_ID_BLOCK, vring, NULL,
1056 				  virtblk_dev_page.dd.num_vq)) {
1057 		mpsslog("%s init_vr failed %s\n",
1058 			mic->name, strerror(errno));
1059 		return false;
1060 	}
1061 	return true;
1062 }
1063 
1064 static void
stop_virtblk(struct mic_info * mic)1065 stop_virtblk(struct mic_info *mic)
1066 {
1067 	int vr_size, ret;
1068 
1069 	vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1070 					 MIC_VIRTIO_RING_ALIGN) +
1071 			     sizeof(struct _mic_vring_info));
1072 	ret = munmap(mic->mic_virtblk.block_dp,
1073 		MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1074 	if (ret < 0)
1075 		mpsslog("%s munmap errno %d\n", mic->name, errno);
1076 	close(mic->mic_virtblk.virtio_block_fd);
1077 }
1078 
1079 static __u8
header_error_check(struct vring_desc * desc)1080 header_error_check(struct vring_desc *desc)
1081 {
1082 	if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1083 		mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1084 			__func__, __LINE__);
1085 		return -EIO;
1086 	}
1087 	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1088 		mpsslog("%s() %d: alone\n",
1089 			__func__, __LINE__);
1090 		return -EIO;
1091 	}
1092 	if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1093 		mpsslog("%s() %d: not read\n",
1094 			__func__, __LINE__);
1095 		return -EIO;
1096 	}
1097 	return 0;
1098 }
1099 
1100 static int
read_header(int fd,struct virtio_blk_outhdr * hdr,__u32 desc_idx)1101 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1102 {
1103 	struct iovec iovec;
1104 	struct mic_copy_desc copy;
1105 
1106 	iovec.iov_len = sizeof(*hdr);
1107 	iovec.iov_base = hdr;
1108 	copy.iov = &iovec;
1109 	copy.iovcnt = 1;
1110 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1111 	copy.update_used = false;  /* do not update used index */
1112 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1113 }
1114 
1115 static int
transfer_blocks(int fd,struct iovec * iovec,__u32 iovcnt)1116 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1117 {
1118 	struct mic_copy_desc copy;
1119 
1120 	copy.iov = iovec;
1121 	copy.iovcnt = iovcnt;
1122 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1123 	copy.update_used = false;  /* do not update used index */
1124 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1125 }
1126 
1127 static __u8
status_error_check(struct vring_desc * desc)1128 status_error_check(struct vring_desc *desc)
1129 {
1130 	if (le32toh(desc->len) != sizeof(__u8)) {
1131 		mpsslog("%s() %d: length is not sizeof(status)\n",
1132 			__func__, __LINE__);
1133 		return -EIO;
1134 	}
1135 	return 0;
1136 }
1137 
1138 static int
write_status(int fd,__u8 * status)1139 write_status(int fd, __u8 *status)
1140 {
1141 	struct iovec iovec;
1142 	struct mic_copy_desc copy;
1143 
1144 	iovec.iov_base = status;
1145 	iovec.iov_len = sizeof(*status);
1146 	copy.iov = &iovec;
1147 	copy.iovcnt = 1;
1148 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1149 	copy.update_used = true; /* Update used index */
1150 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1151 }
1152 
1153 #ifndef VIRTIO_BLK_T_GET_ID
1154 #define VIRTIO_BLK_T_GET_ID    8
1155 #endif
1156 
1157 static void *
virtio_block(void * arg)1158 virtio_block(void *arg)
1159 {
1160 	struct mic_info *mic = (struct mic_info *)arg;
1161 	int ret;
1162 	struct pollfd block_poll;
1163 	struct mic_vring vring;
1164 	__u16 avail_idx;
1165 	__u32 desc_idx;
1166 	struct vring_desc *desc;
1167 	struct iovec *iovec, *piov;
1168 	__u8 status;
1169 	__u32 buffer_desc_idx;
1170 	struct virtio_blk_outhdr hdr;
1171 	void *fos;
1172 
1173 	for (;;) {  /* forever */
1174 		if (!open_backend(mic)) { /* No virtblk */
1175 			for (mic->mic_virtblk.signaled = 0;
1176 				!mic->mic_virtblk.signaled;)
1177 				sleep(1);
1178 			continue;
1179 		}
1180 
1181 		/* backend file is specified. */
1182 		if (!start_virtblk(mic, &vring))
1183 			goto _close_backend;
1184 		iovec = malloc(sizeof(*iovec) *
1185 			le32toh(virtblk_dev_page.blk_config.seg_max));
1186 		if (!iovec) {
1187 			mpsslog("%s: can't alloc iovec: %s\n",
1188 				mic->name, strerror(ENOMEM));
1189 			goto _stop_virtblk;
1190 		}
1191 
1192 		block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1193 		block_poll.events = POLLIN;
1194 		for (mic->mic_virtblk.signaled = 0;
1195 		     !mic->mic_virtblk.signaled;) {
1196 			block_poll.revents = 0;
1197 					/* timeout in 1 sec to see signaled */
1198 			ret = poll(&block_poll, 1, 1000);
1199 			if (ret < 0) {
1200 				mpsslog("%s %d: poll failed: %s\n",
1201 					__func__, __LINE__,
1202 					strerror(errno));
1203 				continue;
1204 			}
1205 
1206 			if (!(block_poll.revents & POLLIN)) {
1207 #ifdef DEBUG
1208 				mpsslog("%s %d: block_poll.revents=0x%x\n",
1209 					__func__, __LINE__, block_poll.revents);
1210 #endif
1211 				continue;
1212 			}
1213 
1214 			/* POLLIN */
1215 			while (vring.info->avail_idx !=
1216 				le16toh(vring.vr.avail->idx)) {
1217 				/* read header element */
1218 				avail_idx =
1219 					vring.info->avail_idx &
1220 					(vring.vr.num - 1);
1221 				desc_idx = le16toh(
1222 					vring.vr.avail->ring[avail_idx]);
1223 				desc = &vring.vr.desc[desc_idx];
1224 #ifdef DEBUG
1225 				mpsslog("%s() %d: avail_idx=%d ",
1226 					__func__, __LINE__,
1227 					vring.info->avail_idx);
1228 				mpsslog("vring.vr.num=%d desc=%p\n",
1229 					vring.vr.num, desc);
1230 #endif
1231 				status = header_error_check(desc);
1232 				ret = read_header(
1233 					mic->mic_virtblk.virtio_block_fd,
1234 					&hdr, desc_idx);
1235 				if (ret < 0) {
1236 					mpsslog("%s() %d %s: ret=%d %s\n",
1237 						__func__, __LINE__,
1238 						mic->name, ret,
1239 						strerror(errno));
1240 					break;
1241 				}
1242 				/* buffer element */
1243 				piov = iovec;
1244 				status = 0;
1245 				fos = mic->mic_virtblk.backend_addr +
1246 					(hdr.sector * SECTOR_SIZE);
1247 				buffer_desc_idx = next_desc(desc);
1248 				desc_idx = buffer_desc_idx;
1249 				for (desc = &vring.vr.desc[buffer_desc_idx];
1250 				     desc->flags & VRING_DESC_F_NEXT;
1251 				     desc_idx = next_desc(desc),
1252 					     desc = &vring.vr.desc[desc_idx]) {
1253 					piov->iov_len = desc->len;
1254 					piov->iov_base = fos;
1255 					piov++;
1256 					fos += desc->len;
1257 				}
1258 				/* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1259 				if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1260 					VIRTIO_BLK_T_GET_ID)) {
1261 					/*
1262 					  VIRTIO_BLK_T_IN - does not do
1263 					  anything. Probably for documenting.
1264 					  VIRTIO_BLK_T_SCSI_CMD - for
1265 					  virtio_scsi.
1266 					  VIRTIO_BLK_T_FLUSH - turned off in
1267 					  config space.
1268 					  VIRTIO_BLK_T_BARRIER - defined but not
1269 					  used in anywhere.
1270 					*/
1271 					mpsslog("%s() %d: type %x ",
1272 						__func__, __LINE__,
1273 						hdr.type);
1274 					mpsslog("is not supported\n");
1275 					status = -ENOTSUP;
1276 
1277 				} else {
1278 					ret = transfer_blocks(
1279 					mic->mic_virtblk.virtio_block_fd,
1280 						iovec,
1281 						piov - iovec);
1282 					if (ret < 0 &&
1283 					    status != 0)
1284 						status = ret;
1285 				}
1286 				/* write status and update used pointer */
1287 				if (status != 0)
1288 					status = status_error_check(desc);
1289 				ret = write_status(
1290 					mic->mic_virtblk.virtio_block_fd,
1291 					&status);
1292 #ifdef DEBUG
1293 				mpsslog("%s() %d: write status=%d on desc=%p\n",
1294 					__func__, __LINE__,
1295 					status, desc);
1296 #endif
1297 			}
1298 		}
1299 		free(iovec);
1300 _stop_virtblk:
1301 		stop_virtblk(mic);
1302 _close_backend:
1303 		close_backend(mic);
1304 	}  /* forever */
1305 
1306 	pthread_exit(NULL);
1307 }
1308 
1309 static void
reset(struct mic_info * mic)1310 reset(struct mic_info *mic)
1311 {
1312 #define RESET_TIMEOUT 120
1313 	int i = RESET_TIMEOUT;
1314 	setsysfs(mic->name, "state", "reset");
1315 	while (i) {
1316 		char *state;
1317 		state = readsysfs(mic->name, "state");
1318 		if (!state)
1319 			goto retry;
1320 		mpsslog("%s: %s %d state %s\n",
1321 			mic->name, __func__, __LINE__, state);
1322 
1323 		if (!strcmp(state, "ready")) {
1324 			free(state);
1325 			break;
1326 		}
1327 		free(state);
1328 retry:
1329 		sleep(1);
1330 		i--;
1331 	}
1332 }
1333 
1334 static int
get_mic_shutdown_status(struct mic_info * mic,char * shutdown_status)1335 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1336 {
1337 	if (!strcmp(shutdown_status, "nop"))
1338 		return MIC_NOP;
1339 	if (!strcmp(shutdown_status, "crashed"))
1340 		return MIC_CRASHED;
1341 	if (!strcmp(shutdown_status, "halted"))
1342 		return MIC_HALTED;
1343 	if (!strcmp(shutdown_status, "poweroff"))
1344 		return MIC_POWER_OFF;
1345 	if (!strcmp(shutdown_status, "restart"))
1346 		return MIC_RESTART;
1347 	mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1348 	/* Invalid state */
1349 	assert(0);
1350 };
1351 
get_mic_state(struct mic_info * mic)1352 static int get_mic_state(struct mic_info *mic)
1353 {
1354 	char *state = NULL;
1355 	enum mic_states mic_state;
1356 
1357 	while (!state) {
1358 		state = readsysfs(mic->name, "state");
1359 		sleep(1);
1360 	}
1361 	mpsslog("%s: %s %d state %s\n",
1362 		mic->name, __func__, __LINE__, state);
1363 
1364 	if (!strcmp(state, "ready")) {
1365 		mic_state = MIC_READY;
1366 	} else if (!strcmp(state, "booting")) {
1367 		mic_state = MIC_BOOTING;
1368 	} else if (!strcmp(state, "online")) {
1369 		mic_state = MIC_ONLINE;
1370 	} else if (!strcmp(state, "shutting_down")) {
1371 		mic_state = MIC_SHUTTING_DOWN;
1372 	} else if (!strcmp(state, "reset_failed")) {
1373 		mic_state = MIC_RESET_FAILED;
1374 	} else if (!strcmp(state, "resetting")) {
1375 		mic_state = MIC_RESETTING;
1376 	} else {
1377 		mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1378 		assert(0);
1379 	}
1380 
1381 	free(state);
1382 	return mic_state;
1383 };
1384 
mic_handle_shutdown(struct mic_info * mic)1385 static void mic_handle_shutdown(struct mic_info *mic)
1386 {
1387 #define SHUTDOWN_TIMEOUT 60
1388 	int i = SHUTDOWN_TIMEOUT;
1389 	char *shutdown_status;
1390 	while (i) {
1391 		shutdown_status = readsysfs(mic->name, "shutdown_status");
1392 		if (!shutdown_status) {
1393 			sleep(1);
1394 			continue;
1395 		}
1396 		mpsslog("%s: %s %d shutdown_status %s\n",
1397 			mic->name, __func__, __LINE__, shutdown_status);
1398 		switch (get_mic_shutdown_status(mic, shutdown_status)) {
1399 		case MIC_RESTART:
1400 			mic->restart = 1;
1401 		case MIC_HALTED:
1402 		case MIC_POWER_OFF:
1403 		case MIC_CRASHED:
1404 			free(shutdown_status);
1405 			goto reset;
1406 		default:
1407 			break;
1408 		}
1409 		free(shutdown_status);
1410 		sleep(1);
1411 		i--;
1412 	}
1413 reset:
1414 	if (!i)
1415 		mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1416 			mic->name, __func__, __LINE__, shutdown_status);
1417 	reset(mic);
1418 }
1419 
open_state_fd(struct mic_info * mic)1420 static int open_state_fd(struct mic_info *mic)
1421 {
1422 	char pathname[PATH_MAX];
1423 	int fd;
1424 
1425 	snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1426 		 MICSYSFSDIR, mic->name, "state");
1427 
1428 	fd = open(pathname, O_RDONLY);
1429 	if (fd < 0)
1430 		mpsslog("%s: opening file %s failed %s\n",
1431 			mic->name, pathname, strerror(errno));
1432 	return fd;
1433 }
1434 
block_till_state_change(int fd,struct mic_info * mic)1435 static int block_till_state_change(int fd, struct mic_info *mic)
1436 {
1437 	struct pollfd ufds[1];
1438 	char value[PAGE_SIZE];
1439 	int ret;
1440 
1441 	ufds[0].fd = fd;
1442 	ufds[0].events = POLLERR | POLLPRI;
1443 	ret = poll(ufds, 1, -1);
1444 	if (ret < 0) {
1445 		mpsslog("%s: %s %d poll failed %s\n",
1446 			mic->name, __func__, __LINE__, strerror(errno));
1447 		return ret;
1448 	}
1449 
1450 	ret = lseek(fd, 0, SEEK_SET);
1451 	if (ret < 0) {
1452 		mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1453 			mic->name, __func__, __LINE__, strerror(errno));
1454 		return ret;
1455 	}
1456 
1457 	ret = read(fd, value, sizeof(value));
1458 	if (ret < 0) {
1459 		mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1460 			mic->name, __func__, __LINE__, strerror(errno));
1461 		return ret;
1462 	}
1463 
1464 	return 0;
1465 }
1466 
1467 static void *
mic_config(void * arg)1468 mic_config(void *arg)
1469 {
1470 	struct mic_info *mic = (struct mic_info *)arg;
1471 	int fd, ret, stat = 0;
1472 
1473 	fd = open_state_fd(mic);
1474 	if (fd < 0) {
1475 		mpsslog("%s: %s %d open state fd failed %s\n",
1476 			mic->name, __func__, __LINE__, strerror(errno));
1477 		goto exit;
1478 	}
1479 
1480 	do {
1481 		ret = block_till_state_change(fd, mic);
1482 		if (ret < 0) {
1483 			mpsslog("%s: %s %d block_till_state_change error %s\n",
1484 				mic->name, __func__, __LINE__, strerror(errno));
1485 			goto close_exit;
1486 		}
1487 
1488 		switch (get_mic_state(mic)) {
1489 		case MIC_SHUTTING_DOWN:
1490 			mic_handle_shutdown(mic);
1491 			break;
1492 		case MIC_READY:
1493 		case MIC_RESET_FAILED:
1494 			ret = kill(mic->pid, SIGTERM);
1495 			mpsslog("%s: %s %d kill pid %d ret %d\n",
1496 				mic->name, __func__, __LINE__,
1497 				mic->pid, ret);
1498 			if (!ret) {
1499 				ret = waitpid(mic->pid, &stat,
1500 					      WIFSIGNALED(stat));
1501 				mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1502 					mic->name, __func__, __LINE__,
1503 					ret, mic->pid);
1504 			}
1505 			if (mic->boot_on_resume) {
1506 				setsysfs(mic->name, "state", "boot");
1507 				mic->boot_on_resume = 0;
1508 			}
1509 			goto close_exit;
1510 		default:
1511 			break;
1512 		}
1513 	} while (1);
1514 
1515 close_exit:
1516 	close(fd);
1517 exit:
1518 	init_mic(mic);
1519 	pthread_exit(NULL);
1520 }
1521 
1522 static void
set_cmdline(struct mic_info * mic)1523 set_cmdline(struct mic_info *mic)
1524 {
1525 	char buffer[PATH_MAX];
1526 	int len;
1527 
1528 	len = snprintf(buffer, PATH_MAX,
1529 		"clocksource=tsc highres=off nohz=off ");
1530 	len += snprintf(buffer + len, PATH_MAX - len,
1531 		"cpufreq_on;corec6_off;pc3_off;pc6_off ");
1532 	len += snprintf(buffer + len, PATH_MAX - len,
1533 		"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1534 		mic->id + 1);
1535 
1536 	setsysfs(mic->name, "cmdline", buffer);
1537 	mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1538 	snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1539 	mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1540 }
1541 
1542 static void
set_log_buf_info(struct mic_info * mic)1543 set_log_buf_info(struct mic_info *mic)
1544 {
1545 	int fd;
1546 	off_t len;
1547 	char system_map[] = "/lib/firmware/mic/System.map";
1548 	char *map, *temp, log_buf[17] = {'\0'};
1549 
1550 	fd = open(system_map, O_RDONLY);
1551 	if (fd < 0) {
1552 		mpsslog("%s: Opening System.map failed: %d\n",
1553 			mic->name, errno);
1554 		return;
1555 	}
1556 	len = lseek(fd, 0, SEEK_END);
1557 	if (len < 0) {
1558 		mpsslog("%s: Reading System.map size failed: %d\n",
1559 			mic->name, errno);
1560 		close(fd);
1561 		return;
1562 	}
1563 	map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1564 	if (map == MAP_FAILED) {
1565 		mpsslog("%s: mmap of System.map failed: %d\n",
1566 			mic->name, errno);
1567 		close(fd);
1568 		return;
1569 	}
1570 	temp = strstr(map, "__log_buf");
1571 	if (!temp) {
1572 		mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1573 		munmap(map, len);
1574 		close(fd);
1575 		return;
1576 	}
1577 	strncpy(log_buf, temp - 19, 16);
1578 	setsysfs(mic->name, "log_buf_addr", log_buf);
1579 	mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1580 	temp = strstr(map, "log_buf_len");
1581 	if (!temp) {
1582 		mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1583 		munmap(map, len);
1584 		close(fd);
1585 		return;
1586 	}
1587 	strncpy(log_buf, temp - 19, 16);
1588 	setsysfs(mic->name, "log_buf_len", log_buf);
1589 	mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1590 	munmap(map, len);
1591 	close(fd);
1592 }
1593 
1594 static void
change_virtblk_backend(int x,siginfo_t * siginfo,void * p)1595 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1596 {
1597 	struct mic_info *mic;
1598 
1599 	for (mic = mic_list.next; mic != NULL; mic = mic->next)
1600 		mic->mic_virtblk.signaled = 1/* true */;
1601 }
1602 
1603 static void
set_mic_boot_params(struct mic_info * mic)1604 set_mic_boot_params(struct mic_info *mic)
1605 {
1606 	set_log_buf_info(mic);
1607 	set_cmdline(mic);
1608 }
1609 
1610 static void *
init_mic(void * arg)1611 init_mic(void *arg)
1612 {
1613 	struct mic_info *mic = (struct mic_info *)arg;
1614 	struct sigaction ignore = {
1615 		.sa_flags = 0,
1616 		.sa_handler = SIG_IGN
1617 	};
1618 	struct sigaction act = {
1619 		.sa_flags = SA_SIGINFO,
1620 		.sa_sigaction = change_virtblk_backend,
1621 	};
1622 	char buffer[PATH_MAX];
1623 	int err, fd;
1624 
1625 	/*
1626 	 * Currently, one virtio block device is supported for each MIC card
1627 	 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1628 	 * The signal informs the virtio block backend about a change in the
1629 	 * configuration file which specifies the virtio backend file name on
1630 	 * the host. Virtio block backend then re-reads the configuration file
1631 	 * and switches to the new block device. This signalling mechanism may
1632 	 * not be required once multiple virtio block devices are supported by
1633 	 * the MIC daemon.
1634 	 */
1635 	sigaction(SIGUSR1, &ignore, NULL);
1636 retry:
1637 	fd = open_state_fd(mic);
1638 	if (fd < 0) {
1639 		mpsslog("%s: %s %d open state fd failed %s\n",
1640 			mic->name, __func__, __LINE__, strerror(errno));
1641 		sleep(2);
1642 		goto retry;
1643 	}
1644 
1645 	if (mic->restart) {
1646 		snprintf(buffer, PATH_MAX, "boot");
1647 		setsysfs(mic->name, "state", buffer);
1648 		mpsslog("%s restarting mic %d\n",
1649 			mic->name, mic->restart);
1650 		mic->restart = 0;
1651 	}
1652 
1653 	while (1) {
1654 		while (block_till_state_change(fd, mic)) {
1655 			mpsslog("%s: %s %d block_till_state_change error %s\n",
1656 				mic->name, __func__, __LINE__, strerror(errno));
1657 			sleep(2);
1658 			continue;
1659 		}
1660 
1661 		if (get_mic_state(mic) == MIC_BOOTING)
1662 			break;
1663 	}
1664 
1665 	mic->pid = fork();
1666 	switch (mic->pid) {
1667 	case 0:
1668 		add_virtio_device(mic, &virtcons_dev_page.dd);
1669 		add_virtio_device(mic, &virtnet_dev_page.dd);
1670 		err = pthread_create(&mic->mic_console.console_thread, NULL,
1671 			virtio_console, mic);
1672 		if (err)
1673 			mpsslog("%s virtcons pthread_create failed %s\n",
1674 				mic->name, strerror(err));
1675 		err = pthread_create(&mic->mic_net.net_thread, NULL,
1676 			virtio_net, mic);
1677 		if (err)
1678 			mpsslog("%s virtnet pthread_create failed %s\n",
1679 				mic->name, strerror(err));
1680 		err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1681 			virtio_block, mic);
1682 		if (err)
1683 			mpsslog("%s virtblk pthread_create failed %s\n",
1684 				mic->name, strerror(err));
1685 		sigemptyset(&act.sa_mask);
1686 		err = sigaction(SIGUSR1, &act, NULL);
1687 		if (err)
1688 			mpsslog("%s sigaction SIGUSR1 failed %s\n",
1689 				mic->name, strerror(errno));
1690 		while (1)
1691 			sleep(60);
1692 	case -1:
1693 		mpsslog("fork failed MIC name %s id %d errno %d\n",
1694 			mic->name, mic->id, errno);
1695 		break;
1696 	default:
1697 		err = pthread_create(&mic->config_thread, NULL,
1698 				     mic_config, mic);
1699 		if (err)
1700 			mpsslog("%s mic_config pthread_create failed %s\n",
1701 				mic->name, strerror(err));
1702 	}
1703 
1704 	return NULL;
1705 }
1706 
1707 static void
start_daemon(void)1708 start_daemon(void)
1709 {
1710 	struct mic_info *mic;
1711 	int err;
1712 
1713 	for (mic = mic_list.next; mic; mic = mic->next) {
1714 		set_mic_boot_params(mic);
1715 		err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1716 		if (err)
1717 			mpsslog("%s init_mic pthread_create failed %s\n",
1718 				mic->name, strerror(err));
1719 	}
1720 
1721 	while (1)
1722 		sleep(60);
1723 }
1724 
1725 static int
init_mic_list(void)1726 init_mic_list(void)
1727 {
1728 	struct mic_info *mic = &mic_list;
1729 	struct dirent *file;
1730 	DIR *dp;
1731 	int cnt = 0;
1732 
1733 	dp = opendir(MICSYSFSDIR);
1734 	if (!dp)
1735 		return 0;
1736 
1737 	while ((file = readdir(dp)) != NULL) {
1738 		if (!strncmp(file->d_name, "mic", 3)) {
1739 			mic->next = calloc(1, sizeof(struct mic_info));
1740 			if (mic->next) {
1741 				mic = mic->next;
1742 				mic->id = atoi(&file->d_name[3]);
1743 				mic->name = malloc(strlen(file->d_name) + 16);
1744 				if (mic->name)
1745 					strcpy(mic->name, file->d_name);
1746 				mpsslog("MIC name %s id %d\n", mic->name,
1747 					mic->id);
1748 				cnt++;
1749 			}
1750 		}
1751 	}
1752 
1753 	closedir(dp);
1754 	return cnt;
1755 }
1756 
1757 void
mpsslog(char * format,...)1758 mpsslog(char *format, ...)
1759 {
1760 	va_list args;
1761 	char buffer[4096];
1762 	char ts[52], *ts1;
1763 	time_t t;
1764 
1765 	if (logfp == NULL)
1766 		return;
1767 
1768 	va_start(args, format);
1769 	vsprintf(buffer, format, args);
1770 	va_end(args);
1771 
1772 	time(&t);
1773 	ts1 = ctime_r(&t, ts);
1774 	ts1[strlen(ts1) - 1] = '\0';
1775 	fprintf(logfp, "%s: %s", ts1, buffer);
1776 
1777 	fflush(logfp);
1778 }
1779 
1780 int
main(int argc,char * argv[])1781 main(int argc, char *argv[])
1782 {
1783 	int cnt;
1784 	pid_t pid;
1785 
1786 	myname = argv[0];
1787 
1788 	logfp = fopen(LOGFILE_NAME, "a+");
1789 	if (!logfp) {
1790 		fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1791 		exit(1);
1792 	}
1793 	pid = fork();
1794 	switch (pid) {
1795 	case 0:
1796 		break;
1797 	case -1:
1798 		exit(2);
1799 	default:
1800 		exit(0);
1801 	}
1802 
1803 	mpsslog("MIC Daemon start\n");
1804 
1805 	cnt = init_mic_list();
1806 	if (cnt == 0) {
1807 		mpsslog("MIC module not loaded\n");
1808 		exit(3);
1809 	}
1810 	mpsslog("MIC found %d devices\n", cnt);
1811 
1812 	start_daemon();
1813 
1814 	exit(0);
1815 }
1816