• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2013 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel MIC User Space Tools.
19  */
20 
21 #define _GNU_SOURCE
22 
23 #include <stdlib.h>
24 #include <fcntl.h>
25 #include <getopt.h>
26 #include <assert.h>
27 #include <unistd.h>
28 #include <stdbool.h>
29 #include <signal.h>
30 #include <poll.h>
31 #include <features.h>
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/socket.h>
36 #include <linux/virtio_ring.h>
37 #include <linux/virtio_net.h>
38 #include <linux/virtio_console.h>
39 #include <linux/virtio_blk.h>
40 #include <linux/version.h>
41 #include "mpssd.h"
42 #include <linux/mic_ioctl.h>
43 #include <linux/mic_common.h>
44 #include <tools/endian.h>
45 
46 static void *init_mic(void *arg);
47 
48 static FILE *logfp;
49 static struct mic_info mic_list;
50 
51 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
52 
53 #define min_t(type, x, y) ({				\
54 		type __min1 = (x);                      \
55 		type __min2 = (y);                      \
56 		__min1 < __min2 ? __min1 : __min2; })
57 
58 /* align addr on a size boundary - adjust address up/down if needed */
59 #define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
60 #define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
61 
62 /* align addr on a size boundary - adjust address up if needed */
63 #define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
64 
65 /* to align the pointer to the (next) page boundary */
66 #define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
67 
68 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
69 
70 #define GSO_ENABLED		1
71 #define MAX_GSO_SIZE		(64 * 1024)
72 #define ETH_H_LEN		14
73 #define MAX_NET_PKT_SIZE	(_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
74 #define MIC_DEVICE_PAGE_END	0x1000
75 
76 #ifndef VIRTIO_NET_HDR_F_DATA_VALID
77 #define VIRTIO_NET_HDR_F_DATA_VALID	2	/* Csum is valid */
78 #endif
79 
80 static struct {
81 	struct mic_device_desc dd;
82 	struct mic_vqconfig vqconfig[2];
83 	__u32 host_features, guest_acknowledgements;
84 	struct virtio_console_config cons_config;
85 } virtcons_dev_page = {
86 	.dd = {
87 		.type = VIRTIO_ID_CONSOLE,
88 		.num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
89 		.feature_len = sizeof(virtcons_dev_page.host_features),
90 		.config_len = sizeof(virtcons_dev_page.cons_config),
91 	},
92 	.vqconfig[0] = {
93 		.num = htole16(MIC_VRING_ENTRIES),
94 	},
95 	.vqconfig[1] = {
96 		.num = htole16(MIC_VRING_ENTRIES),
97 	},
98 };
99 
100 static struct {
101 	struct mic_device_desc dd;
102 	struct mic_vqconfig vqconfig[2];
103 	__u32 host_features, guest_acknowledgements;
104 	struct virtio_net_config net_config;
105 } virtnet_dev_page = {
106 	.dd = {
107 		.type = VIRTIO_ID_NET,
108 		.num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
109 		.feature_len = sizeof(virtnet_dev_page.host_features),
110 		.config_len = sizeof(virtnet_dev_page.net_config),
111 	},
112 	.vqconfig[0] = {
113 		.num = htole16(MIC_VRING_ENTRIES),
114 	},
115 	.vqconfig[1] = {
116 		.num = htole16(MIC_VRING_ENTRIES),
117 	},
118 #if GSO_ENABLED
119 	.host_features = htole32(
120 		1 << VIRTIO_NET_F_CSUM |
121 		1 << VIRTIO_NET_F_GSO |
122 		1 << VIRTIO_NET_F_GUEST_TSO4 |
123 		1 << VIRTIO_NET_F_GUEST_TSO6 |
124 		1 << VIRTIO_NET_F_GUEST_ECN),
125 #else
126 		.host_features = 0,
127 #endif
128 };
129 
130 static const char *mic_config_dir = "/etc/mpss";
131 static const char *virtblk_backend = "VIRTBLK_BACKEND";
132 static struct {
133 	struct mic_device_desc dd;
134 	struct mic_vqconfig vqconfig[1];
135 	__u32 host_features, guest_acknowledgements;
136 	struct virtio_blk_config blk_config;
137 } virtblk_dev_page = {
138 	.dd = {
139 		.type = VIRTIO_ID_BLOCK,
140 		.num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
141 		.feature_len = sizeof(virtblk_dev_page.host_features),
142 		.config_len = sizeof(virtblk_dev_page.blk_config),
143 	},
144 	.vqconfig[0] = {
145 		.num = htole16(MIC_VRING_ENTRIES),
146 	},
147 	.host_features =
148 		htole32(1<<VIRTIO_BLK_F_SEG_MAX),
149 	.blk_config = {
150 		.seg_max = htole32(MIC_VRING_ENTRIES - 2),
151 		.capacity = htole64(0),
152 	 }
153 };
154 
155 static char *myname;
156 
157 static int
tap_configure(struct mic_info * mic,char * dev)158 tap_configure(struct mic_info *mic, char *dev)
159 {
160 	pid_t pid;
161 	char *ifargv[7];
162 	char ipaddr[IFNAMSIZ];
163 	int ret = 0;
164 
165 	pid = fork();
166 	if (pid == 0) {
167 		ifargv[0] = "ip";
168 		ifargv[1] = "link";
169 		ifargv[2] = "set";
170 		ifargv[3] = dev;
171 		ifargv[4] = "up";
172 		ifargv[5] = NULL;
173 		mpsslog("Configuring %s\n", dev);
174 		ret = execvp("ip", ifargv);
175 		if (ret < 0) {
176 			mpsslog("%s execvp failed errno %s\n",
177 				mic->name, strerror(errno));
178 			return ret;
179 		}
180 	}
181 	if (pid < 0) {
182 		mpsslog("%s fork failed errno %s\n",
183 			mic->name, strerror(errno));
184 		return ret;
185 	}
186 
187 	ret = waitpid(pid, NULL, 0);
188 	if (ret < 0) {
189 		mpsslog("%s waitpid failed errno %s\n",
190 			mic->name, strerror(errno));
191 		return ret;
192 	}
193 
194 	snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
195 
196 	pid = fork();
197 	if (pid == 0) {
198 		ifargv[0] = "ip";
199 		ifargv[1] = "addr";
200 		ifargv[2] = "add";
201 		ifargv[3] = ipaddr;
202 		ifargv[4] = "dev";
203 		ifargv[5] = dev;
204 		ifargv[6] = NULL;
205 		mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
206 		ret = execvp("ip", ifargv);
207 		if (ret < 0) {
208 			mpsslog("%s execvp failed errno %s\n",
209 				mic->name, strerror(errno));
210 			return ret;
211 		}
212 	}
213 	if (pid < 0) {
214 		mpsslog("%s fork failed errno %s\n",
215 			mic->name, strerror(errno));
216 		return ret;
217 	}
218 
219 	ret = waitpid(pid, NULL, 0);
220 	if (ret < 0) {
221 		mpsslog("%s waitpid failed errno %s\n",
222 			mic->name, strerror(errno));
223 		return ret;
224 	}
225 	mpsslog("MIC name %s %s %d DONE!\n",
226 		mic->name, __func__, __LINE__);
227 	return 0;
228 }
229 
tun_alloc(struct mic_info * mic,char * dev)230 static int tun_alloc(struct mic_info *mic, char *dev)
231 {
232 	struct ifreq ifr;
233 	int fd, err;
234 #if GSO_ENABLED
235 	unsigned offload;
236 #endif
237 	fd = open("/dev/net/tun", O_RDWR);
238 	if (fd < 0) {
239 		mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
240 		goto done;
241 	}
242 
243 	memset(&ifr, 0, sizeof(ifr));
244 
245 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
246 	if (*dev)
247 		strncpy(ifr.ifr_name, dev, IFNAMSIZ);
248 
249 	err = ioctl(fd, TUNSETIFF, (void *)&ifr);
250 	if (err < 0) {
251 		mpsslog("%s %s %d TUNSETIFF failed %s\n",
252 			mic->name, __func__, __LINE__, strerror(errno));
253 		close(fd);
254 		return err;
255 	}
256 #if GSO_ENABLED
257 	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
258 
259 	err = ioctl(fd, TUNSETOFFLOAD, offload);
260 	if (err < 0) {
261 		mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
262 			mic->name, __func__, __LINE__, strerror(errno));
263 		close(fd);
264 		return err;
265 	}
266 #endif
267 	strcpy(dev, ifr.ifr_name);
268 	mpsslog("Created TAP %s\n", dev);
269 done:
270 	return fd;
271 }
272 
273 #define NET_FD_VIRTIO_NET 0
274 #define NET_FD_TUN 1
275 #define MAX_NET_FD 2
276 
set_dp(struct mic_info * mic,int type,void * dp)277 static void set_dp(struct mic_info *mic, int type, void *dp)
278 {
279 	switch (type) {
280 	case VIRTIO_ID_CONSOLE:
281 		mic->mic_console.console_dp = dp;
282 		return;
283 	case VIRTIO_ID_NET:
284 		mic->mic_net.net_dp = dp;
285 		return;
286 	case VIRTIO_ID_BLOCK:
287 		mic->mic_virtblk.block_dp = dp;
288 		return;
289 	}
290 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
291 	assert(0);
292 }
293 
get_dp(struct mic_info * mic,int type)294 static void *get_dp(struct mic_info *mic, int type)
295 {
296 	switch (type) {
297 	case VIRTIO_ID_CONSOLE:
298 		return mic->mic_console.console_dp;
299 	case VIRTIO_ID_NET:
300 		return mic->mic_net.net_dp;
301 	case VIRTIO_ID_BLOCK:
302 		return mic->mic_virtblk.block_dp;
303 	}
304 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
305 	assert(0);
306 	return NULL;
307 }
308 
get_device_desc(struct mic_info * mic,int type)309 static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
310 {
311 	struct mic_device_desc *d;
312 	int i;
313 	void *dp = get_dp(mic, type);
314 
315 	for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
316 		i += mic_total_desc_size(d)) {
317 		d = dp + i;
318 
319 		/* End of list */
320 		if (d->type == 0)
321 			break;
322 
323 		if (d->type == -1)
324 			continue;
325 
326 		mpsslog("%s %s d-> type %d d %p\n",
327 			mic->name, __func__, d->type, d);
328 
329 		if (d->type == (__u8)type)
330 			return d;
331 	}
332 	mpsslog("%s %s %d not found\n", mic->name, __func__, type);
333 	return NULL;
334 }
335 
336 /* See comments in vhost.c for explanation of next_desc() */
next_desc(struct vring_desc * desc)337 static unsigned next_desc(struct vring_desc *desc)
338 {
339 	unsigned int next;
340 
341 	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
342 		return -1U;
343 	next = le16toh(desc->next);
344 	return next;
345 }
346 
347 /* Sum up all the IOVEC length */
348 static ssize_t
sum_iovec_len(struct mic_copy_desc * copy)349 sum_iovec_len(struct mic_copy_desc *copy)
350 {
351 	ssize_t sum = 0;
352 	int i;
353 
354 	for (i = 0; i < copy->iovcnt; i++)
355 		sum += copy->iov[i].iov_len;
356 	return sum;
357 }
358 
verify_out_len(struct mic_info * mic,struct mic_copy_desc * copy)359 static inline void verify_out_len(struct mic_info *mic,
360 	struct mic_copy_desc *copy)
361 {
362 	if (copy->out_len != sum_iovec_len(copy)) {
363 		mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
364 			mic->name, __func__, __LINE__,
365 			copy->out_len, sum_iovec_len(copy));
366 		assert(copy->out_len == sum_iovec_len(copy));
367 	}
368 }
369 
370 /* Display an iovec */
371 static void
disp_iovec(struct mic_info * mic,struct mic_copy_desc * copy,const char * s,int line)372 disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
373 	   const char *s, int line)
374 {
375 	int i;
376 
377 	for (i = 0; i < copy->iovcnt; i++)
378 		mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
379 			mic->name, s, line, i,
380 			copy->iov[i].iov_base, copy->iov[i].iov_len);
381 }
382 
read_avail_idx(struct mic_vring * vr)383 static inline __u16 read_avail_idx(struct mic_vring *vr)
384 {
385 	return ACCESS_ONCE(vr->info->avail_idx);
386 }
387 
txrx_prepare(int type,bool tx,struct mic_vring * vr,struct mic_copy_desc * copy,ssize_t len)388 static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
389 				struct mic_copy_desc *copy, ssize_t len)
390 {
391 	copy->vr_idx = tx ? 0 : 1;
392 	copy->update_used = true;
393 	if (type == VIRTIO_ID_NET)
394 		copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
395 	else
396 		copy->iov[0].iov_len = len;
397 }
398 
399 /* Central API which triggers the copies */
400 static int
mic_virtio_copy(struct mic_info * mic,int fd,struct mic_vring * vr,struct mic_copy_desc * copy)401 mic_virtio_copy(struct mic_info *mic, int fd,
402 		struct mic_vring *vr, struct mic_copy_desc *copy)
403 {
404 	int ret;
405 
406 	ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
407 	if (ret) {
408 		mpsslog("%s %s %d errno %s ret %d\n",
409 			mic->name, __func__, __LINE__,
410 			strerror(errno), ret);
411 	}
412 	return ret;
413 }
414 
_vring_size(unsigned int num,unsigned long align)415 static inline unsigned _vring_size(unsigned int num, unsigned long align)
416 {
417 	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
418 				+ align - 1) & ~(align - 1))
419 		+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
420 }
421 
422 /*
423  * This initialization routine requires at least one
424  * vring i.e. vr0. vr1 is optional.
425  */
426 static void *
init_vr(struct mic_info * mic,int fd,int type,struct mic_vring * vr0,struct mic_vring * vr1,int num_vq)427 init_vr(struct mic_info *mic, int fd, int type,
428 	struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
429 {
430 	int vr_size;
431 	char *va;
432 
433 	vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
434 					 MIC_VIRTIO_RING_ALIGN) +
435 			     sizeof(struct _mic_vring_info));
436 	va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
437 		PROT_READ, MAP_SHARED, fd, 0);
438 	if (MAP_FAILED == va) {
439 		mpsslog("%s %s %d mmap failed errno %s\n",
440 			mic->name, __func__, __LINE__,
441 			strerror(errno));
442 		goto done;
443 	}
444 	set_dp(mic, type, va);
445 	vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
446 	vr0->info = vr0->va +
447 		_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
448 	vring_init(&vr0->vr,
449 		   MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
450 	mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
451 		__func__, mic->name, vr0->va, vr0->info, vr_size,
452 		_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
453 	mpsslog("magic 0x%x expected 0x%x\n",
454 		le32toh(vr0->info->magic), MIC_MAGIC + type);
455 	assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
456 	if (vr1) {
457 		vr1->va = (struct mic_vring *)
458 			&va[MIC_DEVICE_PAGE_END + vr_size];
459 		vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
460 			MIC_VIRTIO_RING_ALIGN);
461 		vring_init(&vr1->vr,
462 			   MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
463 		mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
464 			__func__, mic->name, vr1->va, vr1->info, vr_size,
465 			_vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
466 		mpsslog("magic 0x%x expected 0x%x\n",
467 			le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
468 		assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
469 	}
470 done:
471 	return va;
472 }
473 
474 static int
wait_for_card_driver(struct mic_info * mic,int fd,int type)475 wait_for_card_driver(struct mic_info *mic, int fd, int type)
476 {
477 	struct pollfd pollfd;
478 	int err;
479 	struct mic_device_desc *desc = get_device_desc(mic, type);
480 	__u8 prev_status;
481 
482 	if (!desc)
483 		return -ENODEV;
484 	prev_status = desc->status;
485 	pollfd.fd = fd;
486 	mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
487 		mic->name, __func__, type, desc->status);
488 
489 	while (1) {
490 		pollfd.events = POLLIN;
491 		pollfd.revents = 0;
492 		err = poll(&pollfd, 1, -1);
493 		if (err < 0) {
494 			mpsslog("%s %s poll failed %s\n",
495 				mic->name, __func__, strerror(errno));
496 			continue;
497 		}
498 
499 		if (pollfd.revents) {
500 			if (desc->status != prev_status) {
501 				mpsslog("%s %s Waiting... desc-> type %d "
502 					"status 0x%x\n",
503 					mic->name, __func__, type,
504 					desc->status);
505 				prev_status = desc->status;
506 			}
507 			if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
508 				mpsslog("%s %s poll.revents %d\n",
509 					mic->name, __func__, pollfd.revents);
510 				mpsslog("%s %s desc-> type %d status 0x%x\n",
511 					mic->name, __func__, type,
512 					desc->status);
513 				break;
514 			}
515 		}
516 	}
517 	return 0;
518 }
519 
520 /* Spin till we have some descriptors */
521 static void
spin_for_descriptors(struct mic_info * mic,struct mic_vring * vr)522 spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
523 {
524 	__u16 avail_idx = read_avail_idx(vr);
525 
526 	while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
527 #ifdef DEBUG
528 		mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
529 			mic->name, __func__,
530 			le16toh(vr->vr.avail->idx), vr->info->avail_idx);
531 #endif
532 		sched_yield();
533 	}
534 }
535 
536 static void *
virtio_net(void * arg)537 virtio_net(void *arg)
538 {
539 	static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
540 	static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
541 	struct iovec vnet_iov[2][2] = {
542 		{ { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
543 		  { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
544 		{ { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
545 		  { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
546 	};
547 	struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
548 	struct mic_info *mic = (struct mic_info *)arg;
549 	char if_name[IFNAMSIZ];
550 	struct pollfd net_poll[MAX_NET_FD];
551 	struct mic_vring tx_vr, rx_vr;
552 	struct mic_copy_desc copy;
553 	struct mic_device_desc *desc;
554 	int err;
555 
556 	snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
557 	mic->mic_net.tap_fd = tun_alloc(mic, if_name);
558 	if (mic->mic_net.tap_fd < 0)
559 		goto done;
560 
561 	if (tap_configure(mic, if_name))
562 		goto done;
563 	mpsslog("MIC name %s id %d\n", mic->name, mic->id);
564 
565 	net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
566 	net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
567 	net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
568 	net_poll[NET_FD_TUN].events = POLLIN;
569 
570 	if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
571 				  VIRTIO_ID_NET, &tx_vr, &rx_vr,
572 		virtnet_dev_page.dd.num_vq)) {
573 		mpsslog("%s init_vr failed %s\n",
574 			mic->name, strerror(errno));
575 		goto done;
576 	}
577 
578 	copy.iovcnt = 2;
579 	desc = get_device_desc(mic, VIRTIO_ID_NET);
580 
581 	while (1) {
582 		ssize_t len;
583 
584 		net_poll[NET_FD_VIRTIO_NET].revents = 0;
585 		net_poll[NET_FD_TUN].revents = 0;
586 
587 		/* Start polling for data from tap and virtio net */
588 		err = poll(net_poll, 2, -1);
589 		if (err < 0) {
590 			mpsslog("%s poll failed %s\n",
591 				__func__, strerror(errno));
592 			continue;
593 		}
594 		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
595 			err = wait_for_card_driver(mic,
596 						   mic->mic_net.virtio_net_fd,
597 						   VIRTIO_ID_NET);
598 			if (err) {
599 				mpsslog("%s %s %d Exiting...\n",
600 					mic->name, __func__, __LINE__);
601 				break;
602 			}
603 		}
604 		/*
605 		 * Check if there is data to be read from TUN and write to
606 		 * virtio net fd if there is.
607 		 */
608 		if (net_poll[NET_FD_TUN].revents & POLLIN) {
609 			copy.iov = iov0;
610 			len = readv(net_poll[NET_FD_TUN].fd,
611 				copy.iov, copy.iovcnt);
612 			if (len > 0) {
613 				struct virtio_net_hdr *hdr
614 					= (struct virtio_net_hdr *)vnet_hdr[0];
615 
616 				/* Disable checksums on the card since we are on
617 				   a reliable PCIe link */
618 				hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
619 #ifdef DEBUG
620 				mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
621 					__func__, __LINE__, hdr->flags);
622 				mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
623 					copy.out_len, hdr->gso_type);
624 #endif
625 #ifdef DEBUG
626 				disp_iovec(mic, copy, __func__, __LINE__);
627 				mpsslog("%s %s %d read from tap 0x%lx\n",
628 					mic->name, __func__, __LINE__,
629 					len);
630 #endif
631 				spin_for_descriptors(mic, &tx_vr);
632 				txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
633 					     len);
634 
635 				err = mic_virtio_copy(mic,
636 					mic->mic_net.virtio_net_fd, &tx_vr,
637 					&copy);
638 				if (err < 0) {
639 					mpsslog("%s %s %d mic_virtio_copy %s\n",
640 						mic->name, __func__, __LINE__,
641 						strerror(errno));
642 				}
643 				if (!err)
644 					verify_out_len(mic, &copy);
645 #ifdef DEBUG
646 				disp_iovec(mic, copy, __func__, __LINE__);
647 				mpsslog("%s %s %d wrote to net 0x%lx\n",
648 					mic->name, __func__, __LINE__,
649 					sum_iovec_len(&copy));
650 #endif
651 				/* Reinitialize IOV for next run */
652 				iov0[1].iov_len = MAX_NET_PKT_SIZE;
653 			} else if (len < 0) {
654 				disp_iovec(mic, &copy, __func__, __LINE__);
655 				mpsslog("%s %s %d read failed %s ", mic->name,
656 					__func__, __LINE__, strerror(errno));
657 				mpsslog("cnt %d sum %zd\n",
658 					copy.iovcnt, sum_iovec_len(&copy));
659 			}
660 		}
661 
662 		/*
663 		 * Check if there is data to be read from virtio net and
664 		 * write to TUN if there is.
665 		 */
666 		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
667 			while (rx_vr.info->avail_idx !=
668 				le16toh(rx_vr.vr.avail->idx)) {
669 				copy.iov = iov1;
670 				txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
671 					     MAX_NET_PKT_SIZE
672 					+ sizeof(struct virtio_net_hdr));
673 
674 				err = mic_virtio_copy(mic,
675 					mic->mic_net.virtio_net_fd, &rx_vr,
676 					&copy);
677 				if (!err) {
678 #ifdef DEBUG
679 					struct virtio_net_hdr *hdr
680 						= (struct virtio_net_hdr *)
681 							vnet_hdr[1];
682 
683 					mpsslog("%s %s %d hdr->flags 0x%x, ",
684 						mic->name, __func__, __LINE__,
685 						hdr->flags);
686 					mpsslog("out_len %d gso_type 0x%x\n",
687 						copy.out_len,
688 						hdr->gso_type);
689 #endif
690 					/* Set the correct output iov_len */
691 					iov1[1].iov_len = copy.out_len -
692 						sizeof(struct virtio_net_hdr);
693 					verify_out_len(mic, &copy);
694 #ifdef DEBUG
695 					disp_iovec(mic, copy, __func__,
696 						   __LINE__);
697 					mpsslog("%s %s %d ",
698 						mic->name, __func__, __LINE__);
699 					mpsslog("read from net 0x%lx\n",
700 						sum_iovec_len(copy));
701 #endif
702 					len = writev(net_poll[NET_FD_TUN].fd,
703 						copy.iov, copy.iovcnt);
704 					if (len != sum_iovec_len(&copy)) {
705 						mpsslog("Tun write failed %s ",
706 							strerror(errno));
707 						mpsslog("len 0x%zx ", len);
708 						mpsslog("read_len 0x%zx\n",
709 							sum_iovec_len(&copy));
710 					} else {
711 #ifdef DEBUG
712 						disp_iovec(mic, &copy, __func__,
713 							   __LINE__);
714 						mpsslog("%s %s %d ",
715 							mic->name, __func__,
716 							__LINE__);
717 						mpsslog("wrote to tap 0x%lx\n",
718 							len);
719 #endif
720 					}
721 				} else {
722 					mpsslog("%s %s %d mic_virtio_copy %s\n",
723 						mic->name, __func__, __LINE__,
724 						strerror(errno));
725 					break;
726 				}
727 			}
728 		}
729 		if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
730 			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
731 	}
732 done:
733 	pthread_exit(NULL);
734 }
735 
736 /* virtio_console */
737 #define VIRTIO_CONSOLE_FD 0
738 #define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
739 #define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
740 #define MAX_BUFFER_SIZE PAGE_SIZE
741 
742 static void *
virtio_console(void * arg)743 virtio_console(void *arg)
744 {
745 	static __u8 vcons_buf[2][PAGE_SIZE];
746 	struct iovec vcons_iov[2] = {
747 		{ .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
748 		{ .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
749 	};
750 	struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
751 	struct mic_info *mic = (struct mic_info *)arg;
752 	int err;
753 	struct pollfd console_poll[MAX_CONSOLE_FD];
754 	int pty_fd;
755 	char *pts_name;
756 	ssize_t len;
757 	struct mic_vring tx_vr, rx_vr;
758 	struct mic_copy_desc copy;
759 	struct mic_device_desc *desc;
760 
761 	pty_fd = posix_openpt(O_RDWR);
762 	if (pty_fd < 0) {
763 		mpsslog("can't open a pseudoterminal master device: %s\n",
764 			strerror(errno));
765 		goto _return;
766 	}
767 	pts_name = ptsname(pty_fd);
768 	if (pts_name == NULL) {
769 		mpsslog("can't get pts name\n");
770 		goto _close_pty;
771 	}
772 	printf("%s console message goes to %s\n", mic->name, pts_name);
773 	mpsslog("%s console message goes to %s\n", mic->name, pts_name);
774 	err = grantpt(pty_fd);
775 	if (err < 0) {
776 		mpsslog("can't grant access: %s %s\n",
777 			pts_name, strerror(errno));
778 		goto _close_pty;
779 	}
780 	err = unlockpt(pty_fd);
781 	if (err < 0) {
782 		mpsslog("can't unlock a pseudoterminal: %s %s\n",
783 			pts_name, strerror(errno));
784 		goto _close_pty;
785 	}
786 	console_poll[MONITOR_FD].fd = pty_fd;
787 	console_poll[MONITOR_FD].events = POLLIN;
788 
789 	console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
790 	console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
791 
792 	if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
793 				  VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
794 		virtcons_dev_page.dd.num_vq)) {
795 		mpsslog("%s init_vr failed %s\n",
796 			mic->name, strerror(errno));
797 		goto _close_pty;
798 	}
799 
800 	copy.iovcnt = 1;
801 	desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
802 
803 	for (;;) {
804 		console_poll[MONITOR_FD].revents = 0;
805 		console_poll[VIRTIO_CONSOLE_FD].revents = 0;
806 		err = poll(console_poll, MAX_CONSOLE_FD, -1);
807 		if (err < 0) {
808 			mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
809 				strerror(errno));
810 			continue;
811 		}
812 		if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
813 			err = wait_for_card_driver(mic,
814 					mic->mic_console.virtio_console_fd,
815 					VIRTIO_ID_CONSOLE);
816 			if (err) {
817 				mpsslog("%s %s %d Exiting...\n",
818 					mic->name, __func__, __LINE__);
819 				break;
820 			}
821 		}
822 
823 		if (console_poll[MONITOR_FD].revents & POLLIN) {
824 			copy.iov = iov0;
825 			len = readv(pty_fd, copy.iov, copy.iovcnt);
826 			if (len > 0) {
827 #ifdef DEBUG
828 				disp_iovec(mic, copy, __func__, __LINE__);
829 				mpsslog("%s %s %d read from tap 0x%lx\n",
830 					mic->name, __func__, __LINE__,
831 					len);
832 #endif
833 				spin_for_descriptors(mic, &tx_vr);
834 				txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
835 					     &copy, len);
836 
837 				err = mic_virtio_copy(mic,
838 					mic->mic_console.virtio_console_fd,
839 					&tx_vr, &copy);
840 				if (err < 0) {
841 					mpsslog("%s %s %d mic_virtio_copy %s\n",
842 						mic->name, __func__, __LINE__,
843 						strerror(errno));
844 				}
845 				if (!err)
846 					verify_out_len(mic, &copy);
847 #ifdef DEBUG
848 				disp_iovec(mic, copy, __func__, __LINE__);
849 				mpsslog("%s %s %d wrote to net 0x%lx\n",
850 					mic->name, __func__, __LINE__,
851 					sum_iovec_len(copy));
852 #endif
853 				/* Reinitialize IOV for next run */
854 				iov0->iov_len = PAGE_SIZE;
855 			} else if (len < 0) {
856 				disp_iovec(mic, &copy, __func__, __LINE__);
857 				mpsslog("%s %s %d read failed %s ",
858 					mic->name, __func__, __LINE__,
859 					strerror(errno));
860 				mpsslog("cnt %d sum %zd\n",
861 					copy.iovcnt, sum_iovec_len(&copy));
862 			}
863 		}
864 
865 		if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
866 			while (rx_vr.info->avail_idx !=
867 				le16toh(rx_vr.vr.avail->idx)) {
868 				copy.iov = iov1;
869 				txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
870 					     &copy, PAGE_SIZE);
871 
872 				err = mic_virtio_copy(mic,
873 					mic->mic_console.virtio_console_fd,
874 					&rx_vr, &copy);
875 				if (!err) {
876 					/* Set the correct output iov_len */
877 					iov1->iov_len = copy.out_len;
878 					verify_out_len(mic, &copy);
879 #ifdef DEBUG
880 					disp_iovec(mic, copy, __func__,
881 						   __LINE__);
882 					mpsslog("%s %s %d ",
883 						mic->name, __func__, __LINE__);
884 					mpsslog("read from net 0x%lx\n",
885 						sum_iovec_len(copy));
886 #endif
887 					len = writev(pty_fd,
888 						copy.iov, copy.iovcnt);
889 					if (len != sum_iovec_len(&copy)) {
890 						mpsslog("Tun write failed %s ",
891 							strerror(errno));
892 						mpsslog("len 0x%zx ", len);
893 						mpsslog("read_len 0x%zx\n",
894 							sum_iovec_len(&copy));
895 					} else {
896 #ifdef DEBUG
897 						disp_iovec(mic, copy, __func__,
898 							   __LINE__);
899 						mpsslog("%s %s %d ",
900 							mic->name, __func__,
901 							__LINE__);
902 						mpsslog("wrote to tap 0x%lx\n",
903 							len);
904 #endif
905 					}
906 				} else {
907 					mpsslog("%s %s %d mic_virtio_copy %s\n",
908 						mic->name, __func__, __LINE__,
909 						strerror(errno));
910 					break;
911 				}
912 			}
913 		}
914 		if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
915 			mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
916 	}
917 _close_pty:
918 	close(pty_fd);
919 _return:
920 	pthread_exit(NULL);
921 }
922 
923 static void
add_virtio_device(struct mic_info * mic,struct mic_device_desc * dd)924 add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
925 {
926 	char path[PATH_MAX];
927 	int fd, err;
928 
929 	snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
930 	fd = open(path, O_RDWR);
931 	if (fd < 0) {
932 		mpsslog("Could not open %s %s\n", path, strerror(errno));
933 		return;
934 	}
935 
936 	err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
937 	if (err < 0) {
938 		mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
939 		close(fd);
940 		return;
941 	}
942 	switch (dd->type) {
943 	case VIRTIO_ID_NET:
944 		mic->mic_net.virtio_net_fd = fd;
945 		mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
946 		break;
947 	case VIRTIO_ID_CONSOLE:
948 		mic->mic_console.virtio_console_fd = fd;
949 		mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
950 		break;
951 	case VIRTIO_ID_BLOCK:
952 		mic->mic_virtblk.virtio_block_fd = fd;
953 		mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
954 		break;
955 	}
956 }
957 
958 static bool
set_backend_file(struct mic_info * mic)959 set_backend_file(struct mic_info *mic)
960 {
961 	FILE *config;
962 	char buff[PATH_MAX], *line, *evv, *p;
963 
964 	snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
965 	config = fopen(buff, "r");
966 	if (config == NULL)
967 		return false;
968 	do {  /* look for "virtblk_backend=XXXX" */
969 		line = fgets(buff, PATH_MAX, config);
970 		if (line == NULL)
971 			break;
972 		if (*line == '#')
973 			continue;
974 		p = strchr(line, '\n');
975 		if (p)
976 			*p = '\0';
977 	} while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
978 	fclose(config);
979 	if (line == NULL)
980 		return false;
981 	evv = strchr(line, '=');
982 	if (evv == NULL)
983 		return false;
984 	mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
985 	if (mic->mic_virtblk.backend_file == NULL) {
986 		mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
987 		return false;
988 	}
989 	strcpy(mic->mic_virtblk.backend_file, evv + 1);
990 	return true;
991 }
992 
993 #define SECTOR_SIZE 512
994 static bool
set_backend_size(struct mic_info * mic)995 set_backend_size(struct mic_info *mic)
996 {
997 	mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
998 		SEEK_END);
999 	if (mic->mic_virtblk.backend_size < 0) {
1000 		mpsslog("%s: can't seek: %s\n",
1001 			mic->name, mic->mic_virtblk.backend_file);
1002 		return false;
1003 	}
1004 	virtblk_dev_page.blk_config.capacity =
1005 		mic->mic_virtblk.backend_size / SECTOR_SIZE;
1006 	if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
1007 		virtblk_dev_page.blk_config.capacity++;
1008 
1009 	virtblk_dev_page.blk_config.capacity =
1010 		htole64(virtblk_dev_page.blk_config.capacity);
1011 
1012 	return true;
1013 }
1014 
1015 static bool
open_backend(struct mic_info * mic)1016 open_backend(struct mic_info *mic)
1017 {
1018 	if (!set_backend_file(mic))
1019 		goto _error_exit;
1020 	mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
1021 	if (mic->mic_virtblk.backend < 0) {
1022 		mpsslog("%s: can't open: %s\n", mic->name,
1023 			mic->mic_virtblk.backend_file);
1024 		goto _error_free;
1025 	}
1026 	if (!set_backend_size(mic))
1027 		goto _error_close;
1028 	mic->mic_virtblk.backend_addr = mmap(NULL,
1029 		mic->mic_virtblk.backend_size,
1030 		PROT_READ|PROT_WRITE, MAP_SHARED,
1031 		mic->mic_virtblk.backend, 0L);
1032 	if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
1033 		mpsslog("%s: can't map: %s %s\n",
1034 			mic->name, mic->mic_virtblk.backend_file,
1035 			strerror(errno));
1036 		goto _error_close;
1037 	}
1038 	return true;
1039 
1040  _error_close:
1041 	close(mic->mic_virtblk.backend);
1042  _error_free:
1043 	free(mic->mic_virtblk.backend_file);
1044  _error_exit:
1045 	return false;
1046 }
1047 
1048 static void
close_backend(struct mic_info * mic)1049 close_backend(struct mic_info *mic)
1050 {
1051 	munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
1052 	close(mic->mic_virtblk.backend);
1053 	free(mic->mic_virtblk.backend_file);
1054 }
1055 
1056 static bool
start_virtblk(struct mic_info * mic,struct mic_vring * vring)1057 start_virtblk(struct mic_info *mic, struct mic_vring *vring)
1058 {
1059 	if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
1060 		mpsslog("%s: blk_config is not 8 byte aligned.\n",
1061 			mic->name);
1062 		return false;
1063 	}
1064 	add_virtio_device(mic, &virtblk_dev_page.dd);
1065 	if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
1066 				  VIRTIO_ID_BLOCK, vring, NULL,
1067 				  virtblk_dev_page.dd.num_vq)) {
1068 		mpsslog("%s init_vr failed %s\n",
1069 			mic->name, strerror(errno));
1070 		return false;
1071 	}
1072 	return true;
1073 }
1074 
1075 static void
stop_virtblk(struct mic_info * mic)1076 stop_virtblk(struct mic_info *mic)
1077 {
1078 	int vr_size, ret;
1079 
1080 	vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
1081 					 MIC_VIRTIO_RING_ALIGN) +
1082 			     sizeof(struct _mic_vring_info));
1083 	ret = munmap(mic->mic_virtblk.block_dp,
1084 		MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
1085 	if (ret < 0)
1086 		mpsslog("%s munmap errno %d\n", mic->name, errno);
1087 	close(mic->mic_virtblk.virtio_block_fd);
1088 }
1089 
1090 static __u8
header_error_check(struct vring_desc * desc)1091 header_error_check(struct vring_desc *desc)
1092 {
1093 	if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
1094 		mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
1095 			__func__, __LINE__);
1096 		return -EIO;
1097 	}
1098 	if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
1099 		mpsslog("%s() %d: alone\n",
1100 			__func__, __LINE__);
1101 		return -EIO;
1102 	}
1103 	if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
1104 		mpsslog("%s() %d: not read\n",
1105 			__func__, __LINE__);
1106 		return -EIO;
1107 	}
1108 	return 0;
1109 }
1110 
1111 static int
read_header(int fd,struct virtio_blk_outhdr * hdr,__u32 desc_idx)1112 read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
1113 {
1114 	struct iovec iovec;
1115 	struct mic_copy_desc copy;
1116 
1117 	iovec.iov_len = sizeof(*hdr);
1118 	iovec.iov_base = hdr;
1119 	copy.iov = &iovec;
1120 	copy.iovcnt = 1;
1121 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1122 	copy.update_used = false;  /* do not update used index */
1123 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1124 }
1125 
1126 static int
transfer_blocks(int fd,struct iovec * iovec,__u32 iovcnt)1127 transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
1128 {
1129 	struct mic_copy_desc copy;
1130 
1131 	copy.iov = iovec;
1132 	copy.iovcnt = iovcnt;
1133 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1134 	copy.update_used = false;  /* do not update used index */
1135 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1136 }
1137 
1138 static __u8
status_error_check(struct vring_desc * desc)1139 status_error_check(struct vring_desc *desc)
1140 {
1141 	if (le32toh(desc->len) != sizeof(__u8)) {
1142 		mpsslog("%s() %d: length is not sizeof(status)\n",
1143 			__func__, __LINE__);
1144 		return -EIO;
1145 	}
1146 	return 0;
1147 }
1148 
1149 static int
write_status(int fd,__u8 * status)1150 write_status(int fd, __u8 *status)
1151 {
1152 	struct iovec iovec;
1153 	struct mic_copy_desc copy;
1154 
1155 	iovec.iov_base = status;
1156 	iovec.iov_len = sizeof(*status);
1157 	copy.iov = &iovec;
1158 	copy.iovcnt = 1;
1159 	copy.vr_idx = 0;  /* only one vring on virtio_block */
1160 	copy.update_used = true; /* Update used index */
1161 	return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
1162 }
1163 
1164 #ifndef VIRTIO_BLK_T_GET_ID
1165 #define VIRTIO_BLK_T_GET_ID    8
1166 #endif
1167 
1168 static void *
virtio_block(void * arg)1169 virtio_block(void *arg)
1170 {
1171 	struct mic_info *mic = (struct mic_info *)arg;
1172 	int ret;
1173 	struct pollfd block_poll;
1174 	struct mic_vring vring;
1175 	__u16 avail_idx;
1176 	__u32 desc_idx;
1177 	struct vring_desc *desc;
1178 	struct iovec *iovec, *piov;
1179 	__u8 status;
1180 	__u32 buffer_desc_idx;
1181 	struct virtio_blk_outhdr hdr;
1182 	void *fos;
1183 
1184 	for (;;) {  /* forever */
1185 		if (!open_backend(mic)) { /* No virtblk */
1186 			for (mic->mic_virtblk.signaled = 0;
1187 				!mic->mic_virtblk.signaled;)
1188 				sleep(1);
1189 			continue;
1190 		}
1191 
1192 		/* backend file is specified. */
1193 		if (!start_virtblk(mic, &vring))
1194 			goto _close_backend;
1195 		iovec = malloc(sizeof(*iovec) *
1196 			le32toh(virtblk_dev_page.blk_config.seg_max));
1197 		if (!iovec) {
1198 			mpsslog("%s: can't alloc iovec: %s\n",
1199 				mic->name, strerror(ENOMEM));
1200 			goto _stop_virtblk;
1201 		}
1202 
1203 		block_poll.fd = mic->mic_virtblk.virtio_block_fd;
1204 		block_poll.events = POLLIN;
1205 		for (mic->mic_virtblk.signaled = 0;
1206 		     !mic->mic_virtblk.signaled;) {
1207 			block_poll.revents = 0;
1208 					/* timeout in 1 sec to see signaled */
1209 			ret = poll(&block_poll, 1, 1000);
1210 			if (ret < 0) {
1211 				mpsslog("%s %d: poll failed: %s\n",
1212 					__func__, __LINE__,
1213 					strerror(errno));
1214 				continue;
1215 			}
1216 
1217 			if (!(block_poll.revents & POLLIN)) {
1218 #ifdef DEBUG
1219 				mpsslog("%s %d: block_poll.revents=0x%x\n",
1220 					__func__, __LINE__, block_poll.revents);
1221 #endif
1222 				continue;
1223 			}
1224 
1225 			/* POLLIN */
1226 			while (vring.info->avail_idx !=
1227 				le16toh(vring.vr.avail->idx)) {
1228 				/* read header element */
1229 				avail_idx =
1230 					vring.info->avail_idx &
1231 					(vring.vr.num - 1);
1232 				desc_idx = le16toh(
1233 					vring.vr.avail->ring[avail_idx]);
1234 				desc = &vring.vr.desc[desc_idx];
1235 #ifdef DEBUG
1236 				mpsslog("%s() %d: avail_idx=%d ",
1237 					__func__, __LINE__,
1238 					vring.info->avail_idx);
1239 				mpsslog("vring.vr.num=%d desc=%p\n",
1240 					vring.vr.num, desc);
1241 #endif
1242 				status = header_error_check(desc);
1243 				ret = read_header(
1244 					mic->mic_virtblk.virtio_block_fd,
1245 					&hdr, desc_idx);
1246 				if (ret < 0) {
1247 					mpsslog("%s() %d %s: ret=%d %s\n",
1248 						__func__, __LINE__,
1249 						mic->name, ret,
1250 						strerror(errno));
1251 					break;
1252 				}
1253 				/* buffer element */
1254 				piov = iovec;
1255 				status = 0;
1256 				fos = mic->mic_virtblk.backend_addr +
1257 					(hdr.sector * SECTOR_SIZE);
1258 				buffer_desc_idx = next_desc(desc);
1259 				desc_idx = buffer_desc_idx;
1260 				for (desc = &vring.vr.desc[buffer_desc_idx];
1261 				     desc->flags & VRING_DESC_F_NEXT;
1262 				     desc_idx = next_desc(desc),
1263 					     desc = &vring.vr.desc[desc_idx]) {
1264 					piov->iov_len = desc->len;
1265 					piov->iov_base = fos;
1266 					piov++;
1267 					fos += desc->len;
1268 				}
1269 				/* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
1270 				if (hdr.type & ~(VIRTIO_BLK_T_OUT |
1271 					VIRTIO_BLK_T_GET_ID)) {
1272 					/*
1273 					  VIRTIO_BLK_T_IN - does not do
1274 					  anything. Probably for documenting.
1275 					  VIRTIO_BLK_T_SCSI_CMD - for
1276 					  virtio_scsi.
1277 					  VIRTIO_BLK_T_FLUSH - turned off in
1278 					  config space.
1279 					  VIRTIO_BLK_T_BARRIER - defined but not
1280 					  used in anywhere.
1281 					*/
1282 					mpsslog("%s() %d: type %x ",
1283 						__func__, __LINE__,
1284 						hdr.type);
1285 					mpsslog("is not supported\n");
1286 					status = -ENOTSUP;
1287 
1288 				} else {
1289 					ret = transfer_blocks(
1290 					mic->mic_virtblk.virtio_block_fd,
1291 						iovec,
1292 						piov - iovec);
1293 					if (ret < 0 &&
1294 					    status != 0)
1295 						status = ret;
1296 				}
1297 				/* write status and update used pointer */
1298 				if (status != 0)
1299 					status = status_error_check(desc);
1300 				ret = write_status(
1301 					mic->mic_virtblk.virtio_block_fd,
1302 					&status);
1303 #ifdef DEBUG
1304 				mpsslog("%s() %d: write status=%d on desc=%p\n",
1305 					__func__, __LINE__,
1306 					status, desc);
1307 #endif
1308 			}
1309 		}
1310 		free(iovec);
1311 _stop_virtblk:
1312 		stop_virtblk(mic);
1313 _close_backend:
1314 		close_backend(mic);
1315 	}  /* forever */
1316 
1317 	pthread_exit(NULL);
1318 }
1319 
1320 static void
reset(struct mic_info * mic)1321 reset(struct mic_info *mic)
1322 {
1323 #define RESET_TIMEOUT 120
1324 	int i = RESET_TIMEOUT;
1325 	setsysfs(mic->name, "state", "reset");
1326 	while (i) {
1327 		char *state;
1328 		state = readsysfs(mic->name, "state");
1329 		if (!state)
1330 			goto retry;
1331 		mpsslog("%s: %s %d state %s\n",
1332 			mic->name, __func__, __LINE__, state);
1333 
1334 		if (!strcmp(state, "ready")) {
1335 			free(state);
1336 			break;
1337 		}
1338 		free(state);
1339 retry:
1340 		sleep(1);
1341 		i--;
1342 	}
1343 }
1344 
1345 static int
get_mic_shutdown_status(struct mic_info * mic,char * shutdown_status)1346 get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
1347 {
1348 	if (!strcmp(shutdown_status, "nop"))
1349 		return MIC_NOP;
1350 	if (!strcmp(shutdown_status, "crashed"))
1351 		return MIC_CRASHED;
1352 	if (!strcmp(shutdown_status, "halted"))
1353 		return MIC_HALTED;
1354 	if (!strcmp(shutdown_status, "poweroff"))
1355 		return MIC_POWER_OFF;
1356 	if (!strcmp(shutdown_status, "restart"))
1357 		return MIC_RESTART;
1358 	mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
1359 	/* Invalid state */
1360 	assert(0);
1361 };
1362 
get_mic_state(struct mic_info * mic)1363 static int get_mic_state(struct mic_info *mic)
1364 {
1365 	char *state = NULL;
1366 	enum mic_states mic_state;
1367 
1368 	while (!state) {
1369 		state = readsysfs(mic->name, "state");
1370 		sleep(1);
1371 	}
1372 	mpsslog("%s: %s %d state %s\n",
1373 		mic->name, __func__, __LINE__, state);
1374 
1375 	if (!strcmp(state, "ready")) {
1376 		mic_state = MIC_READY;
1377 	} else if (!strcmp(state, "booting")) {
1378 		mic_state = MIC_BOOTING;
1379 	} else if (!strcmp(state, "online")) {
1380 		mic_state = MIC_ONLINE;
1381 	} else if (!strcmp(state, "shutting_down")) {
1382 		mic_state = MIC_SHUTTING_DOWN;
1383 	} else if (!strcmp(state, "reset_failed")) {
1384 		mic_state = MIC_RESET_FAILED;
1385 	} else if (!strcmp(state, "resetting")) {
1386 		mic_state = MIC_RESETTING;
1387 	} else {
1388 		mpsslog("%s: BUG invalid state %s\n", mic->name, state);
1389 		assert(0);
1390 	}
1391 
1392 	free(state);
1393 	return mic_state;
1394 };
1395 
mic_handle_shutdown(struct mic_info * mic)1396 static void mic_handle_shutdown(struct mic_info *mic)
1397 {
1398 #define SHUTDOWN_TIMEOUT 60
1399 	int i = SHUTDOWN_TIMEOUT;
1400 	char *shutdown_status;
1401 	while (i) {
1402 		shutdown_status = readsysfs(mic->name, "shutdown_status");
1403 		if (!shutdown_status) {
1404 			sleep(1);
1405 			continue;
1406 		}
1407 		mpsslog("%s: %s %d shutdown_status %s\n",
1408 			mic->name, __func__, __LINE__, shutdown_status);
1409 		switch (get_mic_shutdown_status(mic, shutdown_status)) {
1410 		case MIC_RESTART:
1411 			mic->restart = 1;
1412 		case MIC_HALTED:
1413 		case MIC_POWER_OFF:
1414 		case MIC_CRASHED:
1415 			free(shutdown_status);
1416 			goto reset;
1417 		default:
1418 			break;
1419 		}
1420 		free(shutdown_status);
1421 		sleep(1);
1422 		i--;
1423 	}
1424 reset:
1425 	if (!i)
1426 		mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
1427 			mic->name, __func__, __LINE__, shutdown_status);
1428 	reset(mic);
1429 }
1430 
open_state_fd(struct mic_info * mic)1431 static int open_state_fd(struct mic_info *mic)
1432 {
1433 	char pathname[PATH_MAX];
1434 	int fd;
1435 
1436 	snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
1437 		 MICSYSFSDIR, mic->name, "state");
1438 
1439 	fd = open(pathname, O_RDONLY);
1440 	if (fd < 0)
1441 		mpsslog("%s: opening file %s failed %s\n",
1442 			mic->name, pathname, strerror(errno));
1443 	return fd;
1444 }
1445 
block_till_state_change(int fd,struct mic_info * mic)1446 static int block_till_state_change(int fd, struct mic_info *mic)
1447 {
1448 	struct pollfd ufds[1];
1449 	char value[PAGE_SIZE];
1450 	int ret;
1451 
1452 	ufds[0].fd = fd;
1453 	ufds[0].events = POLLERR | POLLPRI;
1454 	ret = poll(ufds, 1, -1);
1455 	if (ret < 0) {
1456 		mpsslog("%s: %s %d poll failed %s\n",
1457 			mic->name, __func__, __LINE__, strerror(errno));
1458 		return ret;
1459 	}
1460 
1461 	ret = lseek(fd, 0, SEEK_SET);
1462 	if (ret < 0) {
1463 		mpsslog("%s: %s %d Failed to seek to 0: %s\n",
1464 			mic->name, __func__, __LINE__, strerror(errno));
1465 		return ret;
1466 	}
1467 
1468 	ret = read(fd, value, sizeof(value));
1469 	if (ret < 0) {
1470 		mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
1471 			mic->name, __func__, __LINE__, strerror(errno));
1472 		return ret;
1473 	}
1474 
1475 	return 0;
1476 }
1477 
1478 static void *
mic_config(void * arg)1479 mic_config(void *arg)
1480 {
1481 	struct mic_info *mic = (struct mic_info *)arg;
1482 	int fd, ret, stat = 0;
1483 
1484 	fd = open_state_fd(mic);
1485 	if (fd < 0) {
1486 		mpsslog("%s: %s %d open state fd failed %s\n",
1487 			mic->name, __func__, __LINE__, strerror(errno));
1488 		goto exit;
1489 	}
1490 
1491 	do {
1492 		ret = block_till_state_change(fd, mic);
1493 		if (ret < 0) {
1494 			mpsslog("%s: %s %d block_till_state_change error %s\n",
1495 				mic->name, __func__, __LINE__, strerror(errno));
1496 			goto close_exit;
1497 		}
1498 
1499 		switch (get_mic_state(mic)) {
1500 		case MIC_SHUTTING_DOWN:
1501 			mic_handle_shutdown(mic);
1502 			break;
1503 		case MIC_READY:
1504 		case MIC_RESET_FAILED:
1505 			ret = kill(mic->pid, SIGTERM);
1506 			mpsslog("%s: %s %d kill pid %d ret %d\n",
1507 				mic->name, __func__, __LINE__,
1508 				mic->pid, ret);
1509 			if (!ret) {
1510 				ret = waitpid(mic->pid, &stat,
1511 					      WIFSIGNALED(stat));
1512 				mpsslog("%s: %s %d waitpid ret %d pid %d\n",
1513 					mic->name, __func__, __LINE__,
1514 					ret, mic->pid);
1515 			}
1516 			if (mic->boot_on_resume) {
1517 				setsysfs(mic->name, "state", "boot");
1518 				mic->boot_on_resume = 0;
1519 			}
1520 			goto close_exit;
1521 		default:
1522 			break;
1523 		}
1524 	} while (1);
1525 
1526 close_exit:
1527 	close(fd);
1528 exit:
1529 	init_mic(mic);
1530 	pthread_exit(NULL);
1531 }
1532 
1533 static void
set_cmdline(struct mic_info * mic)1534 set_cmdline(struct mic_info *mic)
1535 {
1536 	char buffer[PATH_MAX];
1537 	int len;
1538 
1539 	len = snprintf(buffer, PATH_MAX,
1540 		"clocksource=tsc highres=off nohz=off ");
1541 	len += snprintf(buffer + len, PATH_MAX - len,
1542 		"cpufreq_on;corec6_off;pc3_off;pc6_off ");
1543 	len += snprintf(buffer + len, PATH_MAX - len,
1544 		"ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
1545 		mic->id + 1);
1546 
1547 	setsysfs(mic->name, "cmdline", buffer);
1548 	mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
1549 	snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
1550 	mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
1551 }
1552 
1553 static void
set_log_buf_info(struct mic_info * mic)1554 set_log_buf_info(struct mic_info *mic)
1555 {
1556 	int fd;
1557 	off_t len;
1558 	char system_map[] = "/lib/firmware/mic/System.map";
1559 	char *map, *temp, log_buf[17] = {'\0'};
1560 
1561 	fd = open(system_map, O_RDONLY);
1562 	if (fd < 0) {
1563 		mpsslog("%s: Opening System.map failed: %d\n",
1564 			mic->name, errno);
1565 		return;
1566 	}
1567 	len = lseek(fd, 0, SEEK_END);
1568 	if (len < 0) {
1569 		mpsslog("%s: Reading System.map size failed: %d\n",
1570 			mic->name, errno);
1571 		close(fd);
1572 		return;
1573 	}
1574 	map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
1575 	if (map == MAP_FAILED) {
1576 		mpsslog("%s: mmap of System.map failed: %d\n",
1577 			mic->name, errno);
1578 		close(fd);
1579 		return;
1580 	}
1581 	temp = strstr(map, "__log_buf");
1582 	if (!temp) {
1583 		mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
1584 		munmap(map, len);
1585 		close(fd);
1586 		return;
1587 	}
1588 	strncpy(log_buf, temp - 19, 16);
1589 	setsysfs(mic->name, "log_buf_addr", log_buf);
1590 	mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
1591 	temp = strstr(map, "log_buf_len");
1592 	if (!temp) {
1593 		mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
1594 		munmap(map, len);
1595 		close(fd);
1596 		return;
1597 	}
1598 	strncpy(log_buf, temp - 19, 16);
1599 	setsysfs(mic->name, "log_buf_len", log_buf);
1600 	mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
1601 	munmap(map, len);
1602 	close(fd);
1603 }
1604 
1605 static void
change_virtblk_backend(int x,siginfo_t * siginfo,void * p)1606 change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
1607 {
1608 	struct mic_info *mic;
1609 
1610 	for (mic = mic_list.next; mic != NULL; mic = mic->next)
1611 		mic->mic_virtblk.signaled = 1/* true */;
1612 }
1613 
1614 static void
set_mic_boot_params(struct mic_info * mic)1615 set_mic_boot_params(struct mic_info *mic)
1616 {
1617 	set_log_buf_info(mic);
1618 	set_cmdline(mic);
1619 }
1620 
1621 static void *
init_mic(void * arg)1622 init_mic(void *arg)
1623 {
1624 	struct mic_info *mic = (struct mic_info *)arg;
1625 	struct sigaction ignore = {
1626 		.sa_flags = 0,
1627 		.sa_handler = SIG_IGN
1628 	};
1629 	struct sigaction act = {
1630 		.sa_flags = SA_SIGINFO,
1631 		.sa_sigaction = change_virtblk_backend,
1632 	};
1633 	char buffer[PATH_MAX];
1634 	int err, fd;
1635 
1636 	/*
1637 	 * Currently, one virtio block device is supported for each MIC card
1638 	 * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
1639 	 * The signal informs the virtio block backend about a change in the
1640 	 * configuration file which specifies the virtio backend file name on
1641 	 * the host. Virtio block backend then re-reads the configuration file
1642 	 * and switches to the new block device. This signalling mechanism may
1643 	 * not be required once multiple virtio block devices are supported by
1644 	 * the MIC daemon.
1645 	 */
1646 	sigaction(SIGUSR1, &ignore, NULL);
1647 retry:
1648 	fd = open_state_fd(mic);
1649 	if (fd < 0) {
1650 		mpsslog("%s: %s %d open state fd failed %s\n",
1651 			mic->name, __func__, __LINE__, strerror(errno));
1652 		sleep(2);
1653 		goto retry;
1654 	}
1655 
1656 	if (mic->restart) {
1657 		snprintf(buffer, PATH_MAX, "boot");
1658 		setsysfs(mic->name, "state", buffer);
1659 		mpsslog("%s restarting mic %d\n",
1660 			mic->name, mic->restart);
1661 		mic->restart = 0;
1662 	}
1663 
1664 	while (1) {
1665 		while (block_till_state_change(fd, mic)) {
1666 			mpsslog("%s: %s %d block_till_state_change error %s\n",
1667 				mic->name, __func__, __LINE__, strerror(errno));
1668 			sleep(2);
1669 			continue;
1670 		}
1671 
1672 		if (get_mic_state(mic) == MIC_BOOTING)
1673 			break;
1674 	}
1675 
1676 	mic->pid = fork();
1677 	switch (mic->pid) {
1678 	case 0:
1679 		add_virtio_device(mic, &virtcons_dev_page.dd);
1680 		add_virtio_device(mic, &virtnet_dev_page.dd);
1681 		err = pthread_create(&mic->mic_console.console_thread, NULL,
1682 			virtio_console, mic);
1683 		if (err)
1684 			mpsslog("%s virtcons pthread_create failed %s\n",
1685 				mic->name, strerror(err));
1686 		err = pthread_create(&mic->mic_net.net_thread, NULL,
1687 			virtio_net, mic);
1688 		if (err)
1689 			mpsslog("%s virtnet pthread_create failed %s\n",
1690 				mic->name, strerror(err));
1691 		err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
1692 			virtio_block, mic);
1693 		if (err)
1694 			mpsslog("%s virtblk pthread_create failed %s\n",
1695 				mic->name, strerror(err));
1696 		sigemptyset(&act.sa_mask);
1697 		err = sigaction(SIGUSR1, &act, NULL);
1698 		if (err)
1699 			mpsslog("%s sigaction SIGUSR1 failed %s\n",
1700 				mic->name, strerror(errno));
1701 		while (1)
1702 			sleep(60);
1703 	case -1:
1704 		mpsslog("fork failed MIC name %s id %d errno %d\n",
1705 			mic->name, mic->id, errno);
1706 		break;
1707 	default:
1708 		err = pthread_create(&mic->config_thread, NULL,
1709 				     mic_config, mic);
1710 		if (err)
1711 			mpsslog("%s mic_config pthread_create failed %s\n",
1712 				mic->name, strerror(err));
1713 	}
1714 
1715 	return NULL;
1716 }
1717 
1718 static void
start_daemon(void)1719 start_daemon(void)
1720 {
1721 	struct mic_info *mic;
1722 	int err;
1723 
1724 	for (mic = mic_list.next; mic; mic = mic->next) {
1725 		set_mic_boot_params(mic);
1726 		err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
1727 		if (err)
1728 			mpsslog("%s init_mic pthread_create failed %s\n",
1729 				mic->name, strerror(err));
1730 	}
1731 
1732 	while (1)
1733 		sleep(60);
1734 }
1735 
1736 static int
init_mic_list(void)1737 init_mic_list(void)
1738 {
1739 	struct mic_info *mic = &mic_list;
1740 	struct dirent *file;
1741 	DIR *dp;
1742 	int cnt = 0;
1743 
1744 	dp = opendir(MICSYSFSDIR);
1745 	if (!dp)
1746 		return 0;
1747 
1748 	while ((file = readdir(dp)) != NULL) {
1749 		if (!strncmp(file->d_name, "mic", 3)) {
1750 			mic->next = calloc(1, sizeof(struct mic_info));
1751 			if (mic->next) {
1752 				mic = mic->next;
1753 				mic->id = atoi(&file->d_name[3]);
1754 				mic->name = malloc(strlen(file->d_name) + 16);
1755 				if (mic->name)
1756 					strcpy(mic->name, file->d_name);
1757 				mpsslog("MIC name %s id %d\n", mic->name,
1758 					mic->id);
1759 				cnt++;
1760 			}
1761 		}
1762 	}
1763 
1764 	closedir(dp);
1765 	return cnt;
1766 }
1767 
1768 void
mpsslog(char * format,...)1769 mpsslog(char *format, ...)
1770 {
1771 	va_list args;
1772 	char buffer[4096];
1773 	char ts[52], *ts1;
1774 	time_t t;
1775 
1776 	if (logfp == NULL)
1777 		return;
1778 
1779 	va_start(args, format);
1780 	vsprintf(buffer, format, args);
1781 	va_end(args);
1782 
1783 	time(&t);
1784 	ts1 = ctime_r(&t, ts);
1785 	ts1[strlen(ts1) - 1] = '\0';
1786 	fprintf(logfp, "%s: %s", ts1, buffer);
1787 
1788 	fflush(logfp);
1789 }
1790 
1791 int
main(int argc,char * argv[])1792 main(int argc, char *argv[])
1793 {
1794 	int cnt;
1795 	pid_t pid;
1796 
1797 	myname = argv[0];
1798 
1799 	logfp = fopen(LOGFILE_NAME, "a+");
1800 	if (!logfp) {
1801 		fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
1802 		exit(1);
1803 	}
1804 	pid = fork();
1805 	switch (pid) {
1806 	case 0:
1807 		break;
1808 	case -1:
1809 		exit(2);
1810 	default:
1811 		exit(0);
1812 	}
1813 
1814 	mpsslog("MIC Daemon start\n");
1815 
1816 	cnt = init_mic_list();
1817 	if (cnt == 0) {
1818 		mpsslog("MIC module not loaded\n");
1819 		exit(3);
1820 	}
1821 	mpsslog("MIC found %d devices\n", cnt);
1822 
1823 	start_daemon();
1824 
1825 	exit(0);
1826 }
1827