• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2016 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel Virtio Over PCIe (VOP) driver.
19  *
20  */
21 #include <linux/sched.h>
22 #include <linux/poll.h>
23 #include <linux/dma-mapping.h>
24 
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
27 
28 #include <linux/mic_ioctl.h>
29 #include "vop_main.h"
30 
31 /* Helper API to obtain the VOP PCIe device */
vop_dev(struct vop_vdev * vdev)32 static inline struct device *vop_dev(struct vop_vdev *vdev)
33 {
34 	return vdev->vpdev->dev.parent;
35 }
36 
37 /* Helper API to check if a virtio device is initialized */
vop_vdev_inited(struct vop_vdev * vdev)38 static inline int vop_vdev_inited(struct vop_vdev *vdev)
39 {
40 	if (!vdev)
41 		return -EINVAL;
42 	/* Device has not been created yet */
43 	if (!vdev->dd || !vdev->dd->type) {
44 		dev_err(vop_dev(vdev), "%s %d err %d\n",
45 			__func__, __LINE__, -EINVAL);
46 		return -EINVAL;
47 	}
48 	/* Device has been removed/deleted */
49 	if (vdev->dd->type == -1) {
50 		dev_dbg(vop_dev(vdev), "%s %d err %d\n",
51 			__func__, __LINE__, -ENODEV);
52 		return -ENODEV;
53 	}
54 	return 0;
55 }
56 
_vop_notify(struct vringh * vrh)57 static void _vop_notify(struct vringh *vrh)
58 {
59 	struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
60 	struct vop_vdev *vdev = vvrh->vdev;
61 	struct vop_device *vpdev = vdev->vpdev;
62 	s8 db = vdev->dc->h2c_vdev_db;
63 
64 	if (db != -1)
65 		vpdev->hw_ops->send_intr(vpdev, db);
66 }
67 
vop_virtio_init_post(struct vop_vdev * vdev)68 static void vop_virtio_init_post(struct vop_vdev *vdev)
69 {
70 	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
71 	struct vop_device *vpdev = vdev->vpdev;
72 	int i, used_size;
73 
74 	for (i = 0; i < vdev->dd->num_vq; i++) {
75 		used_size = PAGE_ALIGN(sizeof(u16) * 3 +
76 				sizeof(struct vring_used_elem) *
77 				le16_to_cpu(vqconfig->num));
78 		if (!le64_to_cpu(vqconfig[i].used_address)) {
79 			dev_warn(vop_dev(vdev), "used_address zero??\n");
80 			continue;
81 		}
82 		vdev->vvr[i].vrh.vring.used =
83 			(void __force *)vpdev->hw_ops->ioremap(
84 			vpdev,
85 			le64_to_cpu(vqconfig[i].used_address),
86 			used_size);
87 	}
88 
89 	vdev->dc->used_address_updated = 0;
90 
91 	dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
92 		 __func__, vdev->virtio_id);
93 }
94 
vop_virtio_device_reset(struct vop_vdev * vdev)95 static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
96 {
97 	int i;
98 
99 	dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
100 		__func__, vdev->dd->status, vdev->virtio_id);
101 
102 	for (i = 0; i < vdev->dd->num_vq; i++)
103 		/*
104 		 * Avoid lockdep false positive. The + 1 is for the vop
105 		 * mutex which is held in the reset devices code path.
106 		 */
107 		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
108 
109 	/* 0 status means "reset" */
110 	vdev->dd->status = 0;
111 	vdev->dc->vdev_reset = 0;
112 	vdev->dc->host_ack = 1;
113 
114 	for (i = 0; i < vdev->dd->num_vq; i++) {
115 		struct vringh *vrh = &vdev->vvr[i].vrh;
116 
117 		vdev->vvr[i].vring.info->avail_idx = 0;
118 		vrh->completed = 0;
119 		vrh->last_avail_idx = 0;
120 		vrh->last_used_idx = 0;
121 	}
122 
123 	for (i = 0; i < vdev->dd->num_vq; i++)
124 		mutex_unlock(&vdev->vvr[i].vr_mutex);
125 }
126 
vop_virtio_reset_devices(struct vop_info * vi)127 static void vop_virtio_reset_devices(struct vop_info *vi)
128 {
129 	struct list_head *pos, *tmp;
130 	struct vop_vdev *vdev;
131 
132 	list_for_each_safe(pos, tmp, &vi->vdev_list) {
133 		vdev = list_entry(pos, struct vop_vdev, list);
134 		vop_virtio_device_reset(vdev);
135 		vdev->poll_wake = 1;
136 		wake_up(&vdev->waitq);
137 	}
138 }
139 
vop_bh_handler(struct work_struct * work)140 static void vop_bh_handler(struct work_struct *work)
141 {
142 	struct vop_vdev *vdev = container_of(work, struct vop_vdev,
143 			virtio_bh_work);
144 
145 	if (vdev->dc->used_address_updated)
146 		vop_virtio_init_post(vdev);
147 
148 	if (vdev->dc->vdev_reset)
149 		vop_virtio_device_reset(vdev);
150 
151 	vdev->poll_wake = 1;
152 	wake_up(&vdev->waitq);
153 }
154 
_vop_virtio_intr_handler(int irq,void * data)155 static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
156 {
157 	struct vop_vdev *vdev = data;
158 	struct vop_device *vpdev = vdev->vpdev;
159 
160 	vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
161 	schedule_work(&vdev->virtio_bh_work);
162 	return IRQ_HANDLED;
163 }
164 
vop_virtio_config_change(struct vop_vdev * vdev,void * argp)165 static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
166 {
167 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
168 	int ret = 0, retry, i;
169 	struct vop_device *vpdev = vdev->vpdev;
170 	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
171 	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
172 	s8 db = bootparam->h2c_config_db;
173 
174 	mutex_lock(&vi->vop_mutex);
175 	for (i = 0; i < vdev->dd->num_vq; i++)
176 		mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
177 
178 	if (db == -1 || vdev->dd->type == -1) {
179 		ret = -EIO;
180 		goto exit;
181 	}
182 
183 	memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
184 	vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
185 	vpdev->hw_ops->send_intr(vpdev, db);
186 
187 	for (retry = 100; retry--;) {
188 		ret = wait_event_timeout(wake, vdev->dc->guest_ack,
189 					 msecs_to_jiffies(100));
190 		if (ret)
191 			break;
192 	}
193 
194 	dev_dbg(vop_dev(vdev),
195 		"%s %d retry: %d\n", __func__, __LINE__, retry);
196 	vdev->dc->config_change = 0;
197 	vdev->dc->guest_ack = 0;
198 exit:
199 	for (i = 0; i < vdev->dd->num_vq; i++)
200 		mutex_unlock(&vdev->vvr[i].vr_mutex);
201 	mutex_unlock(&vi->vop_mutex);
202 	return ret;
203 }
204 
vop_copy_dp_entry(struct vop_vdev * vdev,struct mic_device_desc * argp,__u8 * type,struct mic_device_desc ** devpage)205 static int vop_copy_dp_entry(struct vop_vdev *vdev,
206 			     struct mic_device_desc *argp, __u8 *type,
207 			     struct mic_device_desc **devpage)
208 {
209 	struct vop_device *vpdev = vdev->vpdev;
210 	struct mic_device_desc *devp;
211 	struct mic_vqconfig *vqconfig;
212 	int ret = 0, i;
213 	bool slot_found = false;
214 
215 	vqconfig = mic_vq_config(argp);
216 	for (i = 0; i < argp->num_vq; i++) {
217 		if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
218 			ret =  -EINVAL;
219 			dev_err(vop_dev(vdev), "%s %d err %d\n",
220 				__func__, __LINE__, ret);
221 			goto exit;
222 		}
223 	}
224 
225 	/* Find the first free device page entry */
226 	for (i = sizeof(struct mic_bootparam);
227 		i < MIC_DP_SIZE - mic_total_desc_size(argp);
228 		i += mic_total_desc_size(devp)) {
229 		devp = vpdev->hw_ops->get_dp(vpdev) + i;
230 		if (devp->type == 0 || devp->type == -1) {
231 			slot_found = true;
232 			break;
233 		}
234 	}
235 	if (!slot_found) {
236 		ret =  -EINVAL;
237 		dev_err(vop_dev(vdev), "%s %d err %d\n",
238 			__func__, __LINE__, ret);
239 		goto exit;
240 	}
241 	/*
242 	 * Save off the type before doing the memcpy. Type will be set in the
243 	 * end after completing all initialization for the new device.
244 	 */
245 	*type = argp->type;
246 	argp->type = 0;
247 	memcpy(devp, argp, mic_desc_size(argp));
248 
249 	*devpage = devp;
250 exit:
251 	return ret;
252 }
253 
vop_init_device_ctrl(struct vop_vdev * vdev,struct mic_device_desc * devpage)254 static void vop_init_device_ctrl(struct vop_vdev *vdev,
255 				 struct mic_device_desc *devpage)
256 {
257 	struct mic_device_ctrl *dc;
258 
259 	dc = (void *)devpage + mic_aligned_desc_size(devpage);
260 
261 	dc->config_change = 0;
262 	dc->guest_ack = 0;
263 	dc->vdev_reset = 0;
264 	dc->host_ack = 0;
265 	dc->used_address_updated = 0;
266 	dc->c2h_vdev_db = -1;
267 	dc->h2c_vdev_db = -1;
268 	vdev->dc = dc;
269 }
270 
vop_virtio_add_device(struct vop_vdev * vdev,struct mic_device_desc * argp)271 static int vop_virtio_add_device(struct vop_vdev *vdev,
272 				 struct mic_device_desc *argp)
273 {
274 	struct vop_info *vi = vdev->vi;
275 	struct vop_device *vpdev = vi->vpdev;
276 	struct mic_device_desc *dd = NULL;
277 	struct mic_vqconfig *vqconfig;
278 	int vr_size, i, j, ret;
279 	u8 type = 0;
280 	s8 db = -1;
281 	char irqname[16];
282 	struct mic_bootparam *bootparam;
283 	u16 num;
284 	dma_addr_t vr_addr;
285 
286 	bootparam = vpdev->hw_ops->get_dp(vpdev);
287 	init_waitqueue_head(&vdev->waitq);
288 	INIT_LIST_HEAD(&vdev->list);
289 	vdev->vpdev = vpdev;
290 
291 	ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
292 	if (ret) {
293 		dev_err(vop_dev(vdev), "%s %d err %d\n",
294 			__func__, __LINE__, ret);
295 		return ret;
296 	}
297 
298 	vop_init_device_ctrl(vdev, dd);
299 
300 	vdev->dd = dd;
301 	vdev->virtio_id = type;
302 	vqconfig = mic_vq_config(dd);
303 	INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
304 
305 	for (i = 0; i < dd->num_vq; i++) {
306 		struct vop_vringh *vvr = &vdev->vvr[i];
307 		struct mic_vring *vr = &vdev->vvr[i].vring;
308 
309 		num = le16_to_cpu(vqconfig[i].num);
310 		mutex_init(&vvr->vr_mutex);
311 		vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) +
312 			sizeof(struct _mic_vring_info));
313 		vr->va = (void *)
314 			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
315 					 get_order(vr_size));
316 		if (!vr->va) {
317 			ret = -ENOMEM;
318 			dev_err(vop_dev(vdev), "%s %d err %d\n",
319 				__func__, __LINE__, ret);
320 			goto err;
321 		}
322 		vr->len = vr_size;
323 		vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4);
324 		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
325 		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
326 					 DMA_BIDIRECTIONAL);
327 		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
328 			free_pages((unsigned long)vr->va, get_order(vr_size));
329 			ret = -ENOMEM;
330 			dev_err(vop_dev(vdev), "%s %d err %d\n",
331 				__func__, __LINE__, ret);
332 			goto err;
333 		}
334 		vqconfig[i].address = cpu_to_le64(vr_addr);
335 
336 		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
337 		ret = vringh_init_kern(&vvr->vrh,
338 				       *(u32 *)mic_vq_features(vdev->dd),
339 				       num, false, vr->vr.desc, vr->vr.avail,
340 				       vr->vr.used);
341 		if (ret) {
342 			dev_err(vop_dev(vdev), "%s %d err %d\n",
343 				__func__, __LINE__, ret);
344 			goto err;
345 		}
346 		vringh_kiov_init(&vvr->riov, NULL, 0);
347 		vringh_kiov_init(&vvr->wiov, NULL, 0);
348 		vvr->head = USHRT_MAX;
349 		vvr->vdev = vdev;
350 		vvr->vrh.notify = _vop_notify;
351 		dev_dbg(&vpdev->dev,
352 			"%s %d index %d va %p info %p vr_size 0x%x\n",
353 			__func__, __LINE__, i, vr->va, vr->info, vr_size);
354 		vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
355 					get_order(VOP_INT_DMA_BUF_SIZE));
356 		vvr->buf_da = dma_map_single(&vpdev->dev,
357 					  vvr->buf, VOP_INT_DMA_BUF_SIZE,
358 					  DMA_BIDIRECTIONAL);
359 	}
360 
361 	snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
362 		 vdev->virtio_id);
363 	vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
364 	vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
365 			_vop_virtio_intr_handler, irqname, vdev,
366 			vdev->virtio_db);
367 	if (IS_ERR(vdev->virtio_cookie)) {
368 		ret = PTR_ERR(vdev->virtio_cookie);
369 		dev_dbg(&vpdev->dev, "request irq failed\n");
370 		goto err;
371 	}
372 
373 	vdev->dc->c2h_vdev_db = vdev->virtio_db;
374 
375 	/*
376 	 * Order the type update with previous stores. This write barrier
377 	 * is paired with the corresponding read barrier before the uncached
378 	 * system memory read of the type, on the card while scanning the
379 	 * device page.
380 	 */
381 	smp_wmb();
382 	dd->type = type;
383 	argp->type = type;
384 
385 	if (bootparam) {
386 		db = bootparam->h2c_config_db;
387 		if (db != -1)
388 			vpdev->hw_ops->send_intr(vpdev, db);
389 	}
390 	dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
391 	return 0;
392 err:
393 	vqconfig = mic_vq_config(dd);
394 	for (j = 0; j < i; j++) {
395 		struct vop_vringh *vvr = &vdev->vvr[j];
396 
397 		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
398 				 vvr->vring.len, DMA_BIDIRECTIONAL);
399 		free_pages((unsigned long)vvr->vring.va,
400 			   get_order(vvr->vring.len));
401 	}
402 	return ret;
403 }
404 
vop_dev_remove(struct vop_info * pvi,struct mic_device_ctrl * devp,struct vop_device * vpdev)405 static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
406 			   struct vop_device *vpdev)
407 {
408 	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
409 	s8 db;
410 	int ret, retry;
411 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
412 
413 	devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
414 	db = bootparam->h2c_config_db;
415 	if (db != -1)
416 		vpdev->hw_ops->send_intr(vpdev, db);
417 	else
418 		goto done;
419 	for (retry = 15; retry--;) {
420 		ret = wait_event_timeout(wake, devp->guest_ack,
421 					 msecs_to_jiffies(1000));
422 		if (ret)
423 			break;
424 	}
425 done:
426 	devp->config_change = 0;
427 	devp->guest_ack = 0;
428 }
429 
vop_virtio_del_device(struct vop_vdev * vdev)430 static void vop_virtio_del_device(struct vop_vdev *vdev)
431 {
432 	struct vop_info *vi = vdev->vi;
433 	struct vop_device *vpdev = vdev->vpdev;
434 	int i;
435 	struct mic_vqconfig *vqconfig;
436 	struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
437 
438 	if (!bootparam)
439 		goto skip_hot_remove;
440 	vop_dev_remove(vi, vdev->dc, vpdev);
441 skip_hot_remove:
442 	vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
443 	flush_work(&vdev->virtio_bh_work);
444 	vqconfig = mic_vq_config(vdev->dd);
445 	for (i = 0; i < vdev->dd->num_vq; i++) {
446 		struct vop_vringh *vvr = &vdev->vvr[i];
447 
448 		dma_unmap_single(&vpdev->dev,
449 				 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
450 				 DMA_BIDIRECTIONAL);
451 		free_pages((unsigned long)vvr->buf,
452 			   get_order(VOP_INT_DMA_BUF_SIZE));
453 		vringh_kiov_cleanup(&vvr->riov);
454 		vringh_kiov_cleanup(&vvr->wiov);
455 		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
456 				 vvr->vring.len, DMA_BIDIRECTIONAL);
457 		free_pages((unsigned long)vvr->vring.va,
458 			   get_order(vvr->vring.len));
459 	}
460 	/*
461 	 * Order the type update with previous stores. This write barrier
462 	 * is paired with the corresponding read barrier before the uncached
463 	 * system memory read of the type, on the card while scanning the
464 	 * device page.
465 	 */
466 	smp_wmb();
467 	vdev->dd->type = -1;
468 }
469 
470 /*
471  * vop_sync_dma - Wrapper for synchronous DMAs.
472  *
473  * @dev - The address of the pointer to the device instance used
474  * for DMA registration.
475  * @dst - destination DMA address.
476  * @src - source DMA address.
477  * @len - size of the transfer.
478  *
479  * Return DMA_SUCCESS on success
480  */
vop_sync_dma(struct vop_vdev * vdev,dma_addr_t dst,dma_addr_t src,size_t len)481 static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
482 			size_t len)
483 {
484 	int err = 0;
485 	struct dma_device *ddev;
486 	struct dma_async_tx_descriptor *tx;
487 	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
488 	struct dma_chan *vop_ch = vi->dma_ch;
489 
490 	if (!vop_ch) {
491 		err = -EBUSY;
492 		goto error;
493 	}
494 	ddev = vop_ch->device;
495 	tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
496 		DMA_PREP_FENCE);
497 	if (!tx) {
498 		err = -ENOMEM;
499 		goto error;
500 	} else {
501 		dma_cookie_t cookie;
502 
503 		cookie = tx->tx_submit(tx);
504 		if (dma_submit_error(cookie)) {
505 			err = -ENOMEM;
506 			goto error;
507 		}
508 		dma_async_issue_pending(vop_ch);
509 		err = dma_sync_wait(vop_ch, cookie);
510 	}
511 error:
512 	if (err)
513 		dev_err(&vi->vpdev->dev, "%s %d err %d\n",
514 			__func__, __LINE__, err);
515 	return err;
516 }
517 
518 #define VOP_USE_DMA true
519 
520 /*
521  * Initiates the copies across the PCIe bus from card memory to a user
522  * space buffer. When transfers are done using DMA, source/destination
523  * addresses and transfer length must follow the alignment requirements of
524  * the MIC DMA engine.
525  */
vop_virtio_copy_to_user(struct vop_vdev * vdev,void __user * ubuf,size_t len,u64 daddr,size_t dlen,int vr_idx)526 static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
527 				   size_t len, u64 daddr, size_t dlen,
528 				   int vr_idx)
529 {
530 	struct vop_device *vpdev = vdev->vpdev;
531 	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
532 	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
533 	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
534 	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
535 	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
536 	size_t dma_offset, partlen;
537 	int err;
538 
539 	if (!VOP_USE_DMA) {
540 		if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
541 			err = -EFAULT;
542 			dev_err(vop_dev(vdev), "%s %d err %d\n",
543 				__func__, __LINE__, err);
544 			goto err;
545 		}
546 		vdev->in_bytes += len;
547 		err = 0;
548 		goto err;
549 	}
550 
551 	dma_offset = daddr - round_down(daddr, dma_alignment);
552 	daddr -= dma_offset;
553 	len += dma_offset;
554 	/*
555 	 * X100 uses DMA addresses as seen by the card so adding
556 	 * the aperture base is not required for DMA. However x200
557 	 * requires DMA addresses to be an offset into the bar so
558 	 * add the aperture base for x200.
559 	 */
560 	if (x200)
561 		daddr += vpdev->aper->pa;
562 	while (len) {
563 		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
564 		err = vop_sync_dma(vdev, vvr->buf_da, daddr,
565 				   ALIGN(partlen, dma_alignment));
566 		if (err) {
567 			dev_err(vop_dev(vdev), "%s %d err %d\n",
568 				__func__, __LINE__, err);
569 			goto err;
570 		}
571 		if (copy_to_user(ubuf, vvr->buf + dma_offset,
572 				 partlen - dma_offset)) {
573 			err = -EFAULT;
574 			dev_err(vop_dev(vdev), "%s %d err %d\n",
575 				__func__, __LINE__, err);
576 			goto err;
577 		}
578 		daddr += partlen;
579 		ubuf += partlen;
580 		dbuf += partlen;
581 		vdev->in_bytes_dma += partlen;
582 		vdev->in_bytes += partlen;
583 		len -= partlen;
584 		dma_offset = 0;
585 	}
586 	err = 0;
587 err:
588 	vpdev->hw_ops->iounmap(vpdev, dbuf);
589 	dev_dbg(vop_dev(vdev),
590 		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
591 		__func__, ubuf, dbuf, len, vr_idx);
592 	return err;
593 }
594 
595 /*
596  * Initiates copies across the PCIe bus from a user space buffer to card
597  * memory. When transfers are done using DMA, source/destination addresses
598  * and transfer length must follow the alignment requirements of the MIC
599  * DMA engine.
600  */
vop_virtio_copy_from_user(struct vop_vdev * vdev,void __user * ubuf,size_t len,u64 daddr,size_t dlen,int vr_idx)601 static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
602 				     size_t len, u64 daddr, size_t dlen,
603 				     int vr_idx)
604 {
605 	struct vop_device *vpdev = vdev->vpdev;
606 	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
607 	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
608 	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
609 	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
610 	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
611 	size_t partlen;
612 	bool dma = VOP_USE_DMA;
613 	int err = 0;
614 	size_t offset = 0;
615 
616 	if (daddr & (dma_alignment - 1)) {
617 		vdev->tx_dst_unaligned += len;
618 		dma = false;
619 	} else if (ALIGN(len, dma_alignment) > dlen) {
620 		vdev->tx_len_unaligned += len;
621 		dma = false;
622 	}
623 
624 	if (!dma)
625 		goto memcpy;
626 
627 	/*
628 	 * X100 uses DMA addresses as seen by the card so adding
629 	 * the aperture base is not required for DMA. However x200
630 	 * requires DMA addresses to be an offset into the bar so
631 	 * add the aperture base for x200.
632 	 */
633 	if (x200)
634 		daddr += vpdev->aper->pa;
635 	while (len) {
636 		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
637 
638 		if (copy_from_user(vvr->buf, ubuf, partlen)) {
639 			err = -EFAULT;
640 			dev_err(vop_dev(vdev), "%s %d err %d\n",
641 				__func__, __LINE__, err);
642 			goto err;
643 		}
644 		err = vop_sync_dma(vdev, daddr, vvr->buf_da,
645 				   ALIGN(partlen, dma_alignment));
646 		if (err) {
647 			dev_err(vop_dev(vdev), "%s %d err %d\n",
648 				__func__, __LINE__, err);
649 			goto err;
650 		}
651 		daddr += partlen;
652 		ubuf += partlen;
653 		dbuf += partlen;
654 		vdev->out_bytes_dma += partlen;
655 		vdev->out_bytes += partlen;
656 		len -= partlen;
657 	}
658 memcpy:
659 	/*
660 	 * We are copying to IO below and should ideally use something
661 	 * like copy_from_user_toio(..) if it existed.
662 	 */
663 	while (len) {
664 		partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
665 
666 		if (copy_from_user(vvr->buf, ubuf + offset, partlen)) {
667 			err = -EFAULT;
668 			dev_err(vop_dev(vdev), "%s %d err %d\n",
669 				__func__, __LINE__, err);
670 			goto err;
671 		}
672 		memcpy_toio(dbuf + offset, vvr->buf, partlen);
673 		offset += partlen;
674 		vdev->out_bytes += partlen;
675 		len -= partlen;
676 	}
677 	err = 0;
678 err:
679 	vpdev->hw_ops->iounmap(vpdev, dbuf);
680 	dev_dbg(vop_dev(vdev),
681 		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
682 		__func__, ubuf, dbuf, len, vr_idx);
683 	return err;
684 }
685 
686 #define MIC_VRINGH_READ true
687 
688 /* Determine the total number of bytes consumed in a VRINGH KIOV */
vop_vringh_iov_consumed(struct vringh_kiov * iov)689 static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
690 {
691 	int i;
692 	u32 total = iov->consumed;
693 
694 	for (i = 0; i < iov->i; i++)
695 		total += iov->iov[i].iov_len;
696 	return total;
697 }
698 
699 /*
700  * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
701  * This API is heavily based on the vringh_iov_xfer(..) implementation
702  * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
703  * and vringh_iov_push_kern(..) directly is because there is no
704  * way to override the VRINGH xfer(..) routines as of v3.10.
705  */
vop_vringh_copy(struct vop_vdev * vdev,struct vringh_kiov * iov,void __user * ubuf,size_t len,bool read,int vr_idx,size_t * out_len)706 static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
707 			   void __user *ubuf, size_t len, bool read, int vr_idx,
708 			   size_t *out_len)
709 {
710 	int ret = 0;
711 	size_t partlen, tot_len = 0;
712 
713 	while (len && iov->i < iov->used) {
714 		struct kvec *kiov = &iov->iov[iov->i];
715 
716 		partlen = min(kiov->iov_len, len);
717 		if (read)
718 			ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
719 						      (u64)kiov->iov_base,
720 						      kiov->iov_len,
721 						      vr_idx);
722 		else
723 			ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
724 							(u64)kiov->iov_base,
725 							kiov->iov_len,
726 							vr_idx);
727 		if (ret) {
728 			dev_err(vop_dev(vdev), "%s %d err %d\n",
729 				__func__, __LINE__, ret);
730 			break;
731 		}
732 		len -= partlen;
733 		ubuf += partlen;
734 		tot_len += partlen;
735 		iov->consumed += partlen;
736 		kiov->iov_len -= partlen;
737 		kiov->iov_base += partlen;
738 		if (!kiov->iov_len) {
739 			/* Fix up old iov element then increment. */
740 			kiov->iov_len = iov->consumed;
741 			kiov->iov_base -= iov->consumed;
742 
743 			iov->consumed = 0;
744 			iov->i++;
745 		}
746 	}
747 	*out_len = tot_len;
748 	return ret;
749 }
750 
751 /*
752  * Use the standard VRINGH infrastructure in the kernel to fetch new
753  * descriptors, initiate the copies and update the used ring.
754  */
_vop_virtio_copy(struct vop_vdev * vdev,struct mic_copy_desc * copy)755 static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
756 {
757 	int ret = 0;
758 	u32 iovcnt = copy->iovcnt;
759 	struct iovec iov;
760 	struct iovec __user *u_iov = copy->iov;
761 	void __user *ubuf = NULL;
762 	struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
763 	struct vringh_kiov *riov = &vvr->riov;
764 	struct vringh_kiov *wiov = &vvr->wiov;
765 	struct vringh *vrh = &vvr->vrh;
766 	u16 *head = &vvr->head;
767 	struct mic_vring *vr = &vvr->vring;
768 	size_t len = 0, out_len;
769 
770 	copy->out_len = 0;
771 	/* Fetch a new IOVEC if all previous elements have been processed */
772 	if (riov->i == riov->used && wiov->i == wiov->used) {
773 		ret = vringh_getdesc_kern(vrh, riov, wiov,
774 					  head, GFP_KERNEL);
775 		/* Check if there are available descriptors */
776 		if (ret <= 0)
777 			return ret;
778 	}
779 	while (iovcnt) {
780 		if (!len) {
781 			/* Copy over a new iovec from user space. */
782 			ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
783 			if (ret) {
784 				ret = -EINVAL;
785 				dev_err(vop_dev(vdev), "%s %d err %d\n",
786 					__func__, __LINE__, ret);
787 				break;
788 			}
789 			len = iov.iov_len;
790 			ubuf = iov.iov_base;
791 		}
792 		/* Issue all the read descriptors first */
793 		ret = vop_vringh_copy(vdev, riov, ubuf, len,
794 				      MIC_VRINGH_READ, copy->vr_idx, &out_len);
795 		if (ret) {
796 			dev_err(vop_dev(vdev), "%s %d err %d\n",
797 				__func__, __LINE__, ret);
798 			break;
799 		}
800 		len -= out_len;
801 		ubuf += out_len;
802 		copy->out_len += out_len;
803 		/* Issue the write descriptors next */
804 		ret = vop_vringh_copy(vdev, wiov, ubuf, len,
805 				      !MIC_VRINGH_READ, copy->vr_idx, &out_len);
806 		if (ret) {
807 			dev_err(vop_dev(vdev), "%s %d err %d\n",
808 				__func__, __LINE__, ret);
809 			break;
810 		}
811 		len -= out_len;
812 		ubuf += out_len;
813 		copy->out_len += out_len;
814 		if (!len) {
815 			/* One user space iovec is now completed */
816 			iovcnt--;
817 			u_iov++;
818 		}
819 		/* Exit loop if all elements in KIOVs have been processed. */
820 		if (riov->i == riov->used && wiov->i == wiov->used)
821 			break;
822 	}
823 	/*
824 	 * Update the used ring if a descriptor was available and some data was
825 	 * copied in/out and the user asked for a used ring update.
826 	 */
827 	if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
828 		u32 total = 0;
829 
830 		/* Determine the total data consumed */
831 		total += vop_vringh_iov_consumed(riov);
832 		total += vop_vringh_iov_consumed(wiov);
833 		vringh_complete_kern(vrh, *head, total);
834 		*head = USHRT_MAX;
835 		if (vringh_need_notify_kern(vrh) > 0)
836 			vringh_notify(vrh);
837 		vringh_kiov_cleanup(riov);
838 		vringh_kiov_cleanup(wiov);
839 		/* Update avail idx for user space */
840 		vr->info->avail_idx = vrh->last_avail_idx;
841 	}
842 	return ret;
843 }
844 
vop_verify_copy_args(struct vop_vdev * vdev,struct mic_copy_desc * copy)845 static inline int vop_verify_copy_args(struct vop_vdev *vdev,
846 				       struct mic_copy_desc *copy)
847 {
848 	if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
849 		return -EINVAL;
850 	return 0;
851 }
852 
853 /* Copy a specified number of virtio descriptors in a chain */
vop_virtio_copy_desc(struct vop_vdev * vdev,struct mic_copy_desc * copy)854 static int vop_virtio_copy_desc(struct vop_vdev *vdev,
855 				struct mic_copy_desc *copy)
856 {
857 	int err;
858 	struct vop_vringh *vvr;
859 
860 	err = vop_verify_copy_args(vdev, copy);
861 	if (err)
862 		return err;
863 
864 	vvr = &vdev->vvr[copy->vr_idx];
865 	mutex_lock(&vvr->vr_mutex);
866 	if (!vop_vdevup(vdev)) {
867 		err = -ENODEV;
868 		dev_err(vop_dev(vdev), "%s %d err %d\n",
869 			__func__, __LINE__, err);
870 		goto err;
871 	}
872 	err = _vop_virtio_copy(vdev, copy);
873 	if (err) {
874 		dev_err(vop_dev(vdev), "%s %d err %d\n",
875 			__func__, __LINE__, err);
876 	}
877 err:
878 	mutex_unlock(&vvr->vr_mutex);
879 	return err;
880 }
881 
vop_open(struct inode * inode,struct file * f)882 static int vop_open(struct inode *inode, struct file *f)
883 {
884 	struct vop_vdev *vdev;
885 	struct vop_info *vi = container_of(f->private_data,
886 		struct vop_info, miscdev);
887 
888 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
889 	if (!vdev)
890 		return -ENOMEM;
891 	vdev->vi = vi;
892 	mutex_init(&vdev->vdev_mutex);
893 	f->private_data = vdev;
894 	init_completion(&vdev->destroy);
895 	complete(&vdev->destroy);
896 	return 0;
897 }
898 
vop_release(struct inode * inode,struct file * f)899 static int vop_release(struct inode *inode, struct file *f)
900 {
901 	struct vop_vdev *vdev = f->private_data, *vdev_tmp;
902 	struct vop_info *vi = vdev->vi;
903 	struct list_head *pos, *tmp;
904 	bool found = false;
905 
906 	mutex_lock(&vdev->vdev_mutex);
907 	if (vdev->deleted)
908 		goto unlock;
909 	mutex_lock(&vi->vop_mutex);
910 	list_for_each_safe(pos, tmp, &vi->vdev_list) {
911 		vdev_tmp = list_entry(pos, struct vop_vdev, list);
912 		if (vdev == vdev_tmp) {
913 			vop_virtio_del_device(vdev);
914 			list_del(pos);
915 			found = true;
916 			break;
917 		}
918 	}
919 	mutex_unlock(&vi->vop_mutex);
920 unlock:
921 	mutex_unlock(&vdev->vdev_mutex);
922 	if (!found)
923 		wait_for_completion(&vdev->destroy);
924 	f->private_data = NULL;
925 	kfree(vdev);
926 	return 0;
927 }
928 
vop_ioctl(struct file * f,unsigned int cmd,unsigned long arg)929 static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
930 {
931 	struct vop_vdev *vdev = f->private_data;
932 	struct vop_info *vi = vdev->vi;
933 	void __user *argp = (void __user *)arg;
934 	int ret;
935 
936 	switch (cmd) {
937 	case MIC_VIRTIO_ADD_DEVICE:
938 	{
939 		struct mic_device_desc dd, *dd_config;
940 
941 		if (copy_from_user(&dd, argp, sizeof(dd)))
942 			return -EFAULT;
943 
944 		if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
945 		    dd.num_vq > MIC_MAX_VRINGS)
946 			return -EINVAL;
947 
948 		dd_config = memdup_user(argp, mic_desc_size(&dd));
949 		if (IS_ERR(dd_config))
950 			return PTR_ERR(dd_config);
951 
952 		/* Ensure desc has not changed between the two reads */
953 		if (memcmp(&dd, dd_config, sizeof(dd))) {
954 			ret = -EINVAL;
955 			goto free_ret;
956 		}
957 		mutex_lock(&vdev->vdev_mutex);
958 		mutex_lock(&vi->vop_mutex);
959 		ret = vop_virtio_add_device(vdev, dd_config);
960 		if (ret)
961 			goto unlock_ret;
962 		list_add_tail(&vdev->list, &vi->vdev_list);
963 unlock_ret:
964 		mutex_unlock(&vi->vop_mutex);
965 		mutex_unlock(&vdev->vdev_mutex);
966 free_ret:
967 		kfree(dd_config);
968 		return ret;
969 	}
970 	case MIC_VIRTIO_COPY_DESC:
971 	{
972 		struct mic_copy_desc copy;
973 
974 		mutex_lock(&vdev->vdev_mutex);
975 		ret = vop_vdev_inited(vdev);
976 		if (ret)
977 			goto _unlock_ret;
978 
979 		if (copy_from_user(&copy, argp, sizeof(copy))) {
980 			ret = -EFAULT;
981 			goto _unlock_ret;
982 		}
983 
984 		ret = vop_virtio_copy_desc(vdev, &copy);
985 		if (ret < 0)
986 			goto _unlock_ret;
987 		if (copy_to_user(
988 			&((struct mic_copy_desc __user *)argp)->out_len,
989 			&copy.out_len, sizeof(copy.out_len)))
990 			ret = -EFAULT;
991 _unlock_ret:
992 		mutex_unlock(&vdev->vdev_mutex);
993 		return ret;
994 	}
995 	case MIC_VIRTIO_CONFIG_CHANGE:
996 	{
997 		void *buf;
998 
999 		mutex_lock(&vdev->vdev_mutex);
1000 		ret = vop_vdev_inited(vdev);
1001 		if (ret)
1002 			goto __unlock_ret;
1003 		buf = memdup_user(argp, vdev->dd->config_len);
1004 		if (IS_ERR(buf)) {
1005 			ret = PTR_ERR(buf);
1006 			goto __unlock_ret;
1007 		}
1008 		ret = vop_virtio_config_change(vdev, buf);
1009 		kfree(buf);
1010 __unlock_ret:
1011 		mutex_unlock(&vdev->vdev_mutex);
1012 		return ret;
1013 	}
1014 	default:
1015 		return -ENOIOCTLCMD;
1016 	};
1017 	return 0;
1018 }
1019 
1020 /*
1021  * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
1022  * not when previously enqueued buffers may be available. This means that
1023  * in the card->host (TX) path, when userspace is unblocked by poll it
1024  * must drain all available descriptors or it can stall.
1025  */
vop_poll(struct file * f,poll_table * wait)1026 static __poll_t vop_poll(struct file *f, poll_table *wait)
1027 {
1028 	struct vop_vdev *vdev = f->private_data;
1029 	__poll_t mask = 0;
1030 
1031 	mutex_lock(&vdev->vdev_mutex);
1032 	if (vop_vdev_inited(vdev)) {
1033 		mask = EPOLLERR;
1034 		goto done;
1035 	}
1036 	poll_wait(f, &vdev->waitq, wait);
1037 	if (vop_vdev_inited(vdev)) {
1038 		mask = EPOLLERR;
1039 	} else if (vdev->poll_wake) {
1040 		vdev->poll_wake = 0;
1041 		mask = EPOLLIN | EPOLLOUT;
1042 	}
1043 done:
1044 	mutex_unlock(&vdev->vdev_mutex);
1045 	return mask;
1046 }
1047 
1048 static inline int
vop_query_offset(struct vop_vdev * vdev,unsigned long offset,unsigned long * size,unsigned long * pa)1049 vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1050 		 unsigned long *size, unsigned long *pa)
1051 {
1052 	struct vop_device *vpdev = vdev->vpdev;
1053 	unsigned long start = MIC_DP_SIZE;
1054 	int i;
1055 
1056 	/*
1057 	 * MMAP interface is as follows:
1058 	 * offset				region
1059 	 * 0x0					virtio device_page
1060 	 * 0x1000				first vring
1061 	 * 0x1000 + size of 1st vring		second vring
1062 	 * ....
1063 	 */
1064 	if (!offset) {
1065 		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1066 		*size = MIC_DP_SIZE;
1067 		return 0;
1068 	}
1069 
1070 	for (i = 0; i < vdev->dd->num_vq; i++) {
1071 		struct vop_vringh *vvr = &vdev->vvr[i];
1072 
1073 		if (offset == start) {
1074 			*pa = virt_to_phys(vvr->vring.va);
1075 			*size = vvr->vring.len;
1076 			return 0;
1077 		}
1078 		start += vvr->vring.len;
1079 	}
1080 	return -1;
1081 }
1082 
1083 /*
1084  * Maps the device page and virtio rings to user space for readonly access.
1085  */
vop_mmap(struct file * f,struct vm_area_struct * vma)1086 static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1087 {
1088 	struct vop_vdev *vdev = f->private_data;
1089 	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1090 	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1091 	int i, err;
1092 
1093 	err = vop_vdev_inited(vdev);
1094 	if (err)
1095 		goto ret;
1096 	if (vma->vm_flags & VM_WRITE) {
1097 		err = -EACCES;
1098 		goto ret;
1099 	}
1100 	while (size_rem) {
1101 		i = vop_query_offset(vdev, offset, &size, &pa);
1102 		if (i < 0) {
1103 			err = -EINVAL;
1104 			goto ret;
1105 		}
1106 		err = remap_pfn_range(vma, vma->vm_start + offset,
1107 				      pa >> PAGE_SHIFT, size,
1108 				      vma->vm_page_prot);
1109 		if (err)
1110 			goto ret;
1111 		size_rem -= size;
1112 		offset += size;
1113 	}
1114 ret:
1115 	return err;
1116 }
1117 
1118 static const struct file_operations vop_fops = {
1119 	.open = vop_open,
1120 	.release = vop_release,
1121 	.unlocked_ioctl = vop_ioctl,
1122 	.poll = vop_poll,
1123 	.mmap = vop_mmap,
1124 	.owner = THIS_MODULE,
1125 };
1126 
vop_host_init(struct vop_info * vi)1127 int vop_host_init(struct vop_info *vi)
1128 {
1129 	int rc;
1130 	struct miscdevice *mdev;
1131 	struct vop_device *vpdev = vi->vpdev;
1132 
1133 	INIT_LIST_HEAD(&vi->vdev_list);
1134 	vi->dma_ch = vpdev->dma_ch;
1135 	mdev = &vi->miscdev;
1136 	mdev->minor = MISC_DYNAMIC_MINOR;
1137 	snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1138 	mdev->name = vi->name;
1139 	mdev->fops = &vop_fops;
1140 	mdev->parent = &vpdev->dev;
1141 
1142 	rc = misc_register(mdev);
1143 	if (rc)
1144 		dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1145 	return rc;
1146 }
1147 
vop_host_uninit(struct vop_info * vi)1148 void vop_host_uninit(struct vop_info *vi)
1149 {
1150 	struct list_head *pos, *tmp;
1151 	struct vop_vdev *vdev;
1152 
1153 	mutex_lock(&vi->vop_mutex);
1154 	vop_virtio_reset_devices(vi);
1155 	list_for_each_safe(pos, tmp, &vi->vdev_list) {
1156 		vdev = list_entry(pos, struct vop_vdev, list);
1157 		list_del(pos);
1158 		reinit_completion(&vdev->destroy);
1159 		mutex_unlock(&vi->vop_mutex);
1160 		mutex_lock(&vdev->vdev_mutex);
1161 		vop_virtio_del_device(vdev);
1162 		vdev->deleted = true;
1163 		mutex_unlock(&vdev->vdev_mutex);
1164 		complete(&vdev->destroy);
1165 		mutex_lock(&vi->vop_mutex);
1166 	}
1167 	mutex_unlock(&vi->vop_mutex);
1168 	misc_deregister(&vi->miscdev);
1169 }
1170