• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Intel MIC Platform Software Stack (MPSS)
4  *
5  * Copyright(c) 2015 Intel Corporation.
6  *
7  * Intel SCIF driver.
8  */
9 #include "scif_main.h"
10 
11 /*
12  * struct scif_vma_info - Information about a remote memory mapping
13  *			  created via scif_mmap(..)
14  * @vma: VM area struct
15  * @list: link to list of active vmas
16  */
17 struct scif_vma_info {
18 	struct vm_area_struct *vma;
19 	struct list_head list;
20 };
21 
scif_recv_munmap(struct scif_dev * scifdev,struct scifmsg * msg)22 void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg)
23 {
24 	struct scif_rma_req req;
25 	struct scif_window *window = NULL;
26 	struct scif_window *recv_window =
27 		(struct scif_window *)msg->payload[0];
28 	struct scif_endpt *ep;
29 
30 	ep = (struct scif_endpt *)recv_window->ep;
31 	req.out_window = &window;
32 	req.offset = recv_window->offset;
33 	req.prot = recv_window->prot;
34 	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
35 	req.type = SCIF_WINDOW_FULL;
36 	req.head = &ep->rma_info.reg_list;
37 	msg->payload[0] = ep->remote_ep;
38 
39 	mutex_lock(&ep->rma_info.rma_lock);
40 	/* Does a valid window exist? */
41 	if (scif_query_window(&req)) {
42 		dev_err(&scifdev->sdev->dev,
43 			"%s %d -ENXIO\n", __func__, __LINE__);
44 		msg->uop = SCIF_UNREGISTER_ACK;
45 		goto error;
46 	}
47 
48 	scif_put_window(window, window->nr_pages);
49 
50 	if (!window->ref_count) {
51 		atomic_inc(&ep->rma_info.tw_refcount);
52 		ep->rma_info.async_list_del = 1;
53 		list_del_init(&window->list);
54 		scif_free_window_offset(ep, window, window->offset);
55 	}
56 error:
57 	mutex_unlock(&ep->rma_info.rma_lock);
58 	if (window && !window->ref_count)
59 		scif_queue_for_cleanup(window, &scif_info.rma);
60 }
61 
62 /*
63  * Remove valid remote memory mappings created via scif_mmap(..) from the
64  * process address space since the remote node is lost
65  */
__scif_zap_mmaps(struct scif_endpt * ep)66 static void __scif_zap_mmaps(struct scif_endpt *ep)
67 {
68 	struct list_head *item;
69 	struct scif_vma_info *info;
70 	struct vm_area_struct *vma;
71 	unsigned long size;
72 
73 	spin_lock(&ep->lock);
74 	list_for_each(item, &ep->rma_info.vma_list) {
75 		info = list_entry(item, struct scif_vma_info, list);
76 		vma = info->vma;
77 		size = vma->vm_end - vma->vm_start;
78 		zap_vma_ptes(vma, vma->vm_start, size);
79 		dev_dbg(scif_info.mdev.this_device,
80 			"%s ep %p zap vma %p size 0x%lx\n",
81 			__func__, ep, info->vma, size);
82 	}
83 	spin_unlock(&ep->lock);
84 }
85 
86 /*
87  * Traverse the list of endpoints for a particular remote node and
88  * zap valid remote memory mappings since the remote node is lost
89  */
_scif_zap_mmaps(int node,struct list_head * head)90 static void _scif_zap_mmaps(int node, struct list_head *head)
91 {
92 	struct scif_endpt *ep;
93 	struct list_head *item;
94 
95 	mutex_lock(&scif_info.connlock);
96 	list_for_each(item, head) {
97 		ep = list_entry(item, struct scif_endpt, list);
98 		if (ep->remote_dev->node == node)
99 			__scif_zap_mmaps(ep);
100 	}
101 	mutex_unlock(&scif_info.connlock);
102 }
103 
104 /*
105  * Wrapper for removing remote memory mappings for a particular node. This API
106  * is called by peer nodes as part of handling a lost node.
107  */
scif_zap_mmaps(int node)108 void scif_zap_mmaps(int node)
109 {
110 	_scif_zap_mmaps(node, &scif_info.connected);
111 	_scif_zap_mmaps(node, &scif_info.disconnected);
112 }
113 
114 /*
115  * This API is only called while handling a lost node:
116  * a) Remote node is dead.
117  * b) Remote memory mappings have been zapped
118  * So we can traverse the remote_reg_list without any locks. Since
119  * the window has not yet been unregistered we can drop the ref count
120  * and queue it to the cleanup thread.
121  */
__scif_cleanup_rma_for_zombies(struct scif_endpt * ep)122 static void __scif_cleanup_rma_for_zombies(struct scif_endpt *ep)
123 {
124 	struct list_head *pos, *tmp;
125 	struct scif_window *window;
126 
127 	list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
128 		window = list_entry(pos, struct scif_window, list);
129 		if (window->ref_count)
130 			scif_put_window(window, window->nr_pages);
131 		else
132 			dev_err(scif_info.mdev.this_device,
133 				"%s %d unexpected\n",
134 				__func__, __LINE__);
135 		if (!window->ref_count) {
136 			atomic_inc(&ep->rma_info.tw_refcount);
137 			list_del_init(&window->list);
138 			scif_queue_for_cleanup(window, &scif_info.rma);
139 		}
140 	}
141 }
142 
143 /* Cleanup remote registration lists for zombie endpoints */
scif_cleanup_rma_for_zombies(int node)144 void scif_cleanup_rma_for_zombies(int node)
145 {
146 	struct scif_endpt *ep;
147 	struct list_head *item;
148 
149 	mutex_lock(&scif_info.eplock);
150 	list_for_each(item, &scif_info.zombie) {
151 		ep = list_entry(item, struct scif_endpt, list);
152 		if (ep->remote_dev && ep->remote_dev->node == node)
153 			__scif_cleanup_rma_for_zombies(ep);
154 	}
155 	mutex_unlock(&scif_info.eplock);
156 	flush_work(&scif_info.misc_work);
157 }
158 
159 /* Insert the VMA into the per endpoint VMA list */
scif_insert_vma(struct scif_endpt * ep,struct vm_area_struct * vma)160 static int scif_insert_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
161 {
162 	struct scif_vma_info *info;
163 	int err = 0;
164 
165 	info = kzalloc(sizeof(*info), GFP_KERNEL);
166 	if (!info) {
167 		err = -ENOMEM;
168 		goto done;
169 	}
170 	info->vma = vma;
171 	spin_lock(&ep->lock);
172 	list_add_tail(&info->list, &ep->rma_info.vma_list);
173 	spin_unlock(&ep->lock);
174 done:
175 	return err;
176 }
177 
178 /* Delete the VMA from the per endpoint VMA list */
scif_delete_vma(struct scif_endpt * ep,struct vm_area_struct * vma)179 static void scif_delete_vma(struct scif_endpt *ep, struct vm_area_struct *vma)
180 {
181 	struct list_head *item;
182 	struct scif_vma_info *info;
183 
184 	spin_lock(&ep->lock);
185 	list_for_each(item, &ep->rma_info.vma_list) {
186 		info = list_entry(item, struct scif_vma_info, list);
187 		if (info->vma == vma) {
188 			list_del(&info->list);
189 			kfree(info);
190 			break;
191 		}
192 	}
193 	spin_unlock(&ep->lock);
194 }
195 
scif_get_phys(phys_addr_t phys,struct scif_endpt * ep)196 static phys_addr_t scif_get_phys(phys_addr_t phys, struct scif_endpt *ep)
197 {
198 	struct scif_dev *scifdev = (struct scif_dev *)ep->remote_dev;
199 	struct scif_hw_dev *sdev = scifdev->sdev;
200 	phys_addr_t out_phys, apt_base = 0;
201 
202 	/*
203 	 * If the DMA address is card relative then we need to add the
204 	 * aperture base for mmap to work correctly
205 	 */
206 	if (!scifdev_self(scifdev) && sdev->aper && sdev->card_rel_da)
207 		apt_base = sdev->aper->pa;
208 	out_phys = apt_base + phys;
209 	return out_phys;
210 }
211 
scif_get_pages(scif_epd_t epd,off_t offset,size_t len,struct scif_range ** pages)212 int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
213 		   struct scif_range **pages)
214 {
215 	struct scif_endpt *ep = (struct scif_endpt *)epd;
216 	struct scif_rma_req req;
217 	struct scif_window *window = NULL;
218 	int nr_pages, err, i;
219 
220 	dev_dbg(scif_info.mdev.this_device,
221 		"SCIFAPI get_pinned_pages: ep %p offset 0x%lx len 0x%lx\n",
222 		ep, offset, len);
223 	err = scif_verify_epd(ep);
224 	if (err)
225 		return err;
226 
227 	if (!len || (offset < 0) ||
228 	    (offset + len < offset) ||
229 	    (ALIGN(offset, PAGE_SIZE) != offset) ||
230 	    (ALIGN(len, PAGE_SIZE) != len))
231 		return -EINVAL;
232 
233 	nr_pages = len >> PAGE_SHIFT;
234 
235 	req.out_window = &window;
236 	req.offset = offset;
237 	req.prot = 0;
238 	req.nr_bytes = len;
239 	req.type = SCIF_WINDOW_SINGLE;
240 	req.head = &ep->rma_info.remote_reg_list;
241 
242 	mutex_lock(&ep->rma_info.rma_lock);
243 	/* Does a valid window exist? */
244 	err = scif_query_window(&req);
245 	if (err) {
246 		dev_err(&ep->remote_dev->sdev->dev,
247 			"%s %d err %d\n", __func__, __LINE__, err);
248 		goto error;
249 	}
250 
251 	/* Allocate scif_range */
252 	*pages = kzalloc(sizeof(**pages), GFP_KERNEL);
253 	if (!*pages) {
254 		err = -ENOMEM;
255 		goto error;
256 	}
257 
258 	/* Allocate phys addr array */
259 	(*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t));
260 	if (!((*pages)->phys_addr)) {
261 		err = -ENOMEM;
262 		goto error;
263 	}
264 
265 	if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev)) {
266 		/* Allocate virtual address array */
267 		((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)));
268 		if (!(*pages)->va) {
269 			err = -ENOMEM;
270 			goto error;
271 		}
272 	}
273 	/* Populate the values */
274 	(*pages)->cookie = window;
275 	(*pages)->nr_pages = nr_pages;
276 	(*pages)->prot_flags = window->prot;
277 
278 	for (i = 0; i < nr_pages; i++) {
279 		(*pages)->phys_addr[i] =
280 			__scif_off_to_dma_addr(window, offset +
281 					       (i * PAGE_SIZE));
282 		(*pages)->phys_addr[i] = scif_get_phys((*pages)->phys_addr[i],
283 							ep);
284 		if (scif_is_mgmt_node() && !scifdev_self(ep->remote_dev))
285 			(*pages)->va[i] =
286 				ep->remote_dev->sdev->aper->va +
287 				(*pages)->phys_addr[i] -
288 				ep->remote_dev->sdev->aper->pa;
289 	}
290 
291 	scif_get_window(window, nr_pages);
292 error:
293 	mutex_unlock(&ep->rma_info.rma_lock);
294 	if (err) {
295 		if (*pages) {
296 			scif_free((*pages)->phys_addr,
297 				  nr_pages * sizeof(dma_addr_t));
298 			scif_free((*pages)->va,
299 				  nr_pages * sizeof(void *));
300 			kfree(*pages);
301 			*pages = NULL;
302 		}
303 		dev_err(&ep->remote_dev->sdev->dev,
304 			"%s %d err %d\n", __func__, __LINE__, err);
305 	}
306 	return err;
307 }
308 EXPORT_SYMBOL_GPL(scif_get_pages);
309 
scif_put_pages(struct scif_range * pages)310 int scif_put_pages(struct scif_range *pages)
311 {
312 	struct scif_endpt *ep;
313 	struct scif_window *window;
314 	struct scifmsg msg;
315 
316 	if (!pages || !pages->cookie)
317 		return -EINVAL;
318 
319 	window = pages->cookie;
320 
321 	if (!window || window->magic != SCIFEP_MAGIC)
322 		return -EINVAL;
323 
324 	ep = (struct scif_endpt *)window->ep;
325 	/*
326 	 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
327 	 * callee should be allowed to release references to the pages,
328 	 * else the endpoint was not connected in the first place,
329 	 * hence the ENOTCONN.
330 	 */
331 	if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
332 		return -ENOTCONN;
333 
334 	mutex_lock(&ep->rma_info.rma_lock);
335 
336 	scif_put_window(window, pages->nr_pages);
337 
338 	/* Initiate window destruction if ref count is zero */
339 	if (!window->ref_count) {
340 		list_del(&window->list);
341 		mutex_unlock(&ep->rma_info.rma_lock);
342 		scif_drain_dma_intr(ep->remote_dev->sdev,
343 				    ep->rma_info.dma_chan);
344 		/* Inform the peer about this window being destroyed. */
345 		msg.uop = SCIF_MUNMAP;
346 		msg.src = ep->port;
347 		msg.payload[0] = window->peer_window;
348 		/* No error handling for notification messages */
349 		scif_nodeqp_send(ep->remote_dev, &msg);
350 		/* Destroy this window from the peer's registered AS */
351 		scif_destroy_remote_window(window);
352 	} else {
353 		mutex_unlock(&ep->rma_info.rma_lock);
354 	}
355 
356 	scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
357 	scif_free(pages->va, pages->nr_pages * sizeof(void *));
358 	kfree(pages);
359 	return 0;
360 }
361 EXPORT_SYMBOL_GPL(scif_put_pages);
362 
363 /*
364  * scif_rma_list_mmap:
365  *
366  * Traverse the remote registration list starting from start_window:
367  * 1) Create VtoP mappings via remap_pfn_range(..)
368  * 2) Once step 1) and 2) complete successfully then traverse the range of
369  *    windows again and bump the reference count.
370  * RMA lock must be held.
371  */
scif_rma_list_mmap(struct scif_window * start_window,s64 offset,int nr_pages,struct vm_area_struct * vma)372 static int scif_rma_list_mmap(struct scif_window *start_window, s64 offset,
373 			      int nr_pages, struct vm_area_struct *vma)
374 {
375 	s64 end_offset, loop_offset = offset;
376 	struct scif_window *window = start_window;
377 	int loop_nr_pages, nr_pages_left = nr_pages;
378 	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
379 	struct list_head *head = &ep->rma_info.remote_reg_list;
380 	int i, err = 0;
381 	dma_addr_t phys_addr;
382 	struct scif_window_iter src_win_iter;
383 	size_t contig_bytes = 0;
384 
385 	might_sleep();
386 	list_for_each_entry_from(window, head, list) {
387 		end_offset = window->offset +
388 			(window->nr_pages << PAGE_SHIFT);
389 		loop_nr_pages = min_t(int,
390 				      (end_offset - loop_offset) >> PAGE_SHIFT,
391 				      nr_pages_left);
392 		scif_init_window_iter(window, &src_win_iter);
393 		for (i = 0; i < loop_nr_pages; i++) {
394 			phys_addr = scif_off_to_dma_addr(window, loop_offset,
395 							 &contig_bytes,
396 							 &src_win_iter);
397 			phys_addr = scif_get_phys(phys_addr, ep);
398 			err = remap_pfn_range(vma,
399 					      vma->vm_start +
400 					      loop_offset - offset,
401 					      phys_addr >> PAGE_SHIFT,
402 					      PAGE_SIZE,
403 					      vma->vm_page_prot);
404 			if (err)
405 				goto error;
406 			loop_offset += PAGE_SIZE;
407 		}
408 		nr_pages_left -= loop_nr_pages;
409 		if (!nr_pages_left)
410 			break;
411 	}
412 	/*
413 	 * No more failures expected. Bump up the ref count for all
414 	 * the windows. Another traversal from start_window required
415 	 * for handling errors encountered across windows during
416 	 * remap_pfn_range(..).
417 	 */
418 	loop_offset = offset;
419 	nr_pages_left = nr_pages;
420 	window = start_window;
421 	head = &ep->rma_info.remote_reg_list;
422 	list_for_each_entry_from(window, head, list) {
423 		end_offset = window->offset +
424 			(window->nr_pages << PAGE_SHIFT);
425 		loop_nr_pages = min_t(int,
426 				      (end_offset - loop_offset) >> PAGE_SHIFT,
427 				      nr_pages_left);
428 		scif_get_window(window, loop_nr_pages);
429 		nr_pages_left -= loop_nr_pages;
430 		loop_offset += (loop_nr_pages << PAGE_SHIFT);
431 		if (!nr_pages_left)
432 			break;
433 	}
434 error:
435 	if (err)
436 		dev_err(scif_info.mdev.this_device,
437 			"%s %d err %d\n", __func__, __LINE__, err);
438 	return err;
439 }
440 
441 /*
442  * scif_rma_list_munmap:
443  *
444  * Traverse the remote registration list starting from window:
445  * 1) Decrement ref count.
446  * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
447  * RMA lock must be held.
448  */
scif_rma_list_munmap(struct scif_window * start_window,s64 offset,int nr_pages)449 static void scif_rma_list_munmap(struct scif_window *start_window,
450 				 s64 offset, int nr_pages)
451 {
452 	struct scifmsg msg;
453 	s64 loop_offset = offset, end_offset;
454 	int loop_nr_pages, nr_pages_left = nr_pages;
455 	struct scif_endpt *ep = (struct scif_endpt *)start_window->ep;
456 	struct list_head *head = &ep->rma_info.remote_reg_list;
457 	struct scif_window *window = start_window, *_window;
458 
459 	msg.uop = SCIF_MUNMAP;
460 	msg.src = ep->port;
461 	loop_offset = offset;
462 	nr_pages_left = nr_pages;
463 	list_for_each_entry_safe_from(window, _window, head, list) {
464 		end_offset = window->offset +
465 			(window->nr_pages << PAGE_SHIFT);
466 		loop_nr_pages = min_t(int,
467 				      (end_offset - loop_offset) >> PAGE_SHIFT,
468 				      nr_pages_left);
469 		scif_put_window(window, loop_nr_pages);
470 		if (!window->ref_count) {
471 			struct scif_dev *rdev = ep->remote_dev;
472 
473 			scif_drain_dma_intr(rdev->sdev,
474 					    ep->rma_info.dma_chan);
475 			/* Inform the peer about this munmap */
476 			msg.payload[0] = window->peer_window;
477 			/* No error handling for Notification messages. */
478 			scif_nodeqp_send(ep->remote_dev, &msg);
479 			list_del(&window->list);
480 			/* Destroy this window from the peer's registered AS */
481 			scif_destroy_remote_window(window);
482 		}
483 		nr_pages_left -= loop_nr_pages;
484 		loop_offset += (loop_nr_pages << PAGE_SHIFT);
485 		if (!nr_pages_left)
486 			break;
487 	}
488 }
489 
490 /*
491  * The private data field of each VMA used to mmap a remote window
492  * points to an instance of struct vma_pvt
493  */
494 struct vma_pvt {
495 	struct scif_endpt *ep;	/* End point for remote window */
496 	s64 offset;		/* offset within remote window */
497 	bool valid_offset;	/* offset is valid only if the original
498 				 * mmap request was for a single page
499 				 * else the offset within the vma is
500 				 * the correct offset
501 				 */
502 	struct kref ref;
503 };
504 
vma_pvt_release(struct kref * ref)505 static void vma_pvt_release(struct kref *ref)
506 {
507 	struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
508 
509 	kfree(vmapvt);
510 }
511 
512 /**
513  * scif_vma_open - VMA open driver callback
514  * @vma: VMM memory area.
515  * The open method is called by the kernel to allow the subsystem implementing
516  * the VMA to initialize the area. This method is invoked any time a new
517  * reference to the VMA is made (when a process forks, for example).
518  * The one exception happens when the VMA is first created by mmap;
519  * in this case, the driver's mmap method is called instead.
520  * This function is also invoked when an existing VMA is split by the kernel
521  * due to a call to munmap on a subset of the VMA resulting in two VMAs.
522  * The kernel invokes this function only on one of the two VMAs.
523  */
scif_vma_open(struct vm_area_struct * vma)524 static void scif_vma_open(struct vm_area_struct *vma)
525 {
526 	struct vma_pvt *vmapvt = vma->vm_private_data;
527 
528 	dev_dbg(scif_info.mdev.this_device,
529 		"SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n",
530 		vma->vm_start, vma->vm_end);
531 	scif_insert_vma(vmapvt->ep, vma);
532 	kref_get(&vmapvt->ref);
533 }
534 
535 /**
536  * scif_munmap - VMA close driver callback.
537  * @vma: VMM memory area.
538  * When an area is destroyed, the kernel calls its close operation.
539  * Note that there's no usage count associated with VMA's; the area
540  * is opened and closed exactly once by each process that uses it.
541  */
scif_munmap(struct vm_area_struct * vma)542 static void scif_munmap(struct vm_area_struct *vma)
543 {
544 	struct scif_endpt *ep;
545 	struct vma_pvt *vmapvt = vma->vm_private_data;
546 	int nr_pages = vma_pages(vma);
547 	s64 offset;
548 	struct scif_rma_req req;
549 	struct scif_window *window = NULL;
550 	int err;
551 
552 	might_sleep();
553 	dev_dbg(scif_info.mdev.this_device,
554 		"SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n",
555 		vma->vm_start, vma->vm_end);
556 	ep = vmapvt->ep;
557 	offset = vmapvt->valid_offset ? vmapvt->offset :
558 		(vma->vm_pgoff) << PAGE_SHIFT;
559 	dev_dbg(scif_info.mdev.this_device,
560 		"SCIFAPI munmap: ep %p nr_pages 0x%x offset 0x%llx\n",
561 		ep, nr_pages, offset);
562 	req.out_window = &window;
563 	req.offset = offset;
564 	req.nr_bytes = vma->vm_end - vma->vm_start;
565 	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
566 	req.type = SCIF_WINDOW_PARTIAL;
567 	req.head = &ep->rma_info.remote_reg_list;
568 
569 	mutex_lock(&ep->rma_info.rma_lock);
570 
571 	err = scif_query_window(&req);
572 	if (err)
573 		dev_err(scif_info.mdev.this_device,
574 			"%s %d err %d\n", __func__, __LINE__, err);
575 	else
576 		scif_rma_list_munmap(window, offset, nr_pages);
577 
578 	mutex_unlock(&ep->rma_info.rma_lock);
579 	/*
580 	 * The kernel probably zeroes these out but we still want
581 	 * to clean up our own mess just in case.
582 	 */
583 	vma->vm_ops = NULL;
584 	vma->vm_private_data = NULL;
585 	kref_put(&vmapvt->ref, vma_pvt_release);
586 	scif_delete_vma(ep, vma);
587 }
588 
589 static const struct vm_operations_struct scif_vm_ops = {
590 	.open = scif_vma_open,
591 	.close = scif_munmap,
592 };
593 
594 /**
595  * scif_mmap - Map pages in virtual address space to a remote window.
596  * @vma: VMM memory area.
597  * @epd: endpoint descriptor
598  *
599  * Return: Upon successful completion, scif_mmap() returns zero
600  * else an apt error is returned as documented in scif.h
601  */
scif_mmap(struct vm_area_struct * vma,scif_epd_t epd)602 int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
603 {
604 	struct scif_rma_req req;
605 	struct scif_window *window = NULL;
606 	struct scif_endpt *ep = (struct scif_endpt *)epd;
607 	s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
608 	int nr_pages = vma_pages(vma);
609 	int err;
610 	struct vma_pvt *vmapvt;
611 
612 	dev_dbg(scif_info.mdev.this_device,
613 		"SCIFAPI mmap: ep %p start_offset 0x%llx nr_pages 0x%x\n",
614 		ep, start_offset, nr_pages);
615 	err = scif_verify_epd(ep);
616 	if (err)
617 		return err;
618 
619 	might_sleep();
620 
621 	err = scif_insert_vma(ep, vma);
622 	if (err)
623 		return err;
624 
625 	vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL);
626 	if (!vmapvt) {
627 		scif_delete_vma(ep, vma);
628 		return -ENOMEM;
629 	}
630 
631 	vmapvt->ep = ep;
632 	kref_init(&vmapvt->ref);
633 
634 	req.out_window = &window;
635 	req.offset = start_offset;
636 	req.nr_bytes = vma->vm_end - vma->vm_start;
637 	req.prot = vma->vm_flags & (VM_READ | VM_WRITE);
638 	req.type = SCIF_WINDOW_PARTIAL;
639 	req.head = &ep->rma_info.remote_reg_list;
640 
641 	mutex_lock(&ep->rma_info.rma_lock);
642 	/* Does a valid window exist? */
643 	err = scif_query_window(&req);
644 	if (err) {
645 		dev_err(&ep->remote_dev->sdev->dev,
646 			"%s %d err %d\n", __func__, __LINE__, err);
647 		goto error_unlock;
648 	}
649 
650 	/* Default prot for loopback */
651 	if (!scifdev_self(ep->remote_dev))
652 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
653 
654 	/*
655 	 * VM_DONTCOPY - Do not copy this vma on fork
656 	 * VM_DONTEXPAND - Cannot expand with mremap()
657 	 * VM_RESERVED - Count as reserved_vm like IO
658 	 * VM_PFNMAP - Page-ranges managed without "struct page"
659 	 * VM_IO - Memory mapped I/O or similar
660 	 *
661 	 * We do not want to copy this VMA automatically on a fork(),
662 	 * expand this VMA due to mremap() or swap out these pages since
663 	 * the VMA is actually backed by physical pages in the remote
664 	 * node's physical memory and not via a struct page.
665 	 */
666 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
667 
668 	if (!scifdev_self(ep->remote_dev))
669 		vma->vm_flags |= VM_IO | VM_PFNMAP;
670 
671 	/* Map this range of windows */
672 	err = scif_rma_list_mmap(window, start_offset, nr_pages, vma);
673 	if (err) {
674 		dev_err(&ep->remote_dev->sdev->dev,
675 			"%s %d err %d\n", __func__, __LINE__, err);
676 		goto error_unlock;
677 	}
678 	/* Set up the driver call back */
679 	vma->vm_ops = &scif_vm_ops;
680 	vma->vm_private_data = vmapvt;
681 error_unlock:
682 	mutex_unlock(&ep->rma_info.rma_lock);
683 	if (err) {
684 		kfree(vmapvt);
685 		dev_err(&ep->remote_dev->sdev->dev,
686 			"%s %d err %d\n", __func__, __LINE__, err);
687 		scif_delete_vma(ep, vma);
688 	}
689 	return err;
690 }
691