• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/pci.h>
35 #include <linux/poll.h>
36 #include <linux/cdev.h>
37 #include <linux/swap.h>
38 #include <linux/export.h>
39 #include <linux/vmalloc.h>
40 #include <linux/slab.h>
41 #include <linux/highmem.h>
42 #include <linux/io.h>
43 #include <linux/aio.h>
44 #include <linux/jiffies.h>
45 #include <linux/cpu.h>
46 #include <asm/pgtable.h>
47 
48 #include "ipath_kernel.h"
49 #include "ipath_common.h"
50 #include "ipath_user_sdma.h"
51 
52 static int ipath_open(struct inode *, struct file *);
53 static int ipath_close(struct inode *, struct file *);
54 static ssize_t ipath_write(struct file *, const char __user *, size_t,
55 			   loff_t *);
56 static ssize_t ipath_writev(struct kiocb *, const struct iovec *,
57 			    unsigned long , loff_t);
58 static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
59 static int ipath_mmap(struct file *, struct vm_area_struct *);
60 
61 static const struct file_operations ipath_file_ops = {
62 	.owner = THIS_MODULE,
63 	.write = ipath_write,
64 	.aio_write = ipath_writev,
65 	.open = ipath_open,
66 	.release = ipath_close,
67 	.poll = ipath_poll,
68 	.mmap = ipath_mmap,
69 	.llseek = noop_llseek,
70 };
71 
72 /*
73  * Convert kernel virtual addresses to physical addresses so they don't
74  * potentially conflict with the chip addresses used as mmap offsets.
75  * It doesn't really matter what mmap offset we use as long as we can
76  * interpret it correctly.
77  */
cvt_kvaddr(void * p)78 static u64 cvt_kvaddr(void *p)
79 {
80 	struct page *page;
81 	u64 paddr = 0;
82 
83 	page = vmalloc_to_page(p);
84 	if (page)
85 		paddr = page_to_pfn(page) << PAGE_SHIFT;
86 
87 	return paddr;
88 }
89 
ipath_get_base_info(struct file * fp,void __user * ubase,size_t ubase_size)90 static int ipath_get_base_info(struct file *fp,
91 			       void __user *ubase, size_t ubase_size)
92 {
93 	struct ipath_portdata *pd = port_fp(fp);
94 	int ret = 0;
95 	struct ipath_base_info *kinfo = NULL;
96 	struct ipath_devdata *dd = pd->port_dd;
97 	unsigned subport_cnt;
98 	int shared, master;
99 	size_t sz;
100 
101 	subport_cnt = pd->port_subport_cnt;
102 	if (!subport_cnt) {
103 		shared = 0;
104 		master = 0;
105 		subport_cnt = 1;
106 	} else {
107 		shared = 1;
108 		master = !subport_fp(fp);
109 	}
110 
111 	sz = sizeof(*kinfo);
112 	/* If port sharing is not requested, allow the old size structure */
113 	if (!shared)
114 		sz -= 7 * sizeof(u64);
115 	if (ubase_size < sz) {
116 		ipath_cdbg(PROC,
117 			   "Base size %zu, need %zu (version mismatch?)\n",
118 			   ubase_size, sz);
119 		ret = -EINVAL;
120 		goto bail;
121 	}
122 
123 	kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
124 	if (kinfo == NULL) {
125 		ret = -ENOMEM;
126 		goto bail;
127 	}
128 
129 	ret = dd->ipath_f_get_base_info(pd, kinfo);
130 	if (ret < 0)
131 		goto bail;
132 
133 	kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
134 	kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
135 	kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
136 	kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
137 	/*
138 	 * have to mmap whole thing
139 	 */
140 	kinfo->spi_rcv_egrbuftotlen =
141 		pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
142 	kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
143 	kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
144 		pd->port_rcvegrbuf_chunks;
145 	kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
146 	if (master)
147 		kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
148 	/*
149 	 * for this use, may be ipath_cfgports summed over all chips that
150 	 * are are configured and present
151 	 */
152 	kinfo->spi_nports = dd->ipath_cfgports;
153 	/* unit (chip/board) our port is on */
154 	kinfo->spi_unit = dd->ipath_unit;
155 	/* for now, only a single page */
156 	kinfo->spi_tid_maxsize = PAGE_SIZE;
157 
158 	/*
159 	 * Doing this per port, and based on the skip value, etc.  This has
160 	 * to be the actual buffer size, since the protocol code treats it
161 	 * as an array.
162 	 *
163 	 * These have to be set to user addresses in the user code via mmap.
164 	 * These values are used on return to user code for the mmap target
165 	 * addresses only.  For 32 bit, same 44 bit address problem, so use
166 	 * the physical address, not virtual.  Before 2.6.11, using the
167 	 * page_address() macro worked, but in 2.6.11, even that returns the
168 	 * full 64 bit address (upper bits all 1's).  So far, using the
169 	 * physical addresses (or chip offsets, for chip mapping) works, but
170 	 * no doubt some future kernel release will change that, and we'll be
171 	 * on to yet another method of dealing with this.
172 	 */
173 	kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
174 	kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
175 	kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
176 	kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
177 	kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
178 		(void *) dd->ipath_statusp -
179 		(void *) dd->ipath_pioavailregs_dma;
180 	if (!shared) {
181 		kinfo->spi_piocnt = pd->port_piocnt;
182 		kinfo->spi_piobufbase = (u64) pd->port_piobufs;
183 		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
184 			dd->ipath_ureg_align * pd->port_port;
185 	} else if (master) {
186 		kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
187 				    (pd->port_piocnt % subport_cnt);
188 		/* Master's PIO buffers are after all the slave's */
189 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
190 			dd->ipath_palign *
191 			(pd->port_piocnt - kinfo->spi_piocnt);
192 	} else {
193 		unsigned slave = subport_fp(fp) - 1;
194 
195 		kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
196 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
197 			dd->ipath_palign * kinfo->spi_piocnt * slave;
198 	}
199 
200 	if (shared) {
201 		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
202 			dd->ipath_ureg_align * pd->port_port;
203 		kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
204 		kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
205 		kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
206 
207 		kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
208 			PAGE_SIZE * subport_fp(fp));
209 
210 		kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
211 			pd->port_rcvhdrq_size * subport_fp(fp));
212 		kinfo->spi_rcvhdr_tailaddr = 0;
213 		kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
214 			pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
215 			subport_fp(fp));
216 
217 		kinfo->spi_subport_uregbase =
218 			cvt_kvaddr(pd->subport_uregbase);
219 		kinfo->spi_subport_rcvegrbuf =
220 			cvt_kvaddr(pd->subport_rcvegrbuf);
221 		kinfo->spi_subport_rcvhdr_base =
222 			cvt_kvaddr(pd->subport_rcvhdr_base);
223 		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
224 			kinfo->spi_port, kinfo->spi_runtime_flags,
225 			(unsigned long long) kinfo->spi_subport_uregbase,
226 			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
227 			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
228 	}
229 
230 	/*
231 	 * All user buffers are 2KB buffers.  If we ever support
232 	 * giving 4KB buffers to user processes, this will need some
233 	 * work.
234 	 */
235 	kinfo->spi_pioindex = (kinfo->spi_piobufbase -
236 		(dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
237 	kinfo->spi_pioalign = dd->ipath_palign;
238 
239 	kinfo->spi_qpair = IPATH_KD_QP;
240 	/*
241 	 * user mode PIO buffers are always 2KB, even when 4KB can
242 	 * be received, and sent via the kernel; this is ibmaxlen
243 	 * for 2K MTU.
244 	 */
245 	kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32);
246 	kinfo->spi_mtu = dd->ipath_ibmaxlen;	/* maxlen, not ibmtu */
247 	kinfo->spi_port = pd->port_port;
248 	kinfo->spi_subport = subport_fp(fp);
249 	kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
250 	kinfo->spi_hw_version = dd->ipath_revision;
251 
252 	if (master) {
253 		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
254 	}
255 
256 	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
257 	if (copy_to_user(ubase, kinfo, sz))
258 		ret = -EFAULT;
259 
260 bail:
261 	kfree(kinfo);
262 	return ret;
263 }
264 
265 /**
266  * ipath_tid_update - update a port TID
267  * @pd: the port
268  * @fp: the ipath device file
269  * @ti: the TID information
270  *
271  * The new implementation as of Oct 2004 is that the driver assigns
272  * the tid and returns it to the caller.   To make it easier to
273  * catch bugs, and to reduce search time, we keep a cursor for
274  * each port, walking the shadow tid array to find one that's not
275  * in use.
276  *
277  * For now, if we can't allocate the full list, we fail, although
278  * in the long run, we'll allocate as many as we can, and the
279  * caller will deal with that by trying the remaining pages later.
280  * That means that when we fail, we have to mark the tids as not in
281  * use again, in our shadow copy.
282  *
283  * It's up to the caller to free the tids when they are done.
284  * We'll unlock the pages as they free them.
285  *
286  * Also, right now we are locking one page at a time, but since
287  * the intended use of this routine is for a single group of
288  * virtually contiguous pages, that should change to improve
289  * performance.
290  */
ipath_tid_update(struct ipath_portdata * pd,struct file * fp,const struct ipath_tid_info * ti)291 static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
292 			    const struct ipath_tid_info *ti)
293 {
294 	int ret = 0, ntids;
295 	u32 tid, porttid, cnt, i, tidcnt, tidoff;
296 	u16 *tidlist;
297 	struct ipath_devdata *dd = pd->port_dd;
298 	u64 physaddr;
299 	unsigned long vaddr;
300 	u64 __iomem *tidbase;
301 	unsigned long tidmap[8];
302 	struct page **pagep = NULL;
303 	unsigned subport = subport_fp(fp);
304 
305 	if (!dd->ipath_pageshadow) {
306 		ret = -ENOMEM;
307 		goto done;
308 	}
309 
310 	cnt = ti->tidcnt;
311 	if (!cnt) {
312 		ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
313 			  (unsigned long long) ti->tidlist);
314 		/*
315 		 * Should we treat as success?  likely a bug
316 		 */
317 		ret = -EFAULT;
318 		goto done;
319 	}
320 	porttid = pd->port_port * dd->ipath_rcvtidcnt;
321 	if (!pd->port_subport_cnt) {
322 		tidcnt = dd->ipath_rcvtidcnt;
323 		tid = pd->port_tidcursor;
324 		tidoff = 0;
325 	} else if (!subport) {
326 		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
327 			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
328 		tidoff = dd->ipath_rcvtidcnt - tidcnt;
329 		porttid += tidoff;
330 		tid = tidcursor_fp(fp);
331 	} else {
332 		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
333 		tidoff = tidcnt * (subport - 1);
334 		porttid += tidoff;
335 		tid = tidcursor_fp(fp);
336 	}
337 	if (cnt > tidcnt) {
338 		/* make sure it all fits in port_tid_pg_list */
339 		dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
340 			 "TIDs, only trying max (%u)\n", cnt, tidcnt);
341 		cnt = tidcnt;
342 	}
343 	pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
344 	tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
345 
346 	memset(tidmap, 0, sizeof(tidmap));
347 	/* before decrement; chip actual # */
348 	ntids = tidcnt;
349 	tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
350 				   dd->ipath_rcvtidbase +
351 				   porttid * sizeof(*tidbase));
352 
353 	ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
354 		   pd->port_port, cnt, tid, tidbase);
355 
356 	/* virtual address of first page in transfer */
357 	vaddr = ti->tidvaddr;
358 	if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
359 		       cnt * PAGE_SIZE)) {
360 		ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
361 			  (void *)vaddr, cnt);
362 		ret = -EFAULT;
363 		goto done;
364 	}
365 	ret = ipath_get_user_pages(vaddr, cnt, pagep);
366 	if (ret) {
367 		if (ret == -EBUSY) {
368 			ipath_dbg("Failed to lock addr %p, %u pages "
369 				  "(already locked)\n",
370 				  (void *) vaddr, cnt);
371 			/*
372 			 * for now, continue, and see what happens but with
373 			 * the new implementation, this should never happen,
374 			 * unless perhaps the user has mpin'ed the pages
375 			 * themselves (something we need to test)
376 			 */
377 			ret = 0;
378 		} else {
379 			dev_info(&dd->pcidev->dev,
380 				 "Failed to lock addr %p, %u pages: "
381 				 "errno %d\n", (void *) vaddr, cnt, -ret);
382 			goto done;
383 		}
384 	}
385 	for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
386 		for (; ntids--; tid++) {
387 			if (tid == tidcnt)
388 				tid = 0;
389 			if (!dd->ipath_pageshadow[porttid + tid])
390 				break;
391 		}
392 		if (ntids < 0) {
393 			/*
394 			 * oops, wrapped all the way through their TIDs,
395 			 * and didn't have enough free; see comments at
396 			 * start of routine
397 			 */
398 			ipath_dbg("Not enough free TIDs for %u pages "
399 				  "(index %d), failing\n", cnt, i);
400 			i--;	/* last tidlist[i] not filled in */
401 			ret = -ENOMEM;
402 			break;
403 		}
404 		tidlist[i] = tid + tidoff;
405 		ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
406 			   "vaddr %lx\n", i, tid + tidoff, vaddr);
407 		/* we "know" system pages and TID pages are same size */
408 		dd->ipath_pageshadow[porttid + tid] = pagep[i];
409 		dd->ipath_physshadow[porttid + tid] = ipath_map_page(
410 			dd->pcidev, pagep[i], 0, PAGE_SIZE,
411 			PCI_DMA_FROMDEVICE);
412 		/*
413 		 * don't need atomic or it's overhead
414 		 */
415 		__set_bit(tid, tidmap);
416 		physaddr = dd->ipath_physshadow[porttid + tid];
417 		ipath_stats.sps_pagelocks++;
418 		ipath_cdbg(VERBOSE,
419 			   "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
420 			   tid, vaddr, (unsigned long long) physaddr,
421 			   pagep[i]);
422 		dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
423 				    physaddr);
424 		/*
425 		 * don't check this tid in ipath_portshadow, since we
426 		 * just filled it in; start with the next one.
427 		 */
428 		tid++;
429 	}
430 
431 	if (ret) {
432 		u32 limit;
433 	cleanup:
434 		/* jump here if copy out of updated info failed... */
435 		ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
436 			  -ret, i, cnt);
437 		/* same code that's in ipath_free_tid() */
438 		limit = sizeof(tidmap) * BITS_PER_BYTE;
439 		if (limit > tidcnt)
440 			/* just in case size changes in future */
441 			limit = tidcnt;
442 		tid = find_first_bit((const unsigned long *)tidmap, limit);
443 		for (; tid < limit; tid++) {
444 			if (!test_bit(tid, tidmap))
445 				continue;
446 			if (dd->ipath_pageshadow[porttid + tid]) {
447 				ipath_cdbg(VERBOSE, "Freeing TID %u\n",
448 					   tid);
449 				dd->ipath_f_put_tid(dd, &tidbase[tid],
450 						    RCVHQ_RCV_TYPE_EXPECTED,
451 						    dd->ipath_tidinvalid);
452 				pci_unmap_page(dd->pcidev,
453 					dd->ipath_physshadow[porttid + tid],
454 					PAGE_SIZE, PCI_DMA_FROMDEVICE);
455 				dd->ipath_pageshadow[porttid + tid] = NULL;
456 				ipath_stats.sps_pageunlocks++;
457 			}
458 		}
459 		ipath_release_user_pages(pagep, cnt);
460 	} else {
461 		/*
462 		 * Copy the updated array, with ipath_tid's filled in, back
463 		 * to user.  Since we did the copy in already, this "should
464 		 * never fail" If it does, we have to clean up...
465 		 */
466 		if (copy_to_user((void __user *)
467 				 (unsigned long) ti->tidlist,
468 				 tidlist, cnt * sizeof(*tidlist))) {
469 			ret = -EFAULT;
470 			goto cleanup;
471 		}
472 		if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
473 				 tidmap, sizeof tidmap)) {
474 			ret = -EFAULT;
475 			goto cleanup;
476 		}
477 		if (tid == tidcnt)
478 			tid = 0;
479 		if (!pd->port_subport_cnt)
480 			pd->port_tidcursor = tid;
481 		else
482 			tidcursor_fp(fp) = tid;
483 	}
484 
485 done:
486 	if (ret)
487 		ipath_dbg("Failed to map %u TID pages, failing with %d\n",
488 			  ti->tidcnt, -ret);
489 	return ret;
490 }
491 
492 /**
493  * ipath_tid_free - free a port TID
494  * @pd: the port
495  * @subport: the subport
496  * @ti: the TID info
497  *
498  * right now we are unlocking one page at a time, but since
499  * the intended use of this routine is for a single group of
500  * virtually contiguous pages, that should change to improve
501  * performance.  We check that the TID is in range for this port
502  * but otherwise don't check validity; if user has an error and
503  * frees the wrong tid, it's only their own data that can thereby
504  * be corrupted.  We do check that the TID was in use, for sanity
505  * We always use our idea of the saved address, not the address that
506  * they pass in to us.
507  */
508 
ipath_tid_free(struct ipath_portdata * pd,unsigned subport,const struct ipath_tid_info * ti)509 static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
510 			  const struct ipath_tid_info *ti)
511 {
512 	int ret = 0;
513 	u32 tid, porttid, cnt, limit, tidcnt;
514 	struct ipath_devdata *dd = pd->port_dd;
515 	u64 __iomem *tidbase;
516 	unsigned long tidmap[8];
517 
518 	if (!dd->ipath_pageshadow) {
519 		ret = -ENOMEM;
520 		goto done;
521 	}
522 
523 	if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
524 			   sizeof tidmap)) {
525 		ret = -EFAULT;
526 		goto done;
527 	}
528 
529 	porttid = pd->port_port * dd->ipath_rcvtidcnt;
530 	if (!pd->port_subport_cnt)
531 		tidcnt = dd->ipath_rcvtidcnt;
532 	else if (!subport) {
533 		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
534 			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
535 		porttid += dd->ipath_rcvtidcnt - tidcnt;
536 	} else {
537 		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
538 		porttid += tidcnt * (subport - 1);
539 	}
540 	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
541 				   dd->ipath_rcvtidbase +
542 				   porttid * sizeof(*tidbase));
543 
544 	limit = sizeof(tidmap) * BITS_PER_BYTE;
545 	if (limit > tidcnt)
546 		/* just in case size changes in future */
547 		limit = tidcnt;
548 	tid = find_first_bit(tidmap, limit);
549 	ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
550 		   "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
551 		   limit, tid, porttid);
552 	for (cnt = 0; tid < limit; tid++) {
553 		/*
554 		 * small optimization; if we detect a run of 3 or so without
555 		 * any set, use find_first_bit again.  That's mainly to
556 		 * accelerate the case where we wrapped, so we have some at
557 		 * the beginning, and some at the end, and a big gap
558 		 * in the middle.
559 		 */
560 		if (!test_bit(tid, tidmap))
561 			continue;
562 		cnt++;
563 		if (dd->ipath_pageshadow[porttid + tid]) {
564 			struct page *p;
565 			p = dd->ipath_pageshadow[porttid + tid];
566 			dd->ipath_pageshadow[porttid + tid] = NULL;
567 			ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
568 				   pid_nr(pd->port_pid), tid);
569 			dd->ipath_f_put_tid(dd, &tidbase[tid],
570 					    RCVHQ_RCV_TYPE_EXPECTED,
571 					    dd->ipath_tidinvalid);
572 			pci_unmap_page(dd->pcidev,
573 				dd->ipath_physshadow[porttid + tid],
574 				PAGE_SIZE, PCI_DMA_FROMDEVICE);
575 			ipath_release_user_pages(&p, 1);
576 			ipath_stats.sps_pageunlocks++;
577 		} else
578 			ipath_dbg("Unused tid %u, ignoring\n", tid);
579 	}
580 	if (cnt != ti->tidcnt)
581 		ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
582 			  ti->tidcnt, cnt);
583 done:
584 	if (ret)
585 		ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
586 			  ti->tidcnt, -ret);
587 	return ret;
588 }
589 
590 /**
591  * ipath_set_part_key - set a partition key
592  * @pd: the port
593  * @key: the key
594  *
595  * We can have up to 4 active at a time (other than the default, which is
596  * always allowed).  This is somewhat tricky, since multiple ports may set
597  * the same key, so we reference count them, and clean up at exit.  All 4
598  * partition keys are packed into a single infinipath register.  It's an
599  * error for a process to set the same pkey multiple times.  We provide no
600  * mechanism to de-allocate a pkey at this time, we may eventually need to
601  * do that.  I've used the atomic operations, and no locking, and only make
602  * a single pass through what's available.  This should be more than
603  * adequate for some time. I'll think about spinlocks or the like if and as
604  * it's necessary.
605  */
ipath_set_part_key(struct ipath_portdata * pd,u16 key)606 static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
607 {
608 	struct ipath_devdata *dd = pd->port_dd;
609 	int i, any = 0, pidx = -1;
610 	u16 lkey = key & 0x7FFF;
611 	int ret;
612 
613 	if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
614 		/* nothing to do; this key always valid */
615 		ret = 0;
616 		goto bail;
617 	}
618 
619 	ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
620 		   "%hx:%x %hx:%x %hx:%x %hx:%x\n",
621 		   pd->port_port, key, dd->ipath_pkeys[0],
622 		   atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
623 		   atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
624 		   atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
625 		   atomic_read(&dd->ipath_pkeyrefs[3]));
626 
627 	if (!lkey) {
628 		ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
629 			   pd->port_port);
630 		ret = -EINVAL;
631 		goto bail;
632 	}
633 
634 	/*
635 	 * Set the full membership bit, because it has to be
636 	 * set in the register or the packet, and it seems
637 	 * cleaner to set in the register than to force all
638 	 * callers to set it. (see bug 4331)
639 	 */
640 	key |= 0x8000;
641 
642 	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
643 		if (!pd->port_pkeys[i] && pidx == -1)
644 			pidx = i;
645 		if (pd->port_pkeys[i] == key) {
646 			ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
647 				   "(%x) more than once\n",
648 				   pd->port_port, key);
649 			ret = -EEXIST;
650 			goto bail;
651 		}
652 	}
653 	if (pidx == -1) {
654 		ipath_dbg("All pkeys for port %u already in use, "
655 			  "can't set %x\n", pd->port_port, key);
656 		ret = -EBUSY;
657 		goto bail;
658 	}
659 	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
660 		if (!dd->ipath_pkeys[i]) {
661 			any++;
662 			continue;
663 		}
664 		if (dd->ipath_pkeys[i] == key) {
665 			atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
666 
667 			if (atomic_inc_return(pkrefs) > 1) {
668 				pd->port_pkeys[pidx] = key;
669 				ipath_cdbg(VERBOSE, "p%u set key %x "
670 					   "matches #%d, count now %d\n",
671 					   pd->port_port, key, i,
672 					   atomic_read(pkrefs));
673 				ret = 0;
674 				goto bail;
675 			} else {
676 				/*
677 				 * lost race, decrement count, catch below
678 				 */
679 				atomic_dec(pkrefs);
680 				ipath_cdbg(VERBOSE, "Lost race, count was "
681 					   "0, after dec, it's %d\n",
682 					   atomic_read(pkrefs));
683 				any++;
684 			}
685 		}
686 		if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
687 			/*
688 			 * It makes no sense to have both the limited and
689 			 * full membership PKEY set at the same time since
690 			 * the unlimited one will disable the limited one.
691 			 */
692 			ret = -EEXIST;
693 			goto bail;
694 		}
695 	}
696 	if (!any) {
697 		ipath_dbg("port %u, all pkeys already in use, "
698 			  "can't set %x\n", pd->port_port, key);
699 		ret = -EBUSY;
700 		goto bail;
701 	}
702 	for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
703 		if (!dd->ipath_pkeys[i] &&
704 		    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
705 			u64 pkey;
706 
707 			/* for ipathstats, etc. */
708 			ipath_stats.sps_pkeys[i] = lkey;
709 			pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
710 			pkey =
711 				(u64) dd->ipath_pkeys[0] |
712 				((u64) dd->ipath_pkeys[1] << 16) |
713 				((u64) dd->ipath_pkeys[2] << 32) |
714 				((u64) dd->ipath_pkeys[3] << 48);
715 			ipath_cdbg(PROC, "p%u set key %x in #%d, "
716 				   "portidx %d, new pkey reg %llx\n",
717 				   pd->port_port, key, i, pidx,
718 				   (unsigned long long) pkey);
719 			ipath_write_kreg(
720 				dd, dd->ipath_kregs->kr_partitionkey, pkey);
721 
722 			ret = 0;
723 			goto bail;
724 		}
725 	}
726 	ipath_dbg("port %u, all pkeys already in use 2nd pass, "
727 		  "can't set %x\n", pd->port_port, key);
728 	ret = -EBUSY;
729 
730 bail:
731 	return ret;
732 }
733 
734 /**
735  * ipath_manage_rcvq - manage a port's receive queue
736  * @pd: the port
737  * @subport: the subport
738  * @start_stop: action to carry out
739  *
740  * start_stop == 0 disables receive on the port, for use in queue
741  * overflow conditions.  start_stop==1 re-enables, to be used to
742  * re-init the software copy of the head register
743  */
ipath_manage_rcvq(struct ipath_portdata * pd,unsigned subport,int start_stop)744 static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
745 			     int start_stop)
746 {
747 	struct ipath_devdata *dd = pd->port_dd;
748 
749 	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
750 		   start_stop ? "en" : "dis", dd->ipath_unit,
751 		   pd->port_port, subport);
752 	if (subport)
753 		goto bail;
754 	/* atomically clear receive enable port. */
755 	if (start_stop) {
756 		/*
757 		 * On enable, force in-memory copy of the tail register to
758 		 * 0, so that protocol code doesn't have to worry about
759 		 * whether or not the chip has yet updated the in-memory
760 		 * copy or not on return from the system call. The chip
761 		 * always resets it's tail register back to 0 on a
762 		 * transition from disabled to enabled.  This could cause a
763 		 * problem if software was broken, and did the enable w/o
764 		 * the disable, but eventually the in-memory copy will be
765 		 * updated and correct itself, even in the face of software
766 		 * bugs.
767 		 */
768 		if (pd->port_rcvhdrtail_kvaddr)
769 			ipath_clear_rcvhdrtail(pd);
770 		set_bit(dd->ipath_r_portenable_shift + pd->port_port,
771 			&dd->ipath_rcvctrl);
772 	} else
773 		clear_bit(dd->ipath_r_portenable_shift + pd->port_port,
774 			  &dd->ipath_rcvctrl);
775 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
776 			 dd->ipath_rcvctrl);
777 	/* now be sure chip saw it before we return */
778 	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
779 	if (start_stop) {
780 		/*
781 		 * And try to be sure that tail reg update has happened too.
782 		 * This should in theory interlock with the RXE changes to
783 		 * the tail register.  Don't assign it to the tail register
784 		 * in memory copy, since we could overwrite an update by the
785 		 * chip if we did.
786 		 */
787 		ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
788 	}
789 	/* always; new head should be equal to new tail; see above */
790 bail:
791 	return 0;
792 }
793 
ipath_clean_part_key(struct ipath_portdata * pd,struct ipath_devdata * dd)794 static void ipath_clean_part_key(struct ipath_portdata *pd,
795 				 struct ipath_devdata *dd)
796 {
797 	int i, j, pchanged = 0;
798 	u64 oldpkey;
799 
800 	/* for debugging only */
801 	oldpkey = (u64) dd->ipath_pkeys[0] |
802 		((u64) dd->ipath_pkeys[1] << 16) |
803 		((u64) dd->ipath_pkeys[2] << 32) |
804 		((u64) dd->ipath_pkeys[3] << 48);
805 
806 	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
807 		if (!pd->port_pkeys[i])
808 			continue;
809 		ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
810 			   pd->port_pkeys[i]);
811 		for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
812 			/* check for match independent of the global bit */
813 			if ((dd->ipath_pkeys[j] & 0x7fff) !=
814 			    (pd->port_pkeys[i] & 0x7fff))
815 				continue;
816 			if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
817 				ipath_cdbg(VERBOSE, "p%u clear key "
818 					   "%x matches #%d\n",
819 					   pd->port_port,
820 					   pd->port_pkeys[i], j);
821 				ipath_stats.sps_pkeys[j] =
822 					dd->ipath_pkeys[j] = 0;
823 				pchanged++;
824 			}
825 			else ipath_cdbg(
826 				VERBOSE, "p%u key %x matches #%d, "
827 				"but ref still %d\n", pd->port_port,
828 				pd->port_pkeys[i], j,
829 				atomic_read(&dd->ipath_pkeyrefs[j]));
830 			break;
831 		}
832 		pd->port_pkeys[i] = 0;
833 	}
834 	if (pchanged) {
835 		u64 pkey = (u64) dd->ipath_pkeys[0] |
836 			((u64) dd->ipath_pkeys[1] << 16) |
837 			((u64) dd->ipath_pkeys[2] << 32) |
838 			((u64) dd->ipath_pkeys[3] << 48);
839 		ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
840 			   "new pkey reg %llx\n", pd->port_port,
841 			   (unsigned long long) oldpkey,
842 			   (unsigned long long) pkey);
843 		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
844 				 pkey);
845 	}
846 }
847 
848 /*
849  * Initialize the port data with the receive buffer sizes
850  * so this can be done while the master port is locked.
851  * Otherwise, there is a race with a slave opening the port
852  * and seeing these fields uninitialized.
853  */
init_user_egr_sizes(struct ipath_portdata * pd)854 static void init_user_egr_sizes(struct ipath_portdata *pd)
855 {
856 	struct ipath_devdata *dd = pd->port_dd;
857 	unsigned egrperchunk, egrcnt, size;
858 
859 	/*
860 	 * to avoid wasting a lot of memory, we allocate 32KB chunks of
861 	 * physically contiguous memory, advance through it until used up
862 	 * and then allocate more.  Of course, we need memory to store those
863 	 * extra pointers, now.  Started out with 256KB, but under heavy
864 	 * memory pressure (creating large files and then copying them over
865 	 * NFS while doing lots of MPI jobs), we hit some allocation
866 	 * failures, even though we can sleep...  (2.6.10) Still get
867 	 * failures at 64K.  32K is the lowest we can go without wasting
868 	 * additional memory.
869 	 */
870 	size = 0x8000;
871 	egrperchunk = size / dd->ipath_rcvegrbufsize;
872 	egrcnt = dd->ipath_rcvegrcnt;
873 	pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
874 	pd->port_rcvegrbufs_perchunk = egrperchunk;
875 	pd->port_rcvegrbuf_size = size;
876 }
877 
878 /**
879  * ipath_create_user_egr - allocate eager TID buffers
880  * @pd: the port to allocate TID buffers for
881  *
882  * This routine is now quite different for user and kernel, because
883  * the kernel uses skb's, for the accelerated network performance
884  * This is the user port version
885  *
886  * Allocate the eager TID buffers and program them into infinipath
887  * They are no longer completely contiguous, we do multiple allocation
888  * calls.
889  */
ipath_create_user_egr(struct ipath_portdata * pd)890 static int ipath_create_user_egr(struct ipath_portdata *pd)
891 {
892 	struct ipath_devdata *dd = pd->port_dd;
893 	unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
894 	size_t size;
895 	int ret;
896 	gfp_t gfp_flags;
897 
898 	/*
899 	 * GFP_USER, but without GFP_FS, so buffer cache can be
900 	 * coalesced (we hope); otherwise, even at order 4,
901 	 * heavy filesystem activity makes these fail, and we can
902 	 * use compound pages.
903 	 */
904 	gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
905 
906 	egrcnt = dd->ipath_rcvegrcnt;
907 	/* TID number offset for this port */
908 	egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt;
909 	egrsize = dd->ipath_rcvegrbufsize;
910 	ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
911 		   "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
912 
913 	chunk = pd->port_rcvegrbuf_chunks;
914 	egrperchunk = pd->port_rcvegrbufs_perchunk;
915 	size = pd->port_rcvegrbuf_size;
916 	pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
917 				     GFP_KERNEL);
918 	if (!pd->port_rcvegrbuf) {
919 		ret = -ENOMEM;
920 		goto bail;
921 	}
922 	pd->port_rcvegrbuf_phys =
923 		kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
924 			GFP_KERNEL);
925 	if (!pd->port_rcvegrbuf_phys) {
926 		ret = -ENOMEM;
927 		goto bail_rcvegrbuf;
928 	}
929 	for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
930 
931 		pd->port_rcvegrbuf[e] = dma_alloc_coherent(
932 			&dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
933 			gfp_flags);
934 
935 		if (!pd->port_rcvegrbuf[e]) {
936 			ret = -ENOMEM;
937 			goto bail_rcvegrbuf_phys;
938 		}
939 	}
940 
941 	pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
942 
943 	for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
944 		dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
945 		unsigned i;
946 
947 		for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
948 			dd->ipath_f_put_tid(dd, e + egroff +
949 					    (u64 __iomem *)
950 					    ((char __iomem *)
951 					     dd->ipath_kregbase +
952 					     dd->ipath_rcvegrbase),
953 					    RCVHQ_RCV_TYPE_EAGER, pa);
954 			pa += egrsize;
955 		}
956 		cond_resched();	/* don't hog the cpu */
957 	}
958 
959 	ret = 0;
960 	goto bail;
961 
962 bail_rcvegrbuf_phys:
963 	for (e = 0; e < pd->port_rcvegrbuf_chunks &&
964 		pd->port_rcvegrbuf[e]; e++) {
965 		dma_free_coherent(&dd->pcidev->dev, size,
966 				  pd->port_rcvegrbuf[e],
967 				  pd->port_rcvegrbuf_phys[e]);
968 
969 	}
970 	kfree(pd->port_rcvegrbuf_phys);
971 	pd->port_rcvegrbuf_phys = NULL;
972 bail_rcvegrbuf:
973 	kfree(pd->port_rcvegrbuf);
974 	pd->port_rcvegrbuf = NULL;
975 bail:
976 	return ret;
977 }
978 
979 
980 /* common code for the mappings on dma_alloc_coherent mem */
ipath_mmap_mem(struct vm_area_struct * vma,struct ipath_portdata * pd,unsigned len,int write_ok,void * kvaddr,char * what)981 static int ipath_mmap_mem(struct vm_area_struct *vma,
982 	struct ipath_portdata *pd, unsigned len, int write_ok,
983 	void *kvaddr, char *what)
984 {
985 	struct ipath_devdata *dd = pd->port_dd;
986 	unsigned long pfn;
987 	int ret;
988 
989 	if ((vma->vm_end - vma->vm_start) > len) {
990 		dev_info(&dd->pcidev->dev,
991 		         "FAIL on %s: len %lx > %x\n", what,
992 			 vma->vm_end - vma->vm_start, len);
993 		ret = -EFAULT;
994 		goto bail;
995 	}
996 
997 	if (!write_ok) {
998 		if (vma->vm_flags & VM_WRITE) {
999 			dev_info(&dd->pcidev->dev,
1000 				 "%s must be mapped readonly\n", what);
1001 			ret = -EPERM;
1002 			goto bail;
1003 		}
1004 
1005 		/* don't allow them to later change with mprotect */
1006 		vma->vm_flags &= ~VM_MAYWRITE;
1007 	}
1008 
1009 	pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
1010 	ret = remap_pfn_range(vma, vma->vm_start, pfn,
1011 			      len, vma->vm_page_prot);
1012 	if (ret)
1013 		dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
1014 			 "bytes r%c failed: %d\n", what, pd->port_port,
1015 			 pfn, len, write_ok?'w':'o', ret);
1016 	else
1017 		ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
1018 			   "r%c\n", what, pd->port_port, pfn, len,
1019 			   write_ok?'w':'o');
1020 bail:
1021 	return ret;
1022 }
1023 
mmap_ureg(struct vm_area_struct * vma,struct ipath_devdata * dd,u64 ureg)1024 static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
1025 		     u64 ureg)
1026 {
1027 	unsigned long phys;
1028 	int ret;
1029 
1030 	/*
1031 	 * This is real hardware, so use io_remap.  This is the mechanism
1032 	 * for the user process to update the head registers for their port
1033 	 * in the chip.
1034 	 */
1035 	if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
1036 		dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
1037 			 "%lx > PAGE\n", vma->vm_end - vma->vm_start);
1038 		ret = -EFAULT;
1039 	} else {
1040 		phys = dd->ipath_physaddr + ureg;
1041 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1042 
1043 		vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
1044 		ret = io_remap_pfn_range(vma, vma->vm_start,
1045 					 phys >> PAGE_SHIFT,
1046 					 vma->vm_end - vma->vm_start,
1047 					 vma->vm_page_prot);
1048 	}
1049 	return ret;
1050 }
1051 
mmap_piobufs(struct vm_area_struct * vma,struct ipath_devdata * dd,struct ipath_portdata * pd,unsigned piobufs,unsigned piocnt)1052 static int mmap_piobufs(struct vm_area_struct *vma,
1053 			struct ipath_devdata *dd,
1054 			struct ipath_portdata *pd,
1055 			unsigned piobufs, unsigned piocnt)
1056 {
1057 	unsigned long phys;
1058 	int ret;
1059 
1060 	/*
1061 	 * When we map the PIO buffers in the chip, we want to map them as
1062 	 * writeonly, no read possible.   This prevents access to previous
1063 	 * process data, and catches users who might try to read the i/o
1064 	 * space due to a bug.
1065 	 */
1066 	if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
1067 		dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
1068 			 "reqlen %lx > PAGE\n",
1069 			 vma->vm_end - vma->vm_start);
1070 		ret = -EINVAL;
1071 		goto bail;
1072 	}
1073 
1074 	phys = dd->ipath_physaddr + piobufs;
1075 
1076 #if defined(__powerpc__)
1077 	/* There isn't a generic way to specify writethrough mappings */
1078 	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
1079 	pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
1080 	pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
1081 #endif
1082 
1083 	/*
1084 	 * don't allow them to later change to readable with mprotect (for when
1085 	 * not initially mapped readable, as is normally the case)
1086 	 */
1087 	vma->vm_flags &= ~VM_MAYREAD;
1088 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
1089 
1090 	ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
1091 				 vma->vm_end - vma->vm_start,
1092 				 vma->vm_page_prot);
1093 bail:
1094 	return ret;
1095 }
1096 
mmap_rcvegrbufs(struct vm_area_struct * vma,struct ipath_portdata * pd)1097 static int mmap_rcvegrbufs(struct vm_area_struct *vma,
1098 			   struct ipath_portdata *pd)
1099 {
1100 	struct ipath_devdata *dd = pd->port_dd;
1101 	unsigned long start, size;
1102 	size_t total_size, i;
1103 	unsigned long pfn;
1104 	int ret;
1105 
1106 	size = pd->port_rcvegrbuf_size;
1107 	total_size = pd->port_rcvegrbuf_chunks * size;
1108 	if ((vma->vm_end - vma->vm_start) > total_size) {
1109 		dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
1110 			 "reqlen %lx > actual %lx\n",
1111 			 vma->vm_end - vma->vm_start,
1112 			 (unsigned long) total_size);
1113 		ret = -EINVAL;
1114 		goto bail;
1115 	}
1116 
1117 	if (vma->vm_flags & VM_WRITE) {
1118 		dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
1119 			 "writable (flags=%lx)\n", vma->vm_flags);
1120 		ret = -EPERM;
1121 		goto bail;
1122 	}
1123 	/* don't allow them to later change to writeable with mprotect */
1124 	vma->vm_flags &= ~VM_MAYWRITE;
1125 
1126 	start = vma->vm_start;
1127 
1128 	for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
1129 		pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
1130 		ret = remap_pfn_range(vma, start, pfn, size,
1131 				      vma->vm_page_prot);
1132 		if (ret < 0)
1133 			goto bail;
1134 	}
1135 	ret = 0;
1136 
1137 bail:
1138 	return ret;
1139 }
1140 
1141 /*
1142  * ipath_file_vma_fault - handle a VMA page fault.
1143  */
ipath_file_vma_fault(struct vm_area_struct * vma,struct vm_fault * vmf)1144 static int ipath_file_vma_fault(struct vm_area_struct *vma,
1145 					struct vm_fault *vmf)
1146 {
1147 	struct page *page;
1148 
1149 	page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
1150 	if (!page)
1151 		return VM_FAULT_SIGBUS;
1152 	get_page(page);
1153 	vmf->page = page;
1154 
1155 	return 0;
1156 }
1157 
1158 static const struct vm_operations_struct ipath_file_vm_ops = {
1159 	.fault = ipath_file_vma_fault,
1160 };
1161 
mmap_kvaddr(struct vm_area_struct * vma,u64 pgaddr,struct ipath_portdata * pd,unsigned subport)1162 static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1163 		       struct ipath_portdata *pd, unsigned subport)
1164 {
1165 	unsigned long len;
1166 	struct ipath_devdata *dd;
1167 	void *addr;
1168 	size_t size;
1169 	int ret = 0;
1170 
1171 	/* If the port is not shared, all addresses should be physical */
1172 	if (!pd->port_subport_cnt)
1173 		goto bail;
1174 
1175 	dd = pd->port_dd;
1176 	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
1177 
1178 	/*
1179 	 * Each process has all the subport uregbase, rcvhdrq, and
1180 	 * rcvegrbufs mmapped - as an array for all the processes,
1181 	 * and also separately for this process.
1182 	 */
1183 	if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
1184 		addr = pd->subport_uregbase;
1185 		size = PAGE_SIZE * pd->port_subport_cnt;
1186 	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
1187 		addr = pd->subport_rcvhdr_base;
1188 		size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
1189 	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
1190 		addr = pd->subport_rcvegrbuf;
1191 		size *= pd->port_subport_cnt;
1192         } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
1193                                         PAGE_SIZE * subport)) {
1194                 addr = pd->subport_uregbase + PAGE_SIZE * subport;
1195                 size = PAGE_SIZE;
1196         } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
1197                                 pd->port_rcvhdrq_size * subport)) {
1198                 addr = pd->subport_rcvhdr_base +
1199                         pd->port_rcvhdrq_size * subport;
1200                 size = pd->port_rcvhdrq_size;
1201         } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
1202                                size * subport)) {
1203                 addr = pd->subport_rcvegrbuf + size * subport;
1204                 /* rcvegrbufs are read-only on the slave */
1205                 if (vma->vm_flags & VM_WRITE) {
1206                         dev_info(&dd->pcidev->dev,
1207                                  "Can't map eager buffers as "
1208                                  "writable (flags=%lx)\n", vma->vm_flags);
1209                         ret = -EPERM;
1210                         goto bail;
1211                 }
1212                 /*
1213                  * Don't allow permission to later change to writeable
1214                  * with mprotect.
1215                  */
1216                 vma->vm_flags &= ~VM_MAYWRITE;
1217 	} else {
1218 		goto bail;
1219 	}
1220 	len = vma->vm_end - vma->vm_start;
1221 	if (len > size) {
1222 		ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
1223 		ret = -EINVAL;
1224 		goto bail;
1225 	}
1226 
1227 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
1228 	vma->vm_ops = &ipath_file_vm_ops;
1229 	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
1230 	ret = 1;
1231 
1232 bail:
1233 	return ret;
1234 }
1235 
1236 /**
1237  * ipath_mmap - mmap various structures into user space
1238  * @fp: the file pointer
1239  * @vma: the VM area
1240  *
1241  * We use this to have a shared buffer between the kernel and the user code
1242  * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
1243  * buffers in the chip.  We have the open and close entries so we can bump
1244  * the ref count and keep the driver from being unloaded while still mapped.
1245  */
ipath_mmap(struct file * fp,struct vm_area_struct * vma)1246 static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1247 {
1248 	struct ipath_portdata *pd;
1249 	struct ipath_devdata *dd;
1250 	u64 pgaddr, ureg;
1251 	unsigned piobufs, piocnt;
1252 	int ret;
1253 
1254 	pd = port_fp(fp);
1255 	if (!pd) {
1256 		ret = -EINVAL;
1257 		goto bail;
1258 	}
1259 	dd = pd->port_dd;
1260 
1261 	/*
1262 	 * This is the ipath_do_user_init() code, mapping the shared buffers
1263 	 * into the user process. The address referred to by vm_pgoff is the
1264 	 * file offset passed via mmap().  For shared ports, this is the
1265 	 * kernel vmalloc() address of the pages to share with the master.
1266 	 * For non-shared or master ports, this is a physical address.
1267 	 * We only do one mmap for each space mapped.
1268 	 */
1269 	pgaddr = vma->vm_pgoff << PAGE_SHIFT;
1270 
1271 	/*
1272 	 * Check for 0 in case one of the allocations failed, but user
1273 	 * called mmap anyway.
1274 	 */
1275 	if (!pgaddr)  {
1276 		ret = -EINVAL;
1277 		goto bail;
1278 	}
1279 
1280 	ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
1281 		   (unsigned long long) pgaddr, vma->vm_start,
1282 		   vma->vm_end - vma->vm_start, dd->ipath_unit,
1283 		   pd->port_port, subport_fp(fp));
1284 
1285 	/*
1286 	 * Physical addresses must fit in 40 bits for our hardware.
1287 	 * Check for kernel virtual addresses first, anything else must
1288 	 * match a HW or memory address.
1289 	 */
1290 	ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
1291 	if (ret) {
1292 		if (ret > 0)
1293 			ret = 0;
1294 		goto bail;
1295 	}
1296 
1297 	ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
1298 	if (!pd->port_subport_cnt) {
1299 		/* port is not shared */
1300 		piocnt = pd->port_piocnt;
1301 		piobufs = pd->port_piobufs;
1302 	} else if (!subport_fp(fp)) {
1303 		/* caller is the master */
1304 		piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
1305 			 (pd->port_piocnt % pd->port_subport_cnt);
1306 		piobufs = pd->port_piobufs +
1307 			dd->ipath_palign * (pd->port_piocnt - piocnt);
1308 	} else {
1309 		unsigned slave = subport_fp(fp) - 1;
1310 
1311 		/* caller is a slave */
1312 		piocnt = pd->port_piocnt / pd->port_subport_cnt;
1313 		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1314 	}
1315 
1316 	if (pgaddr == ureg)
1317 		ret = mmap_ureg(vma, dd, ureg);
1318 	else if (pgaddr == piobufs)
1319 		ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
1320 	else if (pgaddr == dd->ipath_pioavailregs_phys)
1321 		/* in-memory copy of pioavail registers */
1322 		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1323 			      	     (void *) dd->ipath_pioavailregs_dma,
1324 				     "pioavail registers");
1325 	else if (pgaddr == pd->port_rcvegr_phys)
1326 		ret = mmap_rcvegrbufs(vma, pd);
1327 	else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
1328 		/*
1329 		 * The rcvhdrq itself; readonly except on HT (so have
1330 		 * to allow writable mapping), multiple pages, contiguous
1331 		 * from an i/o perspective.
1332 		 */
1333 		ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
1334 				     pd->port_rcvhdrq,
1335 				     "rcvhdrq");
1336 	else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
1337 		/* in-memory copy of rcvhdrq tail register */
1338 		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1339 				     pd->port_rcvhdrtail_kvaddr,
1340 				     "rcvhdrq tail");
1341 	else
1342 		ret = -EINVAL;
1343 
1344 	vma->vm_private_data = NULL;
1345 
1346 	if (ret < 0)
1347 		dev_info(&dd->pcidev->dev,
1348 			 "Failure %d on off %llx len %lx\n",
1349 			 -ret, (unsigned long long)pgaddr,
1350 			 vma->vm_end - vma->vm_start);
1351 bail:
1352 	return ret;
1353 }
1354 
ipath_poll_hdrqfull(struct ipath_portdata * pd)1355 static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
1356 {
1357 	unsigned pollflag = 0;
1358 
1359 	if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
1360 	    pd->port_hdrqfull != pd->port_hdrqfull_poll) {
1361 		pollflag |= POLLIN | POLLRDNORM;
1362 		pd->port_hdrqfull_poll = pd->port_hdrqfull;
1363 	}
1364 
1365 	return pollflag;
1366 }
1367 
ipath_poll_urgent(struct ipath_portdata * pd,struct file * fp,struct poll_table_struct * pt)1368 static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
1369 				      struct file *fp,
1370 				      struct poll_table_struct *pt)
1371 {
1372 	unsigned pollflag = 0;
1373 	struct ipath_devdata *dd;
1374 
1375 	dd = pd->port_dd;
1376 
1377 	/* variable access in ipath_poll_hdrqfull() needs this */
1378 	rmb();
1379 	pollflag = ipath_poll_hdrqfull(pd);
1380 
1381 	if (pd->port_urgent != pd->port_urgent_poll) {
1382 		pollflag |= POLLIN | POLLRDNORM;
1383 		pd->port_urgent_poll = pd->port_urgent;
1384 	}
1385 
1386 	if (!pollflag) {
1387 		/* this saves a spin_lock/unlock in interrupt handler... */
1388 		set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
1389 		/* flush waiting flag so don't miss an event... */
1390 		wmb();
1391 		poll_wait(fp, &pd->port_wait, pt);
1392 	}
1393 
1394 	return pollflag;
1395 }
1396 
ipath_poll_next(struct ipath_portdata * pd,struct file * fp,struct poll_table_struct * pt)1397 static unsigned int ipath_poll_next(struct ipath_portdata *pd,
1398 				    struct file *fp,
1399 				    struct poll_table_struct *pt)
1400 {
1401 	u32 head;
1402 	u32 tail;
1403 	unsigned pollflag = 0;
1404 	struct ipath_devdata *dd;
1405 
1406 	dd = pd->port_dd;
1407 
1408 	/* variable access in ipath_poll_hdrqfull() needs this */
1409 	rmb();
1410 	pollflag = ipath_poll_hdrqfull(pd);
1411 
1412 	head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
1413 	if (pd->port_rcvhdrtail_kvaddr)
1414 		tail = ipath_get_rcvhdrtail(pd);
1415 	else
1416 		tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
1417 
1418 	if (head != tail)
1419 		pollflag |= POLLIN | POLLRDNORM;
1420 	else {
1421 		/* this saves a spin_lock/unlock in interrupt handler */
1422 		set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1423 		/* flush waiting flag so we don't miss an event */
1424 		wmb();
1425 
1426 		set_bit(pd->port_port + dd->ipath_r_intravail_shift,
1427 			&dd->ipath_rcvctrl);
1428 
1429 		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1430 				 dd->ipath_rcvctrl);
1431 
1432 		if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
1433 			ipath_write_ureg(dd, ur_rcvhdrhead,
1434 					 dd->ipath_rhdrhead_intr_off | head,
1435 					 pd->port_port);
1436 
1437 		poll_wait(fp, &pd->port_wait, pt);
1438 	}
1439 
1440 	return pollflag;
1441 }
1442 
ipath_poll(struct file * fp,struct poll_table_struct * pt)1443 static unsigned int ipath_poll(struct file *fp,
1444 			       struct poll_table_struct *pt)
1445 {
1446 	struct ipath_portdata *pd;
1447 	unsigned pollflag;
1448 
1449 	pd = port_fp(fp);
1450 	if (!pd)
1451 		pollflag = 0;
1452 	else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
1453 		pollflag = ipath_poll_urgent(pd, fp, pt);
1454 	else
1455 		pollflag = ipath_poll_next(pd, fp, pt);
1456 
1457 	return pollflag;
1458 }
1459 
ipath_supports_subports(int user_swmajor,int user_swminor)1460 static int ipath_supports_subports(int user_swmajor, int user_swminor)
1461 {
1462 	/* no subport implementation prior to software version 1.3 */
1463 	return (user_swmajor > 1) || (user_swminor >= 3);
1464 }
1465 
ipath_compatible_subports(int user_swmajor,int user_swminor)1466 static int ipath_compatible_subports(int user_swmajor, int user_swminor)
1467 {
1468 	/* this code is written long-hand for clarity */
1469 	if (IPATH_USER_SWMAJOR != user_swmajor) {
1470 		/* no promise of compatibility if major mismatch */
1471 		return 0;
1472 	}
1473 	if (IPATH_USER_SWMAJOR == 1) {
1474 		switch (IPATH_USER_SWMINOR) {
1475 		case 0:
1476 		case 1:
1477 		case 2:
1478 			/* no subport implementation so cannot be compatible */
1479 			return 0;
1480 		case 3:
1481 			/* 3 is only compatible with itself */
1482 			return user_swminor == 3;
1483 		default:
1484 			/* >= 4 are compatible (or are expected to be) */
1485 			return user_swminor >= 4;
1486 		}
1487 	}
1488 	/* make no promises yet for future major versions */
1489 	return 0;
1490 }
1491 
init_subports(struct ipath_devdata * dd,struct ipath_portdata * pd,const struct ipath_user_info * uinfo)1492 static int init_subports(struct ipath_devdata *dd,
1493 			 struct ipath_portdata *pd,
1494 			 const struct ipath_user_info *uinfo)
1495 {
1496 	int ret = 0;
1497 	unsigned num_subports;
1498 	size_t size;
1499 
1500 	/*
1501 	 * If the user is requesting zero subports,
1502 	 * skip the subport allocation.
1503 	 */
1504 	if (uinfo->spu_subport_cnt <= 0)
1505 		goto bail;
1506 
1507 	/* Self-consistency check for ipath_compatible_subports() */
1508 	if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
1509 	    !ipath_compatible_subports(IPATH_USER_SWMAJOR,
1510 				       IPATH_USER_SWMINOR)) {
1511 		dev_info(&dd->pcidev->dev,
1512 			 "Inconsistent ipath_compatible_subports()\n");
1513 		goto bail;
1514 	}
1515 
1516 	/* Check for subport compatibility */
1517 	if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
1518 				       uinfo->spu_userversion & 0xffff)) {
1519 		dev_info(&dd->pcidev->dev,
1520 			 "Mismatched user version (%d.%d) and driver "
1521 			 "version (%d.%d) while port sharing. Ensure "
1522                          "that driver and library are from the same "
1523                          "release.\n",
1524 			 (int) (uinfo->spu_userversion >> 16),
1525                          (int) (uinfo->spu_userversion & 0xffff),
1526 			 IPATH_USER_SWMAJOR,
1527 	                 IPATH_USER_SWMINOR);
1528 		goto bail;
1529 	}
1530 	if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
1531 		ret = -EINVAL;
1532 		goto bail;
1533 	}
1534 
1535 	num_subports = uinfo->spu_subport_cnt;
1536 	pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports);
1537 	if (!pd->subport_uregbase) {
1538 		ret = -ENOMEM;
1539 		goto bail;
1540 	}
1541 	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
1542 	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1543 		     sizeof(u32), PAGE_SIZE) * num_subports;
1544 	pd->subport_rcvhdr_base = vzalloc(size);
1545 	if (!pd->subport_rcvhdr_base) {
1546 		ret = -ENOMEM;
1547 		goto bail_ureg;
1548 	}
1549 
1550 	pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks *
1551 					pd->port_rcvegrbuf_size *
1552 					num_subports);
1553 	if (!pd->subport_rcvegrbuf) {
1554 		ret = -ENOMEM;
1555 		goto bail_rhdr;
1556 	}
1557 
1558 	pd->port_subport_cnt = uinfo->spu_subport_cnt;
1559 	pd->port_subport_id = uinfo->spu_subport_id;
1560 	pd->active_slaves = 1;
1561 	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1562 	goto bail;
1563 
1564 bail_rhdr:
1565 	vfree(pd->subport_rcvhdr_base);
1566 bail_ureg:
1567 	vfree(pd->subport_uregbase);
1568 	pd->subport_uregbase = NULL;
1569 bail:
1570 	return ret;
1571 }
1572 
try_alloc_port(struct ipath_devdata * dd,int port,struct file * fp,const struct ipath_user_info * uinfo)1573 static int try_alloc_port(struct ipath_devdata *dd, int port,
1574 			  struct file *fp,
1575 			  const struct ipath_user_info *uinfo)
1576 {
1577 	struct ipath_portdata *pd;
1578 	int ret;
1579 
1580 	if (!(pd = dd->ipath_pd[port])) {
1581 		void *ptmp;
1582 
1583 		pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
1584 
1585 		/*
1586 		 * Allocate memory for use in ipath_tid_update() just once
1587 		 * at open, not per call.  Reduces cost of expected send
1588 		 * setup.
1589 		 */
1590 		ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
1591 			       dd->ipath_rcvtidcnt * sizeof(struct page **),
1592 			       GFP_KERNEL);
1593 		if (!pd || !ptmp) {
1594 			ipath_dev_err(dd, "Unable to allocate portdata "
1595 				      "memory, failing open\n");
1596 			ret = -ENOMEM;
1597 			kfree(pd);
1598 			kfree(ptmp);
1599 			goto bail;
1600 		}
1601 		dd->ipath_pd[port] = pd;
1602 		dd->ipath_pd[port]->port_port = port;
1603 		dd->ipath_pd[port]->port_dd = dd;
1604 		dd->ipath_pd[port]->port_tid_pg_list = ptmp;
1605 		init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
1606 	}
1607 	if (!pd->port_cnt) {
1608 		pd->userversion = uinfo->spu_userversion;
1609 		init_user_egr_sizes(pd);
1610 		if ((ret = init_subports(dd, pd, uinfo)) != 0)
1611 			goto bail;
1612 		ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
1613 			   current->comm, current->pid, dd->ipath_unit,
1614 			   port);
1615 		pd->port_cnt = 1;
1616 		port_fp(fp) = pd;
1617 		pd->port_pid = get_pid(task_pid(current));
1618 		strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
1619 		ipath_stats.sps_ports++;
1620 		ret = 0;
1621 	} else
1622 		ret = -EBUSY;
1623 
1624 bail:
1625 	return ret;
1626 }
1627 
usable(struct ipath_devdata * dd)1628 static inline int usable(struct ipath_devdata *dd)
1629 {
1630 	return dd &&
1631 		(dd->ipath_flags & IPATH_PRESENT) &&
1632 		dd->ipath_kregbase &&
1633 		dd->ipath_lid &&
1634 		!(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
1635 				     | IPATH_LINKUNK));
1636 }
1637 
find_free_port(int unit,struct file * fp,const struct ipath_user_info * uinfo)1638 static int find_free_port(int unit, struct file *fp,
1639 			  const struct ipath_user_info *uinfo)
1640 {
1641 	struct ipath_devdata *dd = ipath_lookup(unit);
1642 	int ret, i;
1643 
1644 	if (!dd) {
1645 		ret = -ENODEV;
1646 		goto bail;
1647 	}
1648 
1649 	if (!usable(dd)) {
1650 		ret = -ENETDOWN;
1651 		goto bail;
1652 	}
1653 
1654 	for (i = 1; i < dd->ipath_cfgports; i++) {
1655 		ret = try_alloc_port(dd, i, fp, uinfo);
1656 		if (ret != -EBUSY)
1657 			goto bail;
1658 	}
1659 	ret = -EBUSY;
1660 
1661 bail:
1662 	return ret;
1663 }
1664 
find_best_unit(struct file * fp,const struct ipath_user_info * uinfo)1665 static int find_best_unit(struct file *fp,
1666 			  const struct ipath_user_info *uinfo)
1667 {
1668 	int ret = 0, i, prefunit = -1, devmax;
1669 	int maxofallports, npresent, nup;
1670 	int ndev;
1671 
1672 	devmax = ipath_count_units(&npresent, &nup, &maxofallports);
1673 
1674 	/*
1675 	 * This code is present to allow a knowledgeable person to
1676 	 * specify the layout of processes to processors before opening
1677 	 * this driver, and then we'll assign the process to the "closest"
1678 	 * InfiniPath chip to that processor (we assume reasonable connectivity,
1679 	 * for now).  This code assumes that if affinity has been set
1680 	 * before this point, that at most one cpu is set; for now this
1681 	 * is reasonable.  I check for both cpumask_empty() and cpumask_full(),
1682 	 * in case some kernel variant sets none of the bits when no
1683 	 * affinity is set.  2.6.11 and 12 kernels have all present
1684 	 * cpus set.  Some day we'll have to fix it up further to handle
1685 	 * a cpu subset.  This algorithm fails for two HT chips connected
1686 	 * in tunnel fashion.  Eventually this needs real topology
1687 	 * information.  There may be some issues with dual core numbering
1688 	 * as well.  This needs more work prior to release.
1689 	 */
1690 	if (!cpumask_empty(tsk_cpus_allowed(current)) &&
1691 	    !cpumask_full(tsk_cpus_allowed(current))) {
1692 		int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
1693 		get_online_cpus();
1694 		for_each_online_cpu(i)
1695 			if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) {
1696 				ipath_cdbg(PROC, "%s[%u] affinity set for "
1697 					   "cpu %d/%d\n", current->comm,
1698 					   current->pid, i, ncpus);
1699 				curcpu = i;
1700 				nset++;
1701 			}
1702 		put_online_cpus();
1703 		if (curcpu != -1 && nset != ncpus) {
1704 			if (npresent) {
1705 				prefunit = curcpu / (ncpus / npresent);
1706 				ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
1707 					  "%d cpus/chip, select unit %d\n",
1708 					  current->comm, current->pid,
1709 					  npresent, ncpus, ncpus / npresent,
1710 					  prefunit);
1711 			}
1712 		}
1713 	}
1714 
1715 	/*
1716 	 * user ports start at 1, kernel port is 0
1717 	 * For now, we do round-robin access across all chips
1718 	 */
1719 
1720 	if (prefunit != -1)
1721 		devmax = prefunit + 1;
1722 recheck:
1723 	for (i = 1; i < maxofallports; i++) {
1724 		for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
1725 		     ndev++) {
1726 			struct ipath_devdata *dd = ipath_lookup(ndev);
1727 
1728 			if (!usable(dd))
1729 				continue; /* can't use this unit */
1730 			if (i >= dd->ipath_cfgports)
1731 				/*
1732 				 * Maxed out on users of this unit. Try
1733 				 * next.
1734 				 */
1735 				continue;
1736 			ret = try_alloc_port(dd, i, fp, uinfo);
1737 			if (!ret)
1738 				goto done;
1739 		}
1740 	}
1741 
1742 	if (npresent) {
1743 		if (nup == 0) {
1744 			ret = -ENETDOWN;
1745 			ipath_dbg("No ports available (none initialized "
1746 				  "and ready)\n");
1747 		} else {
1748 			if (prefunit > 0) {
1749 				/* if started above 0, retry from 0 */
1750 				ipath_cdbg(PROC,
1751 					   "%s[%u] no ports on prefunit "
1752 					   "%d, clear and re-check\n",
1753 					   current->comm, current->pid,
1754 					   prefunit);
1755 				devmax = ipath_count_units(NULL, NULL,
1756 							   NULL);
1757 				prefunit = -1;
1758 				goto recheck;
1759 			}
1760 			ret = -EBUSY;
1761 			ipath_dbg("No ports available\n");
1762 		}
1763 	} else {
1764 		ret = -ENXIO;
1765 		ipath_dbg("No boards found\n");
1766 	}
1767 
1768 done:
1769 	return ret;
1770 }
1771 
find_shared_port(struct file * fp,const struct ipath_user_info * uinfo)1772 static int find_shared_port(struct file *fp,
1773 			    const struct ipath_user_info *uinfo)
1774 {
1775 	int devmax, ndev, i;
1776 	int ret = 0;
1777 
1778 	devmax = ipath_count_units(NULL, NULL, NULL);
1779 
1780 	for (ndev = 0; ndev < devmax; ndev++) {
1781 		struct ipath_devdata *dd = ipath_lookup(ndev);
1782 
1783 		if (!usable(dd))
1784 			continue;
1785 		for (i = 1; i < dd->ipath_cfgports; i++) {
1786 			struct ipath_portdata *pd = dd->ipath_pd[i];
1787 
1788 			/* Skip ports which are not yet open */
1789 			if (!pd || !pd->port_cnt)
1790 				continue;
1791 			/* Skip port if it doesn't match the requested one */
1792 			if (pd->port_subport_id != uinfo->spu_subport_id)
1793 				continue;
1794 			/* Verify the sharing process matches the master */
1795 			if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
1796 			    pd->userversion != uinfo->spu_userversion ||
1797 			    pd->port_cnt >= pd->port_subport_cnt) {
1798 				ret = -EINVAL;
1799 				goto done;
1800 			}
1801 			port_fp(fp) = pd;
1802 			subport_fp(fp) = pd->port_cnt++;
1803 			pd->port_subpid[subport_fp(fp)] =
1804 				get_pid(task_pid(current));
1805 			tidcursor_fp(fp) = 0;
1806 			pd->active_slaves |= 1 << subport_fp(fp);
1807 			ipath_cdbg(PROC,
1808 				   "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
1809 				   current->comm, current->pid,
1810 				   subport_fp(fp),
1811 				   pd->port_comm, pid_nr(pd->port_pid),
1812 				   dd->ipath_unit, pd->port_port);
1813 			ret = 1;
1814 			goto done;
1815 		}
1816 	}
1817 
1818 done:
1819 	return ret;
1820 }
1821 
ipath_open(struct inode * in,struct file * fp)1822 static int ipath_open(struct inode *in, struct file *fp)
1823 {
1824 	/* The real work is performed later in ipath_assign_port() */
1825 	fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
1826 	return fp->private_data ? 0 : -ENOMEM;
1827 }
1828 
1829 /* Get port early, so can set affinity prior to memory allocation */
ipath_assign_port(struct file * fp,const struct ipath_user_info * uinfo)1830 static int ipath_assign_port(struct file *fp,
1831 			      const struct ipath_user_info *uinfo)
1832 {
1833 	int ret;
1834 	int i_minor;
1835 	unsigned swmajor, swminor;
1836 
1837 	/* Check to be sure we haven't already initialized this file */
1838 	if (port_fp(fp)) {
1839 		ret = -EINVAL;
1840 		goto done;
1841 	}
1842 
1843 	/* for now, if major version is different, bail */
1844 	swmajor = uinfo->spu_userversion >> 16;
1845 	if (swmajor != IPATH_USER_SWMAJOR) {
1846 		ipath_dbg("User major version %d not same as driver "
1847 			  "major %d\n", uinfo->spu_userversion >> 16,
1848 			  IPATH_USER_SWMAJOR);
1849 		ret = -ENODEV;
1850 		goto done;
1851 	}
1852 
1853 	swminor = uinfo->spu_userversion & 0xffff;
1854 	if (swminor != IPATH_USER_SWMINOR)
1855 		ipath_dbg("User minor version %d not same as driver "
1856 			  "minor %d\n", swminor, IPATH_USER_SWMINOR);
1857 
1858 	mutex_lock(&ipath_mutex);
1859 
1860 	if (ipath_compatible_subports(swmajor, swminor) &&
1861 	    uinfo->spu_subport_cnt &&
1862 	    (ret = find_shared_port(fp, uinfo))) {
1863 		if (ret > 0)
1864 			ret = 0;
1865 		goto done_chk_sdma;
1866 	}
1867 
1868 	i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE;
1869 	ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
1870 		   (long)file_inode(fp)->i_rdev, i_minor);
1871 
1872 	if (i_minor)
1873 		ret = find_free_port(i_minor - 1, fp, uinfo);
1874 	else
1875 		ret = find_best_unit(fp, uinfo);
1876 
1877 done_chk_sdma:
1878 	if (!ret) {
1879 		struct ipath_filedata *fd = fp->private_data;
1880 		const struct ipath_portdata *pd = fd->pd;
1881 		const struct ipath_devdata *dd = pd->port_dd;
1882 
1883 		fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
1884 						      dd->ipath_unit,
1885 						      pd->port_port,
1886 						      fd->subport);
1887 
1888 		if (!fd->pq)
1889 			ret = -ENOMEM;
1890 	}
1891 
1892 	mutex_unlock(&ipath_mutex);
1893 
1894 done:
1895 	return ret;
1896 }
1897 
1898 
ipath_do_user_init(struct file * fp,const struct ipath_user_info * uinfo)1899 static int ipath_do_user_init(struct file *fp,
1900 			      const struct ipath_user_info *uinfo)
1901 {
1902 	int ret;
1903 	struct ipath_portdata *pd = port_fp(fp);
1904 	struct ipath_devdata *dd;
1905 	u32 head32;
1906 
1907 	/* Subports don't need to initialize anything since master did it. */
1908 	if (subport_fp(fp)) {
1909 		ret = wait_event_interruptible(pd->port_wait,
1910 			!test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
1911 		goto done;
1912 	}
1913 
1914 	dd = pd->port_dd;
1915 
1916 	if (uinfo->spu_rcvhdrsize) {
1917 		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
1918 		if (ret)
1919 			goto done;
1920 	}
1921 
1922 	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
1923 
1924 	/* some ports may get extra buffers, calculate that here */
1925 	if (pd->port_port <= dd->ipath_ports_extrabuf)
1926 		pd->port_piocnt = dd->ipath_pbufsport + 1;
1927 	else
1928 		pd->port_piocnt = dd->ipath_pbufsport;
1929 
1930 	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
1931 	if (pd->port_port <= dd->ipath_ports_extrabuf)
1932 		pd->port_pio_base = (dd->ipath_pbufsport + 1)
1933 			* (pd->port_port - 1);
1934 	else
1935 		pd->port_pio_base = dd->ipath_ports_extrabuf +
1936 			dd->ipath_pbufsport * (pd->port_port - 1);
1937 	pd->port_piobufs = dd->ipath_piobufbase +
1938 		pd->port_pio_base * dd->ipath_palign;
1939 	ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
1940 		" first pio %u\n", pd->port_port, pd->port_piobufs,
1941 		pd->port_piocnt, pd->port_pio_base);
1942 	ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
1943 
1944 	/*
1945 	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
1946 	 * array for time being.  If pd->port_port > chip-supported,
1947 	 * we need to do extra stuff here to handle by handling overflow
1948 	 * through port 0, someday
1949 	 */
1950 	ret = ipath_create_rcvhdrq(dd, pd);
1951 	if (!ret)
1952 		ret = ipath_create_user_egr(pd);
1953 	if (ret)
1954 		goto done;
1955 
1956 	/*
1957 	 * set the eager head register for this port to the current values
1958 	 * of the tail pointers, since we don't know if they were
1959 	 * updated on last use of the port.
1960 	 */
1961 	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
1962 	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
1963 	pd->port_lastrcvhdrqtail = -1;
1964 	ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
1965 		pd->port_port, head32);
1966 	pd->port_tidcursor = 0;	/* start at beginning after open */
1967 
1968 	/* initialize poll variables... */
1969 	pd->port_urgent = 0;
1970 	pd->port_urgent_poll = 0;
1971 	pd->port_hdrqfull_poll = pd->port_hdrqfull;
1972 
1973 	/*
1974 	 * Now enable the port for receive.
1975 	 * For chips that are set to DMA the tail register to memory
1976 	 * when they change (and when the update bit transitions from
1977 	 * 0 to 1.  So for those chips, we turn it off and then back on.
1978 	 * This will (very briefly) affect any other open ports, but the
1979 	 * duration is very short, and therefore isn't an issue.  We
1980 	 * explicitly set the in-memory tail copy to 0 beforehand, so we
1981 	 * don't have to wait to be sure the DMA update has happened
1982 	 * (chip resets head/tail to 0 on transition to enable).
1983 	 */
1984 	set_bit(dd->ipath_r_portenable_shift + pd->port_port,
1985 		&dd->ipath_rcvctrl);
1986 	if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) {
1987 		if (pd->port_rcvhdrtail_kvaddr)
1988 			ipath_clear_rcvhdrtail(pd);
1989 		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1990 			dd->ipath_rcvctrl &
1991 			~(1ULL << dd->ipath_r_tailupd_shift));
1992 	}
1993 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1994 			 dd->ipath_rcvctrl);
1995 	/* Notify any waiting slaves */
1996 	if (pd->port_subport_cnt) {
1997 		clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
1998 		wake_up(&pd->port_wait);
1999 	}
2000 done:
2001 	return ret;
2002 }
2003 
2004 /**
2005  * unlock_exptid - unlock any expected TID entries port still had in use
2006  * @pd: port
2007  *
2008  * We don't actually update the chip here, because we do a bulk update
2009  * below, using ipath_f_clear_tids.
2010  */
unlock_expected_tids(struct ipath_portdata * pd)2011 static void unlock_expected_tids(struct ipath_portdata *pd)
2012 {
2013 	struct ipath_devdata *dd = pd->port_dd;
2014 	int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
2015 	int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
2016 
2017 	ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
2018 		   pd->port_port);
2019 	for (i = port_tidbase; i < maxtid; i++) {
2020 		struct page *ps = dd->ipath_pageshadow[i];
2021 
2022 		if (!ps)
2023 			continue;
2024 
2025 		dd->ipath_pageshadow[i] = NULL;
2026 		pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
2027 			PAGE_SIZE, PCI_DMA_FROMDEVICE);
2028 		ipath_release_user_pages_on_close(&ps, 1);
2029 		cnt++;
2030 		ipath_stats.sps_pageunlocks++;
2031 	}
2032 	if (cnt)
2033 		ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
2034 			   pd->port_port, cnt);
2035 
2036 	if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
2037 		ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
2038 			   (unsigned long long) ipath_stats.sps_pagelocks,
2039 			   (unsigned long long)
2040 			   ipath_stats.sps_pageunlocks);
2041 }
2042 
ipath_close(struct inode * in,struct file * fp)2043 static int ipath_close(struct inode *in, struct file *fp)
2044 {
2045 	int ret = 0;
2046 	struct ipath_filedata *fd;
2047 	struct ipath_portdata *pd;
2048 	struct ipath_devdata *dd;
2049 	unsigned long flags;
2050 	unsigned port;
2051 	struct pid *pid;
2052 
2053 	ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
2054 		   (long)in->i_rdev, fp->private_data);
2055 
2056 	mutex_lock(&ipath_mutex);
2057 
2058 	fd = fp->private_data;
2059 	fp->private_data = NULL;
2060 	pd = fd->pd;
2061 	if (!pd) {
2062 		mutex_unlock(&ipath_mutex);
2063 		goto bail;
2064 	}
2065 
2066 	dd = pd->port_dd;
2067 
2068 	/* drain user sdma queue */
2069 	ipath_user_sdma_queue_drain(dd, fd->pq);
2070 	ipath_user_sdma_queue_destroy(fd->pq);
2071 
2072 	if (--pd->port_cnt) {
2073 		/*
2074 		 * XXX If the master closes the port before the slave(s),
2075 		 * revoke the mmap for the eager receive queue so
2076 		 * the slave(s) don't wait for receive data forever.
2077 		 */
2078 		pd->active_slaves &= ~(1 << fd->subport);
2079 		put_pid(pd->port_subpid[fd->subport]);
2080 		pd->port_subpid[fd->subport] = NULL;
2081 		mutex_unlock(&ipath_mutex);
2082 		goto bail;
2083 	}
2084 	/* early; no interrupt users after this */
2085 	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
2086 	port = pd->port_port;
2087 	dd->ipath_pd[port] = NULL;
2088 	pid = pd->port_pid;
2089 	pd->port_pid = NULL;
2090 	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
2091 
2092 	if (pd->port_rcvwait_to || pd->port_piowait_to
2093 	    || pd->port_rcvnowait || pd->port_pionowait) {
2094 		ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
2095 			   "%u rcv %u, pio already\n",
2096 			   pd->port_port, pd->port_rcvwait_to,
2097 			   pd->port_piowait_to, pd->port_rcvnowait,
2098 			   pd->port_pionowait);
2099 		pd->port_rcvwait_to = pd->port_piowait_to =
2100 			pd->port_rcvnowait = pd->port_pionowait = 0;
2101 	}
2102 	if (pd->port_flag) {
2103 		ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n",
2104 			  pd->port_port, pd->port_flag);
2105 		pd->port_flag = 0;
2106 	}
2107 
2108 	if (dd->ipath_kregbase) {
2109 		/* atomically clear receive enable port and intr avail. */
2110 		clear_bit(dd->ipath_r_portenable_shift + port,
2111 			  &dd->ipath_rcvctrl);
2112 		clear_bit(pd->port_port + dd->ipath_r_intravail_shift,
2113 			  &dd->ipath_rcvctrl);
2114 		ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
2115 			dd->ipath_rcvctrl);
2116 		/* and read back from chip to be sure that nothing
2117 		 * else is in flight when we do the rest */
2118 		(void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2119 
2120 		/* clean up the pkeys for this port user */
2121 		ipath_clean_part_key(pd, dd);
2122 		/*
2123 		 * be paranoid, and never write 0's to these, just use an
2124 		 * unused part of the port 0 tail page.  Of course,
2125 		 * rcvhdraddr points to a large chunk of memory, so this
2126 		 * could still trash things, but at least it won't trash
2127 		 * page 0, and by disabling the port, it should stop "soon",
2128 		 * even if a packet or two is in already in flight after we
2129 		 * disabled the port.
2130 		 */
2131 		ipath_write_kreg_port(dd,
2132 		        dd->ipath_kregs->kr_rcvhdrtailaddr, port,
2133 			dd->ipath_dummy_hdrq_phys);
2134 		ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
2135 			pd->port_port, dd->ipath_dummy_hdrq_phys);
2136 
2137 		ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
2138 		ipath_chg_pioavailkernel(dd, pd->port_pio_base,
2139 			pd->port_piocnt, 1);
2140 
2141 		dd->ipath_f_clear_tids(dd, pd->port_port);
2142 
2143 		if (dd->ipath_pageshadow)
2144 			unlock_expected_tids(pd);
2145 		ipath_stats.sps_ports--;
2146 		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
2147 			   pd->port_comm, pid_nr(pid),
2148 			   dd->ipath_unit, port);
2149 	}
2150 
2151 	put_pid(pid);
2152 	mutex_unlock(&ipath_mutex);
2153 	ipath_free_pddata(dd, pd); /* after releasing the mutex */
2154 
2155 bail:
2156 	kfree(fd);
2157 	return ret;
2158 }
2159 
ipath_port_info(struct ipath_portdata * pd,u16 subport,struct ipath_port_info __user * uinfo)2160 static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
2161 			   struct ipath_port_info __user *uinfo)
2162 {
2163 	struct ipath_port_info info;
2164 	int nup;
2165 	int ret;
2166 	size_t sz;
2167 
2168 	(void) ipath_count_units(NULL, &nup, NULL);
2169 	info.num_active = nup;
2170 	info.unit = pd->port_dd->ipath_unit;
2171 	info.port = pd->port_port;
2172 	info.subport = subport;
2173 	/* Don't return new fields if old library opened the port. */
2174 	if (ipath_supports_subports(pd->userversion >> 16,
2175 				    pd->userversion & 0xffff)) {
2176 		/* Number of user ports available for this device. */
2177 		info.num_ports = pd->port_dd->ipath_cfgports - 1;
2178 		info.num_subports = pd->port_subport_cnt;
2179 		sz = sizeof(info);
2180 	} else
2181 		sz = sizeof(info) - 2 * sizeof(u16);
2182 
2183 	if (copy_to_user(uinfo, &info, sz)) {
2184 		ret = -EFAULT;
2185 		goto bail;
2186 	}
2187 	ret = 0;
2188 
2189 bail:
2190 	return ret;
2191 }
2192 
ipath_get_slave_info(struct ipath_portdata * pd,void __user * slave_mask_addr)2193 static int ipath_get_slave_info(struct ipath_portdata *pd,
2194 				void __user *slave_mask_addr)
2195 {
2196 	int ret = 0;
2197 
2198 	if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
2199 		ret = -EFAULT;
2200 	return ret;
2201 }
2202 
ipath_sdma_get_inflight(struct ipath_user_sdma_queue * pq,u32 __user * inflightp)2203 static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
2204 				   u32 __user *inflightp)
2205 {
2206 	const u32 val = ipath_user_sdma_inflight_counter(pq);
2207 
2208 	if (put_user(val, inflightp))
2209 		return -EFAULT;
2210 
2211 	return 0;
2212 }
2213 
ipath_sdma_get_complete(struct ipath_devdata * dd,struct ipath_user_sdma_queue * pq,u32 __user * completep)2214 static int ipath_sdma_get_complete(struct ipath_devdata *dd,
2215 				   struct ipath_user_sdma_queue *pq,
2216 				   u32 __user *completep)
2217 {
2218 	u32 val;
2219 	int err;
2220 
2221 	err = ipath_user_sdma_make_progress(dd, pq);
2222 	if (err < 0)
2223 		return err;
2224 
2225 	val = ipath_user_sdma_complete_counter(pq);
2226 	if (put_user(val, completep))
2227 		return -EFAULT;
2228 
2229 	return 0;
2230 }
2231 
ipath_write(struct file * fp,const char __user * data,size_t count,loff_t * off)2232 static ssize_t ipath_write(struct file *fp, const char __user *data,
2233 			   size_t count, loff_t *off)
2234 {
2235 	const struct ipath_cmd __user *ucmd;
2236 	struct ipath_portdata *pd;
2237 	const void __user *src;
2238 	size_t consumed, copy;
2239 	struct ipath_cmd cmd;
2240 	ssize_t ret = 0;
2241 	void *dest;
2242 
2243 	if (count < sizeof(cmd.type)) {
2244 		ret = -EINVAL;
2245 		goto bail;
2246 	}
2247 
2248 	ucmd = (const struct ipath_cmd __user *) data;
2249 
2250 	if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
2251 		ret = -EFAULT;
2252 		goto bail;
2253 	}
2254 
2255 	consumed = sizeof(cmd.type);
2256 
2257 	switch (cmd.type) {
2258 	case IPATH_CMD_ASSIGN_PORT:
2259 	case __IPATH_CMD_USER_INIT:
2260 	case IPATH_CMD_USER_INIT:
2261 		copy = sizeof(cmd.cmd.user_info);
2262 		dest = &cmd.cmd.user_info;
2263 		src = &ucmd->cmd.user_info;
2264 		break;
2265 	case IPATH_CMD_RECV_CTRL:
2266 		copy = sizeof(cmd.cmd.recv_ctrl);
2267 		dest = &cmd.cmd.recv_ctrl;
2268 		src = &ucmd->cmd.recv_ctrl;
2269 		break;
2270 	case IPATH_CMD_PORT_INFO:
2271 		copy = sizeof(cmd.cmd.port_info);
2272 		dest = &cmd.cmd.port_info;
2273 		src = &ucmd->cmd.port_info;
2274 		break;
2275 	case IPATH_CMD_TID_UPDATE:
2276 	case IPATH_CMD_TID_FREE:
2277 		copy = sizeof(cmd.cmd.tid_info);
2278 		dest = &cmd.cmd.tid_info;
2279 		src = &ucmd->cmd.tid_info;
2280 		break;
2281 	case IPATH_CMD_SET_PART_KEY:
2282 		copy = sizeof(cmd.cmd.part_key);
2283 		dest = &cmd.cmd.part_key;
2284 		src = &ucmd->cmd.part_key;
2285 		break;
2286 	case __IPATH_CMD_SLAVE_INFO:
2287 		copy = sizeof(cmd.cmd.slave_mask_addr);
2288 		dest = &cmd.cmd.slave_mask_addr;
2289 		src = &ucmd->cmd.slave_mask_addr;
2290 		break;
2291 	case IPATH_CMD_PIOAVAILUPD:	// force an update of PIOAvail reg
2292 		copy = 0;
2293 		src = NULL;
2294 		dest = NULL;
2295 		break;
2296 	case IPATH_CMD_POLL_TYPE:
2297 		copy = sizeof(cmd.cmd.poll_type);
2298 		dest = &cmd.cmd.poll_type;
2299 		src = &ucmd->cmd.poll_type;
2300 		break;
2301 	case IPATH_CMD_ARMLAUNCH_CTRL:
2302 		copy = sizeof(cmd.cmd.armlaunch_ctrl);
2303 		dest = &cmd.cmd.armlaunch_ctrl;
2304 		src = &ucmd->cmd.armlaunch_ctrl;
2305 		break;
2306 	case IPATH_CMD_SDMA_INFLIGHT:
2307 		copy = sizeof(cmd.cmd.sdma_inflight);
2308 		dest = &cmd.cmd.sdma_inflight;
2309 		src = &ucmd->cmd.sdma_inflight;
2310 		break;
2311 	case IPATH_CMD_SDMA_COMPLETE:
2312 		copy = sizeof(cmd.cmd.sdma_complete);
2313 		dest = &cmd.cmd.sdma_complete;
2314 		src = &ucmd->cmd.sdma_complete;
2315 		break;
2316 	default:
2317 		ret = -EINVAL;
2318 		goto bail;
2319 	}
2320 
2321 	if (copy) {
2322 		if ((count - consumed) < copy) {
2323 			ret = -EINVAL;
2324 			goto bail;
2325 		}
2326 
2327 		if (copy_from_user(dest, src, copy)) {
2328 			ret = -EFAULT;
2329 			goto bail;
2330 		}
2331 
2332 		consumed += copy;
2333 	}
2334 
2335 	pd = port_fp(fp);
2336 	if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
2337 		cmd.type != IPATH_CMD_ASSIGN_PORT) {
2338 		ret = -EINVAL;
2339 		goto bail;
2340 	}
2341 
2342 	switch (cmd.type) {
2343 	case IPATH_CMD_ASSIGN_PORT:
2344 		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
2345 		if (ret)
2346 			goto bail;
2347 		break;
2348 	case __IPATH_CMD_USER_INIT:
2349 		/* backwards compatibility, get port first */
2350 		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
2351 		if (ret)
2352 			goto bail;
2353 		/* and fall through to current version. */
2354 	case IPATH_CMD_USER_INIT:
2355 		ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
2356 		if (ret)
2357 			goto bail;
2358 		ret = ipath_get_base_info(
2359 			fp, (void __user *) (unsigned long)
2360 			cmd.cmd.user_info.spu_base_info,
2361 			cmd.cmd.user_info.spu_base_info_size);
2362 		break;
2363 	case IPATH_CMD_RECV_CTRL:
2364 		ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
2365 		break;
2366 	case IPATH_CMD_PORT_INFO:
2367 		ret = ipath_port_info(pd, subport_fp(fp),
2368 				      (struct ipath_port_info __user *)
2369 				      (unsigned long) cmd.cmd.port_info);
2370 		break;
2371 	case IPATH_CMD_TID_UPDATE:
2372 		ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
2373 		break;
2374 	case IPATH_CMD_TID_FREE:
2375 		ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
2376 		break;
2377 	case IPATH_CMD_SET_PART_KEY:
2378 		ret = ipath_set_part_key(pd, cmd.cmd.part_key);
2379 		break;
2380 	case __IPATH_CMD_SLAVE_INFO:
2381 		ret = ipath_get_slave_info(pd,
2382 					   (void __user *) (unsigned long)
2383 					   cmd.cmd.slave_mask_addr);
2384 		break;
2385 	case IPATH_CMD_PIOAVAILUPD:
2386 		ipath_force_pio_avail_update(pd->port_dd);
2387 		break;
2388 	case IPATH_CMD_POLL_TYPE:
2389 		pd->poll_type = cmd.cmd.poll_type;
2390 		break;
2391 	case IPATH_CMD_ARMLAUNCH_CTRL:
2392 		if (cmd.cmd.armlaunch_ctrl)
2393 			ipath_enable_armlaunch(pd->port_dd);
2394 		else
2395 			ipath_disable_armlaunch(pd->port_dd);
2396 		break;
2397 	case IPATH_CMD_SDMA_INFLIGHT:
2398 		ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
2399 					      (u32 __user *) (unsigned long)
2400 					      cmd.cmd.sdma_inflight);
2401 		break;
2402 	case IPATH_CMD_SDMA_COMPLETE:
2403 		ret = ipath_sdma_get_complete(pd->port_dd,
2404 					      user_sdma_queue_fp(fp),
2405 					      (u32 __user *) (unsigned long)
2406 					      cmd.cmd.sdma_complete);
2407 		break;
2408 	}
2409 
2410 	if (ret >= 0)
2411 		ret = consumed;
2412 
2413 bail:
2414 	return ret;
2415 }
2416 
ipath_writev(struct kiocb * iocb,const struct iovec * iov,unsigned long dim,loff_t off)2417 static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov,
2418 			    unsigned long dim, loff_t off)
2419 {
2420 	struct file *filp = iocb->ki_filp;
2421 	struct ipath_filedata *fp = filp->private_data;
2422 	struct ipath_portdata *pd = port_fp(filp);
2423 	struct ipath_user_sdma_queue *pq = fp->pq;
2424 
2425 	if (!dim)
2426 		return -EINVAL;
2427 
2428 	return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim);
2429 }
2430 
2431 static struct class *ipath_class;
2432 
init_cdev(int minor,char * name,const struct file_operations * fops,struct cdev ** cdevp,struct device ** devp)2433 static int init_cdev(int minor, char *name, const struct file_operations *fops,
2434 		     struct cdev **cdevp, struct device **devp)
2435 {
2436 	const dev_t dev = MKDEV(IPATH_MAJOR, minor);
2437 	struct cdev *cdev = NULL;
2438 	struct device *device = NULL;
2439 	int ret;
2440 
2441 	cdev = cdev_alloc();
2442 	if (!cdev) {
2443 		printk(KERN_ERR IPATH_DRV_NAME
2444 		       ": Could not allocate cdev for minor %d, %s\n",
2445 		       minor, name);
2446 		ret = -ENOMEM;
2447 		goto done;
2448 	}
2449 
2450 	cdev->owner = THIS_MODULE;
2451 	cdev->ops = fops;
2452 	kobject_set_name(&cdev->kobj, name);
2453 
2454 	ret = cdev_add(cdev, dev, 1);
2455 	if (ret < 0) {
2456 		printk(KERN_ERR IPATH_DRV_NAME
2457 		       ": Could not add cdev for minor %d, %s (err %d)\n",
2458 		       minor, name, -ret);
2459 		goto err_cdev;
2460 	}
2461 
2462 	device = device_create(ipath_class, NULL, dev, NULL, name);
2463 
2464 	if (IS_ERR(device)) {
2465 		ret = PTR_ERR(device);
2466 		printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
2467 		       "device for minor %d, %s (err %d)\n",
2468 		       minor, name, -ret);
2469 		goto err_cdev;
2470 	}
2471 
2472 	goto done;
2473 
2474 err_cdev:
2475 	cdev_del(cdev);
2476 	cdev = NULL;
2477 
2478 done:
2479 	if (ret >= 0) {
2480 		*cdevp = cdev;
2481 		*devp = device;
2482 	} else {
2483 		*cdevp = NULL;
2484 		*devp = NULL;
2485 	}
2486 
2487 	return ret;
2488 }
2489 
ipath_cdev_init(int minor,char * name,const struct file_operations * fops,struct cdev ** cdevp,struct device ** devp)2490 int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
2491 		    struct cdev **cdevp, struct device **devp)
2492 {
2493 	return init_cdev(minor, name, fops, cdevp, devp);
2494 }
2495 
cleanup_cdev(struct cdev ** cdevp,struct device ** devp)2496 static void cleanup_cdev(struct cdev **cdevp,
2497 			 struct device **devp)
2498 {
2499 	struct device *dev = *devp;
2500 
2501 	if (dev) {
2502 		device_unregister(dev);
2503 		*devp = NULL;
2504 	}
2505 
2506 	if (*cdevp) {
2507 		cdev_del(*cdevp);
2508 		*cdevp = NULL;
2509 	}
2510 }
2511 
ipath_cdev_cleanup(struct cdev ** cdevp,struct device ** devp)2512 void ipath_cdev_cleanup(struct cdev **cdevp,
2513 			struct device **devp)
2514 {
2515 	cleanup_cdev(cdevp, devp);
2516 }
2517 
2518 static struct cdev *wildcard_cdev;
2519 static struct device *wildcard_dev;
2520 
2521 static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
2522 
user_init(void)2523 static int user_init(void)
2524 {
2525 	int ret;
2526 
2527 	ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
2528 	if (ret < 0) {
2529 		printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
2530 		       "chrdev region (err %d)\n", -ret);
2531 		goto done;
2532 	}
2533 
2534 	ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
2535 
2536 	if (IS_ERR(ipath_class)) {
2537 		ret = PTR_ERR(ipath_class);
2538 		printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
2539 		       "device class (err %d)\n", -ret);
2540 		goto bail;
2541 	}
2542 
2543 	goto done;
2544 bail:
2545 	unregister_chrdev_region(dev, IPATH_NMINORS);
2546 done:
2547 	return ret;
2548 }
2549 
user_cleanup(void)2550 static void user_cleanup(void)
2551 {
2552 	if (ipath_class) {
2553 		class_destroy(ipath_class);
2554 		ipath_class = NULL;
2555 	}
2556 
2557 	unregister_chrdev_region(dev, IPATH_NMINORS);
2558 }
2559 
2560 static atomic_t user_count = ATOMIC_INIT(0);
2561 static atomic_t user_setup = ATOMIC_INIT(0);
2562 
ipath_user_add(struct ipath_devdata * dd)2563 int ipath_user_add(struct ipath_devdata *dd)
2564 {
2565 	char name[10];
2566 	int ret;
2567 
2568 	if (atomic_inc_return(&user_count) == 1) {
2569 		ret = user_init();
2570 		if (ret < 0) {
2571 			ipath_dev_err(dd, "Unable to set up user support: "
2572 				      "error %d\n", -ret);
2573 			goto bail;
2574 		}
2575 		ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
2576 				&wildcard_dev);
2577 		if (ret < 0) {
2578 			ipath_dev_err(dd, "Could not create wildcard "
2579 				      "minor: error %d\n", -ret);
2580 			goto bail_user;
2581 		}
2582 
2583 		atomic_set(&user_setup, 1);
2584 	}
2585 
2586 	snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
2587 
2588 	ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
2589 			&dd->user_cdev, &dd->user_dev);
2590 	if (ret < 0)
2591 		ipath_dev_err(dd, "Could not create user minor %d, %s\n",
2592 			      dd->ipath_unit + 1, name);
2593 
2594 	goto bail;
2595 
2596 bail_user:
2597 	user_cleanup();
2598 bail:
2599 	return ret;
2600 }
2601 
ipath_user_remove(struct ipath_devdata * dd)2602 void ipath_user_remove(struct ipath_devdata *dd)
2603 {
2604 	cleanup_cdev(&dd->user_cdev, &dd->user_dev);
2605 
2606 	if (atomic_dec_return(&user_count) == 0) {
2607 		if (atomic_read(&user_setup) == 0)
2608 			goto bail;
2609 
2610 		cleanup_cdev(&wildcard_cdev, &wildcard_dev);
2611 		user_cleanup();
2612 
2613 		atomic_set(&user_setup, 0);
2614 	}
2615 bail:
2616 	return;
2617 }
2618