• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * This file is provided under a dual BSD/GPLv2 license.  When using or
4  * redistributing this file, you may do so under either license.
5  *
6  * GPL LICENSE SUMMARY
7  *
8  * Copyright(c) 2015 Intel Corporation.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of version 2 of the GNU General Public License as
12  * published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18  *
19  * BSD LICENSE
20  *
21  * Copyright(c) 2015 Intel Corporation.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  *
27  *  - Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  *  - Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in
31  *    the documentation and/or other materials provided with the
32  *    distribution.
33  *  - Neither the name of Intel Corporation nor the names of its
34  *    contributors may be used to endorse or promote products derived
35  *    from this software without specific prior written permission.
36  *
37  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48  *
49  */
50 
51 #include <linux/delay.h>
52 #include "hfi.h"
53 #include "qp.h"
54 #include "trace.h"
55 
56 #define SC_CTXT_PACKET_EGRESS_TIMEOUT 350 /* in chip cycles */
57 
58 #define SC(name) SEND_CTXT_##name
59 /*
60  * Send Context functions
61  */
62 static void sc_wait_for_packet_egress(struct send_context *sc, int pause);
63 
64 /*
65  * Set the CM reset bit and wait for it to clear.  Use the provided
66  * sendctrl register.  This routine has no locking.
67  */
__cm_reset(struct hfi1_devdata * dd,u64 sendctrl)68 void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
69 {
70 	write_csr(dd, SEND_CTRL, sendctrl | SEND_CTRL_CM_RESET_SMASK);
71 	while (1) {
72 		udelay(1);
73 		sendctrl = read_csr(dd, SEND_CTRL);
74 		if ((sendctrl & SEND_CTRL_CM_RESET_SMASK) == 0)
75 			break;
76 	}
77 }
78 
79 /* defined in header release 48 and higher */
80 #ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
81 #define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
82 #define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
83 #define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
84 		<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
85 #endif
86 
87 /* global control of PIO send */
pio_send_control(struct hfi1_devdata * dd,int op)88 void pio_send_control(struct hfi1_devdata *dd, int op)
89 {
90 	u64 reg, mask;
91 	unsigned long flags;
92 	int write = 1;	/* write sendctrl back */
93 	int flush = 0;	/* re-read sendctrl to make sure it is flushed */
94 
95 	spin_lock_irqsave(&dd->sendctrl_lock, flags);
96 
97 	reg = read_csr(dd, SEND_CTRL);
98 	switch (op) {
99 	case PSC_GLOBAL_ENABLE:
100 		reg |= SEND_CTRL_SEND_ENABLE_SMASK;
101 	/* Fall through */
102 	case PSC_DATA_VL_ENABLE:
103 		/* Disallow sending on VLs not enabled */
104 		mask = (((~0ull)<<num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK)<<
105 				SEND_CTRL_UNSUPPORTED_VL_SHIFT;
106 		reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
107 		break;
108 	case PSC_GLOBAL_DISABLE:
109 		reg &= ~SEND_CTRL_SEND_ENABLE_SMASK;
110 		break;
111 	case PSC_GLOBAL_VLARB_ENABLE:
112 		reg |= SEND_CTRL_VL_ARBITER_ENABLE_SMASK;
113 		break;
114 	case PSC_GLOBAL_VLARB_DISABLE:
115 		reg &= ~SEND_CTRL_VL_ARBITER_ENABLE_SMASK;
116 		break;
117 	case PSC_CM_RESET:
118 		__cm_reset(dd, reg);
119 		write = 0; /* CSR already written (and flushed) */
120 		break;
121 	case PSC_DATA_VL_DISABLE:
122 		reg |= SEND_CTRL_UNSUPPORTED_VL_SMASK;
123 		flush = 1;
124 		break;
125 	default:
126 		dd_dev_err(dd, "%s: invalid control %d\n", __func__, op);
127 		break;
128 	}
129 
130 	if (write) {
131 		write_csr(dd, SEND_CTRL, reg);
132 		if (flush)
133 			(void) read_csr(dd, SEND_CTRL); /* flush write */
134 	}
135 
136 	spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
137 }
138 
139 /* number of send context memory pools */
140 #define NUM_SC_POOLS 2
141 
142 /* Send Context Size (SCS) wildcards */
143 #define SCS_POOL_0 -1
144 #define SCS_POOL_1 -2
145 /* Send Context Count (SCC) wildcards */
146 #define SCC_PER_VL -1
147 #define SCC_PER_CPU  -2
148 
149 #define SCC_PER_KRCVQ  -3
150 #define SCC_ACK_CREDITS  32
151 
152 #define PIO_WAIT_BATCH_SIZE 5
153 
154 /* default send context sizes */
155 static struct sc_config_sizes sc_config_sizes[SC_MAX] = {
156 	[SC_KERNEL] = { .size  = SCS_POOL_0,	/* even divide, pool 0 */
157 			.count = SCC_PER_VL },/* one per NUMA */
158 	[SC_ACK]    = { .size  = SCC_ACK_CREDITS,
159 			.count = SCC_PER_KRCVQ },
160 	[SC_USER]   = { .size  = SCS_POOL_0,	/* even divide, pool 0 */
161 			.count = SCC_PER_CPU },	/* one per CPU */
162 
163 };
164 
165 /* send context memory pool configuration */
166 struct mem_pool_config {
167 	int centipercent;	/* % of memory, in 100ths of 1% */
168 	int absolute_blocks;	/* absolute block count */
169 };
170 
171 /* default memory pool configuration: 100% in pool 0 */
172 static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = {
173 	/* centi%, abs blocks */
174 	{  10000,     -1 },		/* pool 0 */
175 	{      0,     -1 },		/* pool 1 */
176 };
177 
178 /* memory pool information, used when calculating final sizes */
179 struct mem_pool_info {
180 	int centipercent;	/* 100th of 1% of memory to use, -1 if blocks
181 				   already set */
182 	int count;		/* count of contexts in the pool */
183 	int blocks;		/* block size of the pool */
184 	int size;		/* context size, in blocks */
185 };
186 
187 /*
188  * Convert a pool wildcard to a valid pool index.  The wildcards
189  * start at -1 and increase negatively.  Map them as:
190  *	-1 => 0
191  *	-2 => 1
192  *	etc.
193  *
194  * Return -1 on non-wildcard input, otherwise convert to a pool number.
195  */
wildcard_to_pool(int wc)196 static int wildcard_to_pool(int wc)
197 {
198 	if (wc >= 0)
199 		return -1;	/* non-wildcard */
200 	return -wc - 1;
201 }
202 
203 static const char *sc_type_names[SC_MAX] = {
204 	"kernel",
205 	"ack",
206 	"user"
207 };
208 
sc_type_name(int index)209 static const char *sc_type_name(int index)
210 {
211 	if (index < 0 || index >= SC_MAX)
212 		return "unknown";
213 	return sc_type_names[index];
214 }
215 
216 /*
217  * Read the send context memory pool configuration and send context
218  * size configuration.  Replace any wildcards and come up with final
219  * counts and sizes for the send context types.
220  */
init_sc_pools_and_sizes(struct hfi1_devdata * dd)221 int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
222 {
223 	struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } };
224 	int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1;
225 	int total_contexts = 0;
226 	int fixed_blocks;
227 	int pool_blocks;
228 	int used_blocks;
229 	int cp_total;		/* centipercent total */
230 	int ab_total;		/* absolute block total */
231 	int extra;
232 	int i;
233 
234 	/*
235 	 * Step 0:
236 	 *	- copy the centipercents/absolute sizes from the pool config
237 	 *	- sanity check these values
238 	 *	- add up centipercents, then later check for full value
239 	 *	- add up absolute blocks, then later check for over-commit
240 	 */
241 	cp_total = 0;
242 	ab_total = 0;
243 	for (i = 0; i < NUM_SC_POOLS; i++) {
244 		int cp = sc_mem_pool_config[i].centipercent;
245 		int ab = sc_mem_pool_config[i].absolute_blocks;
246 
247 		/*
248 		 * A negative value is "unused" or "invalid".  Both *can*
249 		 * be valid, but centipercent wins, so check that first
250 		 */
251 		if (cp >= 0) {			/* centipercent valid */
252 			cp_total += cp;
253 		} else if (ab >= 0) {		/* absolute blocks valid */
254 			ab_total += ab;
255 		} else {			/* neither valid */
256 			dd_dev_err(
257 				dd,
258 				"Send context memory pool %d: both the block count and centipercent are invalid\n",
259 				i);
260 			return -EINVAL;
261 		}
262 
263 		mem_pool_info[i].centipercent = cp;
264 		mem_pool_info[i].blocks = ab;
265 	}
266 
267 	/* do not use both % and absolute blocks for different pools */
268 	if (cp_total != 0 && ab_total != 0) {
269 		dd_dev_err(
270 			dd,
271 			"All send context memory pools must be described as either centipercent or blocks, no mixing between pools\n");
272 		return -EINVAL;
273 	}
274 
275 	/* if any percentages are present, they must add up to 100% x 100 */
276 	if (cp_total != 0 && cp_total != 10000) {
277 		dd_dev_err(
278 			dd,
279 			"Send context memory pool centipercent is %d, expecting 10000\n",
280 			cp_total);
281 		return -EINVAL;
282 	}
283 
284 	/* the absolute pool total cannot be more than the mem total */
285 	if (ab_total > total_blocks) {
286 		dd_dev_err(
287 			dd,
288 			"Send context memory pool absolute block count %d is larger than the memory size %d\n",
289 			ab_total, total_blocks);
290 		return -EINVAL;
291 	}
292 
293 	/*
294 	 * Step 2:
295 	 *	- copy from the context size config
296 	 *	- replace context type wildcard counts with real values
297 	 *	- add up non-memory pool block sizes
298 	 *	- add up memory pool user counts
299 	 */
300 	fixed_blocks = 0;
301 	for (i = 0; i < SC_MAX; i++) {
302 		int count = sc_config_sizes[i].count;
303 		int size = sc_config_sizes[i].size;
304 		int pool;
305 
306 		/*
307 		 * Sanity check count: Either a positive value or
308 		 * one of the expected wildcards is valid.  The positive
309 		 * value is checked later when we compare against total
310 		 * memory available.
311 		 */
312 		if (i == SC_ACK) {
313 			count = dd->n_krcv_queues;
314 		} else if (i == SC_KERNEL) {
315 			count = num_vls + 1 /* VL15 */;
316 		} else if (count == SCC_PER_CPU) {
317 			count = dd->num_rcv_contexts - dd->n_krcv_queues;
318 		} else if (count < 0) {
319 			dd_dev_err(
320 				dd,
321 				"%s send context invalid count wildcard %d\n",
322 				sc_type_name(i), count);
323 			return -EINVAL;
324 		}
325 		if (total_contexts + count > dd->chip_send_contexts)
326 			count = dd->chip_send_contexts - total_contexts;
327 
328 		total_contexts += count;
329 
330 		/*
331 		 * Sanity check pool: The conversion will return a pool
332 		 * number or -1 if a fixed (non-negative) value.  The fixed
333 		 * value is checked later when we compare against
334 		 * total memory available.
335 		 */
336 		pool = wildcard_to_pool(size);
337 		if (pool == -1) {			/* non-wildcard */
338 			fixed_blocks += size * count;
339 		} else if (pool < NUM_SC_POOLS) {	/* valid wildcard */
340 			mem_pool_info[pool].count += count;
341 		} else {				/* invalid wildcard */
342 			dd_dev_err(
343 				dd,
344 				"%s send context invalid pool wildcard %d\n",
345 				sc_type_name(i), size);
346 			return -EINVAL;
347 		}
348 
349 		dd->sc_sizes[i].count = count;
350 		dd->sc_sizes[i].size = size;
351 	}
352 	if (fixed_blocks > total_blocks) {
353 		dd_dev_err(
354 			dd,
355 			"Send context fixed block count, %u, larger than total block count %u\n",
356 			fixed_blocks, total_blocks);
357 		return -EINVAL;
358 	}
359 
360 	/* step 3: calculate the blocks in the pools, and pool context sizes */
361 	pool_blocks = total_blocks - fixed_blocks;
362 	if (ab_total > pool_blocks) {
363 		dd_dev_err(
364 			dd,
365 			"Send context fixed pool sizes, %u, larger than pool block count %u\n",
366 			ab_total, pool_blocks);
367 		return -EINVAL;
368 	}
369 	/* subtract off the fixed pool blocks */
370 	pool_blocks -= ab_total;
371 
372 	for (i = 0; i < NUM_SC_POOLS; i++) {
373 		struct mem_pool_info *pi = &mem_pool_info[i];
374 
375 		/* % beats absolute blocks */
376 		if (pi->centipercent >= 0)
377 			pi->blocks = (pool_blocks * pi->centipercent) / 10000;
378 
379 		if (pi->blocks == 0 && pi->count != 0) {
380 			dd_dev_err(
381 				dd,
382 				"Send context memory pool %d has %u contexts, but no blocks\n",
383 				i, pi->count);
384 			return -EINVAL;
385 		}
386 		if (pi->count == 0) {
387 			/* warn about wasted blocks */
388 			if (pi->blocks != 0)
389 				dd_dev_err(
390 					dd,
391 					"Send context memory pool %d has %u blocks, but zero contexts\n",
392 					i, pi->blocks);
393 			pi->size = 0;
394 		} else {
395 			pi->size = pi->blocks / pi->count;
396 		}
397 	}
398 
399 	/* step 4: fill in the context type sizes from the pool sizes */
400 	used_blocks = 0;
401 	for (i = 0; i < SC_MAX; i++) {
402 		if (dd->sc_sizes[i].size < 0) {
403 			unsigned pool = wildcard_to_pool(dd->sc_sizes[i].size);
404 
405 			WARN_ON_ONCE(pool >= NUM_SC_POOLS);
406 			dd->sc_sizes[i].size = mem_pool_info[pool].size;
407 		}
408 		/* make sure we are not larger than what is allowed by the HW */
409 #define PIO_MAX_BLOCKS 1024
410 		if (dd->sc_sizes[i].size > PIO_MAX_BLOCKS)
411 			dd->sc_sizes[i].size = PIO_MAX_BLOCKS;
412 
413 		/* calculate our total usage */
414 		used_blocks += dd->sc_sizes[i].size * dd->sc_sizes[i].count;
415 	}
416 	extra = total_blocks - used_blocks;
417 	if (extra != 0)
418 		dd_dev_info(dd, "unused send context blocks: %d\n", extra);
419 
420 	return total_contexts;
421 }
422 
init_send_contexts(struct hfi1_devdata * dd)423 int init_send_contexts(struct hfi1_devdata *dd)
424 {
425 	u16 base;
426 	int ret, i, j, context;
427 
428 	ret = init_credit_return(dd);
429 	if (ret)
430 		return ret;
431 
432 	dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8),
433 					GFP_KERNEL);
434 	dd->send_contexts = kcalloc(dd->num_send_contexts,
435 					sizeof(struct send_context_info),
436 					GFP_KERNEL);
437 	if (!dd->send_contexts || !dd->hw_to_sw) {
438 		kfree(dd->hw_to_sw);
439 		kfree(dd->send_contexts);
440 		free_credit_return(dd);
441 		return -ENOMEM;
442 	}
443 
444 	/* hardware context map starts with invalid send context indices */
445 	for (i = 0; i < TXE_NUM_CONTEXTS; i++)
446 		dd->hw_to_sw[i] = INVALID_SCI;
447 
448 	/*
449 	 * All send contexts have their credit sizes.  Allocate credits
450 	 * for each context one after another from the global space.
451 	 */
452 	context = 0;
453 	base = 1;
454 	for (i = 0; i < SC_MAX; i++) {
455 		struct sc_config_sizes *scs = &dd->sc_sizes[i];
456 
457 		for (j = 0; j < scs->count; j++) {
458 			struct send_context_info *sci =
459 						&dd->send_contexts[context];
460 			sci->type = i;
461 			sci->base = base;
462 			sci->credits = scs->size;
463 
464 			context++;
465 			base += scs->size;
466 		}
467 	}
468 
469 	return 0;
470 }
471 
472 /*
473  * Allocate a software index and hardware context of the given type.
474  *
475  * Must be called with dd->sc_lock held.
476  */
sc_hw_alloc(struct hfi1_devdata * dd,int type,u32 * sw_index,u32 * hw_context)477 static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index,
478 		       u32 *hw_context)
479 {
480 	struct send_context_info *sci;
481 	u32 index;
482 	u32 context;
483 
484 	for (index = 0, sci = &dd->send_contexts[0];
485 			index < dd->num_send_contexts; index++, sci++) {
486 		if (sci->type == type && sci->allocated == 0) {
487 			sci->allocated = 1;
488 			/* use a 1:1 mapping, but make them non-equal */
489 			context = dd->chip_send_contexts - index - 1;
490 			dd->hw_to_sw[context] = index;
491 			*sw_index = index;
492 			*hw_context = context;
493 			return 0; /* success */
494 		}
495 	}
496 	dd_dev_err(dd, "Unable to locate a free type %d send context\n", type);
497 	return -ENOSPC;
498 }
499 
500 /*
501  * Free the send context given by its software index.
502  *
503  * Must be called with dd->sc_lock held.
504  */
sc_hw_free(struct hfi1_devdata * dd,u32 sw_index,u32 hw_context)505 static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context)
506 {
507 	struct send_context_info *sci;
508 
509 	sci = &dd->send_contexts[sw_index];
510 	if (!sci->allocated) {
511 		dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n",
512 			__func__, sw_index, hw_context);
513 	}
514 	sci->allocated = 0;
515 	dd->hw_to_sw[hw_context] = INVALID_SCI;
516 }
517 
518 /* return the base context of a context in a group */
group_context(u32 context,u32 group)519 static inline u32 group_context(u32 context, u32 group)
520 {
521 	return (context >> group) << group;
522 }
523 
524 /* return the size of a group */
group_size(u32 group)525 static inline u32 group_size(u32 group)
526 {
527 	return 1 << group;
528 }
529 
530 /*
531  * Obtain the credit return addresses, kernel virtual and physical, for the
532  * given sc.
533  *
534  * To understand this routine:
535  * o va and pa are arrays of struct credit_return.  One for each physical
536  *   send context, per NUMA.
537  * o Each send context always looks in its relative location in a struct
538  *   credit_return for its credit return.
539  * o Each send context in a group must have its return address CSR programmed
540  *   with the same value.  Use the address of the first send context in the
541  *   group.
542  */
cr_group_addresses(struct send_context * sc,dma_addr_t * pa)543 static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa)
544 {
545 	u32 gc = group_context(sc->hw_context, sc->group);
546 	u32 index = sc->hw_context & 0x7;
547 
548 	sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index];
549 	*pa = (unsigned long)
550 	       &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc];
551 }
552 
553 /*
554  * Work queue function triggered in error interrupt routine for
555  * kernel contexts.
556  */
sc_halted(struct work_struct * work)557 static void sc_halted(struct work_struct *work)
558 {
559 	struct send_context *sc;
560 
561 	sc = container_of(work, struct send_context, halt_work);
562 	sc_restart(sc);
563 }
564 
565 /*
566  * Calculate PIO block threshold for this send context using the given MTU.
567  * Trigger a return when one MTU plus optional header of credits remain.
568  *
569  * Parameter mtu is in bytes.
570  * Parameter hdrqentsize is in DWORDs.
571  *
572  * Return value is what to write into the CSR: trigger return when
573  * unreturned credits pass this count.
574  */
sc_mtu_to_threshold(struct send_context * sc,u32 mtu,u32 hdrqentsize)575 u32 sc_mtu_to_threshold(struct send_context *sc, u32 mtu, u32 hdrqentsize)
576 {
577 	u32 release_credits;
578 	u32 threshold;
579 
580 	/* add in the header size, then divide by the PIO block size */
581 	mtu += hdrqentsize << 2;
582 	release_credits = DIV_ROUND_UP(mtu, PIO_BLOCK_SIZE);
583 
584 	/* check against this context's credits */
585 	if (sc->credits <= release_credits)
586 		threshold = 1;
587 	else
588 		threshold = sc->credits - release_credits;
589 
590 	return threshold;
591 }
592 
593 /*
594  * Calculate credit threshold in terms of percent of the allocated credits.
595  * Trigger when unreturned credits equal or exceed the percentage of the whole.
596  *
597  * Return value is what to write into the CSR: trigger return when
598  * unreturned credits pass this count.
599  */
sc_percent_to_threshold(struct send_context * sc,u32 percent)600 static u32 sc_percent_to_threshold(struct send_context *sc, u32 percent)
601 {
602 	return (sc->credits * percent) / 100;
603 }
604 
605 /*
606  * Set the credit return threshold.
607  */
sc_set_cr_threshold(struct send_context * sc,u32 new_threshold)608 void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
609 {
610 	unsigned long flags;
611 	u32 old_threshold;
612 	int force_return = 0;
613 
614 	spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
615 
616 	old_threshold = (sc->credit_ctrl >>
617 				SC(CREDIT_CTRL_THRESHOLD_SHIFT))
618 			 & SC(CREDIT_CTRL_THRESHOLD_MASK);
619 
620 	if (new_threshold != old_threshold) {
621 		sc->credit_ctrl =
622 			(sc->credit_ctrl
623 				& ~SC(CREDIT_CTRL_THRESHOLD_SMASK))
624 			| ((new_threshold
625 				& SC(CREDIT_CTRL_THRESHOLD_MASK))
626 			   << SC(CREDIT_CTRL_THRESHOLD_SHIFT));
627 		write_kctxt_csr(sc->dd, sc->hw_context,
628 			SC(CREDIT_CTRL), sc->credit_ctrl);
629 
630 		/* force a credit return on change to avoid a possible stall */
631 		force_return = 1;
632 	}
633 
634 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
635 
636 	if (force_return)
637 		sc_return_credits(sc);
638 }
639 
640 /*
641  * set_pio_integrity
642  *
643  * Set the CHECK_ENABLE register for the send context 'sc'.
644  */
set_pio_integrity(struct send_context * sc)645 void set_pio_integrity(struct send_context *sc)
646 {
647 	struct hfi1_devdata *dd = sc->dd;
648 	u64 reg = 0;
649 	u32 hw_context = sc->hw_context;
650 	int type = sc->type;
651 
652 	/*
653 	 * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if
654 	 * we're snooping.
655 	 */
656 	if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
657 	    dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE)
658 		reg = hfi1_pkt_default_send_ctxt_mask(dd, type);
659 
660 	write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg);
661 }
662 
663 /*
664  * Allocate a NUMA relative send context structure of the given type along
665  * with a HW context.
666  */
sc_alloc(struct hfi1_devdata * dd,int type,uint hdrqentsize,int numa)667 struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
668 			      uint hdrqentsize, int numa)
669 {
670 	struct send_context_info *sci;
671 	struct send_context *sc;
672 	dma_addr_t pa;
673 	unsigned long flags;
674 	u64 reg;
675 	u32 thresh;
676 	u32 sw_index;
677 	u32 hw_context;
678 	int ret;
679 	u8 opval, opmask;
680 
681 	/* do not allocate while frozen */
682 	if (dd->flags & HFI1_FROZEN)
683 		return NULL;
684 
685 	sc = kzalloc_node(sizeof(struct send_context), GFP_KERNEL, numa);
686 	if (!sc)
687 		return NULL;
688 
689 	spin_lock_irqsave(&dd->sc_lock, flags);
690 	ret = sc_hw_alloc(dd, type, &sw_index, &hw_context);
691 	if (ret) {
692 		spin_unlock_irqrestore(&dd->sc_lock, flags);
693 		kfree(sc);
694 		return NULL;
695 	}
696 
697 	sci = &dd->send_contexts[sw_index];
698 	sci->sc = sc;
699 
700 	sc->dd = dd;
701 	sc->node = numa;
702 	sc->type = type;
703 	spin_lock_init(&sc->alloc_lock);
704 	spin_lock_init(&sc->release_lock);
705 	spin_lock_init(&sc->credit_ctrl_lock);
706 	INIT_LIST_HEAD(&sc->piowait);
707 	INIT_WORK(&sc->halt_work, sc_halted);
708 	atomic_set(&sc->buffers_allocated, 0);
709 	init_waitqueue_head(&sc->halt_wait);
710 
711 	/* grouping is always single context for now */
712 	sc->group = 0;
713 
714 	sc->sw_index = sw_index;
715 	sc->hw_context = hw_context;
716 	cr_group_addresses(sc, &pa);
717 	sc->credits = sci->credits;
718 
719 /* PIO Send Memory Address details */
720 #define PIO_ADDR_CONTEXT_MASK 0xfful
721 #define PIO_ADDR_CONTEXT_SHIFT 16
722 	sc->base_addr = dd->piobase + ((hw_context & PIO_ADDR_CONTEXT_MASK)
723 					<< PIO_ADDR_CONTEXT_SHIFT);
724 
725 	/* set base and credits */
726 	reg = ((sci->credits & SC(CTRL_CTXT_DEPTH_MASK))
727 					<< SC(CTRL_CTXT_DEPTH_SHIFT))
728 		| ((sci->base & SC(CTRL_CTXT_BASE_MASK))
729 					<< SC(CTRL_CTXT_BASE_SHIFT));
730 	write_kctxt_csr(dd, hw_context, SC(CTRL), reg);
731 
732 	set_pio_integrity(sc);
733 
734 	/* unmask all errors */
735 	write_kctxt_csr(dd, hw_context, SC(ERR_MASK), (u64)-1);
736 
737 	/* set the default partition key */
738 	write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY),
739 		(DEFAULT_PKEY &
740 			SC(CHECK_PARTITION_KEY_VALUE_MASK))
741 		    << SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
742 
743 	/* per context type checks */
744 	if (type == SC_USER) {
745 		opval = USER_OPCODE_CHECK_VAL;
746 		opmask = USER_OPCODE_CHECK_MASK;
747 	} else {
748 		opval = OPCODE_CHECK_VAL_DISABLED;
749 		opmask = OPCODE_CHECK_MASK_DISABLED;
750 	}
751 
752 	/* set the send context check opcode mask and value */
753 	write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE),
754 		((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
755 		((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
756 
757 	/* set up credit return */
758 	reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
759 	write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg);
760 
761 	/*
762 	 * Calculate the initial credit return threshold.
763 	 *
764 	 * For Ack contexts, set a threshold for half the credits.
765 	 * For User contexts use the given percentage.  This has been
766 	 * sanitized on driver start-up.
767 	 * For Kernel contexts, use the default MTU plus a header.
768 	 */
769 	if (type == SC_ACK) {
770 		thresh = sc_percent_to_threshold(sc, 50);
771 	} else if (type == SC_USER) {
772 		thresh = sc_percent_to_threshold(sc,
773 				user_credit_return_threshold);
774 	} else { /* kernel */
775 		thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize);
776 	}
777 	reg = thresh << SC(CREDIT_CTRL_THRESHOLD_SHIFT);
778 	/* add in early return */
779 	if (type == SC_USER && HFI1_CAP_IS_USET(EARLY_CREDIT_RETURN))
780 		reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK);
781 	else if (HFI1_CAP_IS_KSET(EARLY_CREDIT_RETURN)) /* kernel, ack */
782 		reg |= SC(CREDIT_CTRL_EARLY_RETURN_SMASK);
783 
784 	/* set up write-through credit_ctrl */
785 	sc->credit_ctrl = reg;
786 	write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), reg);
787 
788 	/* User send contexts should not allow sending on VL15 */
789 	if (type == SC_USER) {
790 		reg = 1ULL << 15;
791 		write_kctxt_csr(dd, hw_context, SC(CHECK_VL), reg);
792 	}
793 
794 	spin_unlock_irqrestore(&dd->sc_lock, flags);
795 
796 	/*
797 	 * Allocate shadow ring to track outstanding PIO buffers _after_
798 	 * unlocking.  We don't know the size until the lock is held and
799 	 * we can't allocate while the lock is held.  No one is using
800 	 * the context yet, so allocate it now.
801 	 *
802 	 * User contexts do not get a shadow ring.
803 	 */
804 	if (type != SC_USER) {
805 		/*
806 		 * Size the shadow ring 1 larger than the number of credits
807 		 * so head == tail can mean empty.
808 		 */
809 		sc->sr_size = sci->credits + 1;
810 		sc->sr = kzalloc_node(sizeof(union pio_shadow_ring) *
811 				sc->sr_size, GFP_KERNEL, numa);
812 		if (!sc->sr) {
813 			sc_free(sc);
814 			return NULL;
815 		}
816 	}
817 
818 	dd_dev_info(dd,
819 		"Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u\n",
820 		sw_index,
821 		hw_context,
822 		sc_type_name(type),
823 		sc->group,
824 		sc->credits,
825 		sc->credit_ctrl,
826 		thresh);
827 
828 	return sc;
829 }
830 
831 /* free a per-NUMA send context structure */
sc_free(struct send_context * sc)832 void sc_free(struct send_context *sc)
833 {
834 	struct hfi1_devdata *dd;
835 	unsigned long flags;
836 	u32 sw_index;
837 	u32 hw_context;
838 
839 	if (!sc)
840 		return;
841 
842 	sc->flags |= SCF_IN_FREE;	/* ensure no restarts */
843 	dd = sc->dd;
844 	if (!list_empty(&sc->piowait))
845 		dd_dev_err(dd, "piowait list not empty!\n");
846 	sw_index = sc->sw_index;
847 	hw_context = sc->hw_context;
848 	sc_disable(sc);	/* make sure the HW is disabled */
849 	flush_work(&sc->halt_work);
850 
851 	spin_lock_irqsave(&dd->sc_lock, flags);
852 	dd->send_contexts[sw_index].sc = NULL;
853 
854 	/* clear/disable all registers set in sc_alloc */
855 	write_kctxt_csr(dd, hw_context, SC(CTRL), 0);
856 	write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), 0);
857 	write_kctxt_csr(dd, hw_context, SC(ERR_MASK), 0);
858 	write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY), 0);
859 	write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE), 0);
860 	write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), 0);
861 	write_kctxt_csr(dd, hw_context, SC(CREDIT_CTRL), 0);
862 
863 	/* release the index and context for re-use */
864 	sc_hw_free(dd, sw_index, hw_context);
865 	spin_unlock_irqrestore(&dd->sc_lock, flags);
866 
867 	kfree(sc->sr);
868 	kfree(sc);
869 }
870 
871 /* disable the context */
sc_disable(struct send_context * sc)872 void sc_disable(struct send_context *sc)
873 {
874 	u64 reg;
875 	unsigned long flags;
876 	struct pio_buf *pbuf;
877 
878 	if (!sc)
879 		return;
880 
881 	/* do all steps, even if already disabled */
882 	spin_lock_irqsave(&sc->alloc_lock, flags);
883 	reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
884 	reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
885 	sc->flags &= ~SCF_ENABLED;
886 	sc_wait_for_packet_egress(sc, 1);
887 	write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
888 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
889 
890 	/*
891 	 * Flush any waiters.  Once the context is disabled,
892 	 * credit return interrupts are stopped (although there
893 	 * could be one in-process when the context is disabled).
894 	 * Wait one microsecond for any lingering interrupts, then
895 	 * proceed with the flush.
896 	 */
897 	udelay(1);
898 	spin_lock_irqsave(&sc->release_lock, flags);
899 	if (sc->sr) {	/* this context has a shadow ring */
900 		while (sc->sr_tail != sc->sr_head) {
901 			pbuf = &sc->sr[sc->sr_tail].pbuf;
902 			if (pbuf->cb)
903 				(*pbuf->cb)(pbuf->arg, PRC_SC_DISABLE);
904 			sc->sr_tail++;
905 			if (sc->sr_tail >= sc->sr_size)
906 				sc->sr_tail = 0;
907 		}
908 	}
909 	spin_unlock_irqrestore(&sc->release_lock, flags);
910 }
911 
912 /* return SendEgressCtxtStatus.PacketOccupancy */
913 #define packet_occupancy(r) \
914 	(((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)\
915 	>> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT)
916 
917 /* is egress halted on the context? */
918 #define egress_halted(r) \
919 	((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK)
920 
921 /* wait for packet egress, optionally pause for credit return  */
sc_wait_for_packet_egress(struct send_context * sc,int pause)922 static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
923 {
924 	struct hfi1_devdata *dd = sc->dd;
925 	u64 reg = 0;
926 	u64 reg_prev;
927 	u32 loop = 0;
928 
929 	while (1) {
930 		reg_prev = reg;
931 		reg = read_csr(dd, sc->hw_context * 8 +
932 			       SEND_EGRESS_CTXT_STATUS);
933 		/* done if egress is stopped */
934 		if (egress_halted(reg))
935 			break;
936 		reg = packet_occupancy(reg);
937 		if (reg == 0)
938 			break;
939 		/* counter is reset if occupancy count changes */
940 		if (reg != reg_prev)
941 			loop = 0;
942 		if (loop > 500) {
943 			/* timed out - bounce the link */
944 			dd_dev_err(dd,
945 				"%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
946 				__func__, sc->sw_index,
947 				sc->hw_context, (u32)reg);
948 			queue_work(dd->pport->hfi1_wq,
949 				&dd->pport->link_bounce_work);
950 			break;
951 		}
952 		loop++;
953 		udelay(1);
954 	}
955 
956 	if (pause)
957 		/* Add additional delay to ensure chip returns all credits */
958 		pause_for_credit_return(dd);
959 }
960 
sc_wait(struct hfi1_devdata * dd)961 void sc_wait(struct hfi1_devdata *dd)
962 {
963 	int i;
964 
965 	for (i = 0; i < dd->num_send_contexts; i++) {
966 		struct send_context *sc = dd->send_contexts[i].sc;
967 
968 		if (!sc)
969 			continue;
970 		sc_wait_for_packet_egress(sc, 0);
971 	}
972 }
973 
974 /*
975  * Restart a context after it has been halted due to error.
976  *
977  * If the first step fails - wait for the halt to be asserted, return early.
978  * Otherwise complain about timeouts but keep going.
979  *
980  * It is expected that allocations (enabled flag bit) have been shut off
981  * already (only applies to kernel contexts).
982  */
sc_restart(struct send_context * sc)983 int sc_restart(struct send_context *sc)
984 {
985 	struct hfi1_devdata *dd = sc->dd;
986 	u64 reg;
987 	u32 loop;
988 	int count;
989 
990 	/* bounce off if not halted, or being free'd */
991 	if (!(sc->flags & SCF_HALTED) || (sc->flags & SCF_IN_FREE))
992 		return -EINVAL;
993 
994 	dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index,
995 		sc->hw_context);
996 
997 	/*
998 	 * Step 1: Wait for the context to actually halt.
999 	 *
1000 	 * The error interrupt is asynchronous to actually setting halt
1001 	 * on the context.
1002 	 */
1003 	loop = 0;
1004 	while (1) {
1005 		reg = read_kctxt_csr(dd, sc->hw_context, SC(STATUS));
1006 		if (reg & SC(STATUS_CTXT_HALTED_SMASK))
1007 			break;
1008 		if (loop > 100) {
1009 			dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n",
1010 				__func__, sc->sw_index, sc->hw_context);
1011 			return -ETIME;
1012 		}
1013 		loop++;
1014 		udelay(1);
1015 	}
1016 
1017 	/*
1018 	 * Step 2: Ensure no users are still trying to write to PIO.
1019 	 *
1020 	 * For kernel contexts, we have already turned off buffer allocation.
1021 	 * Now wait for the buffer count to go to zero.
1022 	 *
1023 	 * For user contexts, the user handling code has cut off write access
1024 	 * to the context's PIO pages before calling this routine and will
1025 	 * restore write access after this routine returns.
1026 	 */
1027 	if (sc->type != SC_USER) {
1028 		/* kernel context */
1029 		loop = 0;
1030 		while (1) {
1031 			count = atomic_read(&sc->buffers_allocated);
1032 			if (count == 0)
1033 				break;
1034 			if (loop > 100) {
1035 				dd_dev_err(dd,
1036 					"%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
1037 					__func__, sc->sw_index,
1038 					sc->hw_context, count);
1039 			}
1040 			loop++;
1041 			udelay(1);
1042 		}
1043 	}
1044 
1045 	/*
1046 	 * Step 3: Wait for all packets to egress.
1047 	 * This is done while disabling the send context
1048 	 *
1049 	 * Step 4: Disable the context
1050 	 *
1051 	 * This is a superset of the halt.  After the disable, the
1052 	 * errors can be cleared.
1053 	 */
1054 	sc_disable(sc);
1055 
1056 	/*
1057 	 * Step 5: Enable the context
1058 	 *
1059 	 * This enable will clear the halted flag and per-send context
1060 	 * error flags.
1061 	 */
1062 	return sc_enable(sc);
1063 }
1064 
1065 /*
1066  * PIO freeze processing.  To be called after the TXE block is fully frozen.
1067  * Go through all frozen send contexts and disable them.  The contexts are
1068  * already stopped by the freeze.
1069  */
pio_freeze(struct hfi1_devdata * dd)1070 void pio_freeze(struct hfi1_devdata *dd)
1071 {
1072 	struct send_context *sc;
1073 	int i;
1074 
1075 	for (i = 0; i < dd->num_send_contexts; i++) {
1076 		sc = dd->send_contexts[i].sc;
1077 		/*
1078 		 * Don't disable unallocated, unfrozen, or user send contexts.
1079 		 * User send contexts will be disabled when the process
1080 		 * calls into the driver to reset its context.
1081 		 */
1082 		if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
1083 			continue;
1084 
1085 		/* only need to disable, the context is already stopped */
1086 		sc_disable(sc);
1087 	}
1088 }
1089 
1090 /*
1091  * Unfreeze PIO for kernel send contexts.  The precondition for calling this
1092  * is that all PIO send contexts have been disabled and the SPC freeze has
1093  * been cleared.  Now perform the last step and re-enable each kernel context.
1094  * User (PSM) processing will occur when PSM calls into the kernel to
1095  * acknowledge the freeze.
1096  */
pio_kernel_unfreeze(struct hfi1_devdata * dd)1097 void pio_kernel_unfreeze(struct hfi1_devdata *dd)
1098 {
1099 	struct send_context *sc;
1100 	int i;
1101 
1102 	for (i = 0; i < dd->num_send_contexts; i++) {
1103 		sc = dd->send_contexts[i].sc;
1104 		if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
1105 			continue;
1106 
1107 		sc_enable(sc);	/* will clear the sc frozen flag */
1108 	}
1109 }
1110 
1111 /*
1112  * Wait for the SendPioInitCtxt.PioInitInProgress bit to clear.
1113  * Returns:
1114  *	-ETIMEDOUT - if we wait too long
1115  *	-EIO	   - if there was an error
1116  */
pio_init_wait_progress(struct hfi1_devdata * dd)1117 static int pio_init_wait_progress(struct hfi1_devdata *dd)
1118 {
1119 	u64 reg;
1120 	int max, count = 0;
1121 
1122 	/* max is the longest possible HW init time / delay */
1123 	max = (dd->icode == ICODE_FPGA_EMULATION) ? 120 : 5;
1124 	while (1) {
1125 		reg = read_csr(dd, SEND_PIO_INIT_CTXT);
1126 		if (!(reg & SEND_PIO_INIT_CTXT_PIO_INIT_IN_PROGRESS_SMASK))
1127 			break;
1128 		if (count >= max)
1129 			return -ETIMEDOUT;
1130 		udelay(5);
1131 		count++;
1132 	}
1133 
1134 	return reg & SEND_PIO_INIT_CTXT_PIO_INIT_ERR_SMASK ? -EIO : 0;
1135 }
1136 
1137 /*
1138  * Reset all of the send contexts to their power-on state.  Used
1139  * only during manual init - no lock against sc_enable needed.
1140  */
pio_reset_all(struct hfi1_devdata * dd)1141 void pio_reset_all(struct hfi1_devdata *dd)
1142 {
1143 	int ret;
1144 
1145 	/* make sure the init engine is not busy */
1146 	ret = pio_init_wait_progress(dd);
1147 	/* ignore any timeout */
1148 	if (ret == -EIO) {
1149 		/* clear the error */
1150 		write_csr(dd, SEND_PIO_ERR_CLEAR,
1151 			SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
1152 	}
1153 
1154 	/* reset init all */
1155 	write_csr(dd, SEND_PIO_INIT_CTXT,
1156 			SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
1157 	udelay(2);
1158 	ret = pio_init_wait_progress(dd);
1159 	if (ret < 0) {
1160 		dd_dev_err(dd,
1161 			"PIO send context init %s while initializing all PIO blocks\n",
1162 			ret == -ETIMEDOUT ? "is stuck" : "had an error");
1163 	}
1164 }
1165 
1166 /* enable the context */
sc_enable(struct send_context * sc)1167 int sc_enable(struct send_context *sc)
1168 {
1169 	u64 sc_ctrl, reg, pio;
1170 	struct hfi1_devdata *dd;
1171 	unsigned long flags;
1172 	int ret = 0;
1173 
1174 	if (!sc)
1175 		return -EINVAL;
1176 	dd = sc->dd;
1177 
1178 	/*
1179 	 * Obtain the allocator lock to guard against any allocation
1180 	 * attempts (which should not happen prior to context being
1181 	 * enabled). On the release/disable side we don't need to
1182 	 * worry about locking since the releaser will not do anything
1183 	 * if the context accounting values have not changed.
1184 	 */
1185 	spin_lock_irqsave(&sc->alloc_lock, flags);
1186 	sc_ctrl = read_kctxt_csr(dd, sc->hw_context, SC(CTRL));
1187 	if ((sc_ctrl & SC(CTRL_CTXT_ENABLE_SMASK)))
1188 		goto unlock; /* already enabled */
1189 
1190 	/* IMPORTANT: only clear free and fill if transitioning 0 -> 1 */
1191 
1192 	*sc->hw_free = 0;
1193 	sc->free = 0;
1194 	sc->alloc_free = 0;
1195 	sc->fill = 0;
1196 	sc->sr_head = 0;
1197 	sc->sr_tail = 0;
1198 	sc->flags = 0;
1199 	atomic_set(&sc->buffers_allocated, 0);
1200 
1201 	/*
1202 	 * Clear all per-context errors.  Some of these will be set when
1203 	 * we are re-enabling after a context halt.  Now that the context
1204 	 * is disabled, the halt will not clear until after the PIO init
1205 	 * engine runs below.
1206 	 */
1207 	reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS));
1208 	if (reg)
1209 		write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR),
1210 			reg);
1211 
1212 	/*
1213 	 * The HW PIO initialization engine can handle only one init
1214 	 * request at a time. Serialize access to each device's engine.
1215 	 */
1216 	spin_lock(&dd->sc_init_lock);
1217 	/*
1218 	 * Since access to this code block is serialized and
1219 	 * each access waits for the initialization to complete
1220 	 * before releasing the lock, the PIO initialization engine
1221 	 * should not be in use, so we don't have to wait for the
1222 	 * InProgress bit to go down.
1223 	 */
1224 	pio = ((sc->hw_context & SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_MASK) <<
1225 	       SEND_PIO_INIT_CTXT_PIO_CTXT_NUM_SHIFT) |
1226 		SEND_PIO_INIT_CTXT_PIO_SINGLE_CTXT_INIT_SMASK;
1227 	write_csr(dd, SEND_PIO_INIT_CTXT, pio);
1228 	/*
1229 	 * Wait until the engine is done.  Give the chip the required time
1230 	 * so, hopefully, we read the register just once.
1231 	 */
1232 	udelay(2);
1233 	ret = pio_init_wait_progress(dd);
1234 	spin_unlock(&dd->sc_init_lock);
1235 	if (ret) {
1236 		dd_dev_err(dd,
1237 			   "sctxt%u(%u): Context not enabled due to init failure %d\n",
1238 			   sc->sw_index, sc->hw_context, ret);
1239 		goto unlock;
1240 	}
1241 
1242 	/*
1243 	 * All is well. Enable the context.
1244 	 */
1245 	sc_ctrl |= SC(CTRL_CTXT_ENABLE_SMASK);
1246 	write_kctxt_csr(dd, sc->hw_context, SC(CTRL), sc_ctrl);
1247 	/*
1248 	 * Read SendCtxtCtrl to force the write out and prevent a timing
1249 	 * hazard where a PIO write may reach the context before the enable.
1250 	 */
1251 	read_kctxt_csr(dd, sc->hw_context, SC(CTRL));
1252 	sc->flags |= SCF_ENABLED;
1253 
1254 unlock:
1255 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
1256 
1257 	return ret;
1258 }
1259 
1260 /* force a credit return on the context */
sc_return_credits(struct send_context * sc)1261 void sc_return_credits(struct send_context *sc)
1262 {
1263 	if (!sc)
1264 		return;
1265 
1266 	/* a 0->1 transition schedules a credit return */
1267 	write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE),
1268 		SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
1269 	/*
1270 	 * Ensure that the write is flushed and the credit return is
1271 	 * scheduled. We care more about the 0 -> 1 transition.
1272 	 */
1273 	read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE));
1274 	/* set back to 0 for next time */
1275 	write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE), 0);
1276 }
1277 
1278 /* allow all in-flight packets to drain on the context */
sc_flush(struct send_context * sc)1279 void sc_flush(struct send_context *sc)
1280 {
1281 	if (!sc)
1282 		return;
1283 
1284 	sc_wait_for_packet_egress(sc, 1);
1285 }
1286 
1287 /* drop all packets on the context, no waiting until they are sent */
sc_drop(struct send_context * sc)1288 void sc_drop(struct send_context *sc)
1289 {
1290 	if (!sc)
1291 		return;
1292 
1293 	dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
1294 			__func__, sc->sw_index, sc->hw_context);
1295 }
1296 
1297 /*
1298  * Start the software reaction to a context halt or SPC freeze:
1299  *	- mark the context as halted or frozen
1300  *	- stop buffer allocations
1301  *
1302  * Called from the error interrupt.  Other work is deferred until
1303  * out of the interrupt.
1304  */
sc_stop(struct send_context * sc,int flag)1305 void sc_stop(struct send_context *sc, int flag)
1306 {
1307 	unsigned long flags;
1308 
1309 	/* mark the context */
1310 	sc->flags |= flag;
1311 
1312 	/* stop buffer allocations */
1313 	spin_lock_irqsave(&sc->alloc_lock, flags);
1314 	sc->flags &= ~SCF_ENABLED;
1315 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
1316 	wake_up(&sc->halt_wait);
1317 }
1318 
1319 #define BLOCK_DWORDS (PIO_BLOCK_SIZE/sizeof(u32))
1320 #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS)
1321 
1322 /*
1323  * The send context buffer "allocator".
1324  *
1325  * @sc: the PIO send context we are allocating from
1326  * @len: length of whole packet - including PBC - in dwords
1327  * @cb: optional callback to call when the buffer is finished sending
1328  * @arg: argument for cb
1329  *
1330  * Return a pointer to a PIO buffer if successful, NULL if not enough room.
1331  */
sc_buffer_alloc(struct send_context * sc,u32 dw_len,pio_release_cb cb,void * arg)1332 struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
1333 				pio_release_cb cb, void *arg)
1334 {
1335 	struct pio_buf *pbuf = NULL;
1336 	unsigned long flags;
1337 	unsigned long avail;
1338 	unsigned long blocks = dwords_to_blocks(dw_len);
1339 	unsigned long start_fill;
1340 	int trycount = 0;
1341 	u32 head, next;
1342 
1343 	spin_lock_irqsave(&sc->alloc_lock, flags);
1344 	if (!(sc->flags & SCF_ENABLED)) {
1345 		spin_unlock_irqrestore(&sc->alloc_lock, flags);
1346 		goto done;
1347 	}
1348 
1349 retry:
1350 	avail = (unsigned long)sc->credits - (sc->fill - sc->alloc_free);
1351 	if (blocks > avail) {
1352 		/* not enough room */
1353 		if (unlikely(trycount))	{ /* already tried to get more room */
1354 			spin_unlock_irqrestore(&sc->alloc_lock, flags);
1355 			goto done;
1356 		}
1357 		/* copy from receiver cache line and recalculate */
1358 		sc->alloc_free = ACCESS_ONCE(sc->free);
1359 		avail =
1360 			(unsigned long)sc->credits -
1361 			(sc->fill - sc->alloc_free);
1362 		if (blocks > avail) {
1363 			/* still no room, actively update */
1364 			spin_unlock_irqrestore(&sc->alloc_lock, flags);
1365 			sc_release_update(sc);
1366 			spin_lock_irqsave(&sc->alloc_lock, flags);
1367 			sc->alloc_free = ACCESS_ONCE(sc->free);
1368 			trycount++;
1369 			goto retry;
1370 		}
1371 	}
1372 
1373 	/* there is enough room */
1374 
1375 	atomic_inc(&sc->buffers_allocated);
1376 
1377 	/* read this once */
1378 	head = sc->sr_head;
1379 
1380 	/* "allocate" the buffer */
1381 	start_fill = sc->fill;
1382 	sc->fill += blocks;
1383 
1384 	/*
1385 	 * Fill the parts that the releaser looks at before moving the head.
1386 	 * The only necessary piece is the sent_at field.  The credits
1387 	 * we have just allocated cannot have been returned yet, so the
1388 	 * cb and arg will not be looked at for a "while".  Put them
1389 	 * on this side of the memory barrier anyway.
1390 	 */
1391 	pbuf = &sc->sr[head].pbuf;
1392 	pbuf->sent_at = sc->fill;
1393 	pbuf->cb = cb;
1394 	pbuf->arg = arg;
1395 	pbuf->sc = sc;	/* could be filled in at sc->sr init time */
1396 	/* make sure this is in memory before updating the head */
1397 
1398 	/* calculate next head index, do not store */
1399 	next = head + 1;
1400 	if (next >= sc->sr_size)
1401 		next = 0;
1402 	/* update the head - must be last! - the releaser can look at fields
1403 	   in pbuf once we move the head */
1404 	smp_wmb();
1405 	sc->sr_head = next;
1406 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
1407 
1408 	/* finish filling in the buffer outside the lock */
1409 	pbuf->start = sc->base_addr + ((start_fill % sc->credits)
1410 							* PIO_BLOCK_SIZE);
1411 	pbuf->size = sc->credits * PIO_BLOCK_SIZE;
1412 	pbuf->end = sc->base_addr + pbuf->size;
1413 	pbuf->block_count = blocks;
1414 	pbuf->qw_written = 0;
1415 	pbuf->carry_bytes = 0;
1416 	pbuf->carry.val64 = 0;
1417 done:
1418 	return pbuf;
1419 }
1420 
1421 /*
1422  * There are at least two entities that can turn on credit return
1423  * interrupts and they can overlap.  Avoid problems by implementing
1424  * a count scheme that is enforced by a lock.  The lock is needed because
1425  * the count and CSR write must be paired.
1426  */
1427 
1428 /*
1429  * Start credit return interrupts.  This is managed by a count.  If already
1430  * on, just increment the count.
1431  */
sc_add_credit_return_intr(struct send_context * sc)1432 void sc_add_credit_return_intr(struct send_context *sc)
1433 {
1434 	unsigned long flags;
1435 
1436 	/* lock must surround both the count change and the CSR update */
1437 	spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
1438 	if (sc->credit_intr_count == 0) {
1439 		sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
1440 		write_kctxt_csr(sc->dd, sc->hw_context,
1441 			SC(CREDIT_CTRL), sc->credit_ctrl);
1442 	}
1443 	sc->credit_intr_count++;
1444 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
1445 }
1446 
1447 /*
1448  * Stop credit return interrupts.  This is managed by a count.  Decrement the
1449  * count, if the last user, then turn the credit interrupts off.
1450  */
sc_del_credit_return_intr(struct send_context * sc)1451 void sc_del_credit_return_intr(struct send_context *sc)
1452 {
1453 	unsigned long flags;
1454 
1455 	WARN_ON(sc->credit_intr_count == 0);
1456 
1457 	/* lock must surround both the count change and the CSR update */
1458 	spin_lock_irqsave(&sc->credit_ctrl_lock, flags);
1459 	sc->credit_intr_count--;
1460 	if (sc->credit_intr_count == 0) {
1461 		sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
1462 		write_kctxt_csr(sc->dd, sc->hw_context,
1463 			SC(CREDIT_CTRL), sc->credit_ctrl);
1464 	}
1465 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
1466 }
1467 
1468 /*
1469  * The caller must be careful when calling this.  All needint calls
1470  * must be paired with !needint.
1471  */
hfi1_sc_wantpiobuf_intr(struct send_context * sc,u32 needint)1472 void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
1473 {
1474 	if (needint)
1475 		sc_add_credit_return_intr(sc);
1476 	else
1477 		sc_del_credit_return_intr(sc);
1478 	trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl);
1479 	if (needint) {
1480 		mmiowb();
1481 		sc_return_credits(sc);
1482 	}
1483 }
1484 
1485 /**
1486  * sc_piobufavail - callback when a PIO buffer is available
1487  * @sc: the send context
1488  *
1489  * This is called from the interrupt handler when a PIO buffer is
1490  * available after hfi1_verbs_send() returned an error that no buffers were
1491  * available. Disable the interrupt if there are no more QPs waiting.
1492  */
sc_piobufavail(struct send_context * sc)1493 static void sc_piobufavail(struct send_context *sc)
1494 {
1495 	struct hfi1_devdata *dd = sc->dd;
1496 	struct hfi1_ibdev *dev = &dd->verbs_dev;
1497 	struct list_head *list;
1498 	struct hfi1_qp *qps[PIO_WAIT_BATCH_SIZE];
1499 	struct hfi1_qp *qp;
1500 	unsigned long flags;
1501 	unsigned i, n = 0;
1502 
1503 	if (dd->send_contexts[sc->sw_index].type != SC_KERNEL)
1504 		return;
1505 	list = &sc->piowait;
1506 	/*
1507 	 * Note: checking that the piowait list is empty and clearing
1508 	 * the buffer available interrupt needs to be atomic or we
1509 	 * could end up with QPs on the wait list with the interrupt
1510 	 * disabled.
1511 	 */
1512 	write_seqlock_irqsave(&dev->iowait_lock, flags);
1513 	while (!list_empty(list)) {
1514 		struct iowait *wait;
1515 
1516 		if (n == ARRAY_SIZE(qps))
1517 			goto full;
1518 		wait = list_first_entry(list, struct iowait, list);
1519 		qp = container_of(wait, struct hfi1_qp, s_iowait);
1520 		list_del_init(&qp->s_iowait.list);
1521 		/* refcount held until actual wake up */
1522 		qps[n++] = qp;
1523 	}
1524 	/*
1525 	 * Counting: only call wantpiobuf_intr() if there were waiters and they
1526 	 * are now all gone.
1527 	 */
1528 	if (n)
1529 		hfi1_sc_wantpiobuf_intr(sc, 0);
1530 full:
1531 	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
1532 
1533 	for (i = 0; i < n; i++)
1534 		hfi1_qp_wakeup(qps[i], HFI1_S_WAIT_PIO);
1535 }
1536 
1537 /* translate a send credit update to a bit code of reasons */
fill_code(u64 hw_free)1538 static inline int fill_code(u64 hw_free)
1539 {
1540 	int code = 0;
1541 
1542 	if (hw_free & CR_STATUS_SMASK)
1543 		code |= PRC_STATUS_ERR;
1544 	if (hw_free & CR_CREDIT_RETURN_DUE_TO_PBC_SMASK)
1545 		code |= PRC_PBC;
1546 	if (hw_free & CR_CREDIT_RETURN_DUE_TO_THRESHOLD_SMASK)
1547 		code |= PRC_THRESHOLD;
1548 	if (hw_free & CR_CREDIT_RETURN_DUE_TO_ERR_SMASK)
1549 		code |= PRC_FILL_ERR;
1550 	if (hw_free & CR_CREDIT_RETURN_DUE_TO_FORCE_SMASK)
1551 		code |= PRC_SC_DISABLE;
1552 	return code;
1553 }
1554 
1555 /* use the jiffies compare to get the wrap right */
1556 #define sent_before(a, b) time_before(a, b)	/* a < b */
1557 
1558 /*
1559  * The send context buffer "releaser".
1560  */
sc_release_update(struct send_context * sc)1561 void sc_release_update(struct send_context *sc)
1562 {
1563 	struct pio_buf *pbuf;
1564 	u64 hw_free;
1565 	u32 head, tail;
1566 	unsigned long old_free;
1567 	unsigned long extra;
1568 	unsigned long flags;
1569 	int code;
1570 
1571 	if (!sc)
1572 		return;
1573 
1574 	spin_lock_irqsave(&sc->release_lock, flags);
1575 	/* update free */
1576 	hw_free = le64_to_cpu(*sc->hw_free);		/* volatile read */
1577 	old_free = sc->free;
1578 	extra = (((hw_free & CR_COUNTER_SMASK) >> CR_COUNTER_SHIFT)
1579 			- (old_free & CR_COUNTER_MASK))
1580 				& CR_COUNTER_MASK;
1581 	sc->free = old_free + extra;
1582 	trace_hfi1_piofree(sc, extra);
1583 
1584 	/* call sent buffer callbacks */
1585 	code = -1;				/* code not yet set */
1586 	head = ACCESS_ONCE(sc->sr_head);	/* snapshot the head */
1587 	tail = sc->sr_tail;
1588 	while (head != tail) {
1589 		pbuf = &sc->sr[tail].pbuf;
1590 
1591 		if (sent_before(sc->free, pbuf->sent_at)) {
1592 			/* not sent yet */
1593 			break;
1594 		}
1595 		if (pbuf->cb) {
1596 			if (code < 0) /* fill in code on first user */
1597 				code = fill_code(hw_free);
1598 			(*pbuf->cb)(pbuf->arg, code);
1599 		}
1600 
1601 		tail++;
1602 		if (tail >= sc->sr_size)
1603 			tail = 0;
1604 	}
1605 	/* update tail, in case we moved it */
1606 	sc->sr_tail = tail;
1607 	spin_unlock_irqrestore(&sc->release_lock, flags);
1608 	sc_piobufavail(sc);
1609 }
1610 
1611 /*
1612  * Send context group releaser.  Argument is the send context that caused
1613  * the interrupt.  Called from the send context interrupt handler.
1614  *
1615  * Call release on all contexts in the group.
1616  *
1617  * This routine takes the sc_lock without an irqsave because it is only
1618  * called from an interrupt handler.  Adjust if that changes.
1619  */
sc_group_release_update(struct hfi1_devdata * dd,u32 hw_context)1620 void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
1621 {
1622 	struct send_context *sc;
1623 	u32 sw_index;
1624 	u32 gc, gc_end;
1625 
1626 	spin_lock(&dd->sc_lock);
1627 	sw_index = dd->hw_to_sw[hw_context];
1628 	if (unlikely(sw_index >= dd->num_send_contexts)) {
1629 		dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n",
1630 			__func__, hw_context, sw_index);
1631 		goto done;
1632 	}
1633 	sc = dd->send_contexts[sw_index].sc;
1634 	if (unlikely(!sc))
1635 		goto done;
1636 
1637 	gc = group_context(hw_context, sc->group);
1638 	gc_end = gc + group_size(sc->group);
1639 	for (; gc < gc_end; gc++) {
1640 		sw_index = dd->hw_to_sw[gc];
1641 		if (unlikely(sw_index >= dd->num_send_contexts)) {
1642 			dd_dev_err(dd,
1643 				"%s: invalid hw (%u) to sw (%u) mapping\n",
1644 				__func__, hw_context, sw_index);
1645 			continue;
1646 		}
1647 		sc_release_update(dd->send_contexts[sw_index].sc);
1648 	}
1649 done:
1650 	spin_unlock(&dd->sc_lock);
1651 }
1652 
init_pervl_scs(struct hfi1_devdata * dd)1653 int init_pervl_scs(struct hfi1_devdata *dd)
1654 {
1655 	int i;
1656 	u64 mask, all_vl_mask = (u64) 0x80ff; /* VLs 0-7, 15 */
1657 	u32 ctxt;
1658 
1659 	dd->vld[15].sc = sc_alloc(dd, SC_KERNEL,
1660 				  dd->rcd[0]->rcvhdrqentsize, dd->node);
1661 	if (!dd->vld[15].sc)
1662 		goto nomem;
1663 	hfi1_init_ctxt(dd->vld[15].sc);
1664 	dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
1665 	for (i = 0; i < num_vls; i++) {
1666 		/*
1667 		 * Since this function does not deal with a specific
1668 		 * receive context but we need the RcvHdrQ entry size,
1669 		 * use the size from rcd[0]. It is guaranteed to be
1670 		 * valid at this point and will remain the same for all
1671 		 * receive contexts.
1672 		 */
1673 		dd->vld[i].sc = sc_alloc(dd, SC_KERNEL,
1674 					 dd->rcd[0]->rcvhdrqentsize, dd->node);
1675 		if (!dd->vld[i].sc)
1676 			goto nomem;
1677 
1678 		hfi1_init_ctxt(dd->vld[i].sc);
1679 
1680 		/* non VL15 start with the max MTU */
1681 		dd->vld[i].mtu = hfi1_max_mtu;
1682 	}
1683 	sc_enable(dd->vld[15].sc);
1684 	ctxt = dd->vld[15].sc->hw_context;
1685 	mask = all_vl_mask & ~(1LL << 15);
1686 	write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
1687 	dd_dev_info(dd,
1688 		    "Using send context %u(%u) for VL15\n",
1689 		    dd->vld[15].sc->sw_index, ctxt);
1690 	for (i = 0; i < num_vls; i++) {
1691 		sc_enable(dd->vld[i].sc);
1692 		ctxt = dd->vld[i].sc->hw_context;
1693 		mask = all_vl_mask & ~(1LL << i);
1694 		write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
1695 	}
1696 	return 0;
1697 nomem:
1698 	sc_free(dd->vld[15].sc);
1699 	for (i = 0; i < num_vls; i++)
1700 		sc_free(dd->vld[i].sc);
1701 	return -ENOMEM;
1702 }
1703 
init_credit_return(struct hfi1_devdata * dd)1704 int init_credit_return(struct hfi1_devdata *dd)
1705 {
1706 	int ret;
1707 	int num_numa;
1708 	int i;
1709 
1710 	num_numa = num_online_nodes();
1711 	/* enforce the expectation that the numas are compact */
1712 	for (i = 0; i < num_numa; i++) {
1713 		if (!node_online(i)) {
1714 			dd_dev_err(dd, "NUMA nodes are not compact\n");
1715 			ret = -EINVAL;
1716 			goto done;
1717 		}
1718 	}
1719 
1720 	dd->cr_base = kcalloc(
1721 		num_numa,
1722 		sizeof(struct credit_return_base),
1723 		GFP_KERNEL);
1724 	if (!dd->cr_base) {
1725 		dd_dev_err(dd, "Unable to allocate credit return base\n");
1726 		ret = -ENOMEM;
1727 		goto done;
1728 	}
1729 	for (i = 0; i < num_numa; i++) {
1730 		int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
1731 
1732 		set_dev_node(&dd->pcidev->dev, i);
1733 		dd->cr_base[i].va = dma_zalloc_coherent(
1734 					&dd->pcidev->dev,
1735 					bytes,
1736 					&dd->cr_base[i].pa,
1737 					GFP_KERNEL);
1738 		if (dd->cr_base[i].va == NULL) {
1739 			set_dev_node(&dd->pcidev->dev, dd->node);
1740 			dd_dev_err(dd,
1741 				"Unable to allocate credit return DMA range for NUMA %d\n",
1742 				i);
1743 			ret = -ENOMEM;
1744 			goto done;
1745 		}
1746 	}
1747 	set_dev_node(&dd->pcidev->dev, dd->node);
1748 
1749 	ret = 0;
1750 done:
1751 	return ret;
1752 }
1753 
free_credit_return(struct hfi1_devdata * dd)1754 void free_credit_return(struct hfi1_devdata *dd)
1755 {
1756 	int num_numa;
1757 	int i;
1758 
1759 	if (!dd->cr_base)
1760 		return;
1761 
1762 	num_numa = num_online_nodes();
1763 	for (i = 0; i < num_numa; i++) {
1764 		if (dd->cr_base[i].va) {
1765 			dma_free_coherent(&dd->pcidev->dev,
1766 				TXE_NUM_CONTEXTS
1767 					* sizeof(struct credit_return),
1768 				dd->cr_base[i].va,
1769 				dd->cr_base[i].pa);
1770 		}
1771 	}
1772 	kfree(dd->cr_base);
1773 	dd->cr_base = NULL;
1774 }
1775