• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/pci.h>
5 #include <linux/interrupt.h>
6 #include <linux/notifier.h>
7 #include <linux/mlx5/driver.h>
8 #include <linux/mlx5/vport.h>
9 #include "mlx5_core.h"
10 #include "mlx5_irq.h"
11 #include "pci_irq.h"
12 #include "lib/sf.h"
13 #include "lib/eq.h"
14 #ifdef CONFIG_RFS_ACCEL
15 #include <linux/cpu_rmap.h>
16 #endif
17 
18 #define MLX5_SFS_PER_CTRL_IRQ 64
19 #define MLX5_IRQ_CTRL_SF_MAX 8
20 /* min num of vectors for SFs to be enabled */
21 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
22 #define MLX5_IRQ_VEC_COMP_BASE 1
23 
24 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
25 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
26 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
27 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
28 
29 struct mlx5_irq {
30 	struct atomic_notifier_head nh;
31 	cpumask_var_t mask;
32 	char name[MLX5_MAX_IRQ_FORMATTED_NAME];
33 	struct mlx5_irq_pool *pool;
34 	int refcount;
35 	struct msi_map map;
36 	u32 pool_index;
37 };
38 
39 struct mlx5_irq_table {
40 	struct mlx5_irq_pool *pcif_pool;
41 	struct mlx5_irq_pool *sf_ctrl_pool;
42 	struct mlx5_irq_pool *sf_comp_pool;
43 };
44 
mlx5_core_func_to_vport(const struct mlx5_core_dev * dev,int func,bool ec_vf_func)45 static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev,
46 				   int func,
47 				   bool ec_vf_func)
48 {
49 	if (!ec_vf_func)
50 		return func;
51 	return mlx5_core_ec_vf_vport_base(dev) + func - 1;
52 }
53 
54 /**
55  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
56  *                                   to be ssigned to each VF.
57  * @dev: PF to work on
58  * @num_vfs: Number of enabled VFs
59  */
mlx5_get_default_msix_vec_count(struct mlx5_core_dev * dev,int num_vfs)60 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
61 {
62 	int num_vf_msix, min_msix, max_msix;
63 
64 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
65 	if (!num_vf_msix)
66 		return 0;
67 
68 	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
69 	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
70 
71 	/* Limit maximum number of MSI-X vectors so the default configuration
72 	 * has some available in the pool. This will allow the user to increase
73 	 * the number of vectors in a VF without having to first size-down other
74 	 * VFs.
75 	 */
76 	return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
77 }
78 
79 /**
80  * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
81  * @dev: PF to work on
82  * @function_id: Internal PCI VF function IDd
83  * @msix_vec_count: Number of MSI-X vectors to set
84  */
mlx5_set_msix_vec_count(struct mlx5_core_dev * dev,int function_id,int msix_vec_count)85 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
86 			    int msix_vec_count)
87 {
88 	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
89 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
90 	void *hca_cap = NULL, *query_cap = NULL, *cap;
91 	int num_vf_msix, min_msix, max_msix;
92 	bool ec_vf_function;
93 	int vport;
94 	int ret;
95 
96 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
97 	if (!num_vf_msix)
98 		return 0;
99 
100 	if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
101 		return -EOPNOTSUPP;
102 
103 	min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
104 	max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
105 
106 	if (msix_vec_count < min_msix)
107 		return -EINVAL;
108 
109 	if (msix_vec_count > max_msix)
110 		return -EOVERFLOW;
111 
112 	query_cap = kvzalloc(query_sz, GFP_KERNEL);
113 	hca_cap = kvzalloc(set_sz, GFP_KERNEL);
114 	if (!hca_cap || !query_cap) {
115 		ret = -ENOMEM;
116 		goto out;
117 	}
118 
119 	ec_vf_function = mlx5_core_ec_sriov_enabled(dev);
120 	vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function);
121 	ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap);
122 	if (ret)
123 		goto out;
124 
125 	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
126 	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
127 	       MLX5_UN_SZ_BYTES(hca_cap_union));
128 	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
129 
130 	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
131 	MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
132 	MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function);
133 	MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
134 
135 	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
136 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
137 	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
138 out:
139 	kvfree(hca_cap);
140 	kvfree(query_cap);
141 	return ret;
142 }
143 
144 /* mlx5_system_free_irq - Free an IRQ
145  * @irq: IRQ to free
146  *
147  * Free the IRQ and other resources such as rmap from the system.
148  * BUT doesn't free or remove reference from mlx5.
149  * This function is very important for the shutdown flow, where we need to
150  * cleanup system resoruces but keep mlx5 objects alive,
151  * see mlx5_irq_table_free_irqs().
152  */
mlx5_system_free_irq(struct mlx5_irq * irq)153 static void mlx5_system_free_irq(struct mlx5_irq *irq)
154 {
155 	struct mlx5_irq_pool *pool = irq->pool;
156 #ifdef CONFIG_RFS_ACCEL
157 	struct cpu_rmap *rmap;
158 #endif
159 
160 	/* free_irq requires that affinity_hint and rmap will be cleared before
161 	 * calling it. To satisfy this requirement, we call
162 	 * irq_cpu_rmap_remove() to remove the notifier
163 	 */
164 	irq_update_affinity_hint(irq->map.virq, NULL);
165 #ifdef CONFIG_RFS_ACCEL
166 	rmap = mlx5_eq_table_get_rmap(pool->dev);
167 	if (rmap)
168 		irq_cpu_rmap_remove(rmap, irq->map.virq);
169 #endif
170 
171 	free_irq(irq->map.virq, &irq->nh);
172 	if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
173 		pci_msix_free_irq(pool->dev->pdev, irq->map);
174 }
175 
irq_release(struct mlx5_irq * irq)176 static void irq_release(struct mlx5_irq *irq)
177 {
178 	struct mlx5_irq_pool *pool = irq->pool;
179 
180 	xa_erase(&pool->irqs, irq->pool_index);
181 	mlx5_system_free_irq(irq);
182 	free_cpumask_var(irq->mask);
183 	kfree(irq);
184 }
185 
mlx5_irq_put(struct mlx5_irq * irq)186 int mlx5_irq_put(struct mlx5_irq *irq)
187 {
188 	struct mlx5_irq_pool *pool = irq->pool;
189 	int ret = 0;
190 
191 	mutex_lock(&pool->lock);
192 	irq->refcount--;
193 	if (!irq->refcount) {
194 		irq_release(irq);
195 		ret = 1;
196 	}
197 	mutex_unlock(&pool->lock);
198 	return ret;
199 }
200 
mlx5_irq_read_locked(struct mlx5_irq * irq)201 int mlx5_irq_read_locked(struct mlx5_irq *irq)
202 {
203 	lockdep_assert_held(&irq->pool->lock);
204 	return irq->refcount;
205 }
206 
mlx5_irq_get_locked(struct mlx5_irq * irq)207 int mlx5_irq_get_locked(struct mlx5_irq *irq)
208 {
209 	lockdep_assert_held(&irq->pool->lock);
210 	if (WARN_ON_ONCE(!irq->refcount))
211 		return 0;
212 	irq->refcount++;
213 	return 1;
214 }
215 
irq_get(struct mlx5_irq * irq)216 static int irq_get(struct mlx5_irq *irq)
217 {
218 	int err;
219 
220 	mutex_lock(&irq->pool->lock);
221 	err = mlx5_irq_get_locked(irq);
222 	mutex_unlock(&irq->pool->lock);
223 	return err;
224 }
225 
irq_int_handler(int irq,void * nh)226 static irqreturn_t irq_int_handler(int irq, void *nh)
227 {
228 	atomic_notifier_call_chain(nh, 0, NULL);
229 	return IRQ_HANDLED;
230 }
231 
irq_sf_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)232 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
233 {
234 	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
235 }
236 
irq_set_name(struct mlx5_irq_pool * pool,char * name,int vecidx)237 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
238 {
239 	if (!pool->xa_num_irqs.max) {
240 		/* in case we only have a single irq for the device */
241 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
242 		return;
243 	}
244 
245 	if (!vecidx) {
246 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
247 		return;
248 	}
249 
250 	vecidx -= MLX5_IRQ_VEC_COMP_BASE;
251 	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
252 }
253 
mlx5_irq_alloc(struct mlx5_irq_pool * pool,int i,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)254 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
255 				struct irq_affinity_desc *af_desc,
256 				struct cpu_rmap **rmap)
257 {
258 	struct mlx5_core_dev *dev = pool->dev;
259 	char name[MLX5_MAX_IRQ_NAME];
260 	struct mlx5_irq *irq;
261 	int err;
262 
263 	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
264 	if (!irq || !zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
265 		kfree(irq);
266 		return ERR_PTR(-ENOMEM);
267 	}
268 
269 	if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
270 		/* The vector at index 0 is always statically allocated. If
271 		 * dynamic irq is not supported all vectors are statically
272 		 * allocated. In both cases just get the irq number and set
273 		 * the index.
274 		 */
275 		irq->map.virq = pci_irq_vector(dev->pdev, i);
276 		irq->map.index = i;
277 	} else {
278 		irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
279 		if (!irq->map.virq) {
280 			err = irq->map.index;
281 			goto err_alloc_irq;
282 		}
283 	}
284 
285 	if (i && rmap && *rmap) {
286 #ifdef CONFIG_RFS_ACCEL
287 		err = irq_cpu_rmap_add(*rmap, irq->map.virq);
288 		if (err)
289 			goto err_irq_rmap;
290 #endif
291 	}
292 	if (!mlx5_irq_pool_is_sf_pool(pool))
293 		irq_set_name(pool, name, i);
294 	else
295 		irq_sf_set_name(pool, name, i);
296 	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
297 	snprintf(irq->name, MLX5_MAX_IRQ_FORMATTED_NAME,
298 		 MLX5_IRQ_NAME_FORMAT_STR, name, pci_name(dev->pdev));
299 	err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
300 			  &irq->nh);
301 	if (err) {
302 		mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
303 		goto err_req_irq;
304 	}
305 
306 	if (af_desc) {
307 		cpumask_copy(irq->mask, &af_desc->mask);
308 		irq_set_affinity_and_hint(irq->map.virq, irq->mask);
309 	}
310 	irq->pool = pool;
311 	irq->refcount = 1;
312 	irq->pool_index = i;
313 	err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
314 	if (err) {
315 		mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
316 			      irq->pool_index, err);
317 		goto err_xa;
318 	}
319 	return irq;
320 err_xa:
321 	if (af_desc)
322 		irq_update_affinity_hint(irq->map.virq, NULL);
323 	free_irq(irq->map.virq, &irq->nh);
324 err_req_irq:
325 #ifdef CONFIG_RFS_ACCEL
326 	if (i && rmap && *rmap) {
327 		free_irq_cpu_rmap(*rmap);
328 		*rmap = NULL;
329 	}
330 err_irq_rmap:
331 #endif
332 	if (i && pci_msix_can_alloc_dyn(dev->pdev))
333 		pci_msix_free_irq(dev->pdev, irq->map);
334 err_alloc_irq:
335 	free_cpumask_var(irq->mask);
336 	kfree(irq);
337 	return ERR_PTR(err);
338 }
339 
mlx5_irq_attach_nb(struct mlx5_irq * irq,struct notifier_block * nb)340 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
341 {
342 	int ret;
343 
344 	ret = irq_get(irq);
345 	if (!ret)
346 		/* Something very bad happens here, we are enabling EQ
347 		 * on non-existing IRQ.
348 		 */
349 		return -ENOENT;
350 	ret = atomic_notifier_chain_register(&irq->nh, nb);
351 	if (ret)
352 		mlx5_irq_put(irq);
353 	return ret;
354 }
355 
mlx5_irq_detach_nb(struct mlx5_irq * irq,struct notifier_block * nb)356 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
357 {
358 	int err = 0;
359 
360 	err = atomic_notifier_chain_unregister(&irq->nh, nb);
361 	mlx5_irq_put(irq);
362 	return err;
363 }
364 
mlx5_irq_get_affinity_mask(struct mlx5_irq * irq)365 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
366 {
367 	return irq->mask;
368 }
369 
mlx5_irq_get_index(struct mlx5_irq * irq)370 int mlx5_irq_get_index(struct mlx5_irq *irq)
371 {
372 	return irq->map.index;
373 }
374 
375 /* irq_pool API */
376 
377 /* requesting an irq from a given pool according to given index */
378 static struct mlx5_irq *
irq_pool_request_vector(struct mlx5_irq_pool * pool,int vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)379 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
380 			struct irq_affinity_desc *af_desc,
381 			struct cpu_rmap **rmap)
382 {
383 	struct mlx5_irq *irq;
384 
385 	mutex_lock(&pool->lock);
386 	irq = xa_load(&pool->irqs, vecidx);
387 	if (irq) {
388 		mlx5_irq_get_locked(irq);
389 		goto unlock;
390 	}
391 	irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
392 unlock:
393 	mutex_unlock(&pool->lock);
394 	return irq;
395 }
396 
sf_ctrl_irq_pool_get(struct mlx5_irq_table * irq_table)397 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
398 {
399 	return irq_table->sf_ctrl_pool;
400 }
401 
sf_irq_pool_get(struct mlx5_irq_table * irq_table)402 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
403 {
404 	return irq_table->sf_comp_pool;
405 }
406 
mlx5_irq_pool_get(struct mlx5_core_dev * dev)407 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
408 {
409 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
410 	struct mlx5_irq_pool *pool = NULL;
411 
412 	if (mlx5_core_is_sf(dev))
413 		pool = sf_irq_pool_get(irq_table);
414 
415 	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
416 	 * the PF IRQs pool in case the SF pool doesn't exist.
417 	 */
418 	return pool ? pool : irq_table->pcif_pool;
419 }
420 
ctrl_irq_pool_get(struct mlx5_core_dev * dev)421 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
422 {
423 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
424 	struct mlx5_irq_pool *pool = NULL;
425 
426 	if (mlx5_core_is_sf(dev))
427 		pool = sf_ctrl_irq_pool_get(irq_table);
428 
429 	/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
430 	 * the PF IRQs pool in case the SF pool doesn't exist.
431 	 */
432 	return pool ? pool : irq_table->pcif_pool;
433 }
434 
_mlx5_irq_release(struct mlx5_irq * irq)435 static void _mlx5_irq_release(struct mlx5_irq *irq)
436 {
437 	synchronize_irq(irq->map.virq);
438 	mlx5_irq_put(irq);
439 }
440 
441 /**
442  * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
443  * @ctrl_irq: ctrl IRQ to be released.
444  */
mlx5_ctrl_irq_release(struct mlx5_irq * ctrl_irq)445 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
446 {
447 	_mlx5_irq_release(ctrl_irq);
448 }
449 
450 /**
451  * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
452  * @dev: mlx5 device that requesting the IRQ.
453  *
454  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
455  */
mlx5_ctrl_irq_request(struct mlx5_core_dev * dev)456 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
457 {
458 	struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
459 	struct irq_affinity_desc af_desc;
460 	struct mlx5_irq *irq;
461 
462 	cpumask_copy(&af_desc.mask, cpu_online_mask);
463 	af_desc.is_managed = false;
464 	if (!mlx5_irq_pool_is_sf_pool(pool)) {
465 		/* In case we are allocating a control IRQ from a pci device's pool.
466 		 * This can happen also for a SF if the SFs pool is empty.
467 		 */
468 		if (!pool->xa_num_irqs.max) {
469 			cpumask_clear(&af_desc.mask);
470 			/* In case we only have a single IRQ for PF/VF */
471 			cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
472 		}
473 		/* Allocate the IRQ in index 0. The vector was already allocated */
474 		irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
475 	} else {
476 		irq = mlx5_irq_affinity_request(pool, &af_desc);
477 	}
478 
479 	return irq;
480 }
481 
482 /**
483  * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
484  * @dev: mlx5 device that requesting the IRQ.
485  * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
486  * provided.
487  * @af_desc: affinity descriptor for this IRQ.
488  * @rmap: pointer to reverse map pointer for completion interrupts
489  *
490  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
491  */
mlx5_irq_request(struct mlx5_core_dev * dev,u16 vecidx,struct irq_affinity_desc * af_desc,struct cpu_rmap ** rmap)492 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
493 				  struct irq_affinity_desc *af_desc,
494 				  struct cpu_rmap **rmap)
495 {
496 	struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
497 	struct mlx5_irq_pool *pool;
498 	struct mlx5_irq *irq;
499 
500 	pool = irq_table->pcif_pool;
501 	irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
502 	if (IS_ERR(irq))
503 		return irq;
504 	mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
505 		      irq->map.virq, cpumask_pr_args(&af_desc->mask),
506 		      irq->refcount / MLX5_EQ_REFS_PER_IRQ);
507 	return irq;
508 }
509 
510 /**
511  * mlx5_msix_alloc - allocate msix interrupt
512  * @dev: mlx5 device from which to request
513  * @handler: interrupt handler
514  * @affdesc: affinity descriptor
515  * @name: interrupt name
516  *
517  * Returns: struct msi_map with result encoded.
518  * Note: the caller must make sure to release the irq by calling
519  *       mlx5_msix_free() if shutdown was initiated.
520  */
mlx5_msix_alloc(struct mlx5_core_dev * dev,irqreturn_t (* handler)(int,void *),const struct irq_affinity_desc * affdesc,const char * name)521 struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
522 			       irqreturn_t (*handler)(int, void *),
523 			       const struct irq_affinity_desc *affdesc,
524 			       const char *name)
525 {
526 	struct msi_map map;
527 	int err;
528 
529 	if (!dev->pdev) {
530 		map.virq = 0;
531 		map.index = -EINVAL;
532 		return map;
533 	}
534 
535 	map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
536 	if (!map.virq)
537 		return map;
538 
539 	err = request_irq(map.virq, handler, 0, name, NULL);
540 	if (err) {
541 		mlx5_core_warn(dev, "err %d\n", err);
542 		pci_msix_free_irq(dev->pdev, map);
543 		map.virq = 0;
544 		map.index = -ENOMEM;
545 	}
546 	return map;
547 }
548 EXPORT_SYMBOL(mlx5_msix_alloc);
549 
550 /**
551  * mlx5_msix_free - free a previously allocated msix interrupt
552  * @dev: mlx5 device associated with interrupt
553  * @map: map previously returned by mlx5_msix_alloc()
554  */
mlx5_msix_free(struct mlx5_core_dev * dev,struct msi_map map)555 void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
556 {
557 	free_irq(map.virq, NULL);
558 	pci_msix_free_irq(dev->pdev, map);
559 }
560 EXPORT_SYMBOL(mlx5_msix_free);
561 
562 /**
563  * mlx5_irq_release_vector - release one IRQ back to the system.
564  * @irq: the irq to release.
565  */
mlx5_irq_release_vector(struct mlx5_irq * irq)566 void mlx5_irq_release_vector(struct mlx5_irq *irq)
567 {
568 	_mlx5_irq_release(irq);
569 }
570 
571 /**
572  * mlx5_irq_request_vector - request one IRQ for mlx5 device.
573  * @dev: mlx5 device that is requesting the IRQ.
574  * @cpu: CPU to bind the IRQ to.
575  * @vecidx: vector index to request an IRQ for.
576  * @rmap: pointer to reverse map pointer for completion interrupts
577  *
578  * Each IRQ is bound to at most 1 CPU.
579  * This function is requests one IRQ, for the given @vecidx.
580  *
581  * This function returns a pointer to the irq on success, or an error pointer
582  * in case of an error.
583  */
mlx5_irq_request_vector(struct mlx5_core_dev * dev,u16 cpu,u16 vecidx,struct cpu_rmap ** rmap)584 struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
585 					 u16 vecidx, struct cpu_rmap **rmap)
586 {
587 	struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
588 	struct mlx5_irq_pool *pool = table->pcif_pool;
589 	struct irq_affinity_desc af_desc;
590 	int offset = MLX5_IRQ_VEC_COMP_BASE;
591 
592 	if (!pool->xa_num_irqs.max)
593 		offset = 0;
594 
595 	af_desc.is_managed = false;
596 	cpumask_clear(&af_desc.mask);
597 	cpumask_set_cpu(cpu, &af_desc.mask);
598 	return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap);
599 }
600 
601 static struct mlx5_irq_pool *
irq_pool_alloc(struct mlx5_core_dev * dev,int start,int size,char * name,u32 min_threshold,u32 max_threshold)602 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
603 	       u32 min_threshold, u32 max_threshold)
604 {
605 	struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
606 
607 	if (!pool)
608 		return ERR_PTR(-ENOMEM);
609 	pool->dev = dev;
610 	mutex_init(&pool->lock);
611 	xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
612 	pool->xa_num_irqs.min = start;
613 	pool->xa_num_irqs.max = start + size - 1;
614 	if (name)
615 		snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
616 			 "%s", name);
617 	pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
618 	pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
619 	mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
620 		      name, size, start);
621 	return pool;
622 }
623 
irq_pool_free(struct mlx5_irq_pool * pool)624 static void irq_pool_free(struct mlx5_irq_pool *pool)
625 {
626 	struct mlx5_irq *irq;
627 	unsigned long index;
628 
629 	/* There are cases in which we are destrying the irq_table before
630 	 * freeing all the IRQs, fast teardown for example. Hence, free the irqs
631 	 * which might not have been freed.
632 	 */
633 	xa_for_each(&pool->irqs, index, irq)
634 		irq_release(irq);
635 	xa_destroy(&pool->irqs);
636 	mutex_destroy(&pool->lock);
637 	kfree(pool->irqs_per_cpu);
638 	kvfree(pool);
639 }
640 
irq_pools_init(struct mlx5_core_dev * dev,int sf_vec,int pcif_vec)641 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
642 {
643 	struct mlx5_irq_table *table = dev->priv.irq_table;
644 	int num_sf_ctrl_by_msix;
645 	int num_sf_ctrl_by_sfs;
646 	int num_sf_ctrl;
647 	int err;
648 
649 	/* init pcif_pool */
650 	table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
651 					  MLX5_EQ_SHARE_IRQ_MIN_COMP,
652 					  MLX5_EQ_SHARE_IRQ_MAX_COMP);
653 	if (IS_ERR(table->pcif_pool))
654 		return PTR_ERR(table->pcif_pool);
655 	if (!mlx5_sf_max_functions(dev))
656 		return 0;
657 	if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
658 		mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
659 		return 0;
660 	}
661 
662 	/* init sf_ctrl_pool */
663 	num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
664 	num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
665 					  MLX5_SFS_PER_CTRL_IRQ);
666 	num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
667 	num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
668 	table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
669 					     "mlx5_sf_ctrl",
670 					     MLX5_EQ_SHARE_IRQ_MIN_CTRL,
671 					     MLX5_EQ_SHARE_IRQ_MAX_CTRL);
672 	if (IS_ERR(table->sf_ctrl_pool)) {
673 		err = PTR_ERR(table->sf_ctrl_pool);
674 		goto err_pf;
675 	}
676 	/* init sf_comp_pool */
677 	table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
678 					     sf_vec - num_sf_ctrl, "mlx5_sf_comp",
679 					     MLX5_EQ_SHARE_IRQ_MIN_COMP,
680 					     MLX5_EQ_SHARE_IRQ_MAX_COMP);
681 	if (IS_ERR(table->sf_comp_pool)) {
682 		err = PTR_ERR(table->sf_comp_pool);
683 		goto err_sf_ctrl;
684 	}
685 
686 	table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
687 	if (!table->sf_comp_pool->irqs_per_cpu) {
688 		err = -ENOMEM;
689 		goto err_irqs_per_cpu;
690 	}
691 
692 	return 0;
693 
694 err_irqs_per_cpu:
695 	irq_pool_free(table->sf_comp_pool);
696 err_sf_ctrl:
697 	irq_pool_free(table->sf_ctrl_pool);
698 err_pf:
699 	irq_pool_free(table->pcif_pool);
700 	return err;
701 }
702 
irq_pools_destroy(struct mlx5_irq_table * table)703 static void irq_pools_destroy(struct mlx5_irq_table *table)
704 {
705 	if (table->sf_ctrl_pool) {
706 		irq_pool_free(table->sf_comp_pool);
707 		irq_pool_free(table->sf_ctrl_pool);
708 	}
709 	irq_pool_free(table->pcif_pool);
710 }
711 
mlx5_irq_pool_free_irqs(struct mlx5_irq_pool * pool)712 static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
713 {
714 	struct mlx5_irq *irq;
715 	unsigned long index;
716 
717 	xa_for_each(&pool->irqs, index, irq)
718 		mlx5_system_free_irq(irq);
719 
720 }
721 
mlx5_irq_pools_free_irqs(struct mlx5_irq_table * table)722 static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
723 {
724 	if (table->sf_ctrl_pool) {
725 		mlx5_irq_pool_free_irqs(table->sf_comp_pool);
726 		mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
727 	}
728 	mlx5_irq_pool_free_irqs(table->pcif_pool);
729 }
730 
731 /* irq_table API */
732 
mlx5_irq_table_init(struct mlx5_core_dev * dev)733 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
734 {
735 	struct mlx5_irq_table *irq_table;
736 
737 	if (mlx5_core_is_sf(dev))
738 		return 0;
739 
740 	irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
741 				  dev->priv.numa_node);
742 	if (!irq_table)
743 		return -ENOMEM;
744 
745 	dev->priv.irq_table = irq_table;
746 	return 0;
747 }
748 
mlx5_irq_table_cleanup(struct mlx5_core_dev * dev)749 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
750 {
751 	if (mlx5_core_is_sf(dev))
752 		return;
753 
754 	kvfree(dev->priv.irq_table);
755 }
756 
mlx5_irq_table_get_num_comp(struct mlx5_irq_table * table)757 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
758 {
759 	if (!table->pcif_pool->xa_num_irqs.max)
760 		return 1;
761 	return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
762 }
763 
mlx5_irq_table_create(struct mlx5_core_dev * dev)764 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
765 {
766 	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
767 		      MLX5_CAP_GEN(dev, max_num_eqs) :
768 		      1 << MLX5_CAP_GEN(dev, log_max_eq);
769 	int total_vec;
770 	int pcif_vec;
771 	int req_vec;
772 	int err;
773 	int n;
774 
775 	if (mlx5_core_is_sf(dev))
776 		return 0;
777 
778 	pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
779 	pcif_vec = min_t(int, pcif_vec, num_eqs);
780 
781 	total_vec = pcif_vec;
782 	if (mlx5_sf_max_functions(dev))
783 		total_vec += MLX5_IRQ_CTRL_SF_MAX +
784 			MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
785 	total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
786 	pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
787 
788 	req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
789 	n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
790 	if (n < 0)
791 		return n;
792 
793 	err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
794 	if (err)
795 		pci_free_irq_vectors(dev->pdev);
796 
797 	return err;
798 }
799 
mlx5_irq_table_destroy(struct mlx5_core_dev * dev)800 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
801 {
802 	struct mlx5_irq_table *table = dev->priv.irq_table;
803 
804 	if (mlx5_core_is_sf(dev))
805 		return;
806 
807 	/* There are cases where IRQs still will be in used when we reaching
808 	 * to here. Hence, making sure all the irqs are released.
809 	 */
810 	irq_pools_destroy(table);
811 	pci_free_irq_vectors(dev->pdev);
812 }
813 
mlx5_irq_table_free_irqs(struct mlx5_core_dev * dev)814 void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
815 {
816 	struct mlx5_irq_table *table = dev->priv.irq_table;
817 
818 	if (mlx5_core_is_sf(dev))
819 		return;
820 
821 	mlx5_irq_pools_free_irqs(table);
822 	pci_free_irq_vectors(dev->pdev);
823 }
824 
mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table * table)825 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
826 {
827 	if (table->sf_comp_pool)
828 		return min_t(int, num_online_cpus(),
829 			     table->sf_comp_pool->xa_num_irqs.max -
830 			     table->sf_comp_pool->xa_num_irqs.min + 1);
831 	else
832 		return mlx5_irq_table_get_num_comp(table);
833 }
834 
mlx5_irq_table_get(struct mlx5_core_dev * dev)835 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
836 {
837 #ifdef CONFIG_MLX5_SF
838 	if (mlx5_core_is_sf(dev))
839 		return dev->priv.parent_mdev->priv.irq_table;
840 #endif
841 	return dev->priv.irq_table;
842 }
843