1 /*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #include <linux/module.h>
35 #include <linux/init.h>
36 #include <linux/slab.h>
37 #include <linux/errno.h>
38 #include <linux/netdevice.h>
39 #include <linux/inetdevice.h>
40 #include <linux/rtnetlink.h>
41 #include <linux/if_vlan.h>
42 #include <linux/sched/mm.h>
43 #include <linux/sched/task.h>
44
45 #include <net/ipv6.h>
46 #include <net/addrconf.h>
47 #include <net/devlink.h>
48
49 #include <rdma/ib_smi.h>
50 #include <rdma/ib_user_verbs.h>
51 #include <rdma/ib_addr.h>
52 #include <rdma/ib_cache.h>
53
54 #include <net/bonding.h>
55
56 #include <linux/mlx4/driver.h>
57 #include <linux/mlx4/cmd.h>
58 #include <linux/mlx4/qp.h>
59
60 #include "mlx4_ib.h"
61 #include <rdma/mlx4-abi.h>
62
63 #define DRV_NAME MLX4_IB_DRV_NAME
64 #define DRV_VERSION "4.0-0"
65
66 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
67 #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
68 #define MLX4_IB_CARD_REV_A0 0xA0
69
70 MODULE_AUTHOR("Roland Dreier");
71 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
72 MODULE_LICENSE("Dual BSD/GPL");
73
74 int mlx4_ib_sm_guid_assign = 0;
75 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
76 MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
77
78 static const char mlx4_ib_version[] =
79 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
80 DRV_VERSION "\n";
81
82 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
83 static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
84 u8 port_num);
85
86 static struct workqueue_struct *wq;
87
init_query_mad(struct ib_smp * mad)88 static void init_query_mad(struct ib_smp *mad)
89 {
90 mad->base_version = 1;
91 mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
92 mad->class_version = 1;
93 mad->method = IB_MGMT_METHOD_GET;
94 }
95
check_flow_steering_support(struct mlx4_dev * dev)96 static int check_flow_steering_support(struct mlx4_dev *dev)
97 {
98 int eth_num_ports = 0;
99 int ib_num_ports = 0;
100
101 int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
102
103 if (dmfs) {
104 int i;
105 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
106 eth_num_ports++;
107 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
108 ib_num_ports++;
109 dmfs &= (!ib_num_ports ||
110 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
111 (!eth_num_ports ||
112 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
113 if (ib_num_ports && mlx4_is_mfunc(dev)) {
114 pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
115 dmfs = 0;
116 }
117 }
118 return dmfs;
119 }
120
num_ib_ports(struct mlx4_dev * dev)121 static int num_ib_ports(struct mlx4_dev *dev)
122 {
123 int ib_ports = 0;
124 int i;
125
126 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
127 ib_ports++;
128
129 return ib_ports;
130 }
131
mlx4_ib_get_netdev(struct ib_device * device,u8 port_num)132 static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
133 {
134 struct mlx4_ib_dev *ibdev = to_mdev(device);
135 struct net_device *dev;
136
137 rcu_read_lock();
138 dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
139
140 if (dev) {
141 if (mlx4_is_bonded(ibdev->dev)) {
142 struct net_device *upper = NULL;
143
144 upper = netdev_master_upper_dev_get_rcu(dev);
145 if (upper) {
146 struct net_device *active;
147
148 active = bond_option_active_slave_get_rcu(netdev_priv(upper));
149 if (active)
150 dev = active;
151 }
152 }
153 }
154 if (dev)
155 dev_hold(dev);
156
157 rcu_read_unlock();
158 return dev;
159 }
160
mlx4_ib_update_gids_v1(struct gid_entry * gids,struct mlx4_ib_dev * ibdev,u8 port_num)161 static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
162 struct mlx4_ib_dev *ibdev,
163 u8 port_num)
164 {
165 struct mlx4_cmd_mailbox *mailbox;
166 int err;
167 struct mlx4_dev *dev = ibdev->dev;
168 int i;
169 union ib_gid *gid_tbl;
170
171 mailbox = mlx4_alloc_cmd_mailbox(dev);
172 if (IS_ERR(mailbox))
173 return -ENOMEM;
174
175 gid_tbl = mailbox->buf;
176
177 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
178 memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
179
180 err = mlx4_cmd(dev, mailbox->dma,
181 MLX4_SET_PORT_GID_TABLE << 8 | port_num,
182 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
183 MLX4_CMD_WRAPPED);
184 if (mlx4_is_bonded(dev))
185 err += mlx4_cmd(dev, mailbox->dma,
186 MLX4_SET_PORT_GID_TABLE << 8 | 2,
187 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
188 MLX4_CMD_WRAPPED);
189
190 mlx4_free_cmd_mailbox(dev, mailbox);
191 return err;
192 }
193
mlx4_ib_update_gids_v1_v2(struct gid_entry * gids,struct mlx4_ib_dev * ibdev,u8 port_num)194 static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
195 struct mlx4_ib_dev *ibdev,
196 u8 port_num)
197 {
198 struct mlx4_cmd_mailbox *mailbox;
199 int err;
200 struct mlx4_dev *dev = ibdev->dev;
201 int i;
202 struct {
203 union ib_gid gid;
204 __be32 rsrvd1[2];
205 __be16 rsrvd2;
206 u8 type;
207 u8 version;
208 __be32 rsrvd3;
209 } *gid_tbl;
210
211 mailbox = mlx4_alloc_cmd_mailbox(dev);
212 if (IS_ERR(mailbox))
213 return -ENOMEM;
214
215 gid_tbl = mailbox->buf;
216 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
217 memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
218 if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
219 gid_tbl[i].version = 2;
220 if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
221 gid_tbl[i].type = 1;
222 }
223 }
224
225 err = mlx4_cmd(dev, mailbox->dma,
226 MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
227 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
228 MLX4_CMD_WRAPPED);
229 if (mlx4_is_bonded(dev))
230 err += mlx4_cmd(dev, mailbox->dma,
231 MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
232 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
233 MLX4_CMD_WRAPPED);
234
235 mlx4_free_cmd_mailbox(dev, mailbox);
236 return err;
237 }
238
mlx4_ib_update_gids(struct gid_entry * gids,struct mlx4_ib_dev * ibdev,u8 port_num)239 static int mlx4_ib_update_gids(struct gid_entry *gids,
240 struct mlx4_ib_dev *ibdev,
241 u8 port_num)
242 {
243 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
244 return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
245
246 return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
247 }
248
free_gid_entry(struct gid_entry * entry)249 static void free_gid_entry(struct gid_entry *entry)
250 {
251 memset(&entry->gid, 0, sizeof(entry->gid));
252 kfree(entry->ctx);
253 entry->ctx = NULL;
254 }
255
mlx4_ib_add_gid(const struct ib_gid_attr * attr,void ** context)256 static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
257 {
258 struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
259 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
260 struct mlx4_port_gid_table *port_gid_table;
261 int free = -1, found = -1;
262 int ret = 0;
263 int hw_update = 0;
264 int i;
265 struct gid_entry *gids = NULL;
266
267 if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
268 return -EINVAL;
269
270 if (attr->port_num > MLX4_MAX_PORTS)
271 return -EINVAL;
272
273 if (!context)
274 return -EINVAL;
275
276 port_gid_table = &iboe->gids[attr->port_num - 1];
277 spin_lock_bh(&iboe->lock);
278 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
279 if (!memcmp(&port_gid_table->gids[i].gid,
280 &attr->gid, sizeof(attr->gid)) &&
281 port_gid_table->gids[i].gid_type == attr->gid_type) {
282 found = i;
283 break;
284 }
285 if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid))
286 free = i; /* HW has space */
287 }
288
289 if (found < 0) {
290 if (free < 0) {
291 ret = -ENOSPC;
292 } else {
293 port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
294 if (!port_gid_table->gids[free].ctx) {
295 ret = -ENOMEM;
296 } else {
297 *context = port_gid_table->gids[free].ctx;
298 memcpy(&port_gid_table->gids[free].gid,
299 &attr->gid, sizeof(attr->gid));
300 port_gid_table->gids[free].gid_type = attr->gid_type;
301 port_gid_table->gids[free].ctx->real_index = free;
302 port_gid_table->gids[free].ctx->refcount = 1;
303 hw_update = 1;
304 }
305 }
306 } else {
307 struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
308 *context = ctx;
309 ctx->refcount++;
310 }
311 if (!ret && hw_update) {
312 gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
313 GFP_ATOMIC);
314 if (!gids) {
315 ret = -ENOMEM;
316 *context = NULL;
317 free_gid_entry(&port_gid_table->gids[free]);
318 } else {
319 for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
320 memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
321 gids[i].gid_type = port_gid_table->gids[i].gid_type;
322 }
323 }
324 }
325 spin_unlock_bh(&iboe->lock);
326
327 if (!ret && hw_update) {
328 ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
329 if (ret) {
330 spin_lock_bh(&iboe->lock);
331 *context = NULL;
332 free_gid_entry(&port_gid_table->gids[free]);
333 spin_unlock_bh(&iboe->lock);
334 }
335 kfree(gids);
336 }
337
338 return ret;
339 }
340
mlx4_ib_del_gid(const struct ib_gid_attr * attr,void ** context)341 static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
342 {
343 struct gid_cache_context *ctx = *context;
344 struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
345 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
346 struct mlx4_port_gid_table *port_gid_table;
347 int ret = 0;
348 int hw_update = 0;
349 struct gid_entry *gids = NULL;
350
351 if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
352 return -EINVAL;
353
354 if (attr->port_num > MLX4_MAX_PORTS)
355 return -EINVAL;
356
357 port_gid_table = &iboe->gids[attr->port_num - 1];
358 spin_lock_bh(&iboe->lock);
359 if (ctx) {
360 ctx->refcount--;
361 if (!ctx->refcount) {
362 unsigned int real_index = ctx->real_index;
363
364 free_gid_entry(&port_gid_table->gids[real_index]);
365 hw_update = 1;
366 }
367 }
368 if (!ret && hw_update) {
369 int i;
370
371 gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
372 GFP_ATOMIC);
373 if (!gids) {
374 ret = -ENOMEM;
375 } else {
376 for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
377 memcpy(&gids[i].gid,
378 &port_gid_table->gids[i].gid,
379 sizeof(union ib_gid));
380 gids[i].gid_type =
381 port_gid_table->gids[i].gid_type;
382 }
383 }
384 }
385 spin_unlock_bh(&iboe->lock);
386
387 if (!ret && hw_update) {
388 ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
389 kfree(gids);
390 }
391 return ret;
392 }
393
mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev * ibdev,const struct ib_gid_attr * attr)394 int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
395 const struct ib_gid_attr *attr)
396 {
397 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
398 struct gid_cache_context *ctx = NULL;
399 struct mlx4_port_gid_table *port_gid_table;
400 int real_index = -EINVAL;
401 int i;
402 unsigned long flags;
403 u8 port_num = attr->port_num;
404
405 if (port_num > MLX4_MAX_PORTS)
406 return -EINVAL;
407
408 if (mlx4_is_bonded(ibdev->dev))
409 port_num = 1;
410
411 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
412 return attr->index;
413
414 spin_lock_irqsave(&iboe->lock, flags);
415 port_gid_table = &iboe->gids[port_num - 1];
416
417 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
418 if (!memcmp(&port_gid_table->gids[i].gid,
419 &attr->gid, sizeof(attr->gid)) &&
420 attr->gid_type == port_gid_table->gids[i].gid_type) {
421 ctx = port_gid_table->gids[i].ctx;
422 break;
423 }
424 if (ctx)
425 real_index = ctx->real_index;
426 spin_unlock_irqrestore(&iboe->lock, flags);
427 return real_index;
428 }
429
430 #define field_avail(type, fld, sz) (offsetof(type, fld) + \
431 sizeof(((type *)0)->fld) <= (sz))
432
mlx4_ib_query_device(struct ib_device * ibdev,struct ib_device_attr * props,struct ib_udata * uhw)433 static int mlx4_ib_query_device(struct ib_device *ibdev,
434 struct ib_device_attr *props,
435 struct ib_udata *uhw)
436 {
437 struct mlx4_ib_dev *dev = to_mdev(ibdev);
438 struct ib_smp *in_mad = NULL;
439 struct ib_smp *out_mad = NULL;
440 int err;
441 int have_ib_ports;
442 struct mlx4_uverbs_ex_query_device cmd;
443 struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
444 struct mlx4_clock_params clock_params;
445
446 if (uhw->inlen) {
447 if (uhw->inlen < sizeof(cmd))
448 return -EINVAL;
449
450 err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
451 if (err)
452 return err;
453
454 if (cmd.comp_mask)
455 return -EINVAL;
456
457 if (cmd.reserved)
458 return -EINVAL;
459 }
460
461 resp.response_length = offsetof(typeof(resp), response_length) +
462 sizeof(resp.response_length);
463 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
464 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
465 err = -ENOMEM;
466 if (!in_mad || !out_mad)
467 goto out;
468
469 init_query_mad(in_mad);
470 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
471
472 err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
473 1, NULL, NULL, in_mad, out_mad);
474 if (err)
475 goto out;
476
477 memset(props, 0, sizeof *props);
478
479 have_ib_ports = num_ib_ports(dev->dev);
480
481 props->fw_ver = dev->dev->caps.fw_ver;
482 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
483 IB_DEVICE_PORT_ACTIVE_EVENT |
484 IB_DEVICE_SYS_IMAGE_GUID |
485 IB_DEVICE_RC_RNR_NAK_GEN |
486 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
487 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
488 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
489 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
490 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
491 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
492 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
493 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
494 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
495 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
496 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
497 if (dev->dev->caps.max_gso_sz &&
498 (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
499 (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
500 props->device_cap_flags |= IB_DEVICE_UD_TSO;
501 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
502 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
503 if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
504 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
505 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
506 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
507 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
508 props->device_cap_flags |= IB_DEVICE_XRC;
509 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
510 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
511 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
512 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
513 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
514 else
515 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
516 }
517 if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
518 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
519
520 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
521
522 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
523 0xffffff;
524 props->vendor_part_id = dev->dev->persist->pdev->device;
525 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
526 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
527
528 props->max_mr_size = ~0ull;
529 props->page_size_cap = dev->dev->caps.page_size_cap;
530 props->max_qp = dev->dev->quotas.qp;
531 props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
532 props->max_send_sge =
533 min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
534 props->max_recv_sge =
535 min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
536 props->max_sge_rd = MLX4_MAX_SGE_RD;
537 props->max_cq = dev->dev->quotas.cq;
538 props->max_cqe = dev->dev->caps.max_cqes;
539 props->max_mr = dev->dev->quotas.mpt;
540 props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
541 props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
542 props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
543 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
544 props->max_srq = dev->dev->quotas.srq;
545 props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
546 props->max_srq_sge = dev->dev->caps.max_srq_sge;
547 props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
548 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
549 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
550 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
551 props->masked_atomic_cap = props->atomic_cap;
552 props->max_pkeys = dev->dev->caps.pkey_table_len[1];
553 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
554 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
555 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
556 props->max_mcast_grp;
557 props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
558 props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
559 props->timestamp_mask = 0xFFFFFFFFFFFFULL;
560 props->max_ah = INT_MAX;
561
562 if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
563 mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
564 if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
565 props->rss_caps.max_rwq_indirection_tables =
566 props->max_qp;
567 props->rss_caps.max_rwq_indirection_table_size =
568 dev->dev->caps.max_rss_tbl_sz;
569 props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
570 props->max_wq_type_rq = props->max_qp;
571 }
572
573 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
574 props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
575 }
576
577 props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
578 props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
579
580 if (!mlx4_is_slave(dev->dev))
581 err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
582
583 if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
584 resp.response_length += sizeof(resp.hca_core_clock_offset);
585 if (!err && !mlx4_is_slave(dev->dev)) {
586 resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
587 resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
588 }
589 }
590
591 if (uhw->outlen >= resp.response_length +
592 sizeof(resp.max_inl_recv_sz)) {
593 resp.response_length += sizeof(resp.max_inl_recv_sz);
594 resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg *
595 sizeof(struct mlx4_wqe_data_seg);
596 }
597
598 if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
599 if (props->rss_caps.supported_qpts) {
600 resp.rss_caps.rx_hash_function =
601 MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
602
603 resp.rss_caps.rx_hash_fields_mask =
604 MLX4_IB_RX_HASH_SRC_IPV4 |
605 MLX4_IB_RX_HASH_DST_IPV4 |
606 MLX4_IB_RX_HASH_SRC_IPV6 |
607 MLX4_IB_RX_HASH_DST_IPV6 |
608 MLX4_IB_RX_HASH_SRC_PORT_TCP |
609 MLX4_IB_RX_HASH_DST_PORT_TCP |
610 MLX4_IB_RX_HASH_SRC_PORT_UDP |
611 MLX4_IB_RX_HASH_DST_PORT_UDP;
612
613 if (dev->dev->caps.tunnel_offload_mode ==
614 MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
615 resp.rss_caps.rx_hash_fields_mask |=
616 MLX4_IB_RX_HASH_INNER;
617 }
618 resp.response_length = offsetof(typeof(resp), rss_caps) +
619 sizeof(resp.rss_caps);
620 }
621
622 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
623 if (dev->dev->caps.max_gso_sz &&
624 ((mlx4_ib_port_link_layer(ibdev, 1) ==
625 IB_LINK_LAYER_ETHERNET) ||
626 (mlx4_ib_port_link_layer(ibdev, 2) ==
627 IB_LINK_LAYER_ETHERNET))) {
628 resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
629 resp.tso_caps.supported_qpts |=
630 1 << IB_QPT_RAW_PACKET;
631 }
632 resp.response_length = offsetof(typeof(resp), tso_caps) +
633 sizeof(resp.tso_caps);
634 }
635
636 if (uhw->outlen) {
637 err = ib_copy_to_udata(uhw, &resp, resp.response_length);
638 if (err)
639 goto out;
640 }
641 out:
642 kfree(in_mad);
643 kfree(out_mad);
644
645 return err;
646 }
647
648 static enum rdma_link_layer
mlx4_ib_port_link_layer(struct ib_device * device,u8 port_num)649 mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
650 {
651 struct mlx4_dev *dev = to_mdev(device)->dev;
652
653 return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
654 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
655 }
656
ib_link_query_port(struct ib_device * ibdev,u8 port,struct ib_port_attr * props,int netw_view)657 static int ib_link_query_port(struct ib_device *ibdev, u8 port,
658 struct ib_port_attr *props, int netw_view)
659 {
660 struct ib_smp *in_mad = NULL;
661 struct ib_smp *out_mad = NULL;
662 int ext_active_speed;
663 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
664 int err = -ENOMEM;
665
666 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
667 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
668 if (!in_mad || !out_mad)
669 goto out;
670
671 init_query_mad(in_mad);
672 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
673 in_mad->attr_mod = cpu_to_be32(port);
674
675 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
676 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
677
678 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
679 in_mad, out_mad);
680 if (err)
681 goto out;
682
683
684 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
685 props->lmc = out_mad->data[34] & 0x7;
686 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
687 props->sm_sl = out_mad->data[36] & 0xf;
688 props->state = out_mad->data[32] & 0xf;
689 props->phys_state = out_mad->data[33] >> 4;
690 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
691 if (netw_view)
692 props->gid_tbl_len = out_mad->data[50];
693 else
694 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
695 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
696 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
697 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
698 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
699 props->active_width = out_mad->data[31] & 0xf;
700 props->active_speed = out_mad->data[35] >> 4;
701 props->max_mtu = out_mad->data[41] & 0xf;
702 props->active_mtu = out_mad->data[36] >> 4;
703 props->subnet_timeout = out_mad->data[51] & 0x1f;
704 props->max_vl_num = out_mad->data[37] >> 4;
705 props->init_type_reply = out_mad->data[41] >> 4;
706
707 /* Check if extended speeds (EDR/FDR/...) are supported */
708 if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
709 ext_active_speed = out_mad->data[62] >> 4;
710
711 switch (ext_active_speed) {
712 case 1:
713 props->active_speed = IB_SPEED_FDR;
714 break;
715 case 2:
716 props->active_speed = IB_SPEED_EDR;
717 break;
718 }
719 }
720
721 /* If reported active speed is QDR, check if is FDR-10 */
722 if (props->active_speed == IB_SPEED_QDR) {
723 init_query_mad(in_mad);
724 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
725 in_mad->attr_mod = cpu_to_be32(port);
726
727 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
728 NULL, NULL, in_mad, out_mad);
729 if (err)
730 goto out;
731
732 /* Checking LinkSpeedActive for FDR-10 */
733 if (out_mad->data[15] & 0x1)
734 props->active_speed = IB_SPEED_FDR10;
735 }
736
737 /* Avoid wrong speed value returned by FW if the IB link is down. */
738 if (props->state == IB_PORT_DOWN)
739 props->active_speed = IB_SPEED_SDR;
740
741 out:
742 kfree(in_mad);
743 kfree(out_mad);
744 return err;
745 }
746
state_to_phys_state(enum ib_port_state state)747 static u8 state_to_phys_state(enum ib_port_state state)
748 {
749 return state == IB_PORT_ACTIVE ? 5 : 3;
750 }
751
eth_link_query_port(struct ib_device * ibdev,u8 port,struct ib_port_attr * props)752 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
753 struct ib_port_attr *props)
754 {
755
756 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
757 struct mlx4_ib_iboe *iboe = &mdev->iboe;
758 struct net_device *ndev;
759 enum ib_mtu tmp;
760 struct mlx4_cmd_mailbox *mailbox;
761 int err = 0;
762 int is_bonded = mlx4_is_bonded(mdev->dev);
763
764 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
765 if (IS_ERR(mailbox))
766 return PTR_ERR(mailbox);
767
768 err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
769 MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
770 MLX4_CMD_WRAPPED);
771 if (err)
772 goto out;
773
774 props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ||
775 (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
776 IB_WIDTH_4X : IB_WIDTH_1X;
777 props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
778 IB_SPEED_FDR : IB_SPEED_QDR;
779 props->port_cap_flags = IB_PORT_CM_SUP;
780 props->ip_gids = true;
781 props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
782 props->max_msg_sz = mdev->dev->caps.max_msg_sz;
783 props->pkey_tbl_len = 1;
784 props->max_mtu = IB_MTU_4096;
785 props->max_vl_num = 2;
786 props->state = IB_PORT_DOWN;
787 props->phys_state = state_to_phys_state(props->state);
788 props->active_mtu = IB_MTU_256;
789 spin_lock_bh(&iboe->lock);
790 ndev = iboe->netdevs[port - 1];
791 if (ndev && is_bonded) {
792 rcu_read_lock(); /* required to get upper dev */
793 ndev = netdev_master_upper_dev_get_rcu(ndev);
794 rcu_read_unlock();
795 }
796 if (!ndev)
797 goto out_unlock;
798
799 tmp = iboe_get_mtu(ndev->mtu);
800 props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
801
802 props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
803 IB_PORT_ACTIVE : IB_PORT_DOWN;
804 props->phys_state = state_to_phys_state(props->state);
805 out_unlock:
806 spin_unlock_bh(&iboe->lock);
807 out:
808 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
809 return err;
810 }
811
__mlx4_ib_query_port(struct ib_device * ibdev,u8 port,struct ib_port_attr * props,int netw_view)812 int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
813 struct ib_port_attr *props, int netw_view)
814 {
815 int err;
816
817 /* props being zeroed by the caller, avoid zeroing it here */
818
819 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
820 ib_link_query_port(ibdev, port, props, netw_view) :
821 eth_link_query_port(ibdev, port, props);
822
823 return err;
824 }
825
mlx4_ib_query_port(struct ib_device * ibdev,u8 port,struct ib_port_attr * props)826 static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
827 struct ib_port_attr *props)
828 {
829 /* returns host view */
830 return __mlx4_ib_query_port(ibdev, port, props, 0);
831 }
832
__mlx4_ib_query_gid(struct ib_device * ibdev,u8 port,int index,union ib_gid * gid,int netw_view)833 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
834 union ib_gid *gid, int netw_view)
835 {
836 struct ib_smp *in_mad = NULL;
837 struct ib_smp *out_mad = NULL;
838 int err = -ENOMEM;
839 struct mlx4_ib_dev *dev = to_mdev(ibdev);
840 int clear = 0;
841 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
842
843 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
844 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
845 if (!in_mad || !out_mad)
846 goto out;
847
848 init_query_mad(in_mad);
849 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
850 in_mad->attr_mod = cpu_to_be32(port);
851
852 if (mlx4_is_mfunc(dev->dev) && netw_view)
853 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
854
855 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
856 if (err)
857 goto out;
858
859 memcpy(gid->raw, out_mad->data + 8, 8);
860
861 if (mlx4_is_mfunc(dev->dev) && !netw_view) {
862 if (index) {
863 /* For any index > 0, return the null guid */
864 err = 0;
865 clear = 1;
866 goto out;
867 }
868 }
869
870 init_query_mad(in_mad);
871 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
872 in_mad->attr_mod = cpu_to_be32(index / 8);
873
874 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
875 NULL, NULL, in_mad, out_mad);
876 if (err)
877 goto out;
878
879 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
880
881 out:
882 if (clear)
883 memset(gid->raw + 8, 0, 8);
884 kfree(in_mad);
885 kfree(out_mad);
886 return err;
887 }
888
mlx4_ib_query_gid(struct ib_device * ibdev,u8 port,int index,union ib_gid * gid)889 static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
890 union ib_gid *gid)
891 {
892 if (rdma_protocol_ib(ibdev, port))
893 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
894 return 0;
895 }
896
mlx4_ib_query_sl2vl(struct ib_device * ibdev,u8 port,u64 * sl2vl_tbl)897 static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
898 {
899 union sl2vl_tbl_to_u64 sl2vl64;
900 struct ib_smp *in_mad = NULL;
901 struct ib_smp *out_mad = NULL;
902 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
903 int err = -ENOMEM;
904 int jj;
905
906 if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
907 *sl2vl_tbl = 0;
908 return 0;
909 }
910
911 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
912 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
913 if (!in_mad || !out_mad)
914 goto out;
915
916 init_query_mad(in_mad);
917 in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE;
918 in_mad->attr_mod = 0;
919
920 if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
921 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
922
923 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
924 in_mad, out_mad);
925 if (err)
926 goto out;
927
928 for (jj = 0; jj < 8; jj++)
929 sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
930 *sl2vl_tbl = sl2vl64.sl64;
931
932 out:
933 kfree(in_mad);
934 kfree(out_mad);
935 return err;
936 }
937
mlx4_init_sl2vl_tbl(struct mlx4_ib_dev * mdev)938 static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
939 {
940 u64 sl2vl;
941 int i;
942 int err;
943
944 for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
945 if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
946 continue;
947 err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
948 if (err) {
949 pr_err("Unable to get default sl to vl mapping for port %d. Using all zeroes (%d)\n",
950 i, err);
951 sl2vl = 0;
952 }
953 atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
954 }
955 }
956
__mlx4_ib_query_pkey(struct ib_device * ibdev,u8 port,u16 index,u16 * pkey,int netw_view)957 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
958 u16 *pkey, int netw_view)
959 {
960 struct ib_smp *in_mad = NULL;
961 struct ib_smp *out_mad = NULL;
962 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
963 int err = -ENOMEM;
964
965 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
966 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
967 if (!in_mad || !out_mad)
968 goto out;
969
970 init_query_mad(in_mad);
971 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
972 in_mad->attr_mod = cpu_to_be32(index / 32);
973
974 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
975 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
976
977 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
978 in_mad, out_mad);
979 if (err)
980 goto out;
981
982 *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
983
984 out:
985 kfree(in_mad);
986 kfree(out_mad);
987 return err;
988 }
989
mlx4_ib_query_pkey(struct ib_device * ibdev,u8 port,u16 index,u16 * pkey)990 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
991 {
992 return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
993 }
994
mlx4_ib_modify_device(struct ib_device * ibdev,int mask,struct ib_device_modify * props)995 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
996 struct ib_device_modify *props)
997 {
998 struct mlx4_cmd_mailbox *mailbox;
999 unsigned long flags;
1000
1001 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
1002 return -EOPNOTSUPP;
1003
1004 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
1005 return 0;
1006
1007 if (mlx4_is_slave(to_mdev(ibdev)->dev))
1008 return -EOPNOTSUPP;
1009
1010 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
1011 memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1012 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
1013
1014 /*
1015 * If possible, pass node desc to FW, so it can generate
1016 * a 144 trap. If cmd fails, just ignore.
1017 */
1018 mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
1019 if (IS_ERR(mailbox))
1020 return 0;
1021
1022 memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1023 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
1024 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
1025
1026 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
1027
1028 return 0;
1029 }
1030
mlx4_ib_SET_PORT(struct mlx4_ib_dev * dev,u8 port,int reset_qkey_viols,u32 cap_mask)1031 static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
1032 u32 cap_mask)
1033 {
1034 struct mlx4_cmd_mailbox *mailbox;
1035 int err;
1036
1037 mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
1038 if (IS_ERR(mailbox))
1039 return PTR_ERR(mailbox);
1040
1041 if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
1042 *(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
1043 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
1044 } else {
1045 ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols;
1046 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
1047 }
1048
1049 err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
1050 MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
1051 MLX4_CMD_WRAPPED);
1052
1053 mlx4_free_cmd_mailbox(dev->dev, mailbox);
1054 return err;
1055 }
1056
mlx4_ib_modify_port(struct ib_device * ibdev,u8 port,int mask,struct ib_port_modify * props)1057 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
1058 struct ib_port_modify *props)
1059 {
1060 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
1061 u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
1062 struct ib_port_attr attr;
1063 u32 cap_mask;
1064 int err;
1065
1066 /* return OK if this is RoCE. CM calls ib_modify_port() regardless
1067 * of whether port link layer is ETH or IB. For ETH ports, qkey
1068 * violations and port capabilities are not meaningful.
1069 */
1070 if (is_eth)
1071 return 0;
1072
1073 mutex_lock(&mdev->cap_mask_mutex);
1074
1075 err = ib_query_port(ibdev, port, &attr);
1076 if (err)
1077 goto out;
1078
1079 cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
1080 ~props->clr_port_cap_mask;
1081
1082 err = mlx4_ib_SET_PORT(mdev, port,
1083 !!(mask & IB_PORT_RESET_QKEY_CNTR),
1084 cap_mask);
1085
1086 out:
1087 mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
1088 return err;
1089 }
1090
mlx4_ib_alloc_ucontext(struct ib_device * ibdev,struct ib_udata * udata)1091 static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
1092 struct ib_udata *udata)
1093 {
1094 struct mlx4_ib_dev *dev = to_mdev(ibdev);
1095 struct mlx4_ib_ucontext *context;
1096 struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
1097 struct mlx4_ib_alloc_ucontext_resp resp;
1098 int err;
1099
1100 if (!dev->ib_active)
1101 return ERR_PTR(-EAGAIN);
1102
1103 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
1104 resp_v3.qp_tab_size = dev->dev->caps.num_qps;
1105 resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
1106 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1107 } else {
1108 resp.dev_caps = dev->dev->caps.userspace_caps;
1109 resp.qp_tab_size = dev->dev->caps.num_qps;
1110 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
1111 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1112 resp.cqe_size = dev->dev->caps.cqe_size;
1113 }
1114
1115 context = kzalloc(sizeof(*context), GFP_KERNEL);
1116 if (!context)
1117 return ERR_PTR(-ENOMEM);
1118
1119 err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
1120 if (err) {
1121 kfree(context);
1122 return ERR_PTR(err);
1123 }
1124
1125 INIT_LIST_HEAD(&context->db_page_list);
1126 mutex_init(&context->db_page_mutex);
1127
1128 INIT_LIST_HEAD(&context->wqn_ranges_list);
1129 mutex_init(&context->wqn_ranges_mutex);
1130
1131 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
1132 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
1133 else
1134 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
1135
1136 if (err) {
1137 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
1138 kfree(context);
1139 return ERR_PTR(-EFAULT);
1140 }
1141
1142 return &context->ibucontext;
1143 }
1144
mlx4_ib_dealloc_ucontext(struct ib_ucontext * ibcontext)1145 static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1146 {
1147 struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1148
1149 mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
1150 kfree(context);
1151
1152 return 0;
1153 }
1154
mlx4_ib_vma_open(struct vm_area_struct * area)1155 static void mlx4_ib_vma_open(struct vm_area_struct *area)
1156 {
1157 /* vma_open is called when a new VMA is created on top of our VMA.
1158 * This is done through either mremap flow or split_vma (usually due
1159 * to mlock, madvise, munmap, etc.). We do not support a clone of the
1160 * vma, as this VMA is strongly hardware related. Therefore we set the
1161 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1162 * calling us again and trying to do incorrect actions. We assume that
1163 * the original vma size is exactly a single page that there will be no
1164 * "splitting" operations on.
1165 */
1166 area->vm_ops = NULL;
1167 }
1168
mlx4_ib_vma_close(struct vm_area_struct * area)1169 static void mlx4_ib_vma_close(struct vm_area_struct *area)
1170 {
1171 struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
1172
1173 /* It's guaranteed that all VMAs opened on a FD are closed before the
1174 * file itself is closed, therefore no sync is needed with the regular
1175 * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
1176 * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
1177 * The close operation is usually called under mm->mmap_sem except when
1178 * process is exiting. The exiting case is handled explicitly as part
1179 * of mlx4_ib_disassociate_ucontext.
1180 */
1181 mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
1182 area->vm_private_data;
1183
1184 /* set the vma context pointer to null in the mlx4_ib driver's private
1185 * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
1186 */
1187 mlx4_ib_vma_priv_data->vma = NULL;
1188 }
1189
1190 static const struct vm_operations_struct mlx4_ib_vm_ops = {
1191 .open = mlx4_ib_vma_open,
1192 .close = mlx4_ib_vma_close
1193 };
1194
mlx4_ib_disassociate_ucontext(struct ib_ucontext * ibcontext)1195 static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1196 {
1197 int i;
1198 struct vm_area_struct *vma;
1199 struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1200
1201 /* need to protect from a race on closing the vma as part of
1202 * mlx4_ib_vma_close().
1203 */
1204 for (i = 0; i < HW_BAR_COUNT; i++) {
1205 vma = context->hw_bar_info[i].vma;
1206 if (!vma)
1207 continue;
1208
1209 zap_vma_ptes(context->hw_bar_info[i].vma,
1210 context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
1211
1212 context->hw_bar_info[i].vma->vm_flags &=
1213 ~(VM_SHARED | VM_MAYSHARE);
1214 /* context going to be destroyed, should not access ops any more */
1215 context->hw_bar_info[i].vma->vm_ops = NULL;
1216 }
1217 }
1218
mlx4_ib_set_vma_data(struct vm_area_struct * vma,struct mlx4_ib_vma_private_data * vma_private_data)1219 static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
1220 struct mlx4_ib_vma_private_data *vma_private_data)
1221 {
1222 vma_private_data->vma = vma;
1223 vma->vm_private_data = vma_private_data;
1224 vma->vm_ops = &mlx4_ib_vm_ops;
1225 }
1226
mlx4_ib_mmap(struct ib_ucontext * context,struct vm_area_struct * vma)1227 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
1228 {
1229 struct mlx4_ib_dev *dev = to_mdev(context->device);
1230 struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
1231
1232 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1233 return -EINVAL;
1234
1235 if (vma->vm_pgoff == 0) {
1236 /* We prevent double mmaping on same context */
1237 if (mucontext->hw_bar_info[HW_BAR_DB].vma)
1238 return -EINVAL;
1239
1240 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1241
1242 if (io_remap_pfn_range(vma, vma->vm_start,
1243 to_mucontext(context)->uar.pfn,
1244 PAGE_SIZE, vma->vm_page_prot))
1245 return -EAGAIN;
1246
1247 mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
1248
1249 } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
1250 /* We prevent double mmaping on same context */
1251 if (mucontext->hw_bar_info[HW_BAR_BF].vma)
1252 return -EINVAL;
1253
1254 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
1255
1256 if (io_remap_pfn_range(vma, vma->vm_start,
1257 to_mucontext(context)->uar.pfn +
1258 dev->dev->caps.num_uars,
1259 PAGE_SIZE, vma->vm_page_prot))
1260 return -EAGAIN;
1261
1262 mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
1263
1264 } else if (vma->vm_pgoff == 3) {
1265 struct mlx4_clock_params params;
1266 int ret;
1267
1268 /* We prevent double mmaping on same context */
1269 if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
1270 return -EINVAL;
1271
1272 ret = mlx4_get_internal_clock_params(dev->dev, ¶ms);
1273
1274 if (ret)
1275 return ret;
1276
1277 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1278 if (io_remap_pfn_range(vma, vma->vm_start,
1279 (pci_resource_start(dev->dev->persist->pdev,
1280 params.bar) +
1281 params.offset)
1282 >> PAGE_SHIFT,
1283 PAGE_SIZE, vma->vm_page_prot))
1284 return -EAGAIN;
1285
1286 mlx4_ib_set_vma_data(vma,
1287 &mucontext->hw_bar_info[HW_BAR_CLOCK]);
1288 } else {
1289 return -EINVAL;
1290 }
1291
1292 return 0;
1293 }
1294
mlx4_ib_alloc_pd(struct ib_device * ibdev,struct ib_ucontext * context,struct ib_udata * udata)1295 static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
1296 struct ib_ucontext *context,
1297 struct ib_udata *udata)
1298 {
1299 struct mlx4_ib_pd *pd;
1300 int err;
1301
1302 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
1303 if (!pd)
1304 return ERR_PTR(-ENOMEM);
1305
1306 err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
1307 if (err) {
1308 kfree(pd);
1309 return ERR_PTR(err);
1310 }
1311
1312 if (context)
1313 if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
1314 mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
1315 kfree(pd);
1316 return ERR_PTR(-EFAULT);
1317 }
1318 return &pd->ibpd;
1319 }
1320
mlx4_ib_dealloc_pd(struct ib_pd * pd)1321 static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
1322 {
1323 mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
1324 kfree(pd);
1325
1326 return 0;
1327 }
1328
mlx4_ib_alloc_xrcd(struct ib_device * ibdev,struct ib_ucontext * context,struct ib_udata * udata)1329 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
1330 struct ib_ucontext *context,
1331 struct ib_udata *udata)
1332 {
1333 struct mlx4_ib_xrcd *xrcd;
1334 struct ib_cq_init_attr cq_attr = {};
1335 int err;
1336
1337 if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1338 return ERR_PTR(-ENOSYS);
1339
1340 xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
1341 if (!xrcd)
1342 return ERR_PTR(-ENOMEM);
1343
1344 err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
1345 if (err)
1346 goto err1;
1347
1348 xrcd->pd = ib_alloc_pd(ibdev, 0);
1349 if (IS_ERR(xrcd->pd)) {
1350 err = PTR_ERR(xrcd->pd);
1351 goto err2;
1352 }
1353
1354 cq_attr.cqe = 1;
1355 xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
1356 if (IS_ERR(xrcd->cq)) {
1357 err = PTR_ERR(xrcd->cq);
1358 goto err3;
1359 }
1360
1361 return &xrcd->ibxrcd;
1362
1363 err3:
1364 ib_dealloc_pd(xrcd->pd);
1365 err2:
1366 mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
1367 err1:
1368 kfree(xrcd);
1369 return ERR_PTR(err);
1370 }
1371
mlx4_ib_dealloc_xrcd(struct ib_xrcd * xrcd)1372 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
1373 {
1374 ib_destroy_cq(to_mxrcd(xrcd)->cq);
1375 ib_dealloc_pd(to_mxrcd(xrcd)->pd);
1376 mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
1377 kfree(xrcd);
1378
1379 return 0;
1380 }
1381
add_gid_entry(struct ib_qp * ibqp,union ib_gid * gid)1382 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
1383 {
1384 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1385 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1386 struct mlx4_ib_gid_entry *ge;
1387
1388 ge = kzalloc(sizeof *ge, GFP_KERNEL);
1389 if (!ge)
1390 return -ENOMEM;
1391
1392 ge->gid = *gid;
1393 if (mlx4_ib_add_mc(mdev, mqp, gid)) {
1394 ge->port = mqp->port;
1395 ge->added = 1;
1396 }
1397
1398 mutex_lock(&mqp->mutex);
1399 list_add_tail(&ge->list, &mqp->gid_list);
1400 mutex_unlock(&mqp->mutex);
1401
1402 return 0;
1403 }
1404
mlx4_ib_delete_counters_table(struct mlx4_ib_dev * ibdev,struct mlx4_ib_counters * ctr_table)1405 static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
1406 struct mlx4_ib_counters *ctr_table)
1407 {
1408 struct counter_index *counter, *tmp_count;
1409
1410 mutex_lock(&ctr_table->mutex);
1411 list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
1412 list) {
1413 if (counter->allocated)
1414 mlx4_counter_free(ibdev->dev, counter->index);
1415 list_del(&counter->list);
1416 kfree(counter);
1417 }
1418 mutex_unlock(&ctr_table->mutex);
1419 }
1420
mlx4_ib_add_mc(struct mlx4_ib_dev * mdev,struct mlx4_ib_qp * mqp,union ib_gid * gid)1421 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1422 union ib_gid *gid)
1423 {
1424 struct net_device *ndev;
1425 int ret = 0;
1426
1427 if (!mqp->port)
1428 return 0;
1429
1430 spin_lock_bh(&mdev->iboe.lock);
1431 ndev = mdev->iboe.netdevs[mqp->port - 1];
1432 if (ndev)
1433 dev_hold(ndev);
1434 spin_unlock_bh(&mdev->iboe.lock);
1435
1436 if (ndev) {
1437 ret = 1;
1438 dev_put(ndev);
1439 }
1440
1441 return ret;
1442 }
1443
1444 struct mlx4_ib_steering {
1445 struct list_head list;
1446 struct mlx4_flow_reg_id reg_id;
1447 union ib_gid gid;
1448 };
1449
1450 #define LAST_ETH_FIELD vlan_tag
1451 #define LAST_IB_FIELD sl
1452 #define LAST_IPV4_FIELD dst_ip
1453 #define LAST_TCP_UDP_FIELD src_port
1454
1455 /* Field is the last supported field */
1456 #define FIELDS_NOT_SUPPORTED(filter, field)\
1457 memchr_inv((void *)&filter.field +\
1458 sizeof(filter.field), 0,\
1459 sizeof(filter) -\
1460 offsetof(typeof(filter), field) -\
1461 sizeof(filter.field))
1462
parse_flow_attr(struct mlx4_dev * dev,u32 qp_num,union ib_flow_spec * ib_spec,struct _rule_hw * mlx4_spec)1463 static int parse_flow_attr(struct mlx4_dev *dev,
1464 u32 qp_num,
1465 union ib_flow_spec *ib_spec,
1466 struct _rule_hw *mlx4_spec)
1467 {
1468 enum mlx4_net_trans_rule_id type;
1469
1470 switch (ib_spec->type) {
1471 case IB_FLOW_SPEC_ETH:
1472 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
1473 return -ENOTSUPP;
1474
1475 type = MLX4_NET_TRANS_RULE_ID_ETH;
1476 memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
1477 ETH_ALEN);
1478 memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
1479 ETH_ALEN);
1480 mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
1481 mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
1482 break;
1483 case IB_FLOW_SPEC_IB:
1484 if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
1485 return -ENOTSUPP;
1486
1487 type = MLX4_NET_TRANS_RULE_ID_IB;
1488 mlx4_spec->ib.l3_qpn =
1489 cpu_to_be32(qp_num);
1490 mlx4_spec->ib.qpn_mask =
1491 cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
1492 break;
1493
1494
1495 case IB_FLOW_SPEC_IPV4:
1496 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
1497 return -ENOTSUPP;
1498
1499 type = MLX4_NET_TRANS_RULE_ID_IPV4;
1500 mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
1501 mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
1502 mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
1503 mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
1504 break;
1505
1506 case IB_FLOW_SPEC_TCP:
1507 case IB_FLOW_SPEC_UDP:
1508 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
1509 return -ENOTSUPP;
1510
1511 type = ib_spec->type == IB_FLOW_SPEC_TCP ?
1512 MLX4_NET_TRANS_RULE_ID_TCP :
1513 MLX4_NET_TRANS_RULE_ID_UDP;
1514 mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
1515 mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
1516 mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
1517 mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
1518 break;
1519
1520 default:
1521 return -EINVAL;
1522 }
1523 if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
1524 mlx4_hw_rule_sz(dev, type) < 0)
1525 return -EINVAL;
1526 mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
1527 mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
1528 return mlx4_hw_rule_sz(dev, type);
1529 }
1530
1531 struct default_rules {
1532 __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1533 __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1534 __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
1535 __u8 link_layer;
1536 };
1537 static const struct default_rules default_table[] = {
1538 {
1539 .mandatory_fields = {IB_FLOW_SPEC_IPV4},
1540 .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
1541 .rules_create_list = {IB_FLOW_SPEC_IB},
1542 .link_layer = IB_LINK_LAYER_INFINIBAND
1543 }
1544 };
1545
__mlx4_ib_default_rules_match(struct ib_qp * qp,struct ib_flow_attr * flow_attr)1546 static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
1547 struct ib_flow_attr *flow_attr)
1548 {
1549 int i, j, k;
1550 void *ib_flow;
1551 const struct default_rules *pdefault_rules = default_table;
1552 u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
1553
1554 for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
1555 __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
1556 memset(&field_types, 0, sizeof(field_types));
1557
1558 if (link_layer != pdefault_rules->link_layer)
1559 continue;
1560
1561 ib_flow = flow_attr + 1;
1562 /* we assume the specs are sorted */
1563 for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
1564 j < flow_attr->num_of_specs; k++) {
1565 union ib_flow_spec *current_flow =
1566 (union ib_flow_spec *)ib_flow;
1567
1568 /* same layer but different type */
1569 if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
1570 (pdefault_rules->mandatory_fields[k] &
1571 IB_FLOW_SPEC_LAYER_MASK)) &&
1572 (current_flow->type !=
1573 pdefault_rules->mandatory_fields[k]))
1574 goto out;
1575
1576 /* same layer, try match next one */
1577 if (current_flow->type ==
1578 pdefault_rules->mandatory_fields[k]) {
1579 j++;
1580 ib_flow +=
1581 ((union ib_flow_spec *)ib_flow)->size;
1582 }
1583 }
1584
1585 ib_flow = flow_attr + 1;
1586 for (j = 0; j < flow_attr->num_of_specs;
1587 j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
1588 for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
1589 /* same layer and same type */
1590 if (((union ib_flow_spec *)ib_flow)->type ==
1591 pdefault_rules->mandatory_not_fields[k])
1592 goto out;
1593
1594 return i;
1595 }
1596 out:
1597 return -1;
1598 }
1599
__mlx4_ib_create_default_rules(struct mlx4_ib_dev * mdev,struct ib_qp * qp,const struct default_rules * pdefault_rules,struct _rule_hw * mlx4_spec)1600 static int __mlx4_ib_create_default_rules(
1601 struct mlx4_ib_dev *mdev,
1602 struct ib_qp *qp,
1603 const struct default_rules *pdefault_rules,
1604 struct _rule_hw *mlx4_spec) {
1605 int size = 0;
1606 int i;
1607
1608 for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
1609 union ib_flow_spec ib_spec = {};
1610 int ret;
1611
1612 switch (pdefault_rules->rules_create_list[i]) {
1613 case 0:
1614 /* no rule */
1615 continue;
1616 case IB_FLOW_SPEC_IB:
1617 ib_spec.type = IB_FLOW_SPEC_IB;
1618 ib_spec.size = sizeof(struct ib_flow_spec_ib);
1619
1620 break;
1621 default:
1622 /* invalid rule */
1623 return -EINVAL;
1624 }
1625 /* We must put empty rule, qpn is being ignored */
1626 ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
1627 mlx4_spec);
1628 if (ret < 0) {
1629 pr_info("invalid parsing\n");
1630 return -EINVAL;
1631 }
1632
1633 mlx4_spec = (void *)mlx4_spec + ret;
1634 size += ret;
1635 }
1636 return size;
1637 }
1638
__mlx4_ib_create_flow(struct ib_qp * qp,struct ib_flow_attr * flow_attr,int domain,enum mlx4_net_trans_promisc_mode flow_type,u64 * reg_id)1639 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1640 int domain,
1641 enum mlx4_net_trans_promisc_mode flow_type,
1642 u64 *reg_id)
1643 {
1644 int ret, i;
1645 int size = 0;
1646 void *ib_flow;
1647 struct mlx4_ib_dev *mdev = to_mdev(qp->device);
1648 struct mlx4_cmd_mailbox *mailbox;
1649 struct mlx4_net_trans_rule_hw_ctrl *ctrl;
1650 int default_flow;
1651
1652 static const u16 __mlx4_domain[] = {
1653 [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
1654 [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
1655 [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
1656 [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
1657 };
1658
1659 if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
1660 pr_err("Invalid priority value %d\n", flow_attr->priority);
1661 return -EINVAL;
1662 }
1663
1664 if (domain >= IB_FLOW_DOMAIN_NUM) {
1665 pr_err("Invalid domain value %d\n", domain);
1666 return -EINVAL;
1667 }
1668
1669 if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
1670 return -EINVAL;
1671
1672 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
1673 if (IS_ERR(mailbox))
1674 return PTR_ERR(mailbox);
1675 ctrl = mailbox->buf;
1676
1677 ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
1678 flow_attr->priority);
1679 ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
1680 ctrl->port = flow_attr->port;
1681 ctrl->qpn = cpu_to_be32(qp->qp_num);
1682
1683 ib_flow = flow_attr + 1;
1684 size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
1685 /* Add default flows */
1686 default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
1687 if (default_flow >= 0) {
1688 ret = __mlx4_ib_create_default_rules(
1689 mdev, qp, default_table + default_flow,
1690 mailbox->buf + size);
1691 if (ret < 0) {
1692 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1693 return -EINVAL;
1694 }
1695 size += ret;
1696 }
1697 for (i = 0; i < flow_attr->num_of_specs; i++) {
1698 ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
1699 mailbox->buf + size);
1700 if (ret < 0) {
1701 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1702 return -EINVAL;
1703 }
1704 ib_flow += ((union ib_flow_spec *) ib_flow)->size;
1705 size += ret;
1706 }
1707
1708 if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR &&
1709 flow_attr->num_of_specs == 1) {
1710 struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1);
1711 enum ib_flow_spec_type header_spec =
1712 ((union ib_flow_spec *)(flow_attr + 1))->type;
1713
1714 if (header_spec == IB_FLOW_SPEC_ETH)
1715 mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
1716 }
1717
1718 ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
1719 MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
1720 MLX4_CMD_NATIVE);
1721 if (ret == -ENOMEM)
1722 pr_err("mcg table is full. Fail to register network rule.\n");
1723 else if (ret == -ENXIO)
1724 pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
1725 else if (ret)
1726 pr_err("Invalid argument. Fail to register network rule.\n");
1727
1728 mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1729 return ret;
1730 }
1731
__mlx4_ib_destroy_flow(struct mlx4_dev * dev,u64 reg_id)1732 static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
1733 {
1734 int err;
1735 err = mlx4_cmd(dev, reg_id, 0, 0,
1736 MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
1737 MLX4_CMD_NATIVE);
1738 if (err)
1739 pr_err("Fail to detach network rule. registration id = 0x%llx\n",
1740 reg_id);
1741 return err;
1742 }
1743
mlx4_ib_tunnel_steer_add(struct ib_qp * qp,struct ib_flow_attr * flow_attr,u64 * reg_id)1744 static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1745 u64 *reg_id)
1746 {
1747 void *ib_flow;
1748 union ib_flow_spec *ib_spec;
1749 struct mlx4_dev *dev = to_mdev(qp->device)->dev;
1750 int err = 0;
1751
1752 if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
1753 dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
1754 return 0; /* do nothing */
1755
1756 ib_flow = flow_attr + 1;
1757 ib_spec = (union ib_flow_spec *)ib_flow;
1758
1759 if (ib_spec->type != IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
1760 return 0; /* do nothing */
1761
1762 err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
1763 flow_attr->port, qp->qp_num,
1764 MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
1765 reg_id);
1766 return err;
1767 }
1768
mlx4_ib_add_dont_trap_rule(struct mlx4_dev * dev,struct ib_flow_attr * flow_attr,enum mlx4_net_trans_promisc_mode * type)1769 static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
1770 struct ib_flow_attr *flow_attr,
1771 enum mlx4_net_trans_promisc_mode *type)
1772 {
1773 int err = 0;
1774
1775 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
1776 (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
1777 (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
1778 return -EOPNOTSUPP;
1779 }
1780
1781 if (flow_attr->num_of_specs == 0) {
1782 type[0] = MLX4_FS_MC_SNIFFER;
1783 type[1] = MLX4_FS_UC_SNIFFER;
1784 } else {
1785 union ib_flow_spec *ib_spec;
1786
1787 ib_spec = (union ib_flow_spec *)(flow_attr + 1);
1788 if (ib_spec->type != IB_FLOW_SPEC_ETH)
1789 return -EINVAL;
1790
1791 /* if all is zero than MC and UC */
1792 if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
1793 type[0] = MLX4_FS_MC_SNIFFER;
1794 type[1] = MLX4_FS_UC_SNIFFER;
1795 } else {
1796 u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
1797 ib_spec->eth.mask.dst_mac[1],
1798 ib_spec->eth.mask.dst_mac[2],
1799 ib_spec->eth.mask.dst_mac[3],
1800 ib_spec->eth.mask.dst_mac[4],
1801 ib_spec->eth.mask.dst_mac[5]};
1802
1803 /* Above xor was only on MC bit, non empty mask is valid
1804 * only if this bit is set and rest are zero.
1805 */
1806 if (!is_zero_ether_addr(&mac[0]))
1807 return -EINVAL;
1808
1809 if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
1810 type[0] = MLX4_FS_MC_SNIFFER;
1811 else
1812 type[0] = MLX4_FS_UC_SNIFFER;
1813 }
1814 }
1815
1816 return err;
1817 }
1818
mlx4_ib_create_flow(struct ib_qp * qp,struct ib_flow_attr * flow_attr,int domain,struct ib_udata * udata)1819 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1820 struct ib_flow_attr *flow_attr,
1821 int domain, struct ib_udata *udata)
1822 {
1823 int err = 0, i = 0, j = 0;
1824 struct mlx4_ib_flow *mflow;
1825 enum mlx4_net_trans_promisc_mode type[2];
1826 struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
1827 int is_bonded = mlx4_is_bonded(dev);
1828
1829 if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
1830 return ERR_PTR(-EINVAL);
1831
1832 if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1833 return ERR_PTR(-EOPNOTSUPP);
1834
1835 if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
1836 (flow_attr->type != IB_FLOW_ATTR_NORMAL))
1837 return ERR_PTR(-EOPNOTSUPP);
1838
1839 if (udata &&
1840 udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
1841 return ERR_PTR(-EOPNOTSUPP);
1842
1843 memset(type, 0, sizeof(type));
1844
1845 mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
1846 if (!mflow) {
1847 err = -ENOMEM;
1848 goto err_free;
1849 }
1850
1851 switch (flow_attr->type) {
1852 case IB_FLOW_ATTR_NORMAL:
1853 /* If dont trap flag (continue match) is set, under specific
1854 * condition traffic be replicated to given qp,
1855 * without stealing it
1856 */
1857 if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
1858 err = mlx4_ib_add_dont_trap_rule(dev,
1859 flow_attr,
1860 type);
1861 if (err)
1862 goto err_free;
1863 } else {
1864 type[0] = MLX4_FS_REGULAR;
1865 }
1866 break;
1867
1868 case IB_FLOW_ATTR_ALL_DEFAULT:
1869 type[0] = MLX4_FS_ALL_DEFAULT;
1870 break;
1871
1872 case IB_FLOW_ATTR_MC_DEFAULT:
1873 type[0] = MLX4_FS_MC_DEFAULT;
1874 break;
1875
1876 case IB_FLOW_ATTR_SNIFFER:
1877 type[0] = MLX4_FS_MIRROR_RX_PORT;
1878 type[1] = MLX4_FS_MIRROR_SX_PORT;
1879 break;
1880
1881 default:
1882 err = -EINVAL;
1883 goto err_free;
1884 }
1885
1886 while (i < ARRAY_SIZE(type) && type[i]) {
1887 err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
1888 &mflow->reg_id[i].id);
1889 if (err)
1890 goto err_create_flow;
1891 if (is_bonded) {
1892 /* Application always sees one port so the mirror rule
1893 * must be on port #2
1894 */
1895 flow_attr->port = 2;
1896 err = __mlx4_ib_create_flow(qp, flow_attr,
1897 domain, type[j],
1898 &mflow->reg_id[j].mirror);
1899 flow_attr->port = 1;
1900 if (err)
1901 goto err_create_flow;
1902 j++;
1903 }
1904
1905 i++;
1906 }
1907
1908 if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1909 err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1910 &mflow->reg_id[i].id);
1911 if (err)
1912 goto err_create_flow;
1913
1914 if (is_bonded) {
1915 flow_attr->port = 2;
1916 err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1917 &mflow->reg_id[j].mirror);
1918 flow_attr->port = 1;
1919 if (err)
1920 goto err_create_flow;
1921 j++;
1922 }
1923 /* function to create mirror rule */
1924 i++;
1925 }
1926
1927 return &mflow->ibflow;
1928
1929 err_create_flow:
1930 while (i) {
1931 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1932 mflow->reg_id[i].id);
1933 i--;
1934 }
1935
1936 while (j) {
1937 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1938 mflow->reg_id[j].mirror);
1939 j--;
1940 }
1941 err_free:
1942 kfree(mflow);
1943 return ERR_PTR(err);
1944 }
1945
mlx4_ib_destroy_flow(struct ib_flow * flow_id)1946 static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
1947 {
1948 int err, ret = 0;
1949 int i = 0;
1950 struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
1951 struct mlx4_ib_flow *mflow = to_mflow(flow_id);
1952
1953 while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
1954 err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
1955 if (err)
1956 ret = err;
1957 if (mflow->reg_id[i].mirror) {
1958 err = __mlx4_ib_destroy_flow(mdev->dev,
1959 mflow->reg_id[i].mirror);
1960 if (err)
1961 ret = err;
1962 }
1963 i++;
1964 }
1965
1966 kfree(mflow);
1967 return ret;
1968 }
1969
mlx4_ib_mcg_attach(struct ib_qp * ibqp,union ib_gid * gid,u16 lid)1970 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1971 {
1972 int err;
1973 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1974 struct mlx4_dev *dev = mdev->dev;
1975 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1976 struct mlx4_ib_steering *ib_steering = NULL;
1977 enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
1978 struct mlx4_flow_reg_id reg_id;
1979
1980 if (mdev->dev->caps.steering_mode ==
1981 MLX4_STEERING_MODE_DEVICE_MANAGED) {
1982 ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
1983 if (!ib_steering)
1984 return -ENOMEM;
1985 }
1986
1987 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
1988 !!(mqp->flags &
1989 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1990 prot, ®_id.id);
1991 if (err) {
1992 pr_err("multicast attach op failed, err %d\n", err);
1993 goto err_malloc;
1994 }
1995
1996 reg_id.mirror = 0;
1997 if (mlx4_is_bonded(dev)) {
1998 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
1999 (mqp->port == 1) ? 2 : 1,
2000 !!(mqp->flags &
2001 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
2002 prot, ®_id.mirror);
2003 if (err)
2004 goto err_add;
2005 }
2006
2007 err = add_gid_entry(ibqp, gid);
2008 if (err)
2009 goto err_add;
2010
2011 if (ib_steering) {
2012 memcpy(ib_steering->gid.raw, gid->raw, 16);
2013 ib_steering->reg_id = reg_id;
2014 mutex_lock(&mqp->mutex);
2015 list_add(&ib_steering->list, &mqp->steering_rules);
2016 mutex_unlock(&mqp->mutex);
2017 }
2018 return 0;
2019
2020 err_add:
2021 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
2022 prot, reg_id.id);
2023 if (reg_id.mirror)
2024 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
2025 prot, reg_id.mirror);
2026 err_malloc:
2027 kfree(ib_steering);
2028
2029 return err;
2030 }
2031
find_gid_entry(struct mlx4_ib_qp * qp,u8 * raw)2032 static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
2033 {
2034 struct mlx4_ib_gid_entry *ge;
2035 struct mlx4_ib_gid_entry *tmp;
2036 struct mlx4_ib_gid_entry *ret = NULL;
2037
2038 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
2039 if (!memcmp(raw, ge->gid.raw, 16)) {
2040 ret = ge;
2041 break;
2042 }
2043 }
2044
2045 return ret;
2046 }
2047
mlx4_ib_mcg_detach(struct ib_qp * ibqp,union ib_gid * gid,u16 lid)2048 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
2049 {
2050 int err;
2051 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
2052 struct mlx4_dev *dev = mdev->dev;
2053 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
2054 struct net_device *ndev;
2055 struct mlx4_ib_gid_entry *ge;
2056 struct mlx4_flow_reg_id reg_id = {0, 0};
2057 enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
2058
2059 if (mdev->dev->caps.steering_mode ==
2060 MLX4_STEERING_MODE_DEVICE_MANAGED) {
2061 struct mlx4_ib_steering *ib_steering;
2062
2063 mutex_lock(&mqp->mutex);
2064 list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
2065 if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
2066 list_del(&ib_steering->list);
2067 break;
2068 }
2069 }
2070 mutex_unlock(&mqp->mutex);
2071 if (&ib_steering->list == &mqp->steering_rules) {
2072 pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
2073 return -EINVAL;
2074 }
2075 reg_id = ib_steering->reg_id;
2076 kfree(ib_steering);
2077 }
2078
2079 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
2080 prot, reg_id.id);
2081 if (err)
2082 return err;
2083
2084 if (mlx4_is_bonded(dev)) {
2085 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
2086 prot, reg_id.mirror);
2087 if (err)
2088 return err;
2089 }
2090
2091 mutex_lock(&mqp->mutex);
2092 ge = find_gid_entry(mqp, gid->raw);
2093 if (ge) {
2094 spin_lock_bh(&mdev->iboe.lock);
2095 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
2096 if (ndev)
2097 dev_hold(ndev);
2098 spin_unlock_bh(&mdev->iboe.lock);
2099 if (ndev)
2100 dev_put(ndev);
2101 list_del(&ge->list);
2102 kfree(ge);
2103 } else
2104 pr_warn("could not find mgid entry\n");
2105
2106 mutex_unlock(&mqp->mutex);
2107
2108 return 0;
2109 }
2110
init_node_data(struct mlx4_ib_dev * dev)2111 static int init_node_data(struct mlx4_ib_dev *dev)
2112 {
2113 struct ib_smp *in_mad = NULL;
2114 struct ib_smp *out_mad = NULL;
2115 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
2116 int err = -ENOMEM;
2117
2118 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
2119 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
2120 if (!in_mad || !out_mad)
2121 goto out;
2122
2123 init_query_mad(in_mad);
2124 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
2125 if (mlx4_is_master(dev->dev))
2126 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
2127
2128 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2129 if (err)
2130 goto out;
2131
2132 memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
2133
2134 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
2135
2136 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2137 if (err)
2138 goto out;
2139
2140 dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
2141 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
2142
2143 out:
2144 kfree(in_mad);
2145 kfree(out_mad);
2146 return err;
2147 }
2148
show_hca(struct device * device,struct device_attribute * attr,char * buf)2149 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2150 char *buf)
2151 {
2152 struct mlx4_ib_dev *dev =
2153 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2154 return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
2155 }
2156
show_rev(struct device * device,struct device_attribute * attr,char * buf)2157 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2158 char *buf)
2159 {
2160 struct mlx4_ib_dev *dev =
2161 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2162 return sprintf(buf, "%x\n", dev->dev->rev_id);
2163 }
2164
show_board(struct device * device,struct device_attribute * attr,char * buf)2165 static ssize_t show_board(struct device *device, struct device_attribute *attr,
2166 char *buf)
2167 {
2168 struct mlx4_ib_dev *dev =
2169 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2170 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
2171 dev->dev->board_id);
2172 }
2173
2174 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2175 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2176 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
2177
2178 static struct device_attribute *mlx4_class_attributes[] = {
2179 &dev_attr_hw_rev,
2180 &dev_attr_hca_type,
2181 &dev_attr_board_id
2182 };
2183
2184 struct diag_counter {
2185 const char *name;
2186 u32 offset;
2187 };
2188
2189 #define DIAG_COUNTER(_name, _offset) \
2190 { .name = #_name, .offset = _offset }
2191
2192 static const struct diag_counter diag_basic[] = {
2193 DIAG_COUNTER(rq_num_lle, 0x00),
2194 DIAG_COUNTER(sq_num_lle, 0x04),
2195 DIAG_COUNTER(rq_num_lqpoe, 0x08),
2196 DIAG_COUNTER(sq_num_lqpoe, 0x0C),
2197 DIAG_COUNTER(rq_num_lpe, 0x18),
2198 DIAG_COUNTER(sq_num_lpe, 0x1C),
2199 DIAG_COUNTER(rq_num_wrfe, 0x20),
2200 DIAG_COUNTER(sq_num_wrfe, 0x24),
2201 DIAG_COUNTER(sq_num_mwbe, 0x2C),
2202 DIAG_COUNTER(sq_num_bre, 0x34),
2203 DIAG_COUNTER(sq_num_rire, 0x44),
2204 DIAG_COUNTER(rq_num_rire, 0x48),
2205 DIAG_COUNTER(sq_num_rae, 0x4C),
2206 DIAG_COUNTER(rq_num_rae, 0x50),
2207 DIAG_COUNTER(sq_num_roe, 0x54),
2208 DIAG_COUNTER(sq_num_tree, 0x5C),
2209 DIAG_COUNTER(sq_num_rree, 0x64),
2210 DIAG_COUNTER(rq_num_rnr, 0x68),
2211 DIAG_COUNTER(sq_num_rnr, 0x6C),
2212 DIAG_COUNTER(rq_num_oos, 0x100),
2213 DIAG_COUNTER(sq_num_oos, 0x104),
2214 };
2215
2216 static const struct diag_counter diag_ext[] = {
2217 DIAG_COUNTER(rq_num_dup, 0x130),
2218 DIAG_COUNTER(sq_num_to, 0x134),
2219 };
2220
2221 static const struct diag_counter diag_device_only[] = {
2222 DIAG_COUNTER(num_cqovf, 0x1A0),
2223 DIAG_COUNTER(rq_num_udsdprd, 0x118),
2224 };
2225
mlx4_ib_alloc_hw_stats(struct ib_device * ibdev,u8 port_num)2226 static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
2227 u8 port_num)
2228 {
2229 struct mlx4_ib_dev *dev = to_mdev(ibdev);
2230 struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2231
2232 if (!diag[!!port_num].name)
2233 return NULL;
2234
2235 return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
2236 diag[!!port_num].num_counters,
2237 RDMA_HW_STATS_DEFAULT_LIFESPAN);
2238 }
2239
mlx4_ib_get_hw_stats(struct ib_device * ibdev,struct rdma_hw_stats * stats,u8 port,int index)2240 static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
2241 struct rdma_hw_stats *stats,
2242 u8 port, int index)
2243 {
2244 struct mlx4_ib_dev *dev = to_mdev(ibdev);
2245 struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2246 u32 hw_value[ARRAY_SIZE(diag_device_only) +
2247 ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
2248 int ret;
2249 int i;
2250
2251 ret = mlx4_query_diag_counters(dev->dev,
2252 MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
2253 diag[!!port].offset, hw_value,
2254 diag[!!port].num_counters, port);
2255
2256 if (ret)
2257 return ret;
2258
2259 for (i = 0; i < diag[!!port].num_counters; i++)
2260 stats->value[i] = hw_value[i];
2261
2262 return diag[!!port].num_counters;
2263 }
2264
__mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev * ibdev,const char *** name,u32 ** offset,u32 * num,bool port)2265 static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
2266 const char ***name,
2267 u32 **offset,
2268 u32 *num,
2269 bool port)
2270 {
2271 u32 num_counters;
2272
2273 num_counters = ARRAY_SIZE(diag_basic);
2274
2275 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
2276 num_counters += ARRAY_SIZE(diag_ext);
2277
2278 if (!port)
2279 num_counters += ARRAY_SIZE(diag_device_only);
2280
2281 *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
2282 if (!*name)
2283 return -ENOMEM;
2284
2285 *offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
2286 if (!*offset)
2287 goto err_name;
2288
2289 *num = num_counters;
2290
2291 return 0;
2292
2293 err_name:
2294 kfree(*name);
2295 return -ENOMEM;
2296 }
2297
mlx4_ib_fill_diag_counters(struct mlx4_ib_dev * ibdev,const char ** name,u32 * offset,bool port)2298 static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
2299 const char **name,
2300 u32 *offset,
2301 bool port)
2302 {
2303 int i;
2304 int j;
2305
2306 for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
2307 name[i] = diag_basic[i].name;
2308 offset[i] = diag_basic[i].offset;
2309 }
2310
2311 if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
2312 for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
2313 name[j] = diag_ext[i].name;
2314 offset[j] = diag_ext[i].offset;
2315 }
2316 }
2317
2318 if (!port) {
2319 for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
2320 name[j] = diag_device_only[i].name;
2321 offset[j] = diag_device_only[i].offset;
2322 }
2323 }
2324 }
2325
mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev * ibdev)2326 static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
2327 {
2328 struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
2329 int i;
2330 int ret;
2331 bool per_port = !!(ibdev->dev->caps.flags2 &
2332 MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
2333
2334 if (mlx4_is_slave(ibdev->dev))
2335 return 0;
2336
2337 for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2338 /* i == 1 means we are building port counters */
2339 if (i && !per_port)
2340 continue;
2341
2342 ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
2343 &diag[i].offset,
2344 &diag[i].num_counters, i);
2345 if (ret)
2346 goto err_alloc;
2347
2348 mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
2349 diag[i].offset, i);
2350 }
2351
2352 ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
2353 ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
2354
2355 return 0;
2356
2357 err_alloc:
2358 if (i) {
2359 kfree(diag[i - 1].name);
2360 kfree(diag[i - 1].offset);
2361 }
2362
2363 return ret;
2364 }
2365
mlx4_ib_diag_cleanup(struct mlx4_ib_dev * ibdev)2366 static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
2367 {
2368 int i;
2369
2370 for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2371 kfree(ibdev->diag_counters[i].offset);
2372 kfree(ibdev->diag_counters[i].name);
2373 }
2374 }
2375
2376 #define MLX4_IB_INVALID_MAC ((u64)-1)
mlx4_ib_update_qps(struct mlx4_ib_dev * ibdev,struct net_device * dev,int port)2377 static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
2378 struct net_device *dev,
2379 int port)
2380 {
2381 u64 new_smac = 0;
2382 u64 release_mac = MLX4_IB_INVALID_MAC;
2383 struct mlx4_ib_qp *qp;
2384
2385 read_lock(&dev_base_lock);
2386 new_smac = mlx4_mac_to_u64(dev->dev_addr);
2387 read_unlock(&dev_base_lock);
2388
2389 atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
2390
2391 /* no need for update QP1 and mac registration in non-SRIOV */
2392 if (!mlx4_is_mfunc(ibdev->dev))
2393 return;
2394
2395 mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
2396 qp = ibdev->qp1_proxy[port - 1];
2397 if (qp) {
2398 int new_smac_index;
2399 u64 old_smac;
2400 struct mlx4_update_qp_params update_params;
2401
2402 mutex_lock(&qp->mutex);
2403 old_smac = qp->pri.smac;
2404 if (new_smac == old_smac)
2405 goto unlock;
2406
2407 new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
2408
2409 if (new_smac_index < 0)
2410 goto unlock;
2411
2412 update_params.smac_index = new_smac_index;
2413 if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
2414 &update_params)) {
2415 release_mac = new_smac;
2416 goto unlock;
2417 }
2418 /* if old port was zero, no mac was yet registered for this QP */
2419 if (qp->pri.smac_port)
2420 release_mac = old_smac;
2421 qp->pri.smac = new_smac;
2422 qp->pri.smac_port = port;
2423 qp->pri.smac_index = new_smac_index;
2424 }
2425
2426 unlock:
2427 if (release_mac != MLX4_IB_INVALID_MAC)
2428 mlx4_unregister_mac(ibdev->dev, port, release_mac);
2429 if (qp)
2430 mutex_unlock(&qp->mutex);
2431 mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
2432 }
2433
mlx4_ib_scan_netdevs(struct mlx4_ib_dev * ibdev,struct net_device * dev,unsigned long event)2434 static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
2435 struct net_device *dev,
2436 unsigned long event)
2437
2438 {
2439 struct mlx4_ib_iboe *iboe;
2440 int update_qps_port = -1;
2441 int port;
2442
2443 ASSERT_RTNL();
2444
2445 iboe = &ibdev->iboe;
2446
2447 spin_lock_bh(&iboe->lock);
2448 mlx4_foreach_ib_transport_port(port, ibdev->dev) {
2449
2450 iboe->netdevs[port - 1] =
2451 mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
2452
2453 if (dev == iboe->netdevs[port - 1] &&
2454 (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
2455 event == NETDEV_UP || event == NETDEV_CHANGE))
2456 update_qps_port = port;
2457
2458 }
2459 spin_unlock_bh(&iboe->lock);
2460
2461 if (update_qps_port > 0)
2462 mlx4_ib_update_qps(ibdev, dev, update_qps_port);
2463 }
2464
mlx4_ib_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)2465 static int mlx4_ib_netdev_event(struct notifier_block *this,
2466 unsigned long event, void *ptr)
2467 {
2468 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2469 struct mlx4_ib_dev *ibdev;
2470
2471 if (!net_eq(dev_net(dev), &init_net))
2472 return NOTIFY_DONE;
2473
2474 ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
2475 mlx4_ib_scan_netdevs(ibdev, dev, event);
2476
2477 return NOTIFY_DONE;
2478 }
2479
init_pkeys(struct mlx4_ib_dev * ibdev)2480 static void init_pkeys(struct mlx4_ib_dev *ibdev)
2481 {
2482 int port;
2483 int slave;
2484 int i;
2485
2486 if (mlx4_is_master(ibdev->dev)) {
2487 for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
2488 ++slave) {
2489 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2490 for (i = 0;
2491 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2492 ++i) {
2493 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
2494 /* master has the identity virt2phys pkey mapping */
2495 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
2496 ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
2497 mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
2498 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
2499 }
2500 }
2501 }
2502 /* initialize pkey cache */
2503 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2504 for (i = 0;
2505 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2506 ++i)
2507 ibdev->pkeys.phys_pkey_cache[port-1][i] =
2508 (i) ? 0 : 0xFFFF;
2509 }
2510 }
2511 }
2512
mlx4_ib_alloc_eqs(struct mlx4_dev * dev,struct mlx4_ib_dev * ibdev)2513 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2514 {
2515 int i, j, eq = 0, total_eqs = 0;
2516
2517 ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
2518 sizeof(ibdev->eq_table[0]), GFP_KERNEL);
2519 if (!ibdev->eq_table)
2520 return;
2521
2522 for (i = 1; i <= dev->caps.num_ports; i++) {
2523 for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
2524 j++, total_eqs++) {
2525 if (i > 1 && mlx4_is_eq_shared(dev, total_eqs))
2526 continue;
2527 ibdev->eq_table[eq] = total_eqs;
2528 if (!mlx4_assign_eq(dev, i,
2529 &ibdev->eq_table[eq]))
2530 eq++;
2531 else
2532 ibdev->eq_table[eq] = -1;
2533 }
2534 }
2535
2536 for (i = eq; i < dev->caps.num_comp_vectors;
2537 ibdev->eq_table[i++] = -1)
2538 ;
2539
2540 /* Advertise the new number of EQs to clients */
2541 ibdev->ib_dev.num_comp_vectors = eq;
2542 }
2543
mlx4_ib_free_eqs(struct mlx4_dev * dev,struct mlx4_ib_dev * ibdev)2544 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2545 {
2546 int i;
2547 int total_eqs = ibdev->ib_dev.num_comp_vectors;
2548
2549 /* no eqs were allocated */
2550 if (!ibdev->eq_table)
2551 return;
2552
2553 /* Reset the advertised EQ number */
2554 ibdev->ib_dev.num_comp_vectors = 0;
2555
2556 for (i = 0; i < total_eqs; i++)
2557 mlx4_release_eq(dev, ibdev->eq_table[i]);
2558
2559 kfree(ibdev->eq_table);
2560 ibdev->eq_table = NULL;
2561 }
2562
mlx4_port_immutable(struct ib_device * ibdev,u8 port_num,struct ib_port_immutable * immutable)2563 static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
2564 struct ib_port_immutable *immutable)
2565 {
2566 struct ib_port_attr attr;
2567 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
2568 int err;
2569
2570 if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
2571 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2572 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2573 } else {
2574 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
2575 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
2576 if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
2577 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
2578 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2579 immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
2580 if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
2581 RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
2582 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2583 }
2584
2585 err = ib_query_port(ibdev, port_num, &attr);
2586 if (err)
2587 return err;
2588
2589 immutable->pkey_tbl_len = attr.pkey_tbl_len;
2590 immutable->gid_tbl_len = attr.gid_tbl_len;
2591
2592 return 0;
2593 }
2594
get_fw_ver_str(struct ib_device * device,char * str)2595 static void get_fw_ver_str(struct ib_device *device, char *str)
2596 {
2597 struct mlx4_ib_dev *dev =
2598 container_of(device, struct mlx4_ib_dev, ib_dev);
2599 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
2600 (int) (dev->dev->caps.fw_ver >> 32),
2601 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
2602 (int) dev->dev->caps.fw_ver & 0xffff);
2603 }
2604
mlx4_ib_add(struct mlx4_dev * dev)2605 static void *mlx4_ib_add(struct mlx4_dev *dev)
2606 {
2607 struct mlx4_ib_dev *ibdev;
2608 int num_ports = 0;
2609 int i, j;
2610 int err;
2611 struct mlx4_ib_iboe *iboe;
2612 int ib_num_ports = 0;
2613 int num_req_counters;
2614 int allocated;
2615 u32 counter_index;
2616 struct counter_index *new_counter_index = NULL;
2617
2618 pr_info_once("%s", mlx4_ib_version);
2619
2620 num_ports = 0;
2621 mlx4_foreach_ib_transport_port(i, dev)
2622 num_ports++;
2623
2624 /* No point in registering a device with no ports... */
2625 if (num_ports == 0)
2626 return NULL;
2627
2628 ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
2629 if (!ibdev) {
2630 dev_err(&dev->persist->pdev->dev,
2631 "Device struct alloc failed\n");
2632 return NULL;
2633 }
2634
2635 iboe = &ibdev->iboe;
2636
2637 if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
2638 goto err_dealloc;
2639
2640 if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
2641 goto err_pd;
2642
2643 ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
2644 PAGE_SIZE);
2645 if (!ibdev->uar_map)
2646 goto err_uar;
2647 MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
2648
2649 ibdev->dev = dev;
2650 ibdev->bond_next_port = 0;
2651
2652 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
2653 ibdev->ib_dev.owner = THIS_MODULE;
2654 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
2655 ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
2656 ibdev->num_ports = num_ports;
2657 ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
2658 1 : ibdev->num_ports;
2659 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
2660 ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
2661 ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
2662 ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
2663 ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
2664
2665 if (dev->caps.userspace_caps)
2666 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
2667 else
2668 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
2669
2670 ibdev->ib_dev.uverbs_cmd_mask =
2671 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2672 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2673 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2674 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2675 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2676 (1ull << IB_USER_VERBS_CMD_REG_MR) |
2677 (1ull << IB_USER_VERBS_CMD_REREG_MR) |
2678 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2679 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2680 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2681 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2682 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2683 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2684 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2685 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2686 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2687 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2688 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2689 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2690 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2691 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2692 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2693 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
2694 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
2695
2696 ibdev->ib_dev.query_device = mlx4_ib_query_device;
2697 ibdev->ib_dev.query_port = mlx4_ib_query_port;
2698 ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
2699 ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
2700 ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
2701 ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
2702 ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
2703 ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
2704 ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
2705 ibdev->ib_dev.mmap = mlx4_ib_mmap;
2706 ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
2707 ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
2708 ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
2709 ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
2710 ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
2711 ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
2712 ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
2713 ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
2714 ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
2715 ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
2716 ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
2717 ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
2718 ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
2719 ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
2720 ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq;
2721 ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq;
2722 ibdev->ib_dev.post_send = mlx4_ib_post_send;
2723 ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
2724 ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
2725 ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq;
2726 ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq;
2727 ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
2728 ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
2729 ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
2730 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
2731 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
2732 ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
2733 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
2734 ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
2735 ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
2736 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
2737 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
2738 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
2739 ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
2740 ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
2741 ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
2742
2743 ibdev->ib_dev.uverbs_ex_cmd_mask |=
2744 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
2745
2746 if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
2747 ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
2748 IB_LINK_LAYER_ETHERNET) ||
2749 (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
2750 IB_LINK_LAYER_ETHERNET))) {
2751 ibdev->ib_dev.create_wq = mlx4_ib_create_wq;
2752 ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq;
2753 ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq;
2754 ibdev->ib_dev.create_rwq_ind_table =
2755 mlx4_ib_create_rwq_ind_table;
2756 ibdev->ib_dev.destroy_rwq_ind_table =
2757 mlx4_ib_destroy_rwq_ind_table;
2758 ibdev->ib_dev.uverbs_ex_cmd_mask |=
2759 (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
2760 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
2761 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
2762 (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
2763 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
2764 }
2765
2766 if (!mlx4_is_slave(ibdev->dev)) {
2767 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
2768 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
2769 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
2770 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
2771 }
2772
2773 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2774 dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
2775 ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
2776 ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
2777
2778 ibdev->ib_dev.uverbs_cmd_mask |=
2779 (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
2780 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
2781 }
2782
2783 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
2784 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
2785 ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
2786 ibdev->ib_dev.uverbs_cmd_mask |=
2787 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2788 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2789 }
2790
2791 if (check_flow_steering_support(dev)) {
2792 ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
2793 ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
2794 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
2795
2796 ibdev->ib_dev.uverbs_ex_cmd_mask |=
2797 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
2798 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
2799 }
2800
2801 ibdev->ib_dev.uverbs_ex_cmd_mask |=
2802 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
2803 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
2804 (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
2805
2806 mlx4_ib_alloc_eqs(dev, ibdev);
2807
2808 spin_lock_init(&iboe->lock);
2809
2810 if (init_node_data(ibdev))
2811 goto err_map;
2812 mlx4_init_sl2vl_tbl(ibdev);
2813
2814 for (i = 0; i < ibdev->num_ports; ++i) {
2815 mutex_init(&ibdev->counters_table[i].mutex);
2816 INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
2817 }
2818
2819 num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
2820 for (i = 0; i < num_req_counters; ++i) {
2821 mutex_init(&ibdev->qp1_proxy_lock[i]);
2822 allocated = 0;
2823 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
2824 IB_LINK_LAYER_ETHERNET) {
2825 err = mlx4_counter_alloc(ibdev->dev, &counter_index,
2826 MLX4_RES_USAGE_DRIVER);
2827 /* if failed to allocate a new counter, use default */
2828 if (err)
2829 counter_index =
2830 mlx4_get_default_counter_index(dev,
2831 i + 1);
2832 else
2833 allocated = 1;
2834 } else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
2835 counter_index = mlx4_get_default_counter_index(dev,
2836 i + 1);
2837 }
2838 new_counter_index = kmalloc(sizeof(*new_counter_index),
2839 GFP_KERNEL);
2840 if (!new_counter_index) {
2841 if (allocated)
2842 mlx4_counter_free(ibdev->dev, counter_index);
2843 goto err_counter;
2844 }
2845 new_counter_index->index = counter_index;
2846 new_counter_index->allocated = allocated;
2847 list_add_tail(&new_counter_index->list,
2848 &ibdev->counters_table[i].counters_list);
2849 ibdev->counters_table[i].default_counter = counter_index;
2850 pr_info("counter index %d for port %d allocated %d\n",
2851 counter_index, i + 1, allocated);
2852 }
2853 if (mlx4_is_bonded(dev))
2854 for (i = 1; i < ibdev->num_ports ; ++i) {
2855 new_counter_index =
2856 kmalloc(sizeof(struct counter_index),
2857 GFP_KERNEL);
2858 if (!new_counter_index)
2859 goto err_counter;
2860 new_counter_index->index = counter_index;
2861 new_counter_index->allocated = 0;
2862 list_add_tail(&new_counter_index->list,
2863 &ibdev->counters_table[i].counters_list);
2864 ibdev->counters_table[i].default_counter =
2865 counter_index;
2866 }
2867
2868 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2869 ib_num_ports++;
2870
2871 spin_lock_init(&ibdev->sm_lock);
2872 mutex_init(&ibdev->cap_mask_mutex);
2873 INIT_LIST_HEAD(&ibdev->qp_list);
2874 spin_lock_init(&ibdev->reset_flow_resource_lock);
2875
2876 if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2877 ib_num_ports) {
2878 ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
2879 err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
2880 MLX4_IB_UC_STEER_QPN_ALIGN,
2881 &ibdev->steer_qpn_base, 0,
2882 MLX4_RES_USAGE_DRIVER);
2883 if (err)
2884 goto err_counter;
2885
2886 ibdev->ib_uc_qpns_bitmap =
2887 kmalloc_array(BITS_TO_LONGS(ibdev->steer_qpn_count),
2888 sizeof(long),
2889 GFP_KERNEL);
2890 if (!ibdev->ib_uc_qpns_bitmap)
2891 goto err_steer_qp_release;
2892
2893 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
2894 bitmap_zero(ibdev->ib_uc_qpns_bitmap,
2895 ibdev->steer_qpn_count);
2896 err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
2897 dev, ibdev->steer_qpn_base,
2898 ibdev->steer_qpn_base +
2899 ibdev->steer_qpn_count - 1);
2900 if (err)
2901 goto err_steer_free_bitmap;
2902 } else {
2903 bitmap_fill(ibdev->ib_uc_qpns_bitmap,
2904 ibdev->steer_qpn_count);
2905 }
2906 }
2907
2908 for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
2909 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
2910
2911 if (mlx4_ib_alloc_diag_counters(ibdev))
2912 goto err_steer_free_bitmap;
2913
2914 ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
2915 if (ib_register_device(&ibdev->ib_dev, NULL))
2916 goto err_diag_counters;
2917
2918 if (mlx4_ib_mad_init(ibdev))
2919 goto err_reg;
2920
2921 if (mlx4_ib_init_sriov(ibdev))
2922 goto err_mad;
2923
2924 if (!iboe->nb.notifier_call) {
2925 iboe->nb.notifier_call = mlx4_ib_netdev_event;
2926 err = register_netdevice_notifier(&iboe->nb);
2927 if (err) {
2928 iboe->nb.notifier_call = NULL;
2929 goto err_notif;
2930 }
2931 }
2932 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2933 err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
2934 if (err)
2935 goto err_notif;
2936 }
2937
2938 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
2939 if (device_create_file(&ibdev->ib_dev.dev,
2940 mlx4_class_attributes[j]))
2941 goto err_notif;
2942 }
2943
2944 ibdev->ib_active = true;
2945 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2946 devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
2947 &ibdev->ib_dev);
2948
2949 if (mlx4_is_mfunc(ibdev->dev))
2950 init_pkeys(ibdev);
2951
2952 /* create paravirt contexts for any VFs which are active */
2953 if (mlx4_is_master(ibdev->dev)) {
2954 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
2955 if (j == mlx4_master_func_num(ibdev->dev))
2956 continue;
2957 if (mlx4_is_slave_active(ibdev->dev, j))
2958 do_slave_init(ibdev, j, 1);
2959 }
2960 }
2961 return ibdev;
2962
2963 err_notif:
2964 if (ibdev->iboe.nb.notifier_call) {
2965 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2966 pr_warn("failure unregistering notifier\n");
2967 ibdev->iboe.nb.notifier_call = NULL;
2968 }
2969 flush_workqueue(wq);
2970
2971 mlx4_ib_close_sriov(ibdev);
2972
2973 err_mad:
2974 mlx4_ib_mad_cleanup(ibdev);
2975
2976 err_reg:
2977 ib_unregister_device(&ibdev->ib_dev);
2978
2979 err_diag_counters:
2980 mlx4_ib_diag_cleanup(ibdev);
2981
2982 err_steer_free_bitmap:
2983 kfree(ibdev->ib_uc_qpns_bitmap);
2984
2985 err_steer_qp_release:
2986 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2987 ibdev->steer_qpn_count);
2988 err_counter:
2989 for (i = 0; i < ibdev->num_ports; ++i)
2990 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
2991
2992 err_map:
2993 mlx4_ib_free_eqs(dev, ibdev);
2994 iounmap(ibdev->uar_map);
2995
2996 err_uar:
2997 mlx4_uar_free(dev, &ibdev->priv_uar);
2998
2999 err_pd:
3000 mlx4_pd_free(dev, ibdev->priv_pdn);
3001
3002 err_dealloc:
3003 ib_dealloc_device(&ibdev->ib_dev);
3004
3005 return NULL;
3006 }
3007
mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev * dev,int count,int * qpn)3008 int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
3009 {
3010 int offset;
3011
3012 WARN_ON(!dev->ib_uc_qpns_bitmap);
3013
3014 offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
3015 dev->steer_qpn_count,
3016 get_count_order(count));
3017 if (offset < 0)
3018 return offset;
3019
3020 *qpn = dev->steer_qpn_base + offset;
3021 return 0;
3022 }
3023
mlx4_ib_steer_qp_free(struct mlx4_ib_dev * dev,u32 qpn,int count)3024 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
3025 {
3026 if (!qpn ||
3027 dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
3028 return;
3029
3030 if (WARN(qpn < dev->steer_qpn_base, "qpn = %u, steer_qpn_base = %u\n",
3031 qpn, dev->steer_qpn_base))
3032 /* not supposed to be here */
3033 return;
3034
3035 bitmap_release_region(dev->ib_uc_qpns_bitmap,
3036 qpn - dev->steer_qpn_base,
3037 get_count_order(count));
3038 }
3039
mlx4_ib_steer_qp_reg(struct mlx4_ib_dev * mdev,struct mlx4_ib_qp * mqp,int is_attach)3040 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
3041 int is_attach)
3042 {
3043 int err;
3044 size_t flow_size;
3045 struct ib_flow_attr *flow = NULL;
3046 struct ib_flow_spec_ib *ib_spec;
3047
3048 if (is_attach) {
3049 flow_size = sizeof(struct ib_flow_attr) +
3050 sizeof(struct ib_flow_spec_ib);
3051 flow = kzalloc(flow_size, GFP_KERNEL);
3052 if (!flow)
3053 return -ENOMEM;
3054 flow->port = mqp->port;
3055 flow->num_of_specs = 1;
3056 flow->size = flow_size;
3057 ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
3058 ib_spec->type = IB_FLOW_SPEC_IB;
3059 ib_spec->size = sizeof(struct ib_flow_spec_ib);
3060 /* Add an empty rule for IB L2 */
3061 memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
3062
3063 err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
3064 IB_FLOW_DOMAIN_NIC,
3065 MLX4_FS_REGULAR,
3066 &mqp->reg_id);
3067 } else {
3068 err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
3069 }
3070 kfree(flow);
3071 return err;
3072 }
3073
mlx4_ib_remove(struct mlx4_dev * dev,void * ibdev_ptr)3074 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
3075 {
3076 struct mlx4_ib_dev *ibdev = ibdev_ptr;
3077 int p;
3078 int i;
3079
3080 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
3081 devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
3082 ibdev->ib_active = false;
3083 flush_workqueue(wq);
3084
3085 if (ibdev->iboe.nb.notifier_call) {
3086 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
3087 pr_warn("failure unregistering notifier\n");
3088 ibdev->iboe.nb.notifier_call = NULL;
3089 }
3090
3091 mlx4_ib_close_sriov(ibdev);
3092 mlx4_ib_mad_cleanup(ibdev);
3093 ib_unregister_device(&ibdev->ib_dev);
3094 mlx4_ib_diag_cleanup(ibdev);
3095
3096 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
3097 ibdev->steer_qpn_count);
3098 kfree(ibdev->ib_uc_qpns_bitmap);
3099
3100 iounmap(ibdev->uar_map);
3101 for (p = 0; p < ibdev->num_ports; ++p)
3102 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
3103
3104 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
3105 mlx4_CLOSE_PORT(dev, p);
3106
3107 mlx4_ib_free_eqs(dev, ibdev);
3108
3109 mlx4_uar_free(dev, &ibdev->priv_uar);
3110 mlx4_pd_free(dev, ibdev->priv_pdn);
3111 ib_dealloc_device(&ibdev->ib_dev);
3112 }
3113
do_slave_init(struct mlx4_ib_dev * ibdev,int slave,int do_init)3114 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
3115 {
3116 struct mlx4_ib_demux_work **dm = NULL;
3117 struct mlx4_dev *dev = ibdev->dev;
3118 int i;
3119 unsigned long flags;
3120 struct mlx4_active_ports actv_ports;
3121 unsigned int ports;
3122 unsigned int first_port;
3123
3124 if (!mlx4_is_master(dev))
3125 return;
3126
3127 actv_ports = mlx4_get_active_ports(dev, slave);
3128 ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
3129 first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
3130
3131 dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
3132 if (!dm)
3133 return;
3134
3135 for (i = 0; i < ports; i++) {
3136 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
3137 if (!dm[i]) {
3138 while (--i >= 0)
3139 kfree(dm[i]);
3140 goto out;
3141 }
3142 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
3143 dm[i]->port = first_port + i + 1;
3144 dm[i]->slave = slave;
3145 dm[i]->do_init = do_init;
3146 dm[i]->dev = ibdev;
3147 }
3148 /* initialize or tear down tunnel QPs for the slave */
3149 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
3150 if (!ibdev->sriov.is_going_down) {
3151 for (i = 0; i < ports; i++)
3152 queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
3153 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3154 } else {
3155 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3156 for (i = 0; i < ports; i++)
3157 kfree(dm[i]);
3158 }
3159 out:
3160 kfree(dm);
3161 return;
3162 }
3163
mlx4_ib_handle_catas_error(struct mlx4_ib_dev * ibdev)3164 static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
3165 {
3166 struct mlx4_ib_qp *mqp;
3167 unsigned long flags_qp;
3168 unsigned long flags_cq;
3169 struct mlx4_ib_cq *send_mcq, *recv_mcq;
3170 struct list_head cq_notify_list;
3171 struct mlx4_cq *mcq;
3172 unsigned long flags;
3173
3174 pr_warn("mlx4_ib_handle_catas_error was started\n");
3175 INIT_LIST_HEAD(&cq_notify_list);
3176
3177 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
3178 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
3179
3180 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
3181 spin_lock_irqsave(&mqp->sq.lock, flags_qp);
3182 if (mqp->sq.tail != mqp->sq.head) {
3183 send_mcq = to_mcq(mqp->ibqp.send_cq);
3184 spin_lock_irqsave(&send_mcq->lock, flags_cq);
3185 if (send_mcq->mcq.comp &&
3186 mqp->ibqp.send_cq->comp_handler) {
3187 if (!send_mcq->mcq.reset_notify_added) {
3188 send_mcq->mcq.reset_notify_added = 1;
3189 list_add_tail(&send_mcq->mcq.reset_notify,
3190 &cq_notify_list);
3191 }
3192 }
3193 spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
3194 }
3195 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
3196 /* Now, handle the QP's receive queue */
3197 spin_lock_irqsave(&mqp->rq.lock, flags_qp);
3198 /* no handling is needed for SRQ */
3199 if (!mqp->ibqp.srq) {
3200 if (mqp->rq.tail != mqp->rq.head) {
3201 recv_mcq = to_mcq(mqp->ibqp.recv_cq);
3202 spin_lock_irqsave(&recv_mcq->lock, flags_cq);
3203 if (recv_mcq->mcq.comp &&
3204 mqp->ibqp.recv_cq->comp_handler) {
3205 if (!recv_mcq->mcq.reset_notify_added) {
3206 recv_mcq->mcq.reset_notify_added = 1;
3207 list_add_tail(&recv_mcq->mcq.reset_notify,
3208 &cq_notify_list);
3209 }
3210 }
3211 spin_unlock_irqrestore(&recv_mcq->lock,
3212 flags_cq);
3213 }
3214 }
3215 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
3216 }
3217
3218 list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
3219 mcq->comp(mcq);
3220 }
3221 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
3222 pr_warn("mlx4_ib_handle_catas_error ended\n");
3223 }
3224
handle_bonded_port_state_event(struct work_struct * work)3225 static void handle_bonded_port_state_event(struct work_struct *work)
3226 {
3227 struct ib_event_work *ew =
3228 container_of(work, struct ib_event_work, work);
3229 struct mlx4_ib_dev *ibdev = ew->ib_dev;
3230 enum ib_port_state bonded_port_state = IB_PORT_NOP;
3231 int i;
3232 struct ib_event ibev;
3233
3234 kfree(ew);
3235 spin_lock_bh(&ibdev->iboe.lock);
3236 for (i = 0; i < MLX4_MAX_PORTS; ++i) {
3237 struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
3238 enum ib_port_state curr_port_state;
3239
3240 if (!curr_netdev)
3241 continue;
3242
3243 curr_port_state =
3244 (netif_running(curr_netdev) &&
3245 netif_carrier_ok(curr_netdev)) ?
3246 IB_PORT_ACTIVE : IB_PORT_DOWN;
3247
3248 bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
3249 curr_port_state : IB_PORT_ACTIVE;
3250 }
3251 spin_unlock_bh(&ibdev->iboe.lock);
3252
3253 ibev.device = &ibdev->ib_dev;
3254 ibev.element.port_num = 1;
3255 ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
3256 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
3257
3258 ib_dispatch_event(&ibev);
3259 }
3260
mlx4_ib_sl2vl_update(struct mlx4_ib_dev * mdev,int port)3261 void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
3262 {
3263 u64 sl2vl;
3264 int err;
3265
3266 err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
3267 if (err) {
3268 pr_err("Unable to get current sl to vl mapping for port %d. Using all zeroes (%d)\n",
3269 port, err);
3270 sl2vl = 0;
3271 }
3272 atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
3273 }
3274
ib_sl2vl_update_work(struct work_struct * work)3275 static void ib_sl2vl_update_work(struct work_struct *work)
3276 {
3277 struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
3278 struct mlx4_ib_dev *mdev = ew->ib_dev;
3279 int port = ew->port;
3280
3281 mlx4_ib_sl2vl_update(mdev, port);
3282
3283 kfree(ew);
3284 }
3285
mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev * ibdev,int port)3286 void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
3287 int port)
3288 {
3289 struct ib_event_work *ew;
3290
3291 ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3292 if (ew) {
3293 INIT_WORK(&ew->work, ib_sl2vl_update_work);
3294 ew->port = port;
3295 ew->ib_dev = ibdev;
3296 queue_work(wq, &ew->work);
3297 }
3298 }
3299
mlx4_ib_event(struct mlx4_dev * dev,void * ibdev_ptr,enum mlx4_dev_event event,unsigned long param)3300 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
3301 enum mlx4_dev_event event, unsigned long param)
3302 {
3303 struct ib_event ibev;
3304 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
3305 struct mlx4_eqe *eqe = NULL;
3306 struct ib_event_work *ew;
3307 int p = 0;
3308
3309 if (mlx4_is_bonded(dev) &&
3310 ((event == MLX4_DEV_EVENT_PORT_UP) ||
3311 (event == MLX4_DEV_EVENT_PORT_DOWN))) {
3312 ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3313 if (!ew)
3314 return;
3315 INIT_WORK(&ew->work, handle_bonded_port_state_event);
3316 ew->ib_dev = ibdev;
3317 queue_work(wq, &ew->work);
3318 return;
3319 }
3320
3321 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
3322 eqe = (struct mlx4_eqe *)param;
3323 else
3324 p = (int) param;
3325
3326 switch (event) {
3327 case MLX4_DEV_EVENT_PORT_UP:
3328 if (p > ibdev->num_ports)
3329 return;
3330 if (!mlx4_is_slave(dev) &&
3331 rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
3332 IB_LINK_LAYER_INFINIBAND) {
3333 if (mlx4_is_master(dev))
3334 mlx4_ib_invalidate_all_guid_record(ibdev, p);
3335 if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
3336 !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
3337 mlx4_sched_ib_sl2vl_update_work(ibdev, p);
3338 }
3339 ibev.event = IB_EVENT_PORT_ACTIVE;
3340 break;
3341
3342 case MLX4_DEV_EVENT_PORT_DOWN:
3343 if (p > ibdev->num_ports)
3344 return;
3345 ibev.event = IB_EVENT_PORT_ERR;
3346 break;
3347
3348 case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
3349 ibdev->ib_active = false;
3350 ibev.event = IB_EVENT_DEVICE_FATAL;
3351 mlx4_ib_handle_catas_error(ibdev);
3352 break;
3353
3354 case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
3355 ew = kmalloc(sizeof *ew, GFP_ATOMIC);
3356 if (!ew)
3357 break;
3358
3359 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
3360 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
3361 ew->ib_dev = ibdev;
3362 /* need to queue only for port owner, which uses GEN_EQE */
3363 if (mlx4_is_master(dev))
3364 queue_work(wq, &ew->work);
3365 else
3366 handle_port_mgmt_change_event(&ew->work);
3367 return;
3368
3369 case MLX4_DEV_EVENT_SLAVE_INIT:
3370 /* here, p is the slave id */
3371 do_slave_init(ibdev, p, 1);
3372 if (mlx4_is_master(dev)) {
3373 int i;
3374
3375 for (i = 1; i <= ibdev->num_ports; i++) {
3376 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3377 == IB_LINK_LAYER_INFINIBAND)
3378 mlx4_ib_slave_alias_guid_event(ibdev,
3379 p, i,
3380 1);
3381 }
3382 }
3383 return;
3384
3385 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
3386 if (mlx4_is_master(dev)) {
3387 int i;
3388
3389 for (i = 1; i <= ibdev->num_ports; i++) {
3390 if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3391 == IB_LINK_LAYER_INFINIBAND)
3392 mlx4_ib_slave_alias_guid_event(ibdev,
3393 p, i,
3394 0);
3395 }
3396 }
3397 /* here, p is the slave id */
3398 do_slave_init(ibdev, p, 0);
3399 return;
3400
3401 default:
3402 return;
3403 }
3404
3405 ibev.device = ibdev_ptr;
3406 ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
3407
3408 ib_dispatch_event(&ibev);
3409 }
3410
3411 static struct mlx4_interface mlx4_ib_interface = {
3412 .add = mlx4_ib_add,
3413 .remove = mlx4_ib_remove,
3414 .event = mlx4_ib_event,
3415 .protocol = MLX4_PROT_IB_IPV6,
3416 .flags = MLX4_INTFF_BONDING
3417 };
3418
mlx4_ib_init(void)3419 static int __init mlx4_ib_init(void)
3420 {
3421 int err;
3422
3423 wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
3424 if (!wq)
3425 return -ENOMEM;
3426
3427 err = mlx4_ib_mcg_init();
3428 if (err)
3429 goto clean_wq;
3430
3431 err = mlx4_register_interface(&mlx4_ib_interface);
3432 if (err)
3433 goto clean_mcg;
3434
3435 return 0;
3436
3437 clean_mcg:
3438 mlx4_ib_mcg_destroy();
3439
3440 clean_wq:
3441 destroy_workqueue(wq);
3442 return err;
3443 }
3444
mlx4_ib_cleanup(void)3445 static void __exit mlx4_ib_cleanup(void)
3446 {
3447 mlx4_unregister_interface(&mlx4_ib_interface);
3448 mlx4_ib_mcg_destroy();
3449 destroy_workqueue(wq);
3450 }
3451
3452 module_init(mlx4_ib_init);
3453 module_exit(mlx4_ib_cleanup);
3454