1 /*
2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * 3. Neither the names of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * Alternatively, this software may be distributed under the terms of the
17 * GNU General Public License ("GPL") version 2 as published by the Free
18 * Software Foundation.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44
45 /*
46 * Sort array elements by the netlink attribute name
47 */
48 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
49 [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
50 [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
51 [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING,
52 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
53 [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING,
54 .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
55 [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 },
56 [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
57 [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
58 .len = IB_DEVICE_NAME_MAX },
59 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
60 [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
61 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
62 [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
63 [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
64 [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
65 [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
66 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
67 [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
68 [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
69 [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
70 [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
71 [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
72 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
73 [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
74 [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
75 .len = IFNAMSIZ },
76 [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
77 [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
78 [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
79 .len = IFNAMSIZ },
80 [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
81 [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
82 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
83 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
84 [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
85 [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
86 [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
87 [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
88 [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
89 [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
90 [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
91 [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
92 [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
93 .len = sizeof(struct __kernel_sockaddr_storage) },
94 [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
95 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
96 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
97 [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
98 [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
99 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
100 [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
101 [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
102 [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
103 [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
104 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
105 [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
106 [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 },
107 [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
108 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
109 [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
110 [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
111 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
112 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
113 [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
114 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
115 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
116 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
117 [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
118 .len = sizeof(struct __kernel_sockaddr_storage) },
119 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
120 [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
121 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
122 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
123 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
124 .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
125 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
126 [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
127 [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
128 [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
129 [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
130 [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
131 [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 },
132 [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 },
133 [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED },
134 [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED },
135 [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 },
136 [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED },
137 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED },
138 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
139 [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
140 [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
141 [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
142 [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
143 [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
144 };
145
put_driver_name_print_type(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type)146 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
147 enum rdma_nldev_print_type print_type)
148 {
149 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
150 return -EMSGSIZE;
151 if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
152 nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
153 return -EMSGSIZE;
154
155 return 0;
156 }
157
_rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u32 value)158 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
159 enum rdma_nldev_print_type print_type,
160 u32 value)
161 {
162 if (put_driver_name_print_type(msg, name, print_type))
163 return -EMSGSIZE;
164 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
165 return -EMSGSIZE;
166
167 return 0;
168 }
169
_rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u64 value)170 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
171 enum rdma_nldev_print_type print_type,
172 u64 value)
173 {
174 if (put_driver_name_print_type(msg, name, print_type))
175 return -EMSGSIZE;
176 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
177 RDMA_NLDEV_ATTR_PAD))
178 return -EMSGSIZE;
179
180 return 0;
181 }
182
rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,u32 value)183 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
184 {
185 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
186 value);
187 }
188 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
189
rdma_nl_put_driver_u32_hex(struct sk_buff * msg,const char * name,u32 value)190 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
191 u32 value)
192 {
193 return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
194 value);
195 }
196 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
197
rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,u64 value)198 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
199 {
200 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
201 value);
202 }
203 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
204
rdma_nl_put_driver_u64_hex(struct sk_buff * msg,const char * name,u64 value)205 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
206 {
207 return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
208 value);
209 }
210 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
211
fill_nldev_handle(struct sk_buff * msg,struct ib_device * device)212 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
213 {
214 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
215 return -EMSGSIZE;
216 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
217 dev_name(&device->dev)))
218 return -EMSGSIZE;
219
220 return 0;
221 }
222
fill_dev_info(struct sk_buff * msg,struct ib_device * device)223 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
224 {
225 char fw[IB_FW_VERSION_NAME_MAX];
226 int ret = 0;
227 u8 port;
228
229 if (fill_nldev_handle(msg, device))
230 return -EMSGSIZE;
231
232 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
233 return -EMSGSIZE;
234
235 BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
236 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
237 device->attrs.device_cap_flags,
238 RDMA_NLDEV_ATTR_PAD))
239 return -EMSGSIZE;
240
241 ib_get_device_fw_str(device, fw);
242 /* Device without FW has strlen(fw) = 0 */
243 if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
244 return -EMSGSIZE;
245
246 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
247 be64_to_cpu(device->node_guid),
248 RDMA_NLDEV_ATTR_PAD))
249 return -EMSGSIZE;
250 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
251 be64_to_cpu(device->attrs.sys_image_guid),
252 RDMA_NLDEV_ATTR_PAD))
253 return -EMSGSIZE;
254 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
255 return -EMSGSIZE;
256 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
257 return -EMSGSIZE;
258
259 /*
260 * Link type is determined on first port and mlx4 device
261 * which can potentially have two different link type for the same
262 * IB device is considered as better to be avoided in the future,
263 */
264 port = rdma_start_port(device);
265 if (rdma_cap_opa_mad(device, port))
266 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
267 else if (rdma_protocol_ib(device, port))
268 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
269 else if (rdma_protocol_iwarp(device, port))
270 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
271 else if (rdma_protocol_roce(device, port))
272 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
273 else if (rdma_protocol_usnic(device, port))
274 ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
275 "usnic");
276 return ret;
277 }
278
fill_port_info(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)279 static int fill_port_info(struct sk_buff *msg,
280 struct ib_device *device, u32 port,
281 const struct net *net)
282 {
283 struct net_device *netdev = NULL;
284 struct ib_port_attr attr;
285 int ret;
286 u64 cap_flags = 0;
287
288 if (fill_nldev_handle(msg, device))
289 return -EMSGSIZE;
290
291 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
292 return -EMSGSIZE;
293
294 ret = ib_query_port(device, port, &attr);
295 if (ret)
296 return ret;
297
298 if (rdma_protocol_ib(device, port)) {
299 BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
300 sizeof(attr.port_cap_flags2)) > sizeof(u64));
301 cap_flags = attr.port_cap_flags |
302 ((u64)attr.port_cap_flags2 << 32);
303 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
304 cap_flags, RDMA_NLDEV_ATTR_PAD))
305 return -EMSGSIZE;
306 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
307 attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
308 return -EMSGSIZE;
309 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
310 return -EMSGSIZE;
311 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
312 return -EMSGSIZE;
313 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
314 return -EMSGSIZE;
315 }
316 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
317 return -EMSGSIZE;
318 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
319 return -EMSGSIZE;
320
321 netdev = ib_device_get_netdev(device, port);
322 if (netdev && net_eq(dev_net(netdev), net)) {
323 ret = nla_put_u32(msg,
324 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
325 if (ret)
326 goto out;
327 ret = nla_put_string(msg,
328 RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
329 }
330
331 out:
332 if (netdev)
333 dev_put(netdev);
334 return ret;
335 }
336
fill_res_info_entry(struct sk_buff * msg,const char * name,u64 curr)337 static int fill_res_info_entry(struct sk_buff *msg,
338 const char *name, u64 curr)
339 {
340 struct nlattr *entry_attr;
341
342 entry_attr = nla_nest_start_noflag(msg,
343 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
344 if (!entry_attr)
345 return -EMSGSIZE;
346
347 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
348 goto err;
349 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
350 RDMA_NLDEV_ATTR_PAD))
351 goto err;
352
353 nla_nest_end(msg, entry_attr);
354 return 0;
355
356 err:
357 nla_nest_cancel(msg, entry_attr);
358 return -EMSGSIZE;
359 }
360
fill_res_info(struct sk_buff * msg,struct ib_device * device)361 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
362 {
363 static const char * const names[RDMA_RESTRACK_MAX] = {
364 [RDMA_RESTRACK_PD] = "pd",
365 [RDMA_RESTRACK_CQ] = "cq",
366 [RDMA_RESTRACK_QP] = "qp",
367 [RDMA_RESTRACK_CM_ID] = "cm_id",
368 [RDMA_RESTRACK_MR] = "mr",
369 [RDMA_RESTRACK_CTX] = "ctx",
370 };
371
372 struct nlattr *table_attr;
373 int ret, i, curr;
374
375 if (fill_nldev_handle(msg, device))
376 return -EMSGSIZE;
377
378 table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
379 if (!table_attr)
380 return -EMSGSIZE;
381
382 for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
383 if (!names[i])
384 continue;
385 curr = rdma_restrack_count(device, i);
386 ret = fill_res_info_entry(msg, names[i], curr);
387 if (ret)
388 goto err;
389 }
390
391 nla_nest_end(msg, table_attr);
392 return 0;
393
394 err:
395 nla_nest_cancel(msg, table_attr);
396 return ret;
397 }
398
fill_res_name_pid(struct sk_buff * msg,struct rdma_restrack_entry * res)399 static int fill_res_name_pid(struct sk_buff *msg,
400 struct rdma_restrack_entry *res)
401 {
402 /*
403 * For user resources, user is should read /proc/PID/comm to get the
404 * name of the task file.
405 */
406 if (rdma_is_kernel_res(res)) {
407 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
408 res->kern_name))
409 return -EMSGSIZE;
410 } else {
411 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
412 task_pid_vnr(res->task)))
413 return -EMSGSIZE;
414 }
415 return 0;
416 }
417
fill_res_entry(struct ib_device * dev,struct sk_buff * msg,struct rdma_restrack_entry * res)418 static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
419 struct rdma_restrack_entry *res)
420 {
421 if (!dev->ops.fill_res_entry)
422 return false;
423 return dev->ops.fill_res_entry(msg, res);
424 }
425
fill_res_qp_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)426 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
427 struct rdma_restrack_entry *res, uint32_t port)
428 {
429 struct ib_qp *qp = container_of(res, struct ib_qp, res);
430 struct ib_device *dev = qp->device;
431 struct ib_qp_init_attr qp_init_attr;
432 struct ib_qp_attr qp_attr;
433 int ret;
434
435 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
436 if (ret)
437 return ret;
438
439 if (port && port != qp_attr.port_num)
440 return -EAGAIN;
441
442 /* In create_qp() port is not set yet */
443 if (qp_attr.port_num &&
444 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
445 goto err;
446
447 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
448 goto err;
449 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
450 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
451 qp_attr.dest_qp_num))
452 goto err;
453 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
454 qp_attr.rq_psn))
455 goto err;
456 }
457
458 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
459 goto err;
460
461 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
462 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
463 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
464 qp_attr.path_mig_state))
465 goto err;
466 }
467 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
468 goto err;
469 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
470 goto err;
471
472 if (!rdma_is_kernel_res(res) &&
473 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
474 goto err;
475
476 if (fill_res_name_pid(msg, res))
477 goto err;
478
479 if (fill_res_entry(dev, msg, res))
480 goto err;
481
482 return 0;
483
484 err: return -EMSGSIZE;
485 }
486
fill_res_cm_id_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)487 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
488 struct rdma_restrack_entry *res, uint32_t port)
489 {
490 struct rdma_id_private *id_priv =
491 container_of(res, struct rdma_id_private, res);
492 struct ib_device *dev = id_priv->id.device;
493 struct rdma_cm_id *cm_id = &id_priv->id;
494
495 if (port && port != cm_id->port_num)
496 return 0;
497
498 if (cm_id->port_num &&
499 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
500 goto err;
501
502 if (id_priv->qp_num) {
503 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
504 goto err;
505 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
506 goto err;
507 }
508
509 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
510 goto err;
511
512 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
513 goto err;
514
515 if (cm_id->route.addr.src_addr.ss_family &&
516 nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
517 sizeof(cm_id->route.addr.src_addr),
518 &cm_id->route.addr.src_addr))
519 goto err;
520 if (cm_id->route.addr.dst_addr.ss_family &&
521 nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
522 sizeof(cm_id->route.addr.dst_addr),
523 &cm_id->route.addr.dst_addr))
524 goto err;
525
526 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
527 goto err;
528
529 if (fill_res_name_pid(msg, res))
530 goto err;
531
532 if (fill_res_entry(dev, msg, res))
533 goto err;
534
535 return 0;
536
537 err: return -EMSGSIZE;
538 }
539
fill_res_cq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)540 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
541 struct rdma_restrack_entry *res, uint32_t port)
542 {
543 struct ib_cq *cq = container_of(res, struct ib_cq, res);
544 struct ib_device *dev = cq->device;
545
546 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
547 goto err;
548 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
549 atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
550 goto err;
551
552 /* Poll context is only valid for kernel CQs */
553 if (rdma_is_kernel_res(res) &&
554 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
555 goto err;
556
557 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
558 goto err;
559
560 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
561 goto err;
562 if (!rdma_is_kernel_res(res) &&
563 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
564 cq->uobject->context->res.id))
565 goto err;
566
567 if (fill_res_name_pid(msg, res))
568 goto err;
569
570 if (fill_res_entry(dev, msg, res))
571 goto err;
572
573 return 0;
574
575 err: return -EMSGSIZE;
576 }
577
fill_res_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)578 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
579 struct rdma_restrack_entry *res, uint32_t port)
580 {
581 struct ib_mr *mr = container_of(res, struct ib_mr, res);
582 struct ib_device *dev = mr->pd->device;
583
584 if (has_cap_net_admin) {
585 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
586 goto err;
587 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
588 goto err;
589 }
590
591 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
592 RDMA_NLDEV_ATTR_PAD))
593 goto err;
594
595 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
596 goto err;
597
598 if (!rdma_is_kernel_res(res) &&
599 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
600 goto err;
601
602 if (fill_res_name_pid(msg, res))
603 goto err;
604
605 if (fill_res_entry(dev, msg, res))
606 goto err;
607
608 return 0;
609
610 err: return -EMSGSIZE;
611 }
612
fill_res_pd_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)613 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
614 struct rdma_restrack_entry *res, uint32_t port)
615 {
616 struct ib_pd *pd = container_of(res, struct ib_pd, res);
617 struct ib_device *dev = pd->device;
618
619 if (has_cap_net_admin) {
620 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
621 pd->local_dma_lkey))
622 goto err;
623 if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
624 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
625 pd->unsafe_global_rkey))
626 goto err;
627 }
628 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
629 atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
630 goto err;
631
632 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
633 goto err;
634
635 if (!rdma_is_kernel_res(res) &&
636 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
637 pd->uobject->context->res.id))
638 goto err;
639
640 if (fill_res_name_pid(msg, res))
641 goto err;
642
643 if (fill_res_entry(dev, msg, res))
644 goto err;
645
646 return 0;
647
648 err: return -EMSGSIZE;
649 }
650
fill_stat_counter_mode(struct sk_buff * msg,struct rdma_counter * counter)651 static int fill_stat_counter_mode(struct sk_buff *msg,
652 struct rdma_counter *counter)
653 {
654 struct rdma_counter_mode *m = &counter->mode;
655
656 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
657 return -EMSGSIZE;
658
659 if (m->mode == RDMA_COUNTER_MODE_AUTO)
660 if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
661 nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
662 return -EMSGSIZE;
663
664 return 0;
665 }
666
fill_stat_counter_qp_entry(struct sk_buff * msg,u32 qpn)667 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
668 {
669 struct nlattr *entry_attr;
670
671 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
672 if (!entry_attr)
673 return -EMSGSIZE;
674
675 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
676 goto err;
677
678 nla_nest_end(msg, entry_attr);
679 return 0;
680
681 err:
682 nla_nest_cancel(msg, entry_attr);
683 return -EMSGSIZE;
684 }
685
fill_stat_counter_qps(struct sk_buff * msg,struct rdma_counter * counter)686 static int fill_stat_counter_qps(struct sk_buff *msg,
687 struct rdma_counter *counter)
688 {
689 struct rdma_restrack_entry *res;
690 struct rdma_restrack_root *rt;
691 struct nlattr *table_attr;
692 struct ib_qp *qp = NULL;
693 unsigned long id = 0;
694 int ret = 0;
695
696 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
697
698 rt = &counter->device->res[RDMA_RESTRACK_QP];
699 xa_lock(&rt->xa);
700 xa_for_each(&rt->xa, id, res) {
701 if (!rdma_is_visible_in_pid_ns(res))
702 continue;
703
704 qp = container_of(res, struct ib_qp, res);
705 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
706 continue;
707
708 if (!qp->counter || (qp->counter->id != counter->id))
709 continue;
710
711 ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
712 if (ret)
713 goto err;
714 }
715
716 xa_unlock(&rt->xa);
717 nla_nest_end(msg, table_attr);
718 return 0;
719
720 err:
721 xa_unlock(&rt->xa);
722 nla_nest_cancel(msg, table_attr);
723 return ret;
724 }
725
fill_stat_hwcounter_entry(struct sk_buff * msg,const char * name,u64 value)726 static int fill_stat_hwcounter_entry(struct sk_buff *msg,
727 const char *name, u64 value)
728 {
729 struct nlattr *entry_attr;
730
731 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
732 if (!entry_attr)
733 return -EMSGSIZE;
734
735 if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
736 name))
737 goto err;
738 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
739 value, RDMA_NLDEV_ATTR_PAD))
740 goto err;
741
742 nla_nest_end(msg, entry_attr);
743 return 0;
744
745 err:
746 nla_nest_cancel(msg, entry_attr);
747 return -EMSGSIZE;
748 }
749
fill_stat_counter_hwcounters(struct sk_buff * msg,struct rdma_counter * counter)750 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
751 struct rdma_counter *counter)
752 {
753 struct rdma_hw_stats *st = counter->stats;
754 struct nlattr *table_attr;
755 int i;
756
757 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
758 if (!table_attr)
759 return -EMSGSIZE;
760
761 for (i = 0; i < st->num_counters; i++)
762 if (fill_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
763 goto err;
764
765 nla_nest_end(msg, table_attr);
766 return 0;
767
768 err:
769 nla_nest_cancel(msg, table_attr);
770 return -EMSGSIZE;
771 }
772
fill_res_counter_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)773 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
774 struct rdma_restrack_entry *res,
775 uint32_t port)
776 {
777 struct rdma_counter *counter =
778 container_of(res, struct rdma_counter, res);
779
780 if (port && port != counter->port)
781 return -EAGAIN;
782
783 /* Dump it even query failed */
784 rdma_counter_query_stats(counter);
785
786 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
787 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
788 fill_res_name_pid(msg, &counter->res) ||
789 fill_stat_counter_mode(msg, counter) ||
790 fill_stat_counter_qps(msg, counter) ||
791 fill_stat_counter_hwcounters(msg, counter))
792 return -EMSGSIZE;
793
794 return 0;
795 }
796
nldev_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)797 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
798 struct netlink_ext_ack *extack)
799 {
800 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
801 struct ib_device *device;
802 struct sk_buff *msg;
803 u32 index;
804 int err;
805
806 err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
807 nldev_policy, extack);
808 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
809 return -EINVAL;
810
811 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
812
813 device = ib_device_get_by_index(sock_net(skb->sk), index);
814 if (!device)
815 return -EINVAL;
816
817 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
818 if (!msg) {
819 err = -ENOMEM;
820 goto err;
821 }
822
823 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
824 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
825 0, 0);
826
827 err = fill_dev_info(msg, device);
828 if (err)
829 goto err_free;
830
831 nlmsg_end(msg, nlh);
832
833 ib_device_put(device);
834 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
835
836 err_free:
837 nlmsg_free(msg);
838 err:
839 ib_device_put(device);
840 return err;
841 }
842
nldev_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)843 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
844 struct netlink_ext_ack *extack)
845 {
846 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
847 struct ib_device *device;
848 u32 index;
849 int err;
850
851 err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
852 nldev_policy, extack);
853 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
854 return -EINVAL;
855
856 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
857 device = ib_device_get_by_index(sock_net(skb->sk), index);
858 if (!device)
859 return -EINVAL;
860
861 if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
862 char name[IB_DEVICE_NAME_MAX] = {};
863
864 nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
865 IB_DEVICE_NAME_MAX);
866 err = ib_device_rename(device, name);
867 goto done;
868 }
869
870 if (tb[RDMA_NLDEV_NET_NS_FD]) {
871 u32 ns_fd;
872
873 ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
874 err = ib_device_set_netns_put(skb, device, ns_fd);
875 goto put_done;
876 }
877
878 if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
879 u8 use_dim;
880
881 use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
882 err = ib_device_set_dim(device, use_dim);
883 goto done;
884 }
885
886 done:
887 ib_device_put(device);
888 put_done:
889 return err;
890 }
891
_nldev_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)892 static int _nldev_get_dumpit(struct ib_device *device,
893 struct sk_buff *skb,
894 struct netlink_callback *cb,
895 unsigned int idx)
896 {
897 int start = cb->args[0];
898 struct nlmsghdr *nlh;
899
900 if (idx < start)
901 return 0;
902
903 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
904 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
905 0, NLM_F_MULTI);
906
907 if (fill_dev_info(skb, device)) {
908 nlmsg_cancel(skb, nlh);
909 goto out;
910 }
911
912 nlmsg_end(skb, nlh);
913
914 idx++;
915
916 out: cb->args[0] = idx;
917 return skb->len;
918 }
919
nldev_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)920 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
921 {
922 /*
923 * There is no need to take lock, because
924 * we are relying on ib_core's locking.
925 */
926 return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
927 }
928
nldev_port_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)929 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
930 struct netlink_ext_ack *extack)
931 {
932 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
933 struct ib_device *device;
934 struct sk_buff *msg;
935 u32 index;
936 u32 port;
937 int err;
938
939 err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
940 nldev_policy, extack);
941 if (err ||
942 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
943 !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
944 return -EINVAL;
945
946 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
947 device = ib_device_get_by_index(sock_net(skb->sk), index);
948 if (!device)
949 return -EINVAL;
950
951 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
952 if (!rdma_is_port_valid(device, port)) {
953 err = -EINVAL;
954 goto err;
955 }
956
957 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
958 if (!msg) {
959 err = -ENOMEM;
960 goto err;
961 }
962
963 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
964 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
965 0, 0);
966
967 err = fill_port_info(msg, device, port, sock_net(skb->sk));
968 if (err)
969 goto err_free;
970
971 nlmsg_end(msg, nlh);
972 ib_device_put(device);
973
974 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
975
976 err_free:
977 nlmsg_free(msg);
978 err:
979 ib_device_put(device);
980 return err;
981 }
982
nldev_port_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)983 static int nldev_port_get_dumpit(struct sk_buff *skb,
984 struct netlink_callback *cb)
985 {
986 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
987 struct ib_device *device;
988 int start = cb->args[0];
989 struct nlmsghdr *nlh;
990 u32 idx = 0;
991 u32 ifindex;
992 int err;
993 unsigned int p;
994
995 err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
996 nldev_policy, NULL);
997 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
998 return -EINVAL;
999
1000 ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1001 device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1002 if (!device)
1003 return -EINVAL;
1004
1005 rdma_for_each_port (device, p) {
1006 /*
1007 * The dumpit function returns all information from specific
1008 * index. This specific index is taken from the netlink
1009 * messages request sent by user and it is available
1010 * in cb->args[0].
1011 *
1012 * Usually, the user doesn't fill this field and it causes
1013 * to return everything.
1014 *
1015 */
1016 if (idx < start) {
1017 idx++;
1018 continue;
1019 }
1020
1021 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1022 cb->nlh->nlmsg_seq,
1023 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1024 RDMA_NLDEV_CMD_PORT_GET),
1025 0, NLM_F_MULTI);
1026
1027 if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1028 nlmsg_cancel(skb, nlh);
1029 goto out;
1030 }
1031 idx++;
1032 nlmsg_end(skb, nlh);
1033 }
1034
1035 out:
1036 ib_device_put(device);
1037 cb->args[0] = idx;
1038 return skb->len;
1039 }
1040
nldev_res_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1041 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1042 struct netlink_ext_ack *extack)
1043 {
1044 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1045 struct ib_device *device;
1046 struct sk_buff *msg;
1047 u32 index;
1048 int ret;
1049
1050 ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1051 nldev_policy, extack);
1052 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1053 return -EINVAL;
1054
1055 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1056 device = ib_device_get_by_index(sock_net(skb->sk), index);
1057 if (!device)
1058 return -EINVAL;
1059
1060 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1061 if (!msg) {
1062 ret = -ENOMEM;
1063 goto err;
1064 }
1065
1066 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1067 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1068 0, 0);
1069
1070 ret = fill_res_info(msg, device);
1071 if (ret)
1072 goto err_free;
1073
1074 nlmsg_end(msg, nlh);
1075 ib_device_put(device);
1076 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1077
1078 err_free:
1079 nlmsg_free(msg);
1080 err:
1081 ib_device_put(device);
1082 return ret;
1083 }
1084
_nldev_res_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1085 static int _nldev_res_get_dumpit(struct ib_device *device,
1086 struct sk_buff *skb,
1087 struct netlink_callback *cb,
1088 unsigned int idx)
1089 {
1090 int start = cb->args[0];
1091 struct nlmsghdr *nlh;
1092
1093 if (idx < start)
1094 return 0;
1095
1096 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1097 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1098 0, NLM_F_MULTI);
1099
1100 if (fill_res_info(skb, device)) {
1101 nlmsg_cancel(skb, nlh);
1102 goto out;
1103 }
1104 nlmsg_end(skb, nlh);
1105
1106 idx++;
1107
1108 out:
1109 cb->args[0] = idx;
1110 return skb->len;
1111 }
1112
nldev_res_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1113 static int nldev_res_get_dumpit(struct sk_buff *skb,
1114 struct netlink_callback *cb)
1115 {
1116 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1117 }
1118
1119 struct nldev_fill_res_entry {
1120 int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
1121 struct rdma_restrack_entry *res, u32 port);
1122 enum rdma_nldev_attr nldev_attr;
1123 enum rdma_nldev_command nldev_cmd;
1124 u8 flags;
1125 u32 entry;
1126 u32 id;
1127 };
1128
1129 enum nldev_res_flags {
1130 NLDEV_PER_DEV = 1 << 0,
1131 };
1132
1133 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1134 [RDMA_RESTRACK_QP] = {
1135 .fill_res_func = fill_res_qp_entry,
1136 .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
1137 .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1138 .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1139 .id = RDMA_NLDEV_ATTR_RES_LQPN,
1140 },
1141 [RDMA_RESTRACK_CM_ID] = {
1142 .fill_res_func = fill_res_cm_id_entry,
1143 .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
1144 .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1145 .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1146 .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1147 },
1148 [RDMA_RESTRACK_CQ] = {
1149 .fill_res_func = fill_res_cq_entry,
1150 .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
1151 .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1152 .flags = NLDEV_PER_DEV,
1153 .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1154 .id = RDMA_NLDEV_ATTR_RES_CQN,
1155 },
1156 [RDMA_RESTRACK_MR] = {
1157 .fill_res_func = fill_res_mr_entry,
1158 .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
1159 .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1160 .flags = NLDEV_PER_DEV,
1161 .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1162 .id = RDMA_NLDEV_ATTR_RES_MRN,
1163 },
1164 [RDMA_RESTRACK_PD] = {
1165 .fill_res_func = fill_res_pd_entry,
1166 .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
1167 .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1168 .flags = NLDEV_PER_DEV,
1169 .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1170 .id = RDMA_NLDEV_ATTR_RES_PDN,
1171 },
1172 [RDMA_RESTRACK_COUNTER] = {
1173 .fill_res_func = fill_res_counter_entry,
1174 .nldev_cmd = RDMA_NLDEV_CMD_STAT_GET,
1175 .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1176 .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1177 .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1178 },
1179 };
1180
res_get_common_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,enum rdma_restrack_type res_type)1181 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1182 struct netlink_ext_ack *extack,
1183 enum rdma_restrack_type res_type)
1184 {
1185 const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1186 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1187 struct rdma_restrack_entry *res;
1188 struct ib_device *device;
1189 u32 index, id, port = 0;
1190 bool has_cap_net_admin;
1191 struct sk_buff *msg;
1192 int ret;
1193
1194 ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1195 nldev_policy, extack);
1196 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1197 return -EINVAL;
1198
1199 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1200 device = ib_device_get_by_index(sock_net(skb->sk), index);
1201 if (!device)
1202 return -EINVAL;
1203
1204 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1205 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1206 if (!rdma_is_port_valid(device, port)) {
1207 ret = -EINVAL;
1208 goto err;
1209 }
1210 }
1211
1212 if ((port && fe->flags & NLDEV_PER_DEV) ||
1213 (!port && ~fe->flags & NLDEV_PER_DEV)) {
1214 ret = -EINVAL;
1215 goto err;
1216 }
1217
1218 id = nla_get_u32(tb[fe->id]);
1219 res = rdma_restrack_get_byid(device, res_type, id);
1220 if (IS_ERR(res)) {
1221 ret = PTR_ERR(res);
1222 goto err;
1223 }
1224
1225 if (!rdma_is_visible_in_pid_ns(res)) {
1226 ret = -ENOENT;
1227 goto err_get;
1228 }
1229
1230 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1231 if (!msg) {
1232 ret = -ENOMEM;
1233 goto err_get;
1234 }
1235
1236 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1237 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1238 0, 0);
1239
1240 if (fill_nldev_handle(msg, device)) {
1241 ret = -EMSGSIZE;
1242 goto err_free;
1243 }
1244
1245 has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1246 ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
1247 rdma_restrack_put(res);
1248 if (ret)
1249 goto err_free;
1250
1251 nlmsg_end(msg, nlh);
1252 ib_device_put(device);
1253 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1254
1255 err_free:
1256 nlmsg_free(msg);
1257 err_get:
1258 rdma_restrack_put(res);
1259 err:
1260 ib_device_put(device);
1261 return ret;
1262 }
1263
res_get_common_dumpit(struct sk_buff * skb,struct netlink_callback * cb,enum rdma_restrack_type res_type)1264 static int res_get_common_dumpit(struct sk_buff *skb,
1265 struct netlink_callback *cb,
1266 enum rdma_restrack_type res_type)
1267 {
1268 const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1269 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1270 struct rdma_restrack_entry *res;
1271 struct rdma_restrack_root *rt;
1272 int err, ret = 0, idx = 0;
1273 struct nlattr *table_attr;
1274 struct nlattr *entry_attr;
1275 struct ib_device *device;
1276 int start = cb->args[0];
1277 bool has_cap_net_admin;
1278 struct nlmsghdr *nlh;
1279 unsigned long id;
1280 u32 index, port = 0;
1281 bool filled = false;
1282
1283 err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1284 nldev_policy, NULL);
1285 /*
1286 * Right now, we are expecting the device index to get res information,
1287 * but it is possible to extend this code to return all devices in
1288 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1289 * if it doesn't exist, we will iterate over all devices.
1290 *
1291 * But it is not needed for now.
1292 */
1293 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1294 return -EINVAL;
1295
1296 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1297 device = ib_device_get_by_index(sock_net(skb->sk), index);
1298 if (!device)
1299 return -EINVAL;
1300
1301 /*
1302 * If no PORT_INDEX is supplied, we will return all QPs from that device
1303 */
1304 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1305 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1306 if (!rdma_is_port_valid(device, port)) {
1307 ret = -EINVAL;
1308 goto err_index;
1309 }
1310 }
1311
1312 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1313 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
1314 0, NLM_F_MULTI);
1315
1316 if (fill_nldev_handle(skb, device)) {
1317 ret = -EMSGSIZE;
1318 goto err;
1319 }
1320
1321 table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1322 if (!table_attr) {
1323 ret = -EMSGSIZE;
1324 goto err;
1325 }
1326
1327 has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1328
1329 rt = &device->res[res_type];
1330 xa_lock(&rt->xa);
1331 /*
1332 * FIXME: if the skip ahead is something common this loop should
1333 * use xas_for_each & xas_pause to optimize, we can have a lot of
1334 * objects.
1335 */
1336 xa_for_each(&rt->xa, id, res) {
1337 if (!rdma_is_visible_in_pid_ns(res))
1338 continue;
1339
1340 if (idx < start || !rdma_restrack_get(res))
1341 goto next;
1342
1343 xa_unlock(&rt->xa);
1344
1345 filled = true;
1346
1347 entry_attr = nla_nest_start_noflag(skb, fe->entry);
1348 if (!entry_attr) {
1349 ret = -EMSGSIZE;
1350 rdma_restrack_put(res);
1351 goto msg_full;
1352 }
1353
1354 ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
1355 rdma_restrack_put(res);
1356
1357 if (ret) {
1358 nla_nest_cancel(skb, entry_attr);
1359 if (ret == -EMSGSIZE)
1360 goto msg_full;
1361 if (ret == -EAGAIN)
1362 goto again;
1363 goto res_err;
1364 }
1365 nla_nest_end(skb, entry_attr);
1366 again: xa_lock(&rt->xa);
1367 next: idx++;
1368 }
1369 xa_unlock(&rt->xa);
1370
1371 msg_full:
1372 nla_nest_end(skb, table_attr);
1373 nlmsg_end(skb, nlh);
1374 cb->args[0] = idx;
1375
1376 /*
1377 * No more entries to fill, cancel the message and
1378 * return 0 to mark end of dumpit.
1379 */
1380 if (!filled)
1381 goto err;
1382
1383 ib_device_put(device);
1384 return skb->len;
1385
1386 res_err:
1387 nla_nest_cancel(skb, table_attr);
1388
1389 err:
1390 nlmsg_cancel(skb, nlh);
1391
1392 err_index:
1393 ib_device_put(device);
1394 return ret;
1395 }
1396
1397 #define RES_GET_FUNCS(name, type) \
1398 static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
1399 struct netlink_callback *cb) \
1400 { \
1401 return res_get_common_dumpit(skb, cb, type); \
1402 } \
1403 static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
1404 struct nlmsghdr *nlh, \
1405 struct netlink_ext_ack *extack) \
1406 { \
1407 return res_get_common_doit(skb, nlh, extack, type); \
1408 }
1409
1410 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1411 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1412 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1413 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1414 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1415 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1416
1417 static LIST_HEAD(link_ops);
1418 static DECLARE_RWSEM(link_ops_rwsem);
1419
link_ops_get(const char * type)1420 static const struct rdma_link_ops *link_ops_get(const char *type)
1421 {
1422 const struct rdma_link_ops *ops;
1423
1424 list_for_each_entry(ops, &link_ops, list) {
1425 if (!strcmp(ops->type, type))
1426 goto out;
1427 }
1428 ops = NULL;
1429 out:
1430 return ops;
1431 }
1432
rdma_link_register(struct rdma_link_ops * ops)1433 void rdma_link_register(struct rdma_link_ops *ops)
1434 {
1435 down_write(&link_ops_rwsem);
1436 if (WARN_ON_ONCE(link_ops_get(ops->type)))
1437 goto out;
1438 list_add(&ops->list, &link_ops);
1439 out:
1440 up_write(&link_ops_rwsem);
1441 }
1442 EXPORT_SYMBOL(rdma_link_register);
1443
rdma_link_unregister(struct rdma_link_ops * ops)1444 void rdma_link_unregister(struct rdma_link_ops *ops)
1445 {
1446 down_write(&link_ops_rwsem);
1447 list_del(&ops->list);
1448 up_write(&link_ops_rwsem);
1449 }
1450 EXPORT_SYMBOL(rdma_link_unregister);
1451
nldev_newlink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1452 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1453 struct netlink_ext_ack *extack)
1454 {
1455 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1456 char ibdev_name[IB_DEVICE_NAME_MAX];
1457 const struct rdma_link_ops *ops;
1458 char ndev_name[IFNAMSIZ];
1459 struct net_device *ndev;
1460 char type[IFNAMSIZ];
1461 int err;
1462
1463 err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1464 nldev_policy, extack);
1465 if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1466 !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1467 return -EINVAL;
1468
1469 nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1470 sizeof(ibdev_name));
1471 if (strchr(ibdev_name, '%'))
1472 return -EINVAL;
1473
1474 nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1475 nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1476 sizeof(ndev_name));
1477
1478 ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1479 if (!ndev)
1480 return -ENODEV;
1481
1482 down_read(&link_ops_rwsem);
1483 ops = link_ops_get(type);
1484 #ifdef CONFIG_MODULES
1485 if (!ops) {
1486 up_read(&link_ops_rwsem);
1487 request_module("rdma-link-%s", type);
1488 down_read(&link_ops_rwsem);
1489 ops = link_ops_get(type);
1490 }
1491 #endif
1492 err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1493 up_read(&link_ops_rwsem);
1494 dev_put(ndev);
1495
1496 return err;
1497 }
1498
nldev_dellink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1499 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1500 struct netlink_ext_ack *extack)
1501 {
1502 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1503 struct ib_device *device;
1504 u32 index;
1505 int err;
1506
1507 err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1508 nldev_policy, extack);
1509 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1510 return -EINVAL;
1511
1512 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1513 device = ib_device_get_by_index(sock_net(skb->sk), index);
1514 if (!device)
1515 return -EINVAL;
1516
1517 if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1518 ib_device_put(device);
1519 return -EINVAL;
1520 }
1521
1522 ib_unregister_device_and_put(device);
1523 return 0;
1524 }
1525
nldev_get_chardev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1526 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1527 struct netlink_ext_ack *extack)
1528 {
1529 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1530 char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1531 struct ib_client_nl_info data = {};
1532 struct ib_device *ibdev = NULL;
1533 struct sk_buff *msg;
1534 u32 index;
1535 int err;
1536
1537 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1538 extack);
1539 if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1540 return -EINVAL;
1541
1542 nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1543 sizeof(client_name));
1544
1545 if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1546 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1547 ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1548 if (!ibdev)
1549 return -EINVAL;
1550
1551 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1552 data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1553 if (!rdma_is_port_valid(ibdev, data.port)) {
1554 err = -EINVAL;
1555 goto out_put;
1556 }
1557 } else {
1558 data.port = -1;
1559 }
1560 } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1561 return -EINVAL;
1562 }
1563
1564 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1565 if (!msg) {
1566 err = -ENOMEM;
1567 goto out_put;
1568 }
1569 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1570 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1571 RDMA_NLDEV_CMD_GET_CHARDEV),
1572 0, 0);
1573
1574 data.nl_msg = msg;
1575 err = ib_get_client_nl_info(ibdev, client_name, &data);
1576 if (err)
1577 goto out_nlmsg;
1578
1579 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1580 huge_encode_dev(data.cdev->devt),
1581 RDMA_NLDEV_ATTR_PAD);
1582 if (err)
1583 goto out_data;
1584 err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1585 RDMA_NLDEV_ATTR_PAD);
1586 if (err)
1587 goto out_data;
1588 if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1589 dev_name(data.cdev))) {
1590 err = -EMSGSIZE;
1591 goto out_data;
1592 }
1593
1594 nlmsg_end(msg, nlh);
1595 put_device(data.cdev);
1596 if (ibdev)
1597 ib_device_put(ibdev);
1598 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1599
1600 out_data:
1601 put_device(data.cdev);
1602 out_nlmsg:
1603 nlmsg_free(msg);
1604 out_put:
1605 if (ibdev)
1606 ib_device_put(ibdev);
1607 return err;
1608 }
1609
nldev_sys_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1610 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1611 struct netlink_ext_ack *extack)
1612 {
1613 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1614 struct sk_buff *msg;
1615 int err;
1616
1617 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1618 nldev_policy, extack);
1619 if (err)
1620 return err;
1621
1622 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1623 if (!msg)
1624 return -ENOMEM;
1625
1626 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1627 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1628 RDMA_NLDEV_CMD_SYS_GET),
1629 0, 0);
1630
1631 err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1632 (u8)ib_devices_shared_netns);
1633 if (err) {
1634 nlmsg_free(msg);
1635 return err;
1636 }
1637 nlmsg_end(msg, nlh);
1638 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1639 }
1640
nldev_set_sys_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1641 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1642 struct netlink_ext_ack *extack)
1643 {
1644 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1645 u8 enable;
1646 int err;
1647
1648 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1649 nldev_policy, extack);
1650 if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1651 return -EINVAL;
1652
1653 enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1654 /* Only 0 and 1 are supported */
1655 if (enable > 1)
1656 return -EINVAL;
1657
1658 err = rdma_compatdev_set(enable);
1659 return err;
1660 }
1661
nldev_stat_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1662 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1663 struct netlink_ext_ack *extack)
1664 {
1665 u32 index, port, mode, mask = 0, qpn, cntn = 0;
1666 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1667 struct ib_device *device;
1668 struct sk_buff *msg;
1669 int ret;
1670
1671 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1672 nldev_policy, extack);
1673 /* Currently only counter for QP is supported */
1674 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1675 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1676 !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
1677 return -EINVAL;
1678
1679 if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1680 return -EINVAL;
1681
1682 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1683 device = ib_device_get_by_index(sock_net(skb->sk), index);
1684 if (!device)
1685 return -EINVAL;
1686
1687 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1688 if (!rdma_is_port_valid(device, port)) {
1689 ret = -EINVAL;
1690 goto err;
1691 }
1692
1693 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1694 if (!msg) {
1695 ret = -ENOMEM;
1696 goto err;
1697 }
1698 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1699 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1700 RDMA_NLDEV_CMD_STAT_SET),
1701 0, 0);
1702
1703 mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1704 if (mode == RDMA_COUNTER_MODE_AUTO) {
1705 if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1706 mask = nla_get_u32(
1707 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1708
1709 ret = rdma_counter_set_auto_mode(device, port,
1710 mask ? true : false, mask);
1711 if (ret)
1712 goto err_msg;
1713 } else {
1714 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1715 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1716 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1717 ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1718 } else {
1719 ret = rdma_counter_bind_qpn_alloc(device, port,
1720 qpn, &cntn);
1721 }
1722 if (ret)
1723 goto err_msg;
1724
1725 if (fill_nldev_handle(msg, device) ||
1726 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1727 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1728 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1729 ret = -EMSGSIZE;
1730 goto err_fill;
1731 }
1732 }
1733
1734 nlmsg_end(msg, nlh);
1735 ib_device_put(device);
1736 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1737
1738 err_fill:
1739 rdma_counter_unbind_qpn(device, port, qpn, cntn);
1740 err_msg:
1741 nlmsg_free(msg);
1742 err:
1743 ib_device_put(device);
1744 return ret;
1745 }
1746
nldev_stat_del_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1747 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1748 struct netlink_ext_ack *extack)
1749 {
1750 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1751 struct ib_device *device;
1752 struct sk_buff *msg;
1753 u32 index, port, qpn, cntn;
1754 int ret;
1755
1756 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1757 nldev_policy, extack);
1758 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1759 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
1760 !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
1761 !tb[RDMA_NLDEV_ATTR_RES_LQPN])
1762 return -EINVAL;
1763
1764 if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1765 return -EINVAL;
1766
1767 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1768 device = ib_device_get_by_index(sock_net(skb->sk), index);
1769 if (!device)
1770 return -EINVAL;
1771
1772 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1773 if (!rdma_is_port_valid(device, port)) {
1774 ret = -EINVAL;
1775 goto err;
1776 }
1777
1778 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1779 if (!msg) {
1780 ret = -ENOMEM;
1781 goto err;
1782 }
1783 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1784 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1785 RDMA_NLDEV_CMD_STAT_SET),
1786 0, 0);
1787
1788 cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1789 qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1790 if (fill_nldev_handle(msg, device) ||
1791 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1792 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1793 nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1794 ret = -EMSGSIZE;
1795 goto err_fill;
1796 }
1797
1798 ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
1799 if (ret)
1800 goto err_fill;
1801
1802 nlmsg_end(msg, nlh);
1803 ib_device_put(device);
1804 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1805
1806 err_fill:
1807 nlmsg_free(msg);
1808 err:
1809 ib_device_put(device);
1810 return ret;
1811 }
1812
stat_get_doit_default_counter(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])1813 static int stat_get_doit_default_counter(struct sk_buff *skb,
1814 struct nlmsghdr *nlh,
1815 struct netlink_ext_ack *extack,
1816 struct nlattr *tb[])
1817 {
1818 struct rdma_hw_stats *stats;
1819 struct nlattr *table_attr;
1820 struct ib_device *device;
1821 int ret, num_cnts, i;
1822 struct sk_buff *msg;
1823 u32 index, port;
1824 u64 v;
1825
1826 if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1827 return -EINVAL;
1828
1829 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1830 device = ib_device_get_by_index(sock_net(skb->sk), index);
1831 if (!device)
1832 return -EINVAL;
1833
1834 if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
1835 ret = -EINVAL;
1836 goto err;
1837 }
1838
1839 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1840 if (!rdma_is_port_valid(device, port)) {
1841 ret = -EINVAL;
1842 goto err;
1843 }
1844
1845 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1846 if (!msg) {
1847 ret = -ENOMEM;
1848 goto err;
1849 }
1850
1851 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1852 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1853 RDMA_NLDEV_CMD_STAT_GET),
1854 0, 0);
1855
1856 if (fill_nldev_handle(msg, device) ||
1857 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
1858 ret = -EMSGSIZE;
1859 goto err_msg;
1860 }
1861
1862 stats = device->port_data ? device->port_data[port].hw_stats : NULL;
1863 if (stats == NULL) {
1864 ret = -EINVAL;
1865 goto err_msg;
1866 }
1867 mutex_lock(&stats->lock);
1868
1869 num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
1870 if (num_cnts < 0) {
1871 ret = -EINVAL;
1872 goto err_stats;
1873 }
1874
1875 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1876 if (!table_attr) {
1877 ret = -EMSGSIZE;
1878 goto err_stats;
1879 }
1880 for (i = 0; i < num_cnts; i++) {
1881 v = stats->value[i] +
1882 rdma_counter_get_hwstat_value(device, port, i);
1883 if (fill_stat_hwcounter_entry(msg, stats->names[i], v)) {
1884 ret = -EMSGSIZE;
1885 goto err_table;
1886 }
1887 }
1888 nla_nest_end(msg, table_attr);
1889
1890 mutex_unlock(&stats->lock);
1891 nlmsg_end(msg, nlh);
1892 ib_device_put(device);
1893 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1894
1895 err_table:
1896 nla_nest_cancel(msg, table_attr);
1897 err_stats:
1898 mutex_unlock(&stats->lock);
1899 err_msg:
1900 nlmsg_free(msg);
1901 err:
1902 ib_device_put(device);
1903 return ret;
1904 }
1905
stat_get_doit_qp(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])1906 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
1907 struct netlink_ext_ack *extack, struct nlattr *tb[])
1908
1909 {
1910 static enum rdma_nl_counter_mode mode;
1911 static enum rdma_nl_counter_mask mask;
1912 struct ib_device *device;
1913 struct sk_buff *msg;
1914 u32 index, port;
1915 int ret;
1916
1917 if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
1918 return nldev_res_get_counter_doit(skb, nlh, extack);
1919
1920 if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
1921 !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1922 return -EINVAL;
1923
1924 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1925 device = ib_device_get_by_index(sock_net(skb->sk), index);
1926 if (!device)
1927 return -EINVAL;
1928
1929 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1930 if (!rdma_is_port_valid(device, port)) {
1931 ret = -EINVAL;
1932 goto err;
1933 }
1934
1935 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1936 if (!msg) {
1937 ret = -ENOMEM;
1938 goto err;
1939 }
1940
1941 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1942 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1943 RDMA_NLDEV_CMD_STAT_GET),
1944 0, 0);
1945
1946 ret = rdma_counter_get_mode(device, port, &mode, &mask);
1947 if (ret)
1948 goto err_msg;
1949
1950 if (fill_nldev_handle(msg, device) ||
1951 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1952 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
1953 ret = -EMSGSIZE;
1954 goto err_msg;
1955 }
1956
1957 if ((mode == RDMA_COUNTER_MODE_AUTO) &&
1958 nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
1959 ret = -EMSGSIZE;
1960 goto err_msg;
1961 }
1962
1963 nlmsg_end(msg, nlh);
1964 ib_device_put(device);
1965 return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1966
1967 err_msg:
1968 nlmsg_free(msg);
1969 err:
1970 ib_device_put(device);
1971 return ret;
1972 }
1973
nldev_stat_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1974 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1975 struct netlink_ext_ack *extack)
1976 {
1977 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1978 int ret;
1979
1980 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1981 nldev_policy, extack);
1982 if (ret)
1983 return -EINVAL;
1984
1985 if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
1986 return stat_get_doit_default_counter(skb, nlh, extack, tb);
1987
1988 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
1989 case RDMA_NLDEV_ATTR_RES_QP:
1990 ret = stat_get_doit_qp(skb, nlh, extack, tb);
1991 break;
1992
1993 default:
1994 ret = -EINVAL;
1995 break;
1996 }
1997
1998 return ret;
1999 }
2000
nldev_stat_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)2001 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2002 struct netlink_callback *cb)
2003 {
2004 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2005 int ret;
2006
2007 ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2008 nldev_policy, NULL);
2009 if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2010 return -EINVAL;
2011
2012 switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2013 case RDMA_NLDEV_ATTR_RES_QP:
2014 ret = nldev_res_get_counter_dumpit(skb, cb);
2015 break;
2016
2017 default:
2018 ret = -EINVAL;
2019 break;
2020 }
2021
2022 return ret;
2023 }
2024
2025 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2026 [RDMA_NLDEV_CMD_GET] = {
2027 .doit = nldev_get_doit,
2028 .dump = nldev_get_dumpit,
2029 },
2030 [RDMA_NLDEV_CMD_GET_CHARDEV] = {
2031 .doit = nldev_get_chardev,
2032 },
2033 [RDMA_NLDEV_CMD_SET] = {
2034 .doit = nldev_set_doit,
2035 .flags = RDMA_NL_ADMIN_PERM,
2036 },
2037 [RDMA_NLDEV_CMD_NEWLINK] = {
2038 .doit = nldev_newlink,
2039 .flags = RDMA_NL_ADMIN_PERM,
2040 },
2041 [RDMA_NLDEV_CMD_DELLINK] = {
2042 .doit = nldev_dellink,
2043 .flags = RDMA_NL_ADMIN_PERM,
2044 },
2045 [RDMA_NLDEV_CMD_PORT_GET] = {
2046 .doit = nldev_port_get_doit,
2047 .dump = nldev_port_get_dumpit,
2048 },
2049 [RDMA_NLDEV_CMD_RES_GET] = {
2050 .doit = nldev_res_get_doit,
2051 .dump = nldev_res_get_dumpit,
2052 },
2053 [RDMA_NLDEV_CMD_RES_QP_GET] = {
2054 .doit = nldev_res_get_qp_doit,
2055 .dump = nldev_res_get_qp_dumpit,
2056 },
2057 [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2058 .doit = nldev_res_get_cm_id_doit,
2059 .dump = nldev_res_get_cm_id_dumpit,
2060 },
2061 [RDMA_NLDEV_CMD_RES_CQ_GET] = {
2062 .doit = nldev_res_get_cq_doit,
2063 .dump = nldev_res_get_cq_dumpit,
2064 },
2065 [RDMA_NLDEV_CMD_RES_MR_GET] = {
2066 .doit = nldev_res_get_mr_doit,
2067 .dump = nldev_res_get_mr_dumpit,
2068 },
2069 [RDMA_NLDEV_CMD_RES_PD_GET] = {
2070 .doit = nldev_res_get_pd_doit,
2071 .dump = nldev_res_get_pd_dumpit,
2072 },
2073 [RDMA_NLDEV_CMD_SYS_GET] = {
2074 .doit = nldev_sys_get_doit,
2075 },
2076 [RDMA_NLDEV_CMD_SYS_SET] = {
2077 .doit = nldev_set_sys_set_doit,
2078 },
2079 [RDMA_NLDEV_CMD_STAT_SET] = {
2080 .doit = nldev_stat_set_doit,
2081 .flags = RDMA_NL_ADMIN_PERM,
2082 },
2083 [RDMA_NLDEV_CMD_STAT_GET] = {
2084 .doit = nldev_stat_get_doit,
2085 .dump = nldev_stat_get_dumpit,
2086 },
2087 [RDMA_NLDEV_CMD_STAT_DEL] = {
2088 .doit = nldev_stat_del_doit,
2089 .flags = RDMA_NL_ADMIN_PERM,
2090 },
2091 };
2092
nldev_init(void)2093 void __init nldev_init(void)
2094 {
2095 rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2096 }
2097
nldev_exit(void)2098 void __exit nldev_exit(void)
2099 {
2100 rdma_nl_unregister(RDMA_NL_NLDEV);
2101 }
2102
2103 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2104