• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Neither the names of the copyright holders nor the names of its
13  *    contributors may be used to endorse or promote products derived from
14  *    this software without specific prior written permission.
15  *
16  * Alternatively, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") version 2 as published by the Free
18  * Software Foundation.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/pid.h>
35 #include <linux/pid_namespace.h>
36 #include <linux/mutex.h>
37 #include <net/netlink.h>
38 #include <rdma/rdma_cm.h>
39 #include <rdma/rdma_netlink.h>
40 
41 #include "core_priv.h"
42 #include "cma_priv.h"
43 #include "restrack.h"
44 #include "uverbs.h"
45 
46 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
47 			       struct rdma_restrack_entry*, uint32_t);
48 
49 /*
50  * Sort array elements by the netlink attribute name
51  */
52 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
53 	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
54 	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
55 	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
56 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
57 	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
58 					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
59 	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
60 	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
61 	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
62 					.len = IB_DEVICE_NAME_MAX },
63 	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
64 	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
65 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
66 	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
67 	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
68 	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
69 	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
70 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
71 	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
72 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
73 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
74 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
75 	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
76 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
77 	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
78 	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
79 					.len = IFNAMSIZ },
80 	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
81 	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
82 	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
83 					.len = IFNAMSIZ },
84 	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
85 	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
86 	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
87 	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
88 	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
89 	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
90 	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
91 	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
92 	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
93 	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
94 	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
95 	[RDMA_NLDEV_ATTR_RES_CTX]		= { .type = NLA_NESTED },
96 	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
97 	[RDMA_NLDEV_ATTR_RES_CTX_ENTRY]		= { .type = NLA_NESTED },
98 	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
99 			.len = sizeof(struct __kernel_sockaddr_storage) },
100 	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
101 	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
102 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
103 	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
104 	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
105 	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
106 	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
107 	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
108 	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
109 	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
110 	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
111 	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
112 	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
113 	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
114 	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
115 	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
116 	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
117 	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
118 	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
119 	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
120 	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
121 	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
122 	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
123 	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
124 	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
125 			.len = sizeof(struct __kernel_sockaddr_storage) },
126 	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
127 	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
128 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
129 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
130 	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
131 					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
132 	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
133 	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
134 	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
135 	[RDMA_NLDEV_ATTR_RES_SRQ]		= { .type = NLA_NESTED },
136 	[RDMA_NLDEV_ATTR_RES_SRQN]		= { .type = NLA_U32 },
137 	[RDMA_NLDEV_ATTR_RES_SRQ_ENTRY]		= { .type = NLA_NESTED },
138 	[RDMA_NLDEV_ATTR_MIN_RANGE]		= { .type = NLA_U32 },
139 	[RDMA_NLDEV_ATTR_MAX_RANGE]		= { .type = NLA_U32 },
140 	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
141 	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
142 	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
143 	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
144 	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
145 	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
146 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
147 	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
148 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
149 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
150 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
151 	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
152 	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
153 	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
154 	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
155 	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
156 	[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]	= { .type = NLA_U8 },
157 };
158 
put_driver_name_print_type(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type)159 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
160 				      enum rdma_nldev_print_type print_type)
161 {
162 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
163 		return -EMSGSIZE;
164 	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
165 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
166 		return -EMSGSIZE;
167 
168 	return 0;
169 }
170 
_rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u32 value)171 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
172 				   enum rdma_nldev_print_type print_type,
173 				   u32 value)
174 {
175 	if (put_driver_name_print_type(msg, name, print_type))
176 		return -EMSGSIZE;
177 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
178 		return -EMSGSIZE;
179 
180 	return 0;
181 }
182 
_rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,enum rdma_nldev_print_type print_type,u64 value)183 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
184 				   enum rdma_nldev_print_type print_type,
185 				   u64 value)
186 {
187 	if (put_driver_name_print_type(msg, name, print_type))
188 		return -EMSGSIZE;
189 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
190 			      RDMA_NLDEV_ATTR_PAD))
191 		return -EMSGSIZE;
192 
193 	return 0;
194 }
195 
rdma_nl_put_driver_string(struct sk_buff * msg,const char * name,const char * str)196 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
197 			      const char *str)
198 {
199 	if (put_driver_name_print_type(msg, name,
200 				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
201 		return -EMSGSIZE;
202 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
203 		return -EMSGSIZE;
204 
205 	return 0;
206 }
207 EXPORT_SYMBOL(rdma_nl_put_driver_string);
208 
rdma_nl_put_driver_u32(struct sk_buff * msg,const char * name,u32 value)209 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
210 {
211 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
212 				       value);
213 }
214 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
215 
rdma_nl_put_driver_u32_hex(struct sk_buff * msg,const char * name,u32 value)216 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
217 			       u32 value)
218 {
219 	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
220 				       value);
221 }
222 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
223 
rdma_nl_put_driver_u64(struct sk_buff * msg,const char * name,u64 value)224 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
225 {
226 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
227 				       value);
228 }
229 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
230 
rdma_nl_put_driver_u64_hex(struct sk_buff * msg,const char * name,u64 value)231 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
232 {
233 	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
234 				       value);
235 }
236 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
237 
fill_nldev_handle(struct sk_buff * msg,struct ib_device * device)238 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
239 {
240 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
241 		return -EMSGSIZE;
242 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
243 			   dev_name(&device->dev)))
244 		return -EMSGSIZE;
245 
246 	return 0;
247 }
248 
fill_dev_info(struct sk_buff * msg,struct ib_device * device)249 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
250 {
251 	char fw[IB_FW_VERSION_NAME_MAX];
252 	int ret = 0;
253 	u32 port;
254 
255 	if (fill_nldev_handle(msg, device))
256 		return -EMSGSIZE;
257 
258 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
259 		return -EMSGSIZE;
260 
261 	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
262 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
263 			      device->attrs.device_cap_flags,
264 			      RDMA_NLDEV_ATTR_PAD))
265 		return -EMSGSIZE;
266 
267 	ib_get_device_fw_str(device, fw);
268 	/* Device without FW has strlen(fw) = 0 */
269 	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
270 		return -EMSGSIZE;
271 
272 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
273 			      be64_to_cpu(device->node_guid),
274 			      RDMA_NLDEV_ATTR_PAD))
275 		return -EMSGSIZE;
276 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
277 			      be64_to_cpu(device->attrs.sys_image_guid),
278 			      RDMA_NLDEV_ATTR_PAD))
279 		return -EMSGSIZE;
280 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
281 		return -EMSGSIZE;
282 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
283 		return -EMSGSIZE;
284 
285 	/*
286 	 * Link type is determined on first port and mlx4 device
287 	 * which can potentially have two different link type for the same
288 	 * IB device is considered as better to be avoided in the future,
289 	 */
290 	port = rdma_start_port(device);
291 	if (rdma_cap_opa_mad(device, port))
292 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
293 	else if (rdma_protocol_ib(device, port))
294 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
295 	else if (rdma_protocol_iwarp(device, port))
296 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
297 	else if (rdma_protocol_roce(device, port))
298 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
299 	else if (rdma_protocol_usnic(device, port))
300 		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
301 				     "usnic");
302 	return ret;
303 }
304 
fill_port_info(struct sk_buff * msg,struct ib_device * device,u32 port,const struct net * net)305 static int fill_port_info(struct sk_buff *msg,
306 			  struct ib_device *device, u32 port,
307 			  const struct net *net)
308 {
309 	struct net_device *netdev = NULL;
310 	struct ib_port_attr attr;
311 	int ret;
312 	u64 cap_flags = 0;
313 
314 	if (fill_nldev_handle(msg, device))
315 		return -EMSGSIZE;
316 
317 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
318 		return -EMSGSIZE;
319 
320 	ret = ib_query_port(device, port, &attr);
321 	if (ret)
322 		return ret;
323 
324 	if (rdma_protocol_ib(device, port)) {
325 		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
326 				sizeof(attr.port_cap_flags2)) > sizeof(u64));
327 		cap_flags = attr.port_cap_flags |
328 			((u64)attr.port_cap_flags2 << 32);
329 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
330 				      cap_flags, RDMA_NLDEV_ATTR_PAD))
331 			return -EMSGSIZE;
332 		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
333 				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
334 			return -EMSGSIZE;
335 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
336 			return -EMSGSIZE;
337 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
338 			return -EMSGSIZE;
339 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
340 			return -EMSGSIZE;
341 	}
342 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
343 		return -EMSGSIZE;
344 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
345 		return -EMSGSIZE;
346 
347 	netdev = ib_device_get_netdev(device, port);
348 	if (netdev && net_eq(dev_net(netdev), net)) {
349 		ret = nla_put_u32(msg,
350 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
351 		if (ret)
352 			goto out;
353 		ret = nla_put_string(msg,
354 				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
355 	}
356 
357 out:
358 	if (netdev)
359 		dev_put(netdev);
360 	return ret;
361 }
362 
fill_res_info_entry(struct sk_buff * msg,const char * name,u64 curr)363 static int fill_res_info_entry(struct sk_buff *msg,
364 			       const char *name, u64 curr)
365 {
366 	struct nlattr *entry_attr;
367 
368 	entry_attr = nla_nest_start_noflag(msg,
369 					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
370 	if (!entry_attr)
371 		return -EMSGSIZE;
372 
373 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
374 		goto err;
375 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
376 			      RDMA_NLDEV_ATTR_PAD))
377 		goto err;
378 
379 	nla_nest_end(msg, entry_attr);
380 	return 0;
381 
382 err:
383 	nla_nest_cancel(msg, entry_attr);
384 	return -EMSGSIZE;
385 }
386 
fill_res_info(struct sk_buff * msg,struct ib_device * device)387 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
388 {
389 	static const char * const names[RDMA_RESTRACK_MAX] = {
390 		[RDMA_RESTRACK_PD] = "pd",
391 		[RDMA_RESTRACK_CQ] = "cq",
392 		[RDMA_RESTRACK_QP] = "qp",
393 		[RDMA_RESTRACK_CM_ID] = "cm_id",
394 		[RDMA_RESTRACK_MR] = "mr",
395 		[RDMA_RESTRACK_CTX] = "ctx",
396 		[RDMA_RESTRACK_SRQ] = "srq",
397 	};
398 
399 	struct nlattr *table_attr;
400 	int ret, i, curr;
401 
402 	if (fill_nldev_handle(msg, device))
403 		return -EMSGSIZE;
404 
405 	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
406 	if (!table_attr)
407 		return -EMSGSIZE;
408 
409 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
410 		if (!names[i])
411 			continue;
412 		curr = rdma_restrack_count(device, i);
413 		ret = fill_res_info_entry(msg, names[i], curr);
414 		if (ret)
415 			goto err;
416 	}
417 
418 	nla_nest_end(msg, table_attr);
419 	return 0;
420 
421 err:
422 	nla_nest_cancel(msg, table_attr);
423 	return ret;
424 }
425 
fill_res_name_pid(struct sk_buff * msg,struct rdma_restrack_entry * res)426 static int fill_res_name_pid(struct sk_buff *msg,
427 			     struct rdma_restrack_entry *res)
428 {
429 	int err = 0;
430 
431 	/*
432 	 * For user resources, user is should read /proc/PID/comm to get the
433 	 * name of the task file.
434 	 */
435 	if (rdma_is_kernel_res(res)) {
436 		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
437 				     res->kern_name);
438 	} else {
439 		pid_t pid;
440 
441 		pid = task_pid_vnr(res->task);
442 		/*
443 		 * Task is dead and in zombie state.
444 		 * There is no need to print PID anymore.
445 		 */
446 		if (pid)
447 			/*
448 			 * This part is racy, task can be killed and PID will
449 			 * be zero right here but it is ok, next query won't
450 			 * return PID. We don't promise real-time reflection
451 			 * of SW objects.
452 			 */
453 			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
454 	}
455 
456 	return err ? -EMSGSIZE : 0;
457 }
458 
fill_res_qp_entry_query(struct sk_buff * msg,struct rdma_restrack_entry * res,struct ib_device * dev,struct ib_qp * qp)459 static int fill_res_qp_entry_query(struct sk_buff *msg,
460 				   struct rdma_restrack_entry *res,
461 				   struct ib_device *dev,
462 				   struct ib_qp *qp)
463 {
464 	struct ib_qp_init_attr qp_init_attr;
465 	struct ib_qp_attr qp_attr;
466 	int ret;
467 
468 	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
469 	if (ret)
470 		return ret;
471 
472 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
473 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
474 				qp_attr.dest_qp_num))
475 			goto err;
476 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
477 				qp_attr.rq_psn))
478 			goto err;
479 	}
480 
481 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
482 		goto err;
483 
484 	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
485 	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
486 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
487 			       qp_attr.path_mig_state))
488 			goto err;
489 	}
490 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
491 		goto err;
492 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
493 		goto err;
494 
495 	if (dev->ops.fill_res_qp_entry)
496 		return dev->ops.fill_res_qp_entry(msg, qp);
497 	return 0;
498 
499 err:	return -EMSGSIZE;
500 }
501 
fill_res_qp_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)502 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
503 			     struct rdma_restrack_entry *res, uint32_t port)
504 {
505 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
506 	struct ib_device *dev = qp->device;
507 	int ret;
508 
509 	if (port && port != qp->port)
510 		return -EAGAIN;
511 
512 	/* In create_qp() port is not set yet */
513 	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
514 		return -EMSGSIZE;
515 
516 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
517 	if (ret)
518 		return -EMSGSIZE;
519 
520 	if (!rdma_is_kernel_res(res) &&
521 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
522 		return -EMSGSIZE;
523 
524 	ret = fill_res_name_pid(msg, res);
525 	if (ret)
526 		return -EMSGSIZE;
527 
528 	return fill_res_qp_entry_query(msg, res, dev, qp);
529 }
530 
fill_res_qp_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)531 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
532 				 struct rdma_restrack_entry *res, uint32_t port)
533 {
534 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
535 	struct ib_device *dev = qp->device;
536 
537 	if (port && port != qp->port)
538 		return -EAGAIN;
539 	if (!dev->ops.fill_res_qp_entry_raw)
540 		return -EINVAL;
541 	return dev->ops.fill_res_qp_entry_raw(msg, qp);
542 }
543 
fill_res_cm_id_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)544 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
545 				struct rdma_restrack_entry *res, uint32_t port)
546 {
547 	struct rdma_id_private *id_priv =
548 				container_of(res, struct rdma_id_private, res);
549 	struct ib_device *dev = id_priv->id.device;
550 	struct rdma_cm_id *cm_id = &id_priv->id;
551 
552 	if (port && port != cm_id->port_num)
553 		return -EAGAIN;
554 
555 	if (cm_id->port_num &&
556 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
557 		goto err;
558 
559 	if (id_priv->qp_num) {
560 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
561 			goto err;
562 		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
563 			goto err;
564 	}
565 
566 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
567 		goto err;
568 
569 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
570 		goto err;
571 
572 	if (cm_id->route.addr.src_addr.ss_family &&
573 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
574 		    sizeof(cm_id->route.addr.src_addr),
575 		    &cm_id->route.addr.src_addr))
576 		goto err;
577 	if (cm_id->route.addr.dst_addr.ss_family &&
578 	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
579 		    sizeof(cm_id->route.addr.dst_addr),
580 		    &cm_id->route.addr.dst_addr))
581 		goto err;
582 
583 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
584 		goto err;
585 
586 	if (fill_res_name_pid(msg, res))
587 		goto err;
588 
589 	if (dev->ops.fill_res_cm_id_entry)
590 		return dev->ops.fill_res_cm_id_entry(msg, cm_id);
591 	return 0;
592 
593 err: return -EMSGSIZE;
594 }
595 
fill_res_cq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)596 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
597 			     struct rdma_restrack_entry *res, uint32_t port)
598 {
599 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
600 	struct ib_device *dev = cq->device;
601 
602 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
603 		return -EMSGSIZE;
604 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
605 			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
606 		return -EMSGSIZE;
607 
608 	/* Poll context is only valid for kernel CQs */
609 	if (rdma_is_kernel_res(res) &&
610 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
611 		return -EMSGSIZE;
612 
613 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
614 		return -EMSGSIZE;
615 
616 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
617 		return -EMSGSIZE;
618 	if (!rdma_is_kernel_res(res) &&
619 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
620 			cq->uobject->uevent.uobject.context->res.id))
621 		return -EMSGSIZE;
622 
623 	if (fill_res_name_pid(msg, res))
624 		return -EMSGSIZE;
625 
626 	return (dev->ops.fill_res_cq_entry) ?
627 		dev->ops.fill_res_cq_entry(msg, cq) : 0;
628 }
629 
fill_res_cq_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)630 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
631 				 struct rdma_restrack_entry *res, uint32_t port)
632 {
633 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
634 	struct ib_device *dev = cq->device;
635 
636 	if (!dev->ops.fill_res_cq_entry_raw)
637 		return -EINVAL;
638 	return dev->ops.fill_res_cq_entry_raw(msg, cq);
639 }
640 
fill_res_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)641 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
642 			     struct rdma_restrack_entry *res, uint32_t port)
643 {
644 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
645 	struct ib_device *dev = mr->pd->device;
646 
647 	if (has_cap_net_admin) {
648 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
649 			return -EMSGSIZE;
650 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
651 			return -EMSGSIZE;
652 	}
653 
654 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
655 			      RDMA_NLDEV_ATTR_PAD))
656 		return -EMSGSIZE;
657 
658 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
659 		return -EMSGSIZE;
660 
661 	if (!rdma_is_kernel_res(res) &&
662 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
663 		return -EMSGSIZE;
664 
665 	if (fill_res_name_pid(msg, res))
666 		return -EMSGSIZE;
667 
668 	return (dev->ops.fill_res_mr_entry) ?
669 		       dev->ops.fill_res_mr_entry(msg, mr) :
670 		       0;
671 }
672 
fill_res_mr_raw_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)673 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
674 				 struct rdma_restrack_entry *res, uint32_t port)
675 {
676 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
677 	struct ib_device *dev = mr->pd->device;
678 
679 	if (!dev->ops.fill_res_mr_entry_raw)
680 		return -EINVAL;
681 	return dev->ops.fill_res_mr_entry_raw(msg, mr);
682 }
683 
fill_res_pd_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)684 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
685 			     struct rdma_restrack_entry *res, uint32_t port)
686 {
687 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
688 
689 	if (has_cap_net_admin) {
690 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
691 				pd->local_dma_lkey))
692 			goto err;
693 		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
694 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
695 				pd->unsafe_global_rkey))
696 			goto err;
697 	}
698 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
699 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
700 		goto err;
701 
702 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
703 		goto err;
704 
705 	if (!rdma_is_kernel_res(res) &&
706 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
707 			pd->uobject->context->res.id))
708 		goto err;
709 
710 	return fill_res_name_pid(msg, res);
711 
712 err:	return -EMSGSIZE;
713 }
714 
fill_res_ctx_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)715 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
716 			      struct rdma_restrack_entry *res, uint32_t port)
717 {
718 	struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
719 
720 	if (rdma_is_kernel_res(res))
721 		return 0;
722 
723 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
724 		return -EMSGSIZE;
725 
726 	return fill_res_name_pid(msg, res);
727 }
728 
fill_res_range_qp_entry(struct sk_buff * msg,uint32_t min_range,uint32_t max_range)729 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
730 				   uint32_t max_range)
731 {
732 	struct nlattr *entry_attr;
733 
734 	if (!min_range)
735 		return 0;
736 
737 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
738 	if (!entry_attr)
739 		return -EMSGSIZE;
740 
741 	if (min_range == max_range) {
742 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
743 			goto err;
744 	} else {
745 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
746 			goto err;
747 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
748 			goto err;
749 	}
750 	nla_nest_end(msg, entry_attr);
751 	return 0;
752 
753 err:
754 	nla_nest_cancel(msg, entry_attr);
755 	return -EMSGSIZE;
756 }
757 
fill_res_srq_qps(struct sk_buff * msg,struct ib_srq * srq)758 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
759 {
760 	uint32_t min_range = 0, prev = 0;
761 	struct rdma_restrack_entry *res;
762 	struct rdma_restrack_root *rt;
763 	struct nlattr *table_attr;
764 	struct ib_qp *qp = NULL;
765 	unsigned long id = 0;
766 
767 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
768 	if (!table_attr)
769 		return -EMSGSIZE;
770 
771 	rt = &srq->device->res[RDMA_RESTRACK_QP];
772 	xa_lock(&rt->xa);
773 	xa_for_each(&rt->xa, id, res) {
774 		if (!rdma_restrack_get(res))
775 			continue;
776 
777 		qp = container_of(res, struct ib_qp, res);
778 		if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
779 			rdma_restrack_put(res);
780 			continue;
781 		}
782 
783 		if (qp->qp_num < prev)
784 			/* qp_num should be ascending */
785 			goto err_loop;
786 
787 		if (min_range == 0) {
788 			min_range = qp->qp_num;
789 		} else if (qp->qp_num > (prev + 1)) {
790 			if (fill_res_range_qp_entry(msg, min_range, prev))
791 				goto err_loop;
792 
793 			min_range = qp->qp_num;
794 		}
795 		prev = qp->qp_num;
796 		rdma_restrack_put(res);
797 	}
798 
799 	xa_unlock(&rt->xa);
800 
801 	if (fill_res_range_qp_entry(msg, min_range, prev))
802 		goto err;
803 
804 	nla_nest_end(msg, table_attr);
805 	return 0;
806 
807 err_loop:
808 	rdma_restrack_put(res);
809 	xa_unlock(&rt->xa);
810 err:
811 	nla_nest_cancel(msg, table_attr);
812 	return -EMSGSIZE;
813 }
814 
fill_res_srq_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)815 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
816 			      struct rdma_restrack_entry *res, uint32_t port)
817 {
818 	struct ib_srq *srq = container_of(res, struct ib_srq, res);
819 
820 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
821 		goto err;
822 
823 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
824 		goto err;
825 
826 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
827 		goto err;
828 
829 	if (ib_srq_has_cq(srq->srq_type)) {
830 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
831 				srq->ext.cq->res.id))
832 			goto err;
833 	}
834 
835 	if (fill_res_srq_qps(msg, srq))
836 		goto err;
837 
838 	return fill_res_name_pid(msg, res);
839 
840 err:
841 	return -EMSGSIZE;
842 }
843 
fill_stat_counter_mode(struct sk_buff * msg,struct rdma_counter * counter)844 static int fill_stat_counter_mode(struct sk_buff *msg,
845 				  struct rdma_counter *counter)
846 {
847 	struct rdma_counter_mode *m = &counter->mode;
848 
849 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
850 		return -EMSGSIZE;
851 
852 	if (m->mode == RDMA_COUNTER_MODE_AUTO) {
853 		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
854 		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
855 			return -EMSGSIZE;
856 
857 		if ((m->mask & RDMA_COUNTER_MASK_PID) &&
858 		    fill_res_name_pid(msg, &counter->res))
859 			return -EMSGSIZE;
860 	}
861 
862 	return 0;
863 }
864 
fill_stat_counter_qp_entry(struct sk_buff * msg,u32 qpn)865 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
866 {
867 	struct nlattr *entry_attr;
868 
869 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
870 	if (!entry_attr)
871 		return -EMSGSIZE;
872 
873 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
874 		goto err;
875 
876 	nla_nest_end(msg, entry_attr);
877 	return 0;
878 
879 err:
880 	nla_nest_cancel(msg, entry_attr);
881 	return -EMSGSIZE;
882 }
883 
fill_stat_counter_qps(struct sk_buff * msg,struct rdma_counter * counter)884 static int fill_stat_counter_qps(struct sk_buff *msg,
885 				 struct rdma_counter *counter)
886 {
887 	struct rdma_restrack_entry *res;
888 	struct rdma_restrack_root *rt;
889 	struct nlattr *table_attr;
890 	struct ib_qp *qp = NULL;
891 	unsigned long id = 0;
892 	int ret = 0;
893 
894 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
895 	if (!table_attr)
896 		return -EMSGSIZE;
897 
898 	rt = &counter->device->res[RDMA_RESTRACK_QP];
899 	xa_lock(&rt->xa);
900 	xa_for_each(&rt->xa, id, res) {
901 		qp = container_of(res, struct ib_qp, res);
902 		if (!qp->counter || (qp->counter->id != counter->id))
903 			continue;
904 
905 		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
906 		if (ret)
907 			goto err;
908 	}
909 
910 	xa_unlock(&rt->xa);
911 	nla_nest_end(msg, table_attr);
912 	return 0;
913 
914 err:
915 	xa_unlock(&rt->xa);
916 	nla_nest_cancel(msg, table_attr);
917 	return ret;
918 }
919 
rdma_nl_stat_hwcounter_entry(struct sk_buff * msg,const char * name,u64 value)920 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
921 				 u64 value)
922 {
923 	struct nlattr *entry_attr;
924 
925 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
926 	if (!entry_attr)
927 		return -EMSGSIZE;
928 
929 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
930 			   name))
931 		goto err;
932 	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
933 			      value, RDMA_NLDEV_ATTR_PAD))
934 		goto err;
935 
936 	nla_nest_end(msg, entry_attr);
937 	return 0;
938 
939 err:
940 	nla_nest_cancel(msg, entry_attr);
941 	return -EMSGSIZE;
942 }
943 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
944 
fill_stat_mr_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)945 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
946 			      struct rdma_restrack_entry *res, uint32_t port)
947 {
948 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
949 	struct ib_device *dev = mr->pd->device;
950 
951 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
952 		goto err;
953 
954 	if (dev->ops.fill_stat_mr_entry)
955 		return dev->ops.fill_stat_mr_entry(msg, mr);
956 	return 0;
957 
958 err:
959 	return -EMSGSIZE;
960 }
961 
fill_stat_counter_hwcounters(struct sk_buff * msg,struct rdma_counter * counter)962 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
963 					struct rdma_counter *counter)
964 {
965 	struct rdma_hw_stats *st = counter->stats;
966 	struct nlattr *table_attr;
967 	int i;
968 
969 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
970 	if (!table_attr)
971 		return -EMSGSIZE;
972 
973 	for (i = 0; i < st->num_counters; i++)
974 		if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
975 			goto err;
976 
977 	nla_nest_end(msg, table_attr);
978 	return 0;
979 
980 err:
981 	nla_nest_cancel(msg, table_attr);
982 	return -EMSGSIZE;
983 }
984 
fill_res_counter_entry(struct sk_buff * msg,bool has_cap_net_admin,struct rdma_restrack_entry * res,uint32_t port)985 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
986 				  struct rdma_restrack_entry *res,
987 				  uint32_t port)
988 {
989 	struct rdma_counter *counter =
990 		container_of(res, struct rdma_counter, res);
991 
992 	if (port && port != counter->port)
993 		return -EAGAIN;
994 
995 	/* Dump it even query failed */
996 	rdma_counter_query_stats(counter);
997 
998 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
999 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1000 	    fill_stat_counter_mode(msg, counter) ||
1001 	    fill_stat_counter_qps(msg, counter) ||
1002 	    fill_stat_counter_hwcounters(msg, counter))
1003 		return -EMSGSIZE;
1004 
1005 	return 0;
1006 }
1007 
nldev_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1008 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1009 			  struct netlink_ext_ack *extack)
1010 {
1011 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1012 	struct ib_device *device;
1013 	struct sk_buff *msg;
1014 	u32 index;
1015 	int err;
1016 
1017 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1018 				     nldev_policy, extack);
1019 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1020 		return -EINVAL;
1021 
1022 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1023 
1024 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1025 	if (!device)
1026 		return -EINVAL;
1027 
1028 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1029 	if (!msg) {
1030 		err = -ENOMEM;
1031 		goto err;
1032 	}
1033 
1034 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1035 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1036 			0, 0);
1037 
1038 	err = fill_dev_info(msg, device);
1039 	if (err)
1040 		goto err_free;
1041 
1042 	nlmsg_end(msg, nlh);
1043 
1044 	ib_device_put(device);
1045 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1046 
1047 err_free:
1048 	nlmsg_free(msg);
1049 err:
1050 	ib_device_put(device);
1051 	return err;
1052 }
1053 
nldev_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1054 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1055 			  struct netlink_ext_ack *extack)
1056 {
1057 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1058 	struct ib_device *device;
1059 	u32 index;
1060 	int err;
1061 
1062 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1063 				     nldev_policy, extack);
1064 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1065 		return -EINVAL;
1066 
1067 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1068 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1069 	if (!device)
1070 		return -EINVAL;
1071 
1072 	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1073 		char name[IB_DEVICE_NAME_MAX] = {};
1074 
1075 		nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1076 			    IB_DEVICE_NAME_MAX);
1077 		if (strlen(name) == 0) {
1078 			err = -EINVAL;
1079 			goto done;
1080 		}
1081 		err = ib_device_rename(device, name);
1082 		goto done;
1083 	}
1084 
1085 	if (tb[RDMA_NLDEV_NET_NS_FD]) {
1086 		u32 ns_fd;
1087 
1088 		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1089 		err = ib_device_set_netns_put(skb, device, ns_fd);
1090 		goto put_done;
1091 	}
1092 
1093 	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1094 		u8 use_dim;
1095 
1096 		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1097 		err = ib_device_set_dim(device,  use_dim);
1098 		goto done;
1099 	}
1100 
1101 done:
1102 	ib_device_put(device);
1103 put_done:
1104 	return err;
1105 }
1106 
_nldev_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1107 static int _nldev_get_dumpit(struct ib_device *device,
1108 			     struct sk_buff *skb,
1109 			     struct netlink_callback *cb,
1110 			     unsigned int idx)
1111 {
1112 	int start = cb->args[0];
1113 	struct nlmsghdr *nlh;
1114 
1115 	if (idx < start)
1116 		return 0;
1117 
1118 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1119 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1120 			0, NLM_F_MULTI);
1121 
1122 	if (fill_dev_info(skb, device)) {
1123 		nlmsg_cancel(skb, nlh);
1124 		goto out;
1125 	}
1126 
1127 	nlmsg_end(skb, nlh);
1128 
1129 	idx++;
1130 
1131 out:	cb->args[0] = idx;
1132 	return skb->len;
1133 }
1134 
nldev_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1135 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1136 {
1137 	/*
1138 	 * There is no need to take lock, because
1139 	 * we are relying on ib_core's locking.
1140 	 */
1141 	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1142 }
1143 
nldev_port_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1144 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1145 			       struct netlink_ext_ack *extack)
1146 {
1147 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1148 	struct ib_device *device;
1149 	struct sk_buff *msg;
1150 	u32 index;
1151 	u32 port;
1152 	int err;
1153 
1154 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1155 				     nldev_policy, extack);
1156 	if (err ||
1157 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1158 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1159 		return -EINVAL;
1160 
1161 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1162 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1163 	if (!device)
1164 		return -EINVAL;
1165 
1166 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1167 	if (!rdma_is_port_valid(device, port)) {
1168 		err = -EINVAL;
1169 		goto err;
1170 	}
1171 
1172 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1173 	if (!msg) {
1174 		err = -ENOMEM;
1175 		goto err;
1176 	}
1177 
1178 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1179 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1180 			0, 0);
1181 
1182 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
1183 	if (err)
1184 		goto err_free;
1185 
1186 	nlmsg_end(msg, nlh);
1187 	ib_device_put(device);
1188 
1189 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1190 
1191 err_free:
1192 	nlmsg_free(msg);
1193 err:
1194 	ib_device_put(device);
1195 	return err;
1196 }
1197 
nldev_port_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1198 static int nldev_port_get_dumpit(struct sk_buff *skb,
1199 				 struct netlink_callback *cb)
1200 {
1201 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1202 	struct ib_device *device;
1203 	int start = cb->args[0];
1204 	struct nlmsghdr *nlh;
1205 	u32 idx = 0;
1206 	u32 ifindex;
1207 	int err;
1208 	unsigned int p;
1209 
1210 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1211 				     nldev_policy, NULL);
1212 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1213 		return -EINVAL;
1214 
1215 	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1216 	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1217 	if (!device)
1218 		return -EINVAL;
1219 
1220 	rdma_for_each_port (device, p) {
1221 		/*
1222 		 * The dumpit function returns all information from specific
1223 		 * index. This specific index is taken from the netlink
1224 		 * messages request sent by user and it is available
1225 		 * in cb->args[0].
1226 		 *
1227 		 * Usually, the user doesn't fill this field and it causes
1228 		 * to return everything.
1229 		 *
1230 		 */
1231 		if (idx < start) {
1232 			idx++;
1233 			continue;
1234 		}
1235 
1236 		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1237 				cb->nlh->nlmsg_seq,
1238 				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1239 						 RDMA_NLDEV_CMD_PORT_GET),
1240 				0, NLM_F_MULTI);
1241 
1242 		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1243 			nlmsg_cancel(skb, nlh);
1244 			goto out;
1245 		}
1246 		idx++;
1247 		nlmsg_end(skb, nlh);
1248 	}
1249 
1250 out:
1251 	ib_device_put(device);
1252 	cb->args[0] = idx;
1253 	return skb->len;
1254 }
1255 
nldev_res_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1256 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1257 			      struct netlink_ext_ack *extack)
1258 {
1259 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1260 	struct ib_device *device;
1261 	struct sk_buff *msg;
1262 	u32 index;
1263 	int ret;
1264 
1265 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1266 				     nldev_policy, extack);
1267 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1268 		return -EINVAL;
1269 
1270 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1271 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1272 	if (!device)
1273 		return -EINVAL;
1274 
1275 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1276 	if (!msg) {
1277 		ret = -ENOMEM;
1278 		goto err;
1279 	}
1280 
1281 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1282 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1283 			0, 0);
1284 
1285 	ret = fill_res_info(msg, device);
1286 	if (ret)
1287 		goto err_free;
1288 
1289 	nlmsg_end(msg, nlh);
1290 	ib_device_put(device);
1291 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1292 
1293 err_free:
1294 	nlmsg_free(msg);
1295 err:
1296 	ib_device_put(device);
1297 	return ret;
1298 }
1299 
_nldev_res_get_dumpit(struct ib_device * device,struct sk_buff * skb,struct netlink_callback * cb,unsigned int idx)1300 static int _nldev_res_get_dumpit(struct ib_device *device,
1301 				 struct sk_buff *skb,
1302 				 struct netlink_callback *cb,
1303 				 unsigned int idx)
1304 {
1305 	int start = cb->args[0];
1306 	struct nlmsghdr *nlh;
1307 
1308 	if (idx < start)
1309 		return 0;
1310 
1311 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1312 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1313 			0, NLM_F_MULTI);
1314 
1315 	if (fill_res_info(skb, device)) {
1316 		nlmsg_cancel(skb, nlh);
1317 		goto out;
1318 	}
1319 	nlmsg_end(skb, nlh);
1320 
1321 	idx++;
1322 
1323 out:
1324 	cb->args[0] = idx;
1325 	return skb->len;
1326 }
1327 
nldev_res_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)1328 static int nldev_res_get_dumpit(struct sk_buff *skb,
1329 				struct netlink_callback *cb)
1330 {
1331 	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1332 }
1333 
1334 struct nldev_fill_res_entry {
1335 	enum rdma_nldev_attr nldev_attr;
1336 	u8 flags;
1337 	u32 entry;
1338 	u32 id;
1339 };
1340 
1341 enum nldev_res_flags {
1342 	NLDEV_PER_DEV = 1 << 0,
1343 };
1344 
1345 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1346 	[RDMA_RESTRACK_QP] = {
1347 		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1348 		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1349 		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1350 	},
1351 	[RDMA_RESTRACK_CM_ID] = {
1352 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1353 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1354 		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1355 	},
1356 	[RDMA_RESTRACK_CQ] = {
1357 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1358 		.flags = NLDEV_PER_DEV,
1359 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1360 		.id = RDMA_NLDEV_ATTR_RES_CQN,
1361 	},
1362 	[RDMA_RESTRACK_MR] = {
1363 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1364 		.flags = NLDEV_PER_DEV,
1365 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1366 		.id = RDMA_NLDEV_ATTR_RES_MRN,
1367 	},
1368 	[RDMA_RESTRACK_PD] = {
1369 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1370 		.flags = NLDEV_PER_DEV,
1371 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1372 		.id = RDMA_NLDEV_ATTR_RES_PDN,
1373 	},
1374 	[RDMA_RESTRACK_COUNTER] = {
1375 		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1376 		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1377 		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1378 	},
1379 	[RDMA_RESTRACK_CTX] = {
1380 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1381 		.flags = NLDEV_PER_DEV,
1382 		.entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1383 		.id = RDMA_NLDEV_ATTR_RES_CTXN,
1384 	},
1385 	[RDMA_RESTRACK_SRQ] = {
1386 		.nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1387 		.flags = NLDEV_PER_DEV,
1388 		.entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1389 		.id = RDMA_NLDEV_ATTR_RES_SRQN,
1390 	},
1391 
1392 };
1393 
res_get_common_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,enum rdma_restrack_type res_type,res_fill_func_t fill_func)1394 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1395 			       struct netlink_ext_ack *extack,
1396 			       enum rdma_restrack_type res_type,
1397 			       res_fill_func_t fill_func)
1398 {
1399 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1400 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1401 	struct rdma_restrack_entry *res;
1402 	struct ib_device *device;
1403 	u32 index, id, port = 0;
1404 	bool has_cap_net_admin;
1405 	struct sk_buff *msg;
1406 	int ret;
1407 
1408 	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1409 				     nldev_policy, extack);
1410 	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1411 		return -EINVAL;
1412 
1413 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1414 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1415 	if (!device)
1416 		return -EINVAL;
1417 
1418 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1419 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1420 		if (!rdma_is_port_valid(device, port)) {
1421 			ret = -EINVAL;
1422 			goto err;
1423 		}
1424 	}
1425 
1426 	if ((port && fe->flags & NLDEV_PER_DEV) ||
1427 	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1428 		ret = -EINVAL;
1429 		goto err;
1430 	}
1431 
1432 	id = nla_get_u32(tb[fe->id]);
1433 	res = rdma_restrack_get_byid(device, res_type, id);
1434 	if (IS_ERR(res)) {
1435 		ret = PTR_ERR(res);
1436 		goto err;
1437 	}
1438 
1439 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1440 	if (!msg) {
1441 		ret = -ENOMEM;
1442 		goto err_get;
1443 	}
1444 
1445 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1446 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1447 					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1448 			0, 0);
1449 
1450 	if (fill_nldev_handle(msg, device)) {
1451 		ret = -EMSGSIZE;
1452 		goto err_free;
1453 	}
1454 
1455 	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1456 
1457 	ret = fill_func(msg, has_cap_net_admin, res, port);
1458 	if (ret)
1459 		goto err_free;
1460 
1461 	rdma_restrack_put(res);
1462 	nlmsg_end(msg, nlh);
1463 	ib_device_put(device);
1464 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1465 
1466 err_free:
1467 	nlmsg_free(msg);
1468 err_get:
1469 	rdma_restrack_put(res);
1470 err:
1471 	ib_device_put(device);
1472 	return ret;
1473 }
1474 
res_get_common_dumpit(struct sk_buff * skb,struct netlink_callback * cb,enum rdma_restrack_type res_type,res_fill_func_t fill_func)1475 static int res_get_common_dumpit(struct sk_buff *skb,
1476 				 struct netlink_callback *cb,
1477 				 enum rdma_restrack_type res_type,
1478 				 res_fill_func_t fill_func)
1479 {
1480 	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1481 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1482 	struct rdma_restrack_entry *res;
1483 	struct rdma_restrack_root *rt;
1484 	int err, ret = 0, idx = 0;
1485 	struct nlattr *table_attr;
1486 	struct nlattr *entry_attr;
1487 	struct ib_device *device;
1488 	int start = cb->args[0];
1489 	bool has_cap_net_admin;
1490 	struct nlmsghdr *nlh;
1491 	unsigned long id;
1492 	u32 index, port = 0;
1493 	bool filled = false;
1494 
1495 	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1496 				     nldev_policy, NULL);
1497 	/*
1498 	 * Right now, we are expecting the device index to get res information,
1499 	 * but it is possible to extend this code to return all devices in
1500 	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1501 	 * if it doesn't exist, we will iterate over all devices.
1502 	 *
1503 	 * But it is not needed for now.
1504 	 */
1505 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1506 		return -EINVAL;
1507 
1508 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1509 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1510 	if (!device)
1511 		return -EINVAL;
1512 
1513 	/*
1514 	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1515 	 */
1516 	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1517 		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1518 		if (!rdma_is_port_valid(device, port)) {
1519 			ret = -EINVAL;
1520 			goto err_index;
1521 		}
1522 	}
1523 
1524 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1525 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1526 					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1527 			0, NLM_F_MULTI);
1528 
1529 	if (fill_nldev_handle(skb, device)) {
1530 		ret = -EMSGSIZE;
1531 		goto err;
1532 	}
1533 
1534 	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1535 	if (!table_attr) {
1536 		ret = -EMSGSIZE;
1537 		goto err;
1538 	}
1539 
1540 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1541 
1542 	rt = &device->res[res_type];
1543 	xa_lock(&rt->xa);
1544 	/*
1545 	 * FIXME: if the skip ahead is something common this loop should
1546 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1547 	 * objects.
1548 	 */
1549 	xa_for_each(&rt->xa, id, res) {
1550 		if (idx < start || !rdma_restrack_get(res))
1551 			goto next;
1552 
1553 		xa_unlock(&rt->xa);
1554 
1555 		filled = true;
1556 
1557 		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1558 		if (!entry_attr) {
1559 			ret = -EMSGSIZE;
1560 			rdma_restrack_put(res);
1561 			goto msg_full;
1562 		}
1563 
1564 		ret = fill_func(skb, has_cap_net_admin, res, port);
1565 
1566 		rdma_restrack_put(res);
1567 
1568 		if (ret) {
1569 			nla_nest_cancel(skb, entry_attr);
1570 			if (ret == -EMSGSIZE)
1571 				goto msg_full;
1572 			if (ret == -EAGAIN)
1573 				goto again;
1574 			goto res_err;
1575 		}
1576 		nla_nest_end(skb, entry_attr);
1577 again:		xa_lock(&rt->xa);
1578 next:		idx++;
1579 	}
1580 	xa_unlock(&rt->xa);
1581 
1582 msg_full:
1583 	nla_nest_end(skb, table_attr);
1584 	nlmsg_end(skb, nlh);
1585 	cb->args[0] = idx;
1586 
1587 	/*
1588 	 * No more entries to fill, cancel the message and
1589 	 * return 0 to mark end of dumpit.
1590 	 */
1591 	if (!filled)
1592 		goto err;
1593 
1594 	ib_device_put(device);
1595 	return skb->len;
1596 
1597 res_err:
1598 	nla_nest_cancel(skb, table_attr);
1599 
1600 err:
1601 	nlmsg_cancel(skb, nlh);
1602 
1603 err_index:
1604 	ib_device_put(device);
1605 	return ret;
1606 }
1607 
1608 #define RES_GET_FUNCS(name, type)                                              \
1609 	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1610 						 struct netlink_callback *cb)  \
1611 	{                                                                      \
1612 		return res_get_common_dumpit(skb, cb, type,                    \
1613 					     fill_res_##name##_entry);         \
1614 	}                                                                      \
1615 	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1616 					       struct nlmsghdr *nlh,           \
1617 					       struct netlink_ext_ack *extack) \
1618 	{                                                                      \
1619 		return res_get_common_doit(skb, nlh, extack, type,             \
1620 					   fill_res_##name##_entry);           \
1621 	}
1622 
1623 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1624 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1625 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1626 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1627 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1628 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1629 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1630 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1631 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1632 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1633 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1634 
1635 static LIST_HEAD(link_ops);
1636 static DECLARE_RWSEM(link_ops_rwsem);
1637 
link_ops_get(const char * type)1638 static const struct rdma_link_ops *link_ops_get(const char *type)
1639 {
1640 	const struct rdma_link_ops *ops;
1641 
1642 	list_for_each_entry(ops, &link_ops, list) {
1643 		if (!strcmp(ops->type, type))
1644 			goto out;
1645 	}
1646 	ops = NULL;
1647 out:
1648 	return ops;
1649 }
1650 
rdma_link_register(struct rdma_link_ops * ops)1651 void rdma_link_register(struct rdma_link_ops *ops)
1652 {
1653 	down_write(&link_ops_rwsem);
1654 	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1655 		goto out;
1656 	list_add(&ops->list, &link_ops);
1657 out:
1658 	up_write(&link_ops_rwsem);
1659 }
1660 EXPORT_SYMBOL(rdma_link_register);
1661 
rdma_link_unregister(struct rdma_link_ops * ops)1662 void rdma_link_unregister(struct rdma_link_ops *ops)
1663 {
1664 	down_write(&link_ops_rwsem);
1665 	list_del(&ops->list);
1666 	up_write(&link_ops_rwsem);
1667 }
1668 EXPORT_SYMBOL(rdma_link_unregister);
1669 
nldev_newlink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1670 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1671 			  struct netlink_ext_ack *extack)
1672 {
1673 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1674 	char ibdev_name[IB_DEVICE_NAME_MAX];
1675 	const struct rdma_link_ops *ops;
1676 	char ndev_name[IFNAMSIZ];
1677 	struct net_device *ndev;
1678 	char type[IFNAMSIZ];
1679 	int err;
1680 
1681 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1682 				     nldev_policy, extack);
1683 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1684 	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1685 		return -EINVAL;
1686 
1687 	nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1688 		    sizeof(ibdev_name));
1689 	if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1690 		return -EINVAL;
1691 
1692 	nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1693 	nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1694 		    sizeof(ndev_name));
1695 
1696 	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1697 	if (!ndev)
1698 		return -ENODEV;
1699 
1700 	down_read(&link_ops_rwsem);
1701 	ops = link_ops_get(type);
1702 #ifdef CONFIG_MODULES
1703 	if (!ops) {
1704 		up_read(&link_ops_rwsem);
1705 		request_module("rdma-link-%s", type);
1706 		down_read(&link_ops_rwsem);
1707 		ops = link_ops_get(type);
1708 	}
1709 #endif
1710 	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1711 	up_read(&link_ops_rwsem);
1712 	dev_put(ndev);
1713 
1714 	return err;
1715 }
1716 
nldev_dellink(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1717 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1718 			  struct netlink_ext_ack *extack)
1719 {
1720 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1721 	struct ib_device *device;
1722 	u32 index;
1723 	int err;
1724 
1725 	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1726 				     nldev_policy, extack);
1727 	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1728 		return -EINVAL;
1729 
1730 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1731 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1732 	if (!device)
1733 		return -EINVAL;
1734 
1735 	if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1736 		ib_device_put(device);
1737 		return -EINVAL;
1738 	}
1739 
1740 	ib_unregister_device_and_put(device);
1741 	return 0;
1742 }
1743 
nldev_get_chardev(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1744 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1745 			     struct netlink_ext_ack *extack)
1746 {
1747 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1748 	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1749 	struct ib_client_nl_info data = {};
1750 	struct ib_device *ibdev = NULL;
1751 	struct sk_buff *msg;
1752 	u32 index;
1753 	int err;
1754 
1755 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1756 			  extack);
1757 	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1758 		return -EINVAL;
1759 
1760 	nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1761 		    sizeof(client_name));
1762 
1763 	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1764 		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1765 		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1766 		if (!ibdev)
1767 			return -EINVAL;
1768 
1769 		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1770 			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1771 			if (!rdma_is_port_valid(ibdev, data.port)) {
1772 				err = -EINVAL;
1773 				goto out_put;
1774 			}
1775 		} else {
1776 			data.port = -1;
1777 		}
1778 	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1779 		return -EINVAL;
1780 	}
1781 
1782 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1783 	if (!msg) {
1784 		err = -ENOMEM;
1785 		goto out_put;
1786 	}
1787 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1788 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1789 					 RDMA_NLDEV_CMD_GET_CHARDEV),
1790 			0, 0);
1791 
1792 	data.nl_msg = msg;
1793 	err = ib_get_client_nl_info(ibdev, client_name, &data);
1794 	if (err)
1795 		goto out_nlmsg;
1796 
1797 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1798 				huge_encode_dev(data.cdev->devt),
1799 				RDMA_NLDEV_ATTR_PAD);
1800 	if (err)
1801 		goto out_data;
1802 	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1803 				RDMA_NLDEV_ATTR_PAD);
1804 	if (err)
1805 		goto out_data;
1806 	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1807 			   dev_name(data.cdev))) {
1808 		err = -EMSGSIZE;
1809 		goto out_data;
1810 	}
1811 
1812 	nlmsg_end(msg, nlh);
1813 	put_device(data.cdev);
1814 	if (ibdev)
1815 		ib_device_put(ibdev);
1816 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1817 
1818 out_data:
1819 	put_device(data.cdev);
1820 out_nlmsg:
1821 	nlmsg_free(msg);
1822 out_put:
1823 	if (ibdev)
1824 		ib_device_put(ibdev);
1825 	return err;
1826 }
1827 
nldev_sys_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1828 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1829 			      struct netlink_ext_ack *extack)
1830 {
1831 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1832 	struct sk_buff *msg;
1833 	int err;
1834 
1835 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1836 			  nldev_policy, extack);
1837 	if (err)
1838 		return err;
1839 
1840 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1841 	if (!msg)
1842 		return -ENOMEM;
1843 
1844 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1845 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1846 					 RDMA_NLDEV_CMD_SYS_GET),
1847 			0, 0);
1848 
1849 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1850 			 (u8)ib_devices_shared_netns);
1851 	if (err) {
1852 		nlmsg_free(msg);
1853 		return err;
1854 	}
1855 
1856 	/*
1857 	 * Copy-on-fork is supported.
1858 	 * See commits:
1859 	 * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1860 	 * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1861 	 * for more details. Don't backport this without them.
1862 	 *
1863 	 * Return value ignored on purpose, assume copy-on-fork is not
1864 	 * supported in case of failure.
1865 	 */
1866 	nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
1867 
1868 	nlmsg_end(msg, nlh);
1869 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1870 }
1871 
nldev_set_sys_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1872 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1873 				  struct netlink_ext_ack *extack)
1874 {
1875 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1876 	u8 enable;
1877 	int err;
1878 
1879 	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1880 			  nldev_policy, extack);
1881 	if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1882 		return -EINVAL;
1883 
1884 	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1885 	/* Only 0 and 1 are supported */
1886 	if (enable > 1)
1887 		return -EINVAL;
1888 
1889 	err = rdma_compatdev_set(enable);
1890 	return err;
1891 }
1892 
nldev_stat_set_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1893 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1894 			       struct netlink_ext_ack *extack)
1895 {
1896 	u32 index, port, mode, mask = 0, qpn, cntn = 0;
1897 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1898 	struct ib_device *device;
1899 	struct sk_buff *msg;
1900 	int ret;
1901 
1902 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1903 			  nldev_policy, extack);
1904 	/* Currently only counter for QP is supported */
1905 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1906 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1907 	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
1908 		return -EINVAL;
1909 
1910 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1911 		return -EINVAL;
1912 
1913 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1914 	device = ib_device_get_by_index(sock_net(skb->sk), index);
1915 	if (!device)
1916 		return -EINVAL;
1917 
1918 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1919 	if (!rdma_is_port_valid(device, port)) {
1920 		ret = -EINVAL;
1921 		goto err;
1922 	}
1923 
1924 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1925 	if (!msg) {
1926 		ret = -ENOMEM;
1927 		goto err;
1928 	}
1929 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1930 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1931 					 RDMA_NLDEV_CMD_STAT_SET),
1932 			0, 0);
1933 
1934 	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1935 	if (mode == RDMA_COUNTER_MODE_AUTO) {
1936 		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1937 			mask = nla_get_u32(
1938 				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1939 		ret = rdma_counter_set_auto_mode(device, port, mask, extack);
1940 		if (ret)
1941 			goto err_msg;
1942 	} else {
1943 		if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
1944 			goto err_msg;
1945 		qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1946 		if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1947 			cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1948 			ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1949 		} else {
1950 			ret = rdma_counter_bind_qpn_alloc(device, port,
1951 							  qpn, &cntn);
1952 		}
1953 		if (ret)
1954 			goto err_msg;
1955 
1956 		if (fill_nldev_handle(msg, device) ||
1957 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1958 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1959 		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1960 			ret = -EMSGSIZE;
1961 			goto err_fill;
1962 		}
1963 	}
1964 
1965 	nlmsg_end(msg, nlh);
1966 	ib_device_put(device);
1967 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1968 
1969 err_fill:
1970 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
1971 err_msg:
1972 	nlmsg_free(msg);
1973 err:
1974 	ib_device_put(device);
1975 	return ret;
1976 }
1977 
nldev_stat_del_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1978 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1979 			       struct netlink_ext_ack *extack)
1980 {
1981 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1982 	struct ib_device *device;
1983 	struct sk_buff *msg;
1984 	u32 index, port, qpn, cntn;
1985 	int ret;
1986 
1987 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1988 			  nldev_policy, extack);
1989 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1990 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
1991 	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
1992 	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
1993 		return -EINVAL;
1994 
1995 	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1996 		return -EINVAL;
1997 
1998 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1999 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2000 	if (!device)
2001 		return -EINVAL;
2002 
2003 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2004 	if (!rdma_is_port_valid(device, port)) {
2005 		ret = -EINVAL;
2006 		goto err;
2007 	}
2008 
2009 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2010 	if (!msg) {
2011 		ret = -ENOMEM;
2012 		goto err;
2013 	}
2014 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2015 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2016 					 RDMA_NLDEV_CMD_STAT_SET),
2017 			0, 0);
2018 
2019 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2020 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2021 	if (fill_nldev_handle(msg, device) ||
2022 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2023 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2024 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2025 		ret = -EMSGSIZE;
2026 		goto err_fill;
2027 	}
2028 
2029 	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2030 	if (ret)
2031 		goto err_fill;
2032 
2033 	nlmsg_end(msg, nlh);
2034 	ib_device_put(device);
2035 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2036 
2037 err_fill:
2038 	nlmsg_free(msg);
2039 err:
2040 	ib_device_put(device);
2041 	return ret;
2042 }
2043 
stat_get_doit_default_counter(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])2044 static int stat_get_doit_default_counter(struct sk_buff *skb,
2045 					 struct nlmsghdr *nlh,
2046 					 struct netlink_ext_ack *extack,
2047 					 struct nlattr *tb[])
2048 {
2049 	struct rdma_hw_stats *stats;
2050 	struct nlattr *table_attr;
2051 	struct ib_device *device;
2052 	int ret, num_cnts, i;
2053 	struct sk_buff *msg;
2054 	u32 index, port;
2055 	u64 v;
2056 
2057 	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2058 		return -EINVAL;
2059 
2060 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2061 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2062 	if (!device)
2063 		return -EINVAL;
2064 
2065 	if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2066 		ret = -EINVAL;
2067 		goto err;
2068 	}
2069 
2070 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2071 	stats = ib_get_hw_stats_port(device, port);
2072 	if (!stats) {
2073 		ret = -EINVAL;
2074 		goto err;
2075 	}
2076 
2077 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2078 	if (!msg) {
2079 		ret = -ENOMEM;
2080 		goto err;
2081 	}
2082 
2083 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2084 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2085 					 RDMA_NLDEV_CMD_STAT_GET),
2086 			0, 0);
2087 
2088 	if (fill_nldev_handle(msg, device) ||
2089 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2090 		ret = -EMSGSIZE;
2091 		goto err_msg;
2092 	}
2093 
2094 	mutex_lock(&stats->lock);
2095 
2096 	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2097 	if (num_cnts < 0) {
2098 		ret = -EINVAL;
2099 		goto err_stats;
2100 	}
2101 
2102 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2103 	if (!table_attr) {
2104 		ret = -EMSGSIZE;
2105 		goto err_stats;
2106 	}
2107 	for (i = 0; i < num_cnts; i++) {
2108 		v = stats->value[i] +
2109 			rdma_counter_get_hwstat_value(device, port, i);
2110 		if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) {
2111 			ret = -EMSGSIZE;
2112 			goto err_table;
2113 		}
2114 	}
2115 	nla_nest_end(msg, table_attr);
2116 
2117 	mutex_unlock(&stats->lock);
2118 	nlmsg_end(msg, nlh);
2119 	ib_device_put(device);
2120 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2121 
2122 err_table:
2123 	nla_nest_cancel(msg, table_attr);
2124 err_stats:
2125 	mutex_unlock(&stats->lock);
2126 err_msg:
2127 	nlmsg_free(msg);
2128 err:
2129 	ib_device_put(device);
2130 	return ret;
2131 }
2132 
stat_get_doit_qp(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack,struct nlattr * tb[])2133 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2134 			    struct netlink_ext_ack *extack, struct nlattr *tb[])
2135 
2136 {
2137 	static enum rdma_nl_counter_mode mode;
2138 	static enum rdma_nl_counter_mask mask;
2139 	struct ib_device *device;
2140 	struct sk_buff *msg;
2141 	u32 index, port;
2142 	int ret;
2143 
2144 	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2145 		return nldev_res_get_counter_doit(skb, nlh, extack);
2146 
2147 	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2148 	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2149 		return -EINVAL;
2150 
2151 	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2152 	device = ib_device_get_by_index(sock_net(skb->sk), index);
2153 	if (!device)
2154 		return -EINVAL;
2155 
2156 	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2157 	if (!rdma_is_port_valid(device, port)) {
2158 		ret = -EINVAL;
2159 		goto err;
2160 	}
2161 
2162 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2163 	if (!msg) {
2164 		ret = -ENOMEM;
2165 		goto err;
2166 	}
2167 
2168 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2169 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2170 					 RDMA_NLDEV_CMD_STAT_GET),
2171 			0, 0);
2172 
2173 	ret = rdma_counter_get_mode(device, port, &mode, &mask);
2174 	if (ret)
2175 		goto err_msg;
2176 
2177 	if (fill_nldev_handle(msg, device) ||
2178 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2179 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2180 		ret = -EMSGSIZE;
2181 		goto err_msg;
2182 	}
2183 
2184 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2185 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2186 		ret = -EMSGSIZE;
2187 		goto err_msg;
2188 	}
2189 
2190 	nlmsg_end(msg, nlh);
2191 	ib_device_put(device);
2192 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2193 
2194 err_msg:
2195 	nlmsg_free(msg);
2196 err:
2197 	ib_device_put(device);
2198 	return ret;
2199 }
2200 
nldev_stat_get_doit(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2201 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2202 			       struct netlink_ext_ack *extack)
2203 {
2204 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2205 	int ret;
2206 
2207 	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2208 			  nldev_policy, extack);
2209 	if (ret)
2210 		return -EINVAL;
2211 
2212 	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2213 		return stat_get_doit_default_counter(skb, nlh, extack, tb);
2214 
2215 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2216 	case RDMA_NLDEV_ATTR_RES_QP:
2217 		ret = stat_get_doit_qp(skb, nlh, extack, tb);
2218 		break;
2219 	case RDMA_NLDEV_ATTR_RES_MR:
2220 		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2221 					  fill_stat_mr_entry);
2222 		break;
2223 	default:
2224 		ret = -EINVAL;
2225 		break;
2226 	}
2227 
2228 	return ret;
2229 }
2230 
nldev_stat_get_dumpit(struct sk_buff * skb,struct netlink_callback * cb)2231 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2232 				 struct netlink_callback *cb)
2233 {
2234 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2235 	int ret;
2236 
2237 	ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2238 			  nldev_policy, NULL);
2239 	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2240 		return -EINVAL;
2241 
2242 	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2243 	case RDMA_NLDEV_ATTR_RES_QP:
2244 		ret = nldev_res_get_counter_dumpit(skb, cb);
2245 		break;
2246 	case RDMA_NLDEV_ATTR_RES_MR:
2247 		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2248 					    fill_stat_mr_entry);
2249 		break;
2250 	default:
2251 		ret = -EINVAL;
2252 		break;
2253 	}
2254 
2255 	return ret;
2256 }
2257 
2258 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2259 	[RDMA_NLDEV_CMD_GET] = {
2260 		.doit = nldev_get_doit,
2261 		.dump = nldev_get_dumpit,
2262 	},
2263 	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2264 		.doit = nldev_get_chardev,
2265 	},
2266 	[RDMA_NLDEV_CMD_SET] = {
2267 		.doit = nldev_set_doit,
2268 		.flags = RDMA_NL_ADMIN_PERM,
2269 	},
2270 	[RDMA_NLDEV_CMD_NEWLINK] = {
2271 		.doit = nldev_newlink,
2272 		.flags = RDMA_NL_ADMIN_PERM,
2273 	},
2274 	[RDMA_NLDEV_CMD_DELLINK] = {
2275 		.doit = nldev_dellink,
2276 		.flags = RDMA_NL_ADMIN_PERM,
2277 	},
2278 	[RDMA_NLDEV_CMD_PORT_GET] = {
2279 		.doit = nldev_port_get_doit,
2280 		.dump = nldev_port_get_dumpit,
2281 	},
2282 	[RDMA_NLDEV_CMD_RES_GET] = {
2283 		.doit = nldev_res_get_doit,
2284 		.dump = nldev_res_get_dumpit,
2285 	},
2286 	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2287 		.doit = nldev_res_get_qp_doit,
2288 		.dump = nldev_res_get_qp_dumpit,
2289 	},
2290 	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2291 		.doit = nldev_res_get_cm_id_doit,
2292 		.dump = nldev_res_get_cm_id_dumpit,
2293 	},
2294 	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2295 		.doit = nldev_res_get_cq_doit,
2296 		.dump = nldev_res_get_cq_dumpit,
2297 	},
2298 	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2299 		.doit = nldev_res_get_mr_doit,
2300 		.dump = nldev_res_get_mr_dumpit,
2301 	},
2302 	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2303 		.doit = nldev_res_get_pd_doit,
2304 		.dump = nldev_res_get_pd_dumpit,
2305 	},
2306 	[RDMA_NLDEV_CMD_RES_CTX_GET] = {
2307 		.doit = nldev_res_get_ctx_doit,
2308 		.dump = nldev_res_get_ctx_dumpit,
2309 	},
2310 	[RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2311 		.doit = nldev_res_get_srq_doit,
2312 		.dump = nldev_res_get_srq_dumpit,
2313 	},
2314 	[RDMA_NLDEV_CMD_SYS_GET] = {
2315 		.doit = nldev_sys_get_doit,
2316 	},
2317 	[RDMA_NLDEV_CMD_SYS_SET] = {
2318 		.doit = nldev_set_sys_set_doit,
2319 		.flags = RDMA_NL_ADMIN_PERM,
2320 	},
2321 	[RDMA_NLDEV_CMD_STAT_SET] = {
2322 		.doit = nldev_stat_set_doit,
2323 		.flags = RDMA_NL_ADMIN_PERM,
2324 	},
2325 	[RDMA_NLDEV_CMD_STAT_GET] = {
2326 		.doit = nldev_stat_get_doit,
2327 		.dump = nldev_stat_get_dumpit,
2328 	},
2329 	[RDMA_NLDEV_CMD_STAT_DEL] = {
2330 		.doit = nldev_stat_del_doit,
2331 		.flags = RDMA_NL_ADMIN_PERM,
2332 	},
2333 	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
2334 		.doit = nldev_res_get_qp_raw_doit,
2335 		.dump = nldev_res_get_qp_raw_dumpit,
2336 		.flags = RDMA_NL_ADMIN_PERM,
2337 	},
2338 	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
2339 		.doit = nldev_res_get_cq_raw_doit,
2340 		.dump = nldev_res_get_cq_raw_dumpit,
2341 		.flags = RDMA_NL_ADMIN_PERM,
2342 	},
2343 	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
2344 		.doit = nldev_res_get_mr_raw_doit,
2345 		.dump = nldev_res_get_mr_raw_dumpit,
2346 		.flags = RDMA_NL_ADMIN_PERM,
2347 	},
2348 };
2349 
nldev_init(void)2350 void __init nldev_init(void)
2351 {
2352 	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2353 }
2354 
nldev_exit(void)2355 void nldev_exit(void)
2356 {
2357 	rdma_nl_unregister(RDMA_NL_NLDEV);
2358 }
2359 
2360 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2361