• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2018 Mellanox Technologies
3 
4 #include <linux/mlx5/driver.h>
5 
6 #include "mlx5_core.h"
7 #include "lib/eq.h"
8 #include "lib/events.h"
9 
10 struct mlx5_event_nb {
11 	struct mlx5_nb  nb;
12 	void           *ctx;
13 };
14 
15 /* General events handlers for the low level mlx5_core driver
16  *
17  * Other Major feature specific events such as
18  * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
19  * separate notifiers callbacks, specifically by those mlx5 components.
20  */
21 static int any_notifier(struct notifier_block *, unsigned long, void *);
22 static int temp_warn(struct notifier_block *, unsigned long, void *);
23 static int port_module(struct notifier_block *, unsigned long, void *);
24 static int pcie_core(struct notifier_block *, unsigned long, void *);
25 
26 /* handler which forwards the event to events->fw_nh, driver notifiers */
27 static int forward_event(struct notifier_block *, unsigned long, void *);
28 
29 static struct mlx5_nb events_nbs_ref[] = {
30 	/* Events to be processed by mlx5_core */
31 	{.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
32 	{.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
33 	{.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
34 	{.nb.notifier_call = pcie_core,     .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
35 
36 	/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
37 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
38 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
39 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE },
40 	/* QP/WQ resource events to forward */
41 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
42 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
43 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
44 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
45 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
46 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
47 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
48 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
49 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
50 	/* SRQ events */
51 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
52 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
53 };
54 
55 struct mlx5_events {
56 	struct mlx5_core_dev *dev;
57 	struct workqueue_struct *wq;
58 	struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
59 	/* driver notifier chain for fw events */
60 	struct atomic_notifier_head fw_nh;
61 	/* port module events stats */
62 	struct mlx5_pme_stats pme_stats;
63 	/*pcie_core*/
64 	struct work_struct pcie_core_work;
65 	/* driver notifier chain for sw events */
66 	struct blocking_notifier_head sw_nh;
67 };
68 
eqe_type_str(u8 type)69 static const char *eqe_type_str(u8 type)
70 {
71 	switch (type) {
72 	case MLX5_EVENT_TYPE_COMP:
73 		return "MLX5_EVENT_TYPE_COMP";
74 	case MLX5_EVENT_TYPE_PATH_MIG:
75 		return "MLX5_EVENT_TYPE_PATH_MIG";
76 	case MLX5_EVENT_TYPE_COMM_EST:
77 		return "MLX5_EVENT_TYPE_COMM_EST";
78 	case MLX5_EVENT_TYPE_SQ_DRAINED:
79 		return "MLX5_EVENT_TYPE_SQ_DRAINED";
80 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
81 		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
82 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
83 		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
84 	case MLX5_EVENT_TYPE_CQ_ERROR:
85 		return "MLX5_EVENT_TYPE_CQ_ERROR";
86 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
87 		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
88 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
89 		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
90 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
91 		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
92 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
93 		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
94 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
95 		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
96 	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
97 		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
98 	case MLX5_EVENT_TYPE_PORT_CHANGE:
99 		return "MLX5_EVENT_TYPE_PORT_CHANGE";
100 	case MLX5_EVENT_TYPE_GPIO_EVENT:
101 		return "MLX5_EVENT_TYPE_GPIO_EVENT";
102 	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
103 		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
104 	case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
105 		return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
106 	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
107 		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
108 	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
109 		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
110 	case MLX5_EVENT_TYPE_STALL_EVENT:
111 		return "MLX5_EVENT_TYPE_STALL_EVENT";
112 	case MLX5_EVENT_TYPE_CMD:
113 		return "MLX5_EVENT_TYPE_CMD";
114 	case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
115 		return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
116 	case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
117 		return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
118 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
119 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
120 	case MLX5_EVENT_TYPE_PAGE_FAULT:
121 		return "MLX5_EVENT_TYPE_PAGE_FAULT";
122 	case MLX5_EVENT_TYPE_PPS_EVENT:
123 		return "MLX5_EVENT_TYPE_PPS_EVENT";
124 	case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
125 		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
126 	case MLX5_EVENT_TYPE_FPGA_ERROR:
127 		return "MLX5_EVENT_TYPE_FPGA_ERROR";
128 	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
129 		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
130 	case MLX5_EVENT_TYPE_GENERAL_EVENT:
131 		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
132 	case MLX5_EVENT_TYPE_MONITOR_COUNTER:
133 		return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
134 	case MLX5_EVENT_TYPE_DEVICE_TRACER:
135 		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
136 	case MLX5_EVENT_TYPE_OBJECT_CHANGE:
137 		return "MLX5_EVENT_TYPE_OBJECT_CHANGE";
138 	default:
139 		return "Unrecognized event";
140 	}
141 }
142 
143 /* handles all FW events, type == eqe->type */
any_notifier(struct notifier_block * nb,unsigned long type,void * data)144 static int any_notifier(struct notifier_block *nb,
145 			unsigned long type, void *data)
146 {
147 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
148 	struct mlx5_events   *events   = event_nb->ctx;
149 	struct mlx5_eqe      *eqe      = data;
150 
151 	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
152 		      eqe_type_str(eqe->type), eqe->sub_type);
153 	return NOTIFY_OK;
154 }
155 
156 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
temp_warn(struct notifier_block * nb,unsigned long type,void * data)157 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
158 {
159 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
160 	struct mlx5_events   *events   = event_nb->ctx;
161 	struct mlx5_eqe      *eqe      = data;
162 	u64 value_lsb;
163 	u64 value_msb;
164 
165 	value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
166 	/* bit 1-63 are not supported for NICs,
167 	 * hence read only bit 0 (asic) from lsb.
168 	 */
169 	value_lsb &= 0x1;
170 	value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
171 
172 	if (net_ratelimit())
173 		mlx5_core_warn(events->dev,
174 			       "High temperature on sensors with bit set %llx %llx",
175 			       value_msb, value_lsb);
176 
177 	return NOTIFY_OK;
178 }
179 
180 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
mlx5_pme_status_to_string(enum port_module_event_status_type status)181 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
182 {
183 	switch (status) {
184 	case MLX5_MODULE_STATUS_PLUGGED:
185 		return "Cable plugged";
186 	case MLX5_MODULE_STATUS_UNPLUGGED:
187 		return "Cable unplugged";
188 	case MLX5_MODULE_STATUS_ERROR:
189 		return "Cable error";
190 	case MLX5_MODULE_STATUS_DISABLED:
191 		return "Cable disabled";
192 	default:
193 		return "Unknown status";
194 	}
195 }
196 
mlx5_pme_error_to_string(enum port_module_event_error_type error)197 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
198 {
199 	switch (error) {
200 	case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
201 		return "Power budget exceeded";
202 	case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
203 		return "Long Range for non MLNX cable";
204 	case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
205 		return "Bus stuck (I2C or data shorted)";
206 	case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
207 		return "No EEPROM/retry timeout";
208 	case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
209 		return "Enforce part number list";
210 	case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
211 		return "Unknown identifier";
212 	case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
213 		return "High Temperature";
214 	case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
215 		return "Bad or shorted cable/module";
216 	case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
217 		return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
218 	default:
219 		return "Unknown error";
220 	}
221 }
222 
223 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
port_module(struct notifier_block * nb,unsigned long type,void * data)224 static int port_module(struct notifier_block *nb, unsigned long type, void *data)
225 {
226 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
227 	struct mlx5_events   *events   = event_nb->ctx;
228 	struct mlx5_eqe      *eqe      = data;
229 
230 	enum port_module_event_status_type module_status;
231 	enum port_module_event_error_type error_type;
232 	struct mlx5_eqe_port_module *module_event_eqe;
233 	const char *status_str;
234 	u8 module_num;
235 
236 	module_event_eqe = &eqe->data.port_module;
237 	module_status = module_event_eqe->module_status &
238 			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
239 	error_type = module_event_eqe->error_type &
240 		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
241 
242 	if (module_status < MLX5_MODULE_STATUS_NUM)
243 		events->pme_stats.status_counters[module_status]++;
244 
245 	if (module_status == MLX5_MODULE_STATUS_ERROR)
246 		if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
247 			events->pme_stats.error_counters[error_type]++;
248 
249 	if (!printk_ratelimit())
250 		return NOTIFY_OK;
251 
252 	module_num = module_event_eqe->module;
253 	status_str = mlx5_pme_status_to_string(module_status);
254 	if (module_status == MLX5_MODULE_STATUS_ERROR) {
255 		const char *error_str = mlx5_pme_error_to_string(error_type);
256 
257 		mlx5_core_err(events->dev,
258 			      "Port module event[error]: module %u, %s, %s\n",
259 			      module_num, status_str, error_str);
260 	} else {
261 		mlx5_core_info(events->dev,
262 			       "Port module event: module %u, %s\n",
263 			       module_num, status_str);
264 	}
265 
266 	return NOTIFY_OK;
267 }
268 
269 enum {
270 	MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
271 	MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
272 	MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
273 };
274 
mlx5_pcie_event(struct work_struct * work)275 static void mlx5_pcie_event(struct work_struct *work)
276 {
277 	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
278 	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
279 	struct mlx5_events *events;
280 	struct mlx5_core_dev *dev;
281 	u8 power_status;
282 	u16 pci_power;
283 
284 	events = container_of(work, struct mlx5_events, pcie_core_work);
285 	dev  = events->dev;
286 
287 	if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
288 		return;
289 
290 	mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
291 			     MLX5_REG_MPEIN, 0, 0);
292 	power_status = MLX5_GET(mpein_reg, out, pwr_status);
293 	pci_power = MLX5_GET(mpein_reg, out, pci_power);
294 
295 	switch (power_status) {
296 	case MLX5_PCI_POWER_COULD_NOT_BE_READ:
297 		mlx5_core_info_rl(dev,
298 				  "PCIe slot power capability was not advertised.\n");
299 		break;
300 	case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
301 		mlx5_core_warn_rl(dev,
302 				  "Detected insufficient power on the PCIe slot (%uW).\n",
303 				  pci_power);
304 		break;
305 	case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
306 		mlx5_core_info_rl(dev,
307 				  "PCIe slot advertised sufficient power (%uW).\n",
308 				  pci_power);
309 		break;
310 	}
311 }
312 
pcie_core(struct notifier_block * nb,unsigned long type,void * data)313 static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
314 {
315 	struct mlx5_event_nb    *event_nb = mlx5_nb_cof(nb,
316 							struct mlx5_event_nb,
317 							nb);
318 	struct mlx5_events      *events   = event_nb->ctx;
319 	struct mlx5_eqe         *eqe      = data;
320 
321 	switch (eqe->sub_type) {
322 	case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
323 			queue_work(events->wq, &events->pcie_core_work);
324 		break;
325 	default:
326 		return NOTIFY_DONE;
327 	}
328 
329 	return NOTIFY_OK;
330 }
331 
mlx5_get_pme_stats(struct mlx5_core_dev * dev,struct mlx5_pme_stats * stats)332 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
333 {
334 	*stats = dev->priv.events->pme_stats;
335 }
336 
337 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
forward_event(struct notifier_block * nb,unsigned long event,void * data)338 static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
339 {
340 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
341 	struct mlx5_events   *events   = event_nb->ctx;
342 	struct mlx5_eqe      *eqe      = data;
343 
344 	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
345 		      eqe_type_str(eqe->type), eqe->sub_type);
346 	atomic_notifier_call_chain(&events->fw_nh, event, data);
347 	return NOTIFY_OK;
348 }
349 
mlx5_events_init(struct mlx5_core_dev * dev)350 int mlx5_events_init(struct mlx5_core_dev *dev)
351 {
352 	struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
353 
354 	if (!events)
355 		return -ENOMEM;
356 
357 	ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
358 	events->dev = dev;
359 	dev->priv.events = events;
360 	events->wq = create_singlethread_workqueue("mlx5_events");
361 	if (!events->wq) {
362 		kfree(events);
363 		return -ENOMEM;
364 	}
365 	INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
366 	BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
367 
368 	return 0;
369 }
370 
mlx5_events_cleanup(struct mlx5_core_dev * dev)371 void mlx5_events_cleanup(struct mlx5_core_dev *dev)
372 {
373 	destroy_workqueue(dev->priv.events->wq);
374 	kvfree(dev->priv.events);
375 }
376 
mlx5_events_start(struct mlx5_core_dev * dev)377 void mlx5_events_start(struct mlx5_core_dev *dev)
378 {
379 	struct mlx5_events *events = dev->priv.events;
380 	int i;
381 
382 	for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
383 		events->notifiers[i].nb  = events_nbs_ref[i];
384 		events->notifiers[i].ctx = events;
385 		mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
386 	}
387 }
388 
mlx5_events_stop(struct mlx5_core_dev * dev)389 void mlx5_events_stop(struct mlx5_core_dev *dev)
390 {
391 	struct mlx5_events *events = dev->priv.events;
392 	int i;
393 
394 	for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
395 		mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
396 	flush_workqueue(events->wq);
397 }
398 
399 /* This API is used only for processing and forwarding firmware
400  * events to mlx5 consumer.
401  */
mlx5_notifier_register(struct mlx5_core_dev * dev,struct notifier_block * nb)402 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
403 {
404 	struct mlx5_events *events = dev->priv.events;
405 
406 	return atomic_notifier_chain_register(&events->fw_nh, nb);
407 }
408 EXPORT_SYMBOL(mlx5_notifier_register);
409 
mlx5_notifier_unregister(struct mlx5_core_dev * dev,struct notifier_block * nb)410 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
411 {
412 	struct mlx5_events *events = dev->priv.events;
413 
414 	return atomic_notifier_chain_unregister(&events->fw_nh, nb);
415 }
416 EXPORT_SYMBOL(mlx5_notifier_unregister);
417 
mlx5_notifier_call_chain(struct mlx5_events * events,unsigned int event,void * data)418 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
419 {
420 	return atomic_notifier_call_chain(&events->fw_nh, event, data);
421 }
422 
423 /* This API is used only for processing and forwarding driver-specific
424  * events to mlx5 consumers.
425  */
mlx5_blocking_notifier_register(struct mlx5_core_dev * dev,struct notifier_block * nb)426 int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
427 {
428 	struct mlx5_events *events = dev->priv.events;
429 
430 	return blocking_notifier_chain_register(&events->sw_nh, nb);
431 }
432 EXPORT_SYMBOL(mlx5_blocking_notifier_register);
433 
mlx5_blocking_notifier_unregister(struct mlx5_core_dev * dev,struct notifier_block * nb)434 int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
435 {
436 	struct mlx5_events *events = dev->priv.events;
437 
438 	return blocking_notifier_chain_unregister(&events->sw_nh, nb);
439 }
440 EXPORT_SYMBOL(mlx5_blocking_notifier_unregister);
441 
mlx5_blocking_notifier_call_chain(struct mlx5_core_dev * dev,unsigned int event,void * data)442 int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
443 				      void *data)
444 {
445 	struct mlx5_events *events = dev->priv.events;
446 
447 	return blocking_notifier_call_chain(&events->sw_nh, event, data);
448 }
449