1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2018 Mellanox Technologies
3
4 #include <linux/mlx5/driver.h>
5
6 #include "mlx5_core.h"
7 #include "lib/eq.h"
8 #include "lib/events.h"
9
10 struct mlx5_event_nb {
11 struct mlx5_nb nb;
12 void *ctx;
13 };
14
15 /* General events handlers for the low level mlx5_core driver
16 *
17 * Other Major feature specific events such as
18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
19 * separate notifiers callbacks, specifically by those mlx5 components.
20 */
21 static int any_notifier(struct notifier_block *, unsigned long, void *);
22 static int temp_warn(struct notifier_block *, unsigned long, void *);
23 static int port_module(struct notifier_block *, unsigned long, void *);
24 static int pcie_core(struct notifier_block *, unsigned long, void *);
25
26 /* handler which forwards the event to events->fw_nh, driver notifiers */
27 static int forward_event(struct notifier_block *, unsigned long, void *);
28
29 static struct mlx5_nb events_nbs_ref[] = {
30 /* Events to be processed by mlx5_core */
31 {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
32 {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
33 {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
34 {.nb.notifier_call = pcie_core, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
35
36 /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
37 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
38 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
39 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE },
40 /* QP/WQ resource events to forward */
41 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
42 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG },
43 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST },
44 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
45 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
46 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
47 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
48 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
49 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
50 /* SRQ events */
51 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
52 {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
53 };
54
55 struct mlx5_events {
56 struct mlx5_core_dev *dev;
57 struct workqueue_struct *wq;
58 struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)];
59 /* driver notifier chain for fw events */
60 struct atomic_notifier_head fw_nh;
61 /* port module events stats */
62 struct mlx5_pme_stats pme_stats;
63 /*pcie_core*/
64 struct work_struct pcie_core_work;
65 /* driver notifier chain for sw events */
66 struct blocking_notifier_head sw_nh;
67 };
68
eqe_type_str(u8 type)69 static const char *eqe_type_str(u8 type)
70 {
71 switch (type) {
72 case MLX5_EVENT_TYPE_COMP:
73 return "MLX5_EVENT_TYPE_COMP";
74 case MLX5_EVENT_TYPE_PATH_MIG:
75 return "MLX5_EVENT_TYPE_PATH_MIG";
76 case MLX5_EVENT_TYPE_COMM_EST:
77 return "MLX5_EVENT_TYPE_COMM_EST";
78 case MLX5_EVENT_TYPE_SQ_DRAINED:
79 return "MLX5_EVENT_TYPE_SQ_DRAINED";
80 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
81 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
82 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
83 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
84 case MLX5_EVENT_TYPE_CQ_ERROR:
85 return "MLX5_EVENT_TYPE_CQ_ERROR";
86 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
87 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
88 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
89 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
90 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
91 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
92 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
93 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
94 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
95 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
96 case MLX5_EVENT_TYPE_INTERNAL_ERROR:
97 return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
98 case MLX5_EVENT_TYPE_PORT_CHANGE:
99 return "MLX5_EVENT_TYPE_PORT_CHANGE";
100 case MLX5_EVENT_TYPE_GPIO_EVENT:
101 return "MLX5_EVENT_TYPE_GPIO_EVENT";
102 case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
103 return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
104 case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
105 return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
106 case MLX5_EVENT_TYPE_REMOTE_CONFIG:
107 return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
108 case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
109 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
110 case MLX5_EVENT_TYPE_STALL_EVENT:
111 return "MLX5_EVENT_TYPE_STALL_EVENT";
112 case MLX5_EVENT_TYPE_CMD:
113 return "MLX5_EVENT_TYPE_CMD";
114 case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
115 return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
116 case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
117 return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
118 case MLX5_EVENT_TYPE_PAGE_REQUEST:
119 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
120 case MLX5_EVENT_TYPE_PAGE_FAULT:
121 return "MLX5_EVENT_TYPE_PAGE_FAULT";
122 case MLX5_EVENT_TYPE_PPS_EVENT:
123 return "MLX5_EVENT_TYPE_PPS_EVENT";
124 case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
125 return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
126 case MLX5_EVENT_TYPE_FPGA_ERROR:
127 return "MLX5_EVENT_TYPE_FPGA_ERROR";
128 case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
129 return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
130 case MLX5_EVENT_TYPE_GENERAL_EVENT:
131 return "MLX5_EVENT_TYPE_GENERAL_EVENT";
132 case MLX5_EVENT_TYPE_MONITOR_COUNTER:
133 return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
134 case MLX5_EVENT_TYPE_DEVICE_TRACER:
135 return "MLX5_EVENT_TYPE_DEVICE_TRACER";
136 case MLX5_EVENT_TYPE_OBJECT_CHANGE:
137 return "MLX5_EVENT_TYPE_OBJECT_CHANGE";
138 default:
139 return "Unrecognized event";
140 }
141 }
142
143 /* handles all FW events, type == eqe->type */
any_notifier(struct notifier_block * nb,unsigned long type,void * data)144 static int any_notifier(struct notifier_block *nb,
145 unsigned long type, void *data)
146 {
147 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
148 struct mlx5_events *events = event_nb->ctx;
149 struct mlx5_eqe *eqe = data;
150
151 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
152 eqe_type_str(eqe->type), eqe->sub_type);
153 return NOTIFY_OK;
154 }
155
156 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
temp_warn(struct notifier_block * nb,unsigned long type,void * data)157 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
158 {
159 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
160 struct mlx5_events *events = event_nb->ctx;
161 struct mlx5_eqe *eqe = data;
162 u64 value_lsb;
163 u64 value_msb;
164
165 value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
166 value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
167
168 mlx5_core_warn(events->dev,
169 "High temperature on sensors with bit set %llx %llx",
170 value_msb, value_lsb);
171
172 return NOTIFY_OK;
173 }
174
175 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
mlx5_pme_status_to_string(enum port_module_event_status_type status)176 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
177 {
178 switch (status) {
179 case MLX5_MODULE_STATUS_PLUGGED:
180 return "Cable plugged";
181 case MLX5_MODULE_STATUS_UNPLUGGED:
182 return "Cable unplugged";
183 case MLX5_MODULE_STATUS_ERROR:
184 return "Cable error";
185 case MLX5_MODULE_STATUS_DISABLED:
186 return "Cable disabled";
187 default:
188 return "Unknown status";
189 }
190 }
191
mlx5_pme_error_to_string(enum port_module_event_error_type error)192 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
193 {
194 switch (error) {
195 case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
196 return "Power budget exceeded";
197 case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
198 return "Long Range for non MLNX cable";
199 case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
200 return "Bus stuck (I2C or data shorted)";
201 case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
202 return "No EEPROM/retry timeout";
203 case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
204 return "Enforce part number list";
205 case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
206 return "Unknown identifier";
207 case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
208 return "High Temperature";
209 case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
210 return "Bad or shorted cable/module";
211 case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
212 return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
213 default:
214 return "Unknown error";
215 }
216 }
217
218 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
port_module(struct notifier_block * nb,unsigned long type,void * data)219 static int port_module(struct notifier_block *nb, unsigned long type, void *data)
220 {
221 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
222 struct mlx5_events *events = event_nb->ctx;
223 struct mlx5_eqe *eqe = data;
224
225 enum port_module_event_status_type module_status;
226 enum port_module_event_error_type error_type;
227 struct mlx5_eqe_port_module *module_event_eqe;
228 const char *status_str;
229 u8 module_num;
230
231 module_event_eqe = &eqe->data.port_module;
232 module_status = module_event_eqe->module_status &
233 PORT_MODULE_EVENT_MODULE_STATUS_MASK;
234 error_type = module_event_eqe->error_type &
235 PORT_MODULE_EVENT_ERROR_TYPE_MASK;
236
237 if (module_status < MLX5_MODULE_STATUS_NUM)
238 events->pme_stats.status_counters[module_status]++;
239
240 if (module_status == MLX5_MODULE_STATUS_ERROR)
241 if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
242 events->pme_stats.error_counters[error_type]++;
243
244 if (!printk_ratelimit())
245 return NOTIFY_OK;
246
247 module_num = module_event_eqe->module;
248 status_str = mlx5_pme_status_to_string(module_status);
249 if (module_status == MLX5_MODULE_STATUS_ERROR) {
250 const char *error_str = mlx5_pme_error_to_string(error_type);
251
252 mlx5_core_err(events->dev,
253 "Port module event[error]: module %u, %s, %s\n",
254 module_num, status_str, error_str);
255 } else {
256 mlx5_core_info(events->dev,
257 "Port module event: module %u, %s\n",
258 module_num, status_str);
259 }
260
261 return NOTIFY_OK;
262 }
263
264 enum {
265 MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
266 MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
267 MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
268 };
269
mlx5_pcie_event(struct work_struct * work)270 static void mlx5_pcie_event(struct work_struct *work)
271 {
272 u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
273 u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
274 struct mlx5_events *events;
275 struct mlx5_core_dev *dev;
276 u8 power_status;
277 u16 pci_power;
278
279 events = container_of(work, struct mlx5_events, pcie_core_work);
280 dev = events->dev;
281
282 if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
283 return;
284
285 mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
286 MLX5_REG_MPEIN, 0, 0);
287 power_status = MLX5_GET(mpein_reg, out, pwr_status);
288 pci_power = MLX5_GET(mpein_reg, out, pci_power);
289
290 switch (power_status) {
291 case MLX5_PCI_POWER_COULD_NOT_BE_READ:
292 mlx5_core_info_rl(dev,
293 "PCIe slot power capability was not advertised.\n");
294 break;
295 case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
296 mlx5_core_warn_rl(dev,
297 "Detected insufficient power on the PCIe slot (%uW).\n",
298 pci_power);
299 break;
300 case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
301 mlx5_core_info_rl(dev,
302 "PCIe slot advertised sufficient power (%uW).\n",
303 pci_power);
304 break;
305 }
306 }
307
pcie_core(struct notifier_block * nb,unsigned long type,void * data)308 static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
309 {
310 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb,
311 struct mlx5_event_nb,
312 nb);
313 struct mlx5_events *events = event_nb->ctx;
314 struct mlx5_eqe *eqe = data;
315
316 switch (eqe->sub_type) {
317 case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
318 queue_work(events->wq, &events->pcie_core_work);
319 break;
320 default:
321 return NOTIFY_DONE;
322 }
323
324 return NOTIFY_OK;
325 }
326
mlx5_get_pme_stats(struct mlx5_core_dev * dev,struct mlx5_pme_stats * stats)327 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
328 {
329 *stats = dev->priv.events->pme_stats;
330 }
331
332 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
forward_event(struct notifier_block * nb,unsigned long event,void * data)333 static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
334 {
335 struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
336 struct mlx5_events *events = event_nb->ctx;
337 struct mlx5_eqe *eqe = data;
338
339 mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
340 eqe_type_str(eqe->type), eqe->sub_type);
341 atomic_notifier_call_chain(&events->fw_nh, event, data);
342 return NOTIFY_OK;
343 }
344
mlx5_events_init(struct mlx5_core_dev * dev)345 int mlx5_events_init(struct mlx5_core_dev *dev)
346 {
347 struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
348
349 if (!events)
350 return -ENOMEM;
351
352 ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
353 events->dev = dev;
354 dev->priv.events = events;
355 events->wq = create_singlethread_workqueue("mlx5_events");
356 if (!events->wq) {
357 kfree(events);
358 return -ENOMEM;
359 }
360 INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
361 BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
362
363 return 0;
364 }
365
mlx5_events_cleanup(struct mlx5_core_dev * dev)366 void mlx5_events_cleanup(struct mlx5_core_dev *dev)
367 {
368 destroy_workqueue(dev->priv.events->wq);
369 kvfree(dev->priv.events);
370 }
371
mlx5_events_start(struct mlx5_core_dev * dev)372 void mlx5_events_start(struct mlx5_core_dev *dev)
373 {
374 struct mlx5_events *events = dev->priv.events;
375 int i;
376
377 for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
378 events->notifiers[i].nb = events_nbs_ref[i];
379 events->notifiers[i].ctx = events;
380 mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
381 }
382 }
383
mlx5_events_stop(struct mlx5_core_dev * dev)384 void mlx5_events_stop(struct mlx5_core_dev *dev)
385 {
386 struct mlx5_events *events = dev->priv.events;
387 int i;
388
389 for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
390 mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
391 flush_workqueue(events->wq);
392 }
393
394 /* This API is used only for processing and forwarding firmware
395 * events to mlx5 consumer.
396 */
mlx5_notifier_register(struct mlx5_core_dev * dev,struct notifier_block * nb)397 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
398 {
399 struct mlx5_events *events = dev->priv.events;
400
401 return atomic_notifier_chain_register(&events->fw_nh, nb);
402 }
403 EXPORT_SYMBOL(mlx5_notifier_register);
404
mlx5_notifier_unregister(struct mlx5_core_dev * dev,struct notifier_block * nb)405 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
406 {
407 struct mlx5_events *events = dev->priv.events;
408
409 return atomic_notifier_chain_unregister(&events->fw_nh, nb);
410 }
411 EXPORT_SYMBOL(mlx5_notifier_unregister);
412
mlx5_notifier_call_chain(struct mlx5_events * events,unsigned int event,void * data)413 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
414 {
415 return atomic_notifier_call_chain(&events->fw_nh, event, data);
416 }
417
418 /* This API is used only for processing and forwarding driver-specific
419 * events to mlx5 consumers.
420 */
mlx5_blocking_notifier_register(struct mlx5_core_dev * dev,struct notifier_block * nb)421 int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
422 {
423 struct mlx5_events *events = dev->priv.events;
424
425 return blocking_notifier_chain_register(&events->sw_nh, nb);
426 }
427 EXPORT_SYMBOL(mlx5_blocking_notifier_register);
428
mlx5_blocking_notifier_unregister(struct mlx5_core_dev * dev,struct notifier_block * nb)429 int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
430 {
431 struct mlx5_events *events = dev->priv.events;
432
433 return blocking_notifier_chain_unregister(&events->sw_nh, nb);
434 }
435 EXPORT_SYMBOL(mlx5_blocking_notifier_unregister);
436
mlx5_blocking_notifier_call_chain(struct mlx5_core_dev * dev,unsigned int event,void * data)437 int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
438 void *data)
439 {
440 struct mlx5_events *events = dev->priv.events;
441
442 return blocking_notifier_call_chain(&events->sw_nh, event, data);
443 }
444
mlx5_events_work_enqueue(struct mlx5_core_dev * dev,struct work_struct * work)445 void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work)
446 {
447 queue_work(dev->priv.events->wq, work);
448 }
449