1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2019 Google, Inc.
5 */
6
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
16 #include "gve.h"
17 #include "gve_adminq.h"
18 #include "gve_register.h"
19
20 #define GVE_DEFAULT_RX_COPYBREAK (256)
21
22 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
23 #define GVE_VERSION "1.0.0"
24 #define GVE_VERSION_PREFIX "GVE-"
25
26 // Minimum amount of time between queue kicks in msec (10 seconds)
27 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
28
29 const char gve_version_str[] = GVE_VERSION;
30 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
31
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)32 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
33 {
34 struct gve_priv *priv = netdev_priv(dev);
35 unsigned int start;
36 u64 packets, bytes;
37 int ring;
38
39 if (priv->rx) {
40 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
41 do {
42 start =
43 u64_stats_fetch_begin(&priv->rx[ring].statss);
44 packets = priv->rx[ring].rpackets;
45 bytes = priv->rx[ring].rbytes;
46 } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
47 start));
48 s->rx_packets += packets;
49 s->rx_bytes += bytes;
50 }
51 }
52 if (priv->tx) {
53 for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
54 do {
55 start =
56 u64_stats_fetch_begin(&priv->tx[ring].statss);
57 packets = priv->tx[ring].pkt_done;
58 bytes = priv->tx[ring].bytes_done;
59 } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
60 start));
61 s->tx_packets += packets;
62 s->tx_bytes += bytes;
63 }
64 }
65 }
66
gve_alloc_counter_array(struct gve_priv * priv)67 static int gve_alloc_counter_array(struct gve_priv *priv)
68 {
69 priv->counter_array =
70 dma_alloc_coherent(&priv->pdev->dev,
71 priv->num_event_counters *
72 sizeof(*priv->counter_array),
73 &priv->counter_array_bus, GFP_KERNEL);
74 if (!priv->counter_array)
75 return -ENOMEM;
76
77 return 0;
78 }
79
gve_free_counter_array(struct gve_priv * priv)80 static void gve_free_counter_array(struct gve_priv *priv)
81 {
82 if (!priv->counter_array)
83 return;
84
85 dma_free_coherent(&priv->pdev->dev,
86 priv->num_event_counters *
87 sizeof(*priv->counter_array),
88 priv->counter_array, priv->counter_array_bus);
89 priv->counter_array = NULL;
90 }
91
92 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)93 static void gve_stats_report_task(struct work_struct *work)
94 {
95 struct gve_priv *priv = container_of(work, struct gve_priv,
96 stats_report_task);
97 if (gve_get_do_report_stats(priv)) {
98 gve_handle_report_stats(priv);
99 gve_clear_do_report_stats(priv);
100 }
101 }
102
gve_stats_report_schedule(struct gve_priv * priv)103 static void gve_stats_report_schedule(struct gve_priv *priv)
104 {
105 if (!gve_get_probe_in_progress(priv) &&
106 !gve_get_reset_in_progress(priv)) {
107 gve_set_do_report_stats(priv);
108 queue_work(priv->gve_wq, &priv->stats_report_task);
109 }
110 }
111
gve_stats_report_timer(struct timer_list * t)112 static void gve_stats_report_timer(struct timer_list *t)
113 {
114 struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
115
116 mod_timer(&priv->stats_report_timer,
117 round_jiffies(jiffies +
118 msecs_to_jiffies(priv->stats_report_timer_period)));
119 gve_stats_report_schedule(priv);
120 }
121
gve_alloc_stats_report(struct gve_priv * priv)122 static int gve_alloc_stats_report(struct gve_priv *priv)
123 {
124 int tx_stats_num, rx_stats_num;
125
126 tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
127 priv->tx_cfg.num_queues;
128 rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
129 priv->rx_cfg.num_queues;
130 priv->stats_report_len = struct_size(priv->stats_report, stats,
131 tx_stats_num + rx_stats_num);
132 priv->stats_report =
133 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
134 &priv->stats_report_bus, GFP_KERNEL);
135 if (!priv->stats_report)
136 return -ENOMEM;
137 /* Set up timer for the report-stats task */
138 timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
139 priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
140 return 0;
141 }
142
gve_free_stats_report(struct gve_priv * priv)143 static void gve_free_stats_report(struct gve_priv *priv)
144 {
145 if (!priv->stats_report)
146 return;
147
148 del_timer_sync(&priv->stats_report_timer);
149 dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
150 priv->stats_report, priv->stats_report_bus);
151 priv->stats_report = NULL;
152 }
153
gve_mgmnt_intr(int irq,void * arg)154 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
155 {
156 struct gve_priv *priv = arg;
157
158 queue_work(priv->gve_wq, &priv->service_task);
159 return IRQ_HANDLED;
160 }
161
gve_intr(int irq,void * arg)162 static irqreturn_t gve_intr(int irq, void *arg)
163 {
164 struct gve_notify_block *block = arg;
165 struct gve_priv *priv = block->priv;
166
167 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
168 napi_schedule_irqoff(&block->napi);
169 return IRQ_HANDLED;
170 }
171
gve_napi_poll(struct napi_struct * napi,int budget)172 static int gve_napi_poll(struct napi_struct *napi, int budget)
173 {
174 struct gve_notify_block *block;
175 __be32 __iomem *irq_doorbell;
176 bool reschedule = false;
177 struct gve_priv *priv;
178
179 block = container_of(napi, struct gve_notify_block, napi);
180 priv = block->priv;
181
182 if (block->tx)
183 reschedule |= gve_tx_poll(block, budget);
184 if (block->rx)
185 reschedule |= gve_rx_poll(block, budget);
186
187 if (reschedule)
188 return budget;
189
190 napi_complete(napi);
191 irq_doorbell = gve_irq_doorbell(priv, block);
192 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
193
194 /* Double check we have no extra work.
195 * Ensure unmask synchronizes with checking for work.
196 */
197 mb();
198 if (block->tx)
199 reschedule |= gve_tx_poll(block, -1);
200 if (block->rx)
201 reschedule |= gve_rx_poll(block, -1);
202 if (reschedule && napi_reschedule(napi))
203 iowrite32be(GVE_IRQ_MASK, irq_doorbell);
204
205 return 0;
206 }
207
gve_alloc_notify_blocks(struct gve_priv * priv)208 static int gve_alloc_notify_blocks(struct gve_priv *priv)
209 {
210 int num_vecs_requested = priv->num_ntfy_blks + 1;
211 char *name = priv->dev->name;
212 unsigned int active_cpus;
213 int vecs_enabled;
214 int i, j;
215 int err;
216
217 priv->msix_vectors = kvzalloc(num_vecs_requested *
218 sizeof(*priv->msix_vectors), GFP_KERNEL);
219 if (!priv->msix_vectors)
220 return -ENOMEM;
221 for (i = 0; i < num_vecs_requested; i++)
222 priv->msix_vectors[i].entry = i;
223 vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
224 GVE_MIN_MSIX, num_vecs_requested);
225 if (vecs_enabled < 0) {
226 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
227 GVE_MIN_MSIX, vecs_enabled);
228 err = vecs_enabled;
229 goto abort_with_msix_vectors;
230 }
231 if (vecs_enabled != num_vecs_requested) {
232 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
233 int vecs_per_type = new_num_ntfy_blks / 2;
234 int vecs_left = new_num_ntfy_blks % 2;
235
236 priv->num_ntfy_blks = new_num_ntfy_blks;
237 priv->mgmt_msix_idx = priv->num_ntfy_blks;
238 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
239 vecs_per_type);
240 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
241 vecs_per_type + vecs_left);
242 dev_err(&priv->pdev->dev,
243 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
244 vecs_enabled, priv->tx_cfg.max_queues,
245 priv->rx_cfg.max_queues);
246 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
247 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
248 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
249 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
250 }
251 /* Half the notification blocks go to TX and half to RX */
252 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
253
254 /* Setup Management Vector - the last vector */
255 snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
256 name);
257 err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
258 gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
259 if (err) {
260 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
261 goto abort_with_msix_enabled;
262 }
263 priv->ntfy_blocks =
264 dma_alloc_coherent(&priv->pdev->dev,
265 priv->num_ntfy_blks *
266 sizeof(*priv->ntfy_blocks),
267 &priv->ntfy_block_bus, GFP_KERNEL);
268 if (!priv->ntfy_blocks) {
269 err = -ENOMEM;
270 goto abort_with_mgmt_vector;
271 }
272 /* Setup the other blocks - the first n-1 vectors */
273 for (i = 0; i < priv->num_ntfy_blks; i++) {
274 struct gve_notify_block *block = &priv->ntfy_blocks[i];
275 int msix_idx = i;
276
277 snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
278 name, i);
279 block->priv = priv;
280 err = request_irq(priv->msix_vectors[msix_idx].vector,
281 gve_intr, 0, block->name, block);
282 if (err) {
283 dev_err(&priv->pdev->dev,
284 "Failed to receive msix vector %d\n", i);
285 goto abort_with_some_ntfy_blocks;
286 }
287 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
288 get_cpu_mask(i % active_cpus));
289 }
290 return 0;
291 abort_with_some_ntfy_blocks:
292 for (j = 0; j < i; j++) {
293 struct gve_notify_block *block = &priv->ntfy_blocks[j];
294 int msix_idx = j;
295
296 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
297 NULL);
298 free_irq(priv->msix_vectors[msix_idx].vector, block);
299 }
300 dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
301 sizeof(*priv->ntfy_blocks),
302 priv->ntfy_blocks, priv->ntfy_block_bus);
303 priv->ntfy_blocks = NULL;
304 abort_with_mgmt_vector:
305 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
306 abort_with_msix_enabled:
307 pci_disable_msix(priv->pdev);
308 abort_with_msix_vectors:
309 kvfree(priv->msix_vectors);
310 priv->msix_vectors = NULL;
311 return err;
312 }
313
gve_free_notify_blocks(struct gve_priv * priv)314 static void gve_free_notify_blocks(struct gve_priv *priv)
315 {
316 int i;
317
318 if (!priv->msix_vectors)
319 return;
320
321 /* Free the irqs */
322 for (i = 0; i < priv->num_ntfy_blks; i++) {
323 struct gve_notify_block *block = &priv->ntfy_blocks[i];
324 int msix_idx = i;
325
326 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
327 NULL);
328 free_irq(priv->msix_vectors[msix_idx].vector, block);
329 }
330 free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
331 dma_free_coherent(&priv->pdev->dev,
332 priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
333 priv->ntfy_blocks, priv->ntfy_block_bus);
334 priv->ntfy_blocks = NULL;
335 pci_disable_msix(priv->pdev);
336 kvfree(priv->msix_vectors);
337 priv->msix_vectors = NULL;
338 }
339
gve_setup_device_resources(struct gve_priv * priv)340 static int gve_setup_device_resources(struct gve_priv *priv)
341 {
342 int err;
343
344 err = gve_alloc_counter_array(priv);
345 if (err)
346 return err;
347 err = gve_alloc_notify_blocks(priv);
348 if (err)
349 goto abort_with_counter;
350 err = gve_alloc_stats_report(priv);
351 if (err)
352 goto abort_with_ntfy_blocks;
353 err = gve_adminq_configure_device_resources(priv,
354 priv->counter_array_bus,
355 priv->num_event_counters,
356 priv->ntfy_block_bus,
357 priv->num_ntfy_blks);
358 if (unlikely(err)) {
359 dev_err(&priv->pdev->dev,
360 "could not setup device_resources: err=%d\n", err);
361 err = -ENXIO;
362 goto abort_with_stats_report;
363 }
364 err = gve_adminq_report_stats(priv, priv->stats_report_len,
365 priv->stats_report_bus,
366 GVE_STATS_REPORT_TIMER_PERIOD);
367 if (err)
368 dev_err(&priv->pdev->dev,
369 "Failed to report stats: err=%d\n", err);
370 gve_set_device_resources_ok(priv);
371 return 0;
372 abort_with_stats_report:
373 gve_free_stats_report(priv);
374 abort_with_ntfy_blocks:
375 gve_free_notify_blocks(priv);
376 abort_with_counter:
377 gve_free_counter_array(priv);
378 return err;
379 }
380
381 static void gve_trigger_reset(struct gve_priv *priv);
382
gve_teardown_device_resources(struct gve_priv * priv)383 static void gve_teardown_device_resources(struct gve_priv *priv)
384 {
385 int err;
386
387 /* Tell device its resources are being freed */
388 if (gve_get_device_resources_ok(priv)) {
389 /* detach the stats report */
390 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
391 if (err) {
392 dev_err(&priv->pdev->dev,
393 "Failed to detach stats report: err=%d\n", err);
394 gve_trigger_reset(priv);
395 }
396 err = gve_adminq_deconfigure_device_resources(priv);
397 if (err) {
398 dev_err(&priv->pdev->dev,
399 "Could not deconfigure device resources: err=%d\n",
400 err);
401 gve_trigger_reset(priv);
402 }
403 }
404 gve_free_counter_array(priv);
405 gve_free_notify_blocks(priv);
406 gve_free_stats_report(priv);
407 gve_clear_device_resources_ok(priv);
408 }
409
gve_add_napi(struct gve_priv * priv,int ntfy_idx)410 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
411 {
412 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
413
414 netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
415 NAPI_POLL_WEIGHT);
416 }
417
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)418 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
419 {
420 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
421
422 netif_napi_del(&block->napi);
423 }
424
gve_register_qpls(struct gve_priv * priv)425 static int gve_register_qpls(struct gve_priv *priv)
426 {
427 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
428 int err;
429 int i;
430
431 for (i = 0; i < num_qpls; i++) {
432 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
433 if (err) {
434 netif_err(priv, drv, priv->dev,
435 "failed to register queue page list %d\n",
436 priv->qpls[i].id);
437 /* This failure will trigger a reset - no need to clean
438 * up
439 */
440 return err;
441 }
442 }
443 return 0;
444 }
445
gve_unregister_qpls(struct gve_priv * priv)446 static int gve_unregister_qpls(struct gve_priv *priv)
447 {
448 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
449 int err;
450 int i;
451
452 for (i = 0; i < num_qpls; i++) {
453 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
454 /* This failure will trigger a reset - no need to clean up */
455 if (err) {
456 netif_err(priv, drv, priv->dev,
457 "Failed to unregister queue page list %d\n",
458 priv->qpls[i].id);
459 return err;
460 }
461 }
462 return 0;
463 }
464
gve_create_rings(struct gve_priv * priv)465 static int gve_create_rings(struct gve_priv *priv)
466 {
467 int err;
468 int i;
469
470 err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
471 if (err) {
472 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
473 priv->tx_cfg.num_queues);
474 /* This failure will trigger a reset - no need to clean
475 * up
476 */
477 return err;
478 }
479 netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
480 priv->tx_cfg.num_queues);
481
482 err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
483 if (err) {
484 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
485 priv->rx_cfg.num_queues);
486 /* This failure will trigger a reset - no need to clean
487 * up
488 */
489 return err;
490 }
491 netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
492 priv->rx_cfg.num_queues);
493
494 /* Rx data ring has been prefilled with packet buffers at queue
495 * allocation time.
496 * Write the doorbell to provide descriptor slots and packet buffers
497 * to the NIC.
498 */
499 for (i = 0; i < priv->rx_cfg.num_queues; i++)
500 gve_rx_write_doorbell(priv, &priv->rx[i]);
501
502 return 0;
503 }
504
gve_alloc_rings(struct gve_priv * priv)505 static int gve_alloc_rings(struct gve_priv *priv)
506 {
507 int ntfy_idx;
508 int err;
509 int i;
510
511 /* Setup tx rings */
512 priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
513 GFP_KERNEL);
514 if (!priv->tx)
515 return -ENOMEM;
516 err = gve_tx_alloc_rings(priv);
517 if (err)
518 goto free_tx;
519 /* Setup rx rings */
520 priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
521 GFP_KERNEL);
522 if (!priv->rx) {
523 err = -ENOMEM;
524 goto free_tx_queue;
525 }
526 err = gve_rx_alloc_rings(priv);
527 if (err)
528 goto free_rx;
529 /* Add tx napi & init sync stats*/
530 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
531 u64_stats_init(&priv->tx[i].statss);
532 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
533 gve_add_napi(priv, ntfy_idx);
534 }
535 /* Add rx napi & init sync stats*/
536 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
537 u64_stats_init(&priv->rx[i].statss);
538 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
539 gve_add_napi(priv, ntfy_idx);
540 }
541
542 return 0;
543
544 free_rx:
545 kvfree(priv->rx);
546 priv->rx = NULL;
547 free_tx_queue:
548 gve_tx_free_rings(priv);
549 free_tx:
550 kvfree(priv->tx);
551 priv->tx = NULL;
552 return err;
553 }
554
gve_destroy_rings(struct gve_priv * priv)555 static int gve_destroy_rings(struct gve_priv *priv)
556 {
557 int err;
558
559 err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
560 if (err) {
561 netif_err(priv, drv, priv->dev,
562 "failed to destroy tx queues\n");
563 /* This failure will trigger a reset - no need to clean up */
564 return err;
565 }
566 netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
567 err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
568 if (err) {
569 netif_err(priv, drv, priv->dev,
570 "failed to destroy rx queues\n");
571 /* This failure will trigger a reset - no need to clean up */
572 return err;
573 }
574 netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
575 return 0;
576 }
577
gve_free_rings(struct gve_priv * priv)578 static void gve_free_rings(struct gve_priv *priv)
579 {
580 int ntfy_idx;
581 int i;
582
583 if (priv->tx) {
584 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
585 ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
586 gve_remove_napi(priv, ntfy_idx);
587 }
588 gve_tx_free_rings(priv);
589 kvfree(priv->tx);
590 priv->tx = NULL;
591 }
592 if (priv->rx) {
593 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
594 ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
595 gve_remove_napi(priv, ntfy_idx);
596 }
597 gve_rx_free_rings(priv);
598 kvfree(priv->rx);
599 priv->rx = NULL;
600 }
601 }
602
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir)603 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
604 struct page **page, dma_addr_t *dma,
605 enum dma_data_direction dir)
606 {
607 *page = alloc_page(GFP_KERNEL);
608 if (!*page) {
609 priv->page_alloc_fail++;
610 return -ENOMEM;
611 }
612 *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
613 if (dma_mapping_error(dev, *dma)) {
614 priv->dma_mapping_error++;
615 put_page(*page);
616 return -ENOMEM;
617 }
618 return 0;
619 }
620
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)621 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
622 int pages)
623 {
624 struct gve_queue_page_list *qpl = &priv->qpls[id];
625 int err;
626 int i;
627
628 if (pages + priv->num_registered_pages > priv->max_registered_pages) {
629 netif_err(priv, drv, priv->dev,
630 "Reached max number of registered pages %llu > %llu\n",
631 pages + priv->num_registered_pages,
632 priv->max_registered_pages);
633 return -EINVAL;
634 }
635
636 qpl->id = id;
637 qpl->num_entries = 0;
638 qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
639 /* caller handles clean up */
640 if (!qpl->pages)
641 return -ENOMEM;
642 qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
643 GFP_KERNEL);
644 /* caller handles clean up */
645 if (!qpl->page_buses)
646 return -ENOMEM;
647
648 for (i = 0; i < pages; i++) {
649 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
650 &qpl->page_buses[i],
651 gve_qpl_dma_dir(priv, id));
652 /* caller handles clean up */
653 if (err)
654 return -ENOMEM;
655 qpl->num_entries++;
656 }
657 priv->num_registered_pages += pages;
658
659 return 0;
660 }
661
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)662 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
663 enum dma_data_direction dir)
664 {
665 if (!dma_mapping_error(dev, dma))
666 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
667 if (page)
668 put_page(page);
669 }
670
gve_free_queue_page_list(struct gve_priv * priv,int id)671 static void gve_free_queue_page_list(struct gve_priv *priv,
672 int id)
673 {
674 struct gve_queue_page_list *qpl = &priv->qpls[id];
675 int i;
676
677 if (!qpl->pages)
678 return;
679 if (!qpl->page_buses)
680 goto free_pages;
681
682 for (i = 0; i < qpl->num_entries; i++)
683 gve_free_page(&priv->pdev->dev, qpl->pages[i],
684 qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
685
686 kvfree(qpl->page_buses);
687 free_pages:
688 kvfree(qpl->pages);
689 priv->num_registered_pages -= qpl->num_entries;
690 }
691
gve_alloc_qpls(struct gve_priv * priv)692 static int gve_alloc_qpls(struct gve_priv *priv)
693 {
694 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
695 int i, j;
696 int err;
697
698 priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
699 if (!priv->qpls)
700 return -ENOMEM;
701
702 for (i = 0; i < gve_num_tx_qpls(priv); i++) {
703 err = gve_alloc_queue_page_list(priv, i,
704 priv->tx_pages_per_qpl);
705 if (err)
706 goto free_qpls;
707 }
708 for (; i < num_qpls; i++) {
709 err = gve_alloc_queue_page_list(priv, i,
710 priv->rx_pages_per_qpl);
711 if (err)
712 goto free_qpls;
713 }
714
715 priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
716 sizeof(unsigned long) * BITS_PER_BYTE;
717 priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
718 sizeof(unsigned long), GFP_KERNEL);
719 if (!priv->qpl_cfg.qpl_id_map) {
720 err = -ENOMEM;
721 goto free_qpls;
722 }
723
724 return 0;
725
726 free_qpls:
727 for (j = 0; j <= i; j++)
728 gve_free_queue_page_list(priv, j);
729 kvfree(priv->qpls);
730 return err;
731 }
732
gve_free_qpls(struct gve_priv * priv)733 static void gve_free_qpls(struct gve_priv *priv)
734 {
735 int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
736 int i;
737
738 kvfree(priv->qpl_cfg.qpl_id_map);
739
740 for (i = 0; i < num_qpls; i++)
741 gve_free_queue_page_list(priv, i);
742
743 kvfree(priv->qpls);
744 }
745
746 /* Use this to schedule a reset when the device is capable of continuing
747 * to handle other requests in its current state. If it is not, do a reset
748 * in thread instead.
749 */
gve_schedule_reset(struct gve_priv * priv)750 void gve_schedule_reset(struct gve_priv *priv)
751 {
752 gve_set_do_reset(priv);
753 queue_work(priv->gve_wq, &priv->service_task);
754 }
755
756 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
757 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
758 static void gve_turndown(struct gve_priv *priv);
759 static void gve_turnup(struct gve_priv *priv);
760
gve_open(struct net_device * dev)761 static int gve_open(struct net_device *dev)
762 {
763 struct gve_priv *priv = netdev_priv(dev);
764 int err;
765
766 err = gve_alloc_qpls(priv);
767 if (err)
768 return err;
769 err = gve_alloc_rings(priv);
770 if (err)
771 goto free_qpls;
772
773 err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
774 if (err)
775 goto free_rings;
776 err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
777 if (err)
778 goto free_rings;
779
780 err = gve_register_qpls(priv);
781 if (err)
782 goto reset;
783 err = gve_create_rings(priv);
784 if (err)
785 goto reset;
786 gve_set_device_rings_ok(priv);
787
788 if (gve_get_report_stats(priv))
789 mod_timer(&priv->stats_report_timer,
790 round_jiffies(jiffies +
791 msecs_to_jiffies(priv->stats_report_timer_period)));
792
793 gve_turnup(priv);
794 queue_work(priv->gve_wq, &priv->service_task);
795 priv->interface_up_cnt++;
796 return 0;
797
798 free_rings:
799 gve_free_rings(priv);
800 free_qpls:
801 gve_free_qpls(priv);
802 return err;
803
804 reset:
805 /* This must have been called from a reset due to the rtnl lock
806 * so just return at this point.
807 */
808 if (gve_get_reset_in_progress(priv))
809 return err;
810 /* Otherwise reset before returning */
811 gve_reset_and_teardown(priv, true);
812 /* if this fails there is nothing we can do so just ignore the return */
813 gve_reset_recovery(priv, false);
814 /* return the original error */
815 return err;
816 }
817
gve_close(struct net_device * dev)818 static int gve_close(struct net_device *dev)
819 {
820 struct gve_priv *priv = netdev_priv(dev);
821 int err;
822
823 netif_carrier_off(dev);
824 if (gve_get_device_rings_ok(priv)) {
825 gve_turndown(priv);
826 err = gve_destroy_rings(priv);
827 if (err)
828 goto err;
829 err = gve_unregister_qpls(priv);
830 if (err)
831 goto err;
832 gve_clear_device_rings_ok(priv);
833 }
834 del_timer_sync(&priv->stats_report_timer);
835
836 gve_free_rings(priv);
837 gve_free_qpls(priv);
838 priv->interface_down_cnt++;
839 return 0;
840
841 err:
842 /* This must have been called from a reset due to the rtnl lock
843 * so just return at this point.
844 */
845 if (gve_get_reset_in_progress(priv))
846 return err;
847 /* Otherwise reset before returning */
848 gve_reset_and_teardown(priv, true);
849 return gve_reset_recovery(priv, false);
850 }
851
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)852 int gve_adjust_queues(struct gve_priv *priv,
853 struct gve_queue_config new_rx_config,
854 struct gve_queue_config new_tx_config)
855 {
856 int err;
857
858 if (netif_carrier_ok(priv->dev)) {
859 /* To make this process as simple as possible we teardown the
860 * device, set the new configuration, and then bring the device
861 * up again.
862 */
863 err = gve_close(priv->dev);
864 /* we have already tried to reset in close,
865 * just fail at this point
866 */
867 if (err)
868 return err;
869 priv->tx_cfg = new_tx_config;
870 priv->rx_cfg = new_rx_config;
871
872 err = gve_open(priv->dev);
873 if (err)
874 goto err;
875
876 return 0;
877 }
878 /* Set the config for the next up. */
879 priv->tx_cfg = new_tx_config;
880 priv->rx_cfg = new_rx_config;
881
882 return 0;
883 err:
884 netif_err(priv, drv, priv->dev,
885 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
886 gve_turndown(priv);
887 return err;
888 }
889
gve_turndown(struct gve_priv * priv)890 static void gve_turndown(struct gve_priv *priv)
891 {
892 int idx;
893
894 if (netif_carrier_ok(priv->dev))
895 netif_carrier_off(priv->dev);
896
897 if (!gve_get_napi_enabled(priv))
898 return;
899
900 /* Disable napi to prevent more work from coming in */
901 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
902 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
903 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
904
905 napi_disable(&block->napi);
906 }
907 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
908 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
909 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
910
911 napi_disable(&block->napi);
912 }
913
914 /* Stop tx queues */
915 netif_tx_disable(priv->dev);
916
917 gve_clear_napi_enabled(priv);
918 gve_clear_report_stats(priv);
919 }
920
gve_turnup(struct gve_priv * priv)921 static void gve_turnup(struct gve_priv *priv)
922 {
923 int idx;
924
925 /* Start the tx queues */
926 netif_tx_start_all_queues(priv->dev);
927
928 /* Enable napi and unmask interrupts for all queues */
929 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
930 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
931 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
932
933 napi_enable(&block->napi);
934 iowrite32be(0, gve_irq_doorbell(priv, block));
935 }
936 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
937 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
938 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
939
940 napi_enable(&block->napi);
941 iowrite32be(0, gve_irq_doorbell(priv, block));
942 }
943
944 gve_set_napi_enabled(priv);
945 }
946
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)947 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
948 {
949 struct gve_notify_block *block;
950 struct gve_tx_ring *tx = NULL;
951 struct gve_priv *priv;
952 u32 last_nic_done;
953 u32 current_time;
954 u32 ntfy_idx;
955
956 netdev_info(dev, "Timeout on tx queue, %d", txqueue);
957 priv = netdev_priv(dev);
958 if (txqueue > priv->tx_cfg.num_queues)
959 goto reset;
960
961 ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
962 if (ntfy_idx >= priv->num_ntfy_blks)
963 goto reset;
964
965 block = &priv->ntfy_blocks[ntfy_idx];
966 tx = block->tx;
967
968 current_time = jiffies_to_msecs(jiffies);
969 if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
970 goto reset;
971
972 /* Check to see if there are missed completions, which will allow us to
973 * kick the queue.
974 */
975 last_nic_done = gve_tx_load_event_counter(priv, tx);
976 if (last_nic_done - tx->done) {
977 netdev_info(dev, "Kicking queue %d", txqueue);
978 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
979 napi_schedule(&block->napi);
980 tx->last_kick_msec = current_time;
981 goto out;
982 } // Else reset.
983
984 reset:
985 gve_schedule_reset(priv);
986
987 out:
988 if (tx)
989 tx->queue_timeout++;
990 priv->tx_timeo_cnt++;
991 }
992
993 static const struct net_device_ops gve_netdev_ops = {
994 .ndo_start_xmit = gve_tx,
995 .ndo_open = gve_open,
996 .ndo_stop = gve_close,
997 .ndo_get_stats64 = gve_get_stats,
998 .ndo_tx_timeout = gve_tx_timeout,
999 };
1000
gve_handle_status(struct gve_priv * priv,u32 status)1001 static void gve_handle_status(struct gve_priv *priv, u32 status)
1002 {
1003 if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1004 dev_info(&priv->pdev->dev, "Device requested reset.\n");
1005 gve_set_do_reset(priv);
1006 }
1007 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1008 priv->stats_report_trigger_cnt++;
1009 gve_set_do_report_stats(priv);
1010 }
1011 }
1012
gve_handle_reset(struct gve_priv * priv)1013 static void gve_handle_reset(struct gve_priv *priv)
1014 {
1015 /* A service task will be scheduled at the end of probe to catch any
1016 * resets that need to happen, and we don't want to reset until
1017 * probe is done.
1018 */
1019 if (gve_get_probe_in_progress(priv))
1020 return;
1021
1022 if (gve_get_do_reset(priv)) {
1023 rtnl_lock();
1024 gve_reset(priv, false);
1025 rtnl_unlock();
1026 }
1027 }
1028
gve_handle_report_stats(struct gve_priv * priv)1029 void gve_handle_report_stats(struct gve_priv *priv)
1030 {
1031 struct stats *stats = priv->stats_report->stats;
1032 int idx, stats_idx = 0;
1033 unsigned int start = 0;
1034 u64 tx_bytes;
1035
1036 if (!gve_get_report_stats(priv))
1037 return;
1038
1039 be64_add_cpu(&priv->stats_report->written_count, 1);
1040 /* tx stats */
1041 if (priv->tx) {
1042 for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1043 do {
1044 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1045 tx_bytes = priv->tx[idx].bytes_done;
1046 } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1047 stats[stats_idx++] = (struct stats) {
1048 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1049 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1050 .queue_id = cpu_to_be32(idx),
1051 };
1052 stats[stats_idx++] = (struct stats) {
1053 .stat_name = cpu_to_be32(TX_STOP_CNT),
1054 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1055 .queue_id = cpu_to_be32(idx),
1056 };
1057 stats[stats_idx++] = (struct stats) {
1058 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1059 .value = cpu_to_be64(priv->tx[idx].req),
1060 .queue_id = cpu_to_be32(idx),
1061 };
1062 stats[stats_idx++] = (struct stats) {
1063 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1064 .value = cpu_to_be64(tx_bytes),
1065 .queue_id = cpu_to_be32(idx),
1066 };
1067 stats[stats_idx++] = (struct stats) {
1068 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1069 .value = cpu_to_be64(priv->tx[idx].done),
1070 .queue_id = cpu_to_be32(idx),
1071 };
1072 stats[stats_idx++] = (struct stats) {
1073 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1074 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
1075 .queue_id = cpu_to_be32(idx),
1076 };
1077 }
1078 }
1079 /* rx stats */
1080 if (priv->rx) {
1081 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1082 stats[stats_idx++] = (struct stats) {
1083 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1084 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1085 .queue_id = cpu_to_be32(idx),
1086 };
1087 stats[stats_idx++] = (struct stats) {
1088 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1089 .value = cpu_to_be64(priv->rx[0].fill_cnt),
1090 .queue_id = cpu_to_be32(idx),
1091 };
1092 }
1093 }
1094 }
1095
gve_handle_link_status(struct gve_priv * priv,bool link_status)1096 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1097 {
1098 if (!gve_get_napi_enabled(priv))
1099 return;
1100
1101 if (link_status == netif_carrier_ok(priv->dev))
1102 return;
1103
1104 if (link_status) {
1105 netdev_info(priv->dev, "Device link is up.\n");
1106 netif_carrier_on(priv->dev);
1107 } else {
1108 netdev_info(priv->dev, "Device link is down.\n");
1109 netif_carrier_off(priv->dev);
1110 }
1111 }
1112
1113 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)1114 static void gve_service_task(struct work_struct *work)
1115 {
1116 struct gve_priv *priv = container_of(work, struct gve_priv,
1117 service_task);
1118 u32 status = ioread32be(&priv->reg_bar0->device_status);
1119
1120 gve_handle_status(priv, status);
1121
1122 gve_handle_reset(priv);
1123 gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1124 }
1125
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)1126 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
1127 {
1128 int num_ntfy;
1129 int err;
1130
1131 /* Set up the adminq */
1132 err = gve_adminq_alloc(&priv->pdev->dev, priv);
1133 if (err) {
1134 dev_err(&priv->pdev->dev,
1135 "Failed to alloc admin queue: err=%d\n", err);
1136 return err;
1137 }
1138
1139 if (skip_describe_device)
1140 goto setup_device;
1141
1142 /* Get the initial information we need from the device */
1143 err = gve_adminq_describe_device(priv);
1144 if (err) {
1145 dev_err(&priv->pdev->dev,
1146 "Could not get device information: err=%d\n", err);
1147 goto err;
1148 }
1149 if (priv->dev->max_mtu > PAGE_SIZE) {
1150 priv->dev->max_mtu = PAGE_SIZE;
1151 err = gve_adminq_set_mtu(priv, priv->dev->mtu);
1152 if (err) {
1153 dev_err(&priv->pdev->dev, "Could not set mtu");
1154 goto err;
1155 }
1156 }
1157 priv->dev->mtu = priv->dev->max_mtu;
1158 num_ntfy = pci_msix_vec_count(priv->pdev);
1159 if (num_ntfy <= 0) {
1160 dev_err(&priv->pdev->dev,
1161 "could not count MSI-x vectors: err=%d\n", num_ntfy);
1162 err = num_ntfy;
1163 goto err;
1164 } else if (num_ntfy < GVE_MIN_MSIX) {
1165 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
1166 GVE_MIN_MSIX, num_ntfy);
1167 err = -EINVAL;
1168 goto err;
1169 }
1170
1171 priv->num_registered_pages = 0;
1172 priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1173 /* gvnic has one Notification Block per MSI-x vector, except for the
1174 * management vector
1175 */
1176 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
1177 priv->mgmt_msix_idx = priv->num_ntfy_blks;
1178
1179 priv->tx_cfg.max_queues =
1180 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
1181 priv->rx_cfg.max_queues =
1182 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
1183
1184 priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
1185 priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
1186 if (priv->default_num_queues > 0) {
1187 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
1188 priv->tx_cfg.num_queues);
1189 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
1190 priv->rx_cfg.num_queues);
1191 }
1192
1193 dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
1194 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
1195 dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
1196 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
1197
1198 setup_device:
1199 err = gve_setup_device_resources(priv);
1200 if (!err)
1201 return 0;
1202 err:
1203 gve_adminq_free(&priv->pdev->dev, priv);
1204 return err;
1205 }
1206
gve_teardown_priv_resources(struct gve_priv * priv)1207 static void gve_teardown_priv_resources(struct gve_priv *priv)
1208 {
1209 gve_teardown_device_resources(priv);
1210 gve_adminq_free(&priv->pdev->dev, priv);
1211 }
1212
gve_trigger_reset(struct gve_priv * priv)1213 static void gve_trigger_reset(struct gve_priv *priv)
1214 {
1215 /* Reset the device by releasing the AQ */
1216 gve_adminq_release(priv);
1217 }
1218
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)1219 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1220 {
1221 gve_trigger_reset(priv);
1222 /* With the reset having already happened, close cannot fail */
1223 if (was_up)
1224 gve_close(priv->dev);
1225 gve_teardown_priv_resources(priv);
1226 }
1227
gve_reset_recovery(struct gve_priv * priv,bool was_up)1228 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1229 {
1230 int err;
1231
1232 err = gve_init_priv(priv, true);
1233 if (err)
1234 goto err;
1235 if (was_up) {
1236 err = gve_open(priv->dev);
1237 if (err)
1238 goto err;
1239 }
1240 return 0;
1241 err:
1242 dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1243 gve_turndown(priv);
1244 return err;
1245 }
1246
gve_reset(struct gve_priv * priv,bool attempt_teardown)1247 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1248 {
1249 bool was_up = netif_carrier_ok(priv->dev);
1250 int err;
1251
1252 dev_info(&priv->pdev->dev, "Performing reset\n");
1253 gve_clear_do_reset(priv);
1254 gve_set_reset_in_progress(priv);
1255 /* If we aren't attempting to teardown normally, just go turndown and
1256 * reset right away.
1257 */
1258 if (!attempt_teardown) {
1259 gve_turndown(priv);
1260 gve_reset_and_teardown(priv, was_up);
1261 } else {
1262 /* Otherwise attempt to close normally */
1263 if (was_up) {
1264 err = gve_close(priv->dev);
1265 /* If that fails reset as we did above */
1266 if (err)
1267 gve_reset_and_teardown(priv, was_up);
1268 }
1269 /* Clean up any remaining resources */
1270 gve_teardown_priv_resources(priv);
1271 }
1272
1273 /* Set it all back up */
1274 err = gve_reset_recovery(priv, was_up);
1275 gve_clear_reset_in_progress(priv);
1276 priv->reset_cnt++;
1277 priv->interface_up_cnt = 0;
1278 priv->interface_down_cnt = 0;
1279 priv->stats_report_trigger_cnt = 0;
1280 return err;
1281 }
1282
gve_write_version(u8 __iomem * driver_version_register)1283 static void gve_write_version(u8 __iomem *driver_version_register)
1284 {
1285 const char *c = gve_version_prefix;
1286
1287 while (*c) {
1288 writeb(*c, driver_version_register);
1289 c++;
1290 }
1291
1292 c = gve_version_str;
1293 while (*c) {
1294 writeb(*c, driver_version_register);
1295 c++;
1296 }
1297 writeb('\n', driver_version_register);
1298 }
1299
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)1300 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1301 {
1302 int max_tx_queues, max_rx_queues;
1303 struct net_device *dev;
1304 __be32 __iomem *db_bar;
1305 struct gve_registers __iomem *reg_bar;
1306 struct gve_priv *priv;
1307 int err;
1308
1309 err = pci_enable_device(pdev);
1310 if (err)
1311 return -ENXIO;
1312
1313 err = pci_request_regions(pdev, "gvnic-cfg");
1314 if (err)
1315 goto abort_with_enabled;
1316
1317 pci_set_master(pdev);
1318
1319 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1320 if (err) {
1321 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1322 goto abort_with_pci_region;
1323 }
1324
1325 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1326 if (err) {
1327 dev_err(&pdev->dev,
1328 "Failed to set consistent dma mask: err=%d\n", err);
1329 goto abort_with_pci_region;
1330 }
1331
1332 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1333 if (!reg_bar) {
1334 dev_err(&pdev->dev, "Failed to map pci bar!\n");
1335 err = -ENOMEM;
1336 goto abort_with_pci_region;
1337 }
1338
1339 db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1340 if (!db_bar) {
1341 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1342 err = -ENOMEM;
1343 goto abort_with_reg_bar;
1344 }
1345
1346 gve_write_version(®_bar->driver_version);
1347 /* Get max queues to alloc etherdev */
1348 max_tx_queues = ioread32be(®_bar->max_tx_queues);
1349 max_rx_queues = ioread32be(®_bar->max_rx_queues);
1350 /* Alloc and setup the netdev and priv */
1351 dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1352 if (!dev) {
1353 dev_err(&pdev->dev, "could not allocate netdev\n");
1354 goto abort_with_db_bar;
1355 }
1356 SET_NETDEV_DEV(dev, &pdev->dev);
1357 pci_set_drvdata(pdev, dev);
1358 dev->ethtool_ops = &gve_ethtool_ops;
1359 dev->netdev_ops = &gve_netdev_ops;
1360 /* advertise features */
1361 dev->hw_features = NETIF_F_HIGHDMA;
1362 dev->hw_features |= NETIF_F_SG;
1363 dev->hw_features |= NETIF_F_HW_CSUM;
1364 dev->hw_features |= NETIF_F_TSO;
1365 dev->hw_features |= NETIF_F_TSO6;
1366 dev->hw_features |= NETIF_F_TSO_ECN;
1367 dev->hw_features |= NETIF_F_RXCSUM;
1368 dev->hw_features |= NETIF_F_RXHASH;
1369 dev->features = dev->hw_features;
1370 dev->watchdog_timeo = 5 * HZ;
1371 dev->min_mtu = ETH_MIN_MTU;
1372 netif_carrier_off(dev);
1373
1374 priv = netdev_priv(dev);
1375 priv->dev = dev;
1376 priv->pdev = pdev;
1377 priv->msg_enable = DEFAULT_MSG_LEVEL;
1378 priv->reg_bar0 = reg_bar;
1379 priv->db_bar2 = db_bar;
1380 priv->service_task_flags = 0x0;
1381 priv->state_flags = 0x0;
1382 priv->ethtool_flags = 0x0;
1383
1384 gve_set_probe_in_progress(priv);
1385 priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1386 if (!priv->gve_wq) {
1387 dev_err(&pdev->dev, "Could not allocate workqueue");
1388 err = -ENOMEM;
1389 goto abort_with_netdev;
1390 }
1391 INIT_WORK(&priv->service_task, gve_service_task);
1392 INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
1393 priv->tx_cfg.max_queues = max_tx_queues;
1394 priv->rx_cfg.max_queues = max_rx_queues;
1395
1396 err = gve_init_priv(priv, false);
1397 if (err)
1398 goto abort_with_wq;
1399
1400 err = register_netdev(dev);
1401 if (err)
1402 goto abort_with_gve_init;
1403
1404 dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1405 gve_clear_probe_in_progress(priv);
1406 queue_work(priv->gve_wq, &priv->service_task);
1407 return 0;
1408
1409 abort_with_gve_init:
1410 gve_teardown_priv_resources(priv);
1411
1412 abort_with_wq:
1413 destroy_workqueue(priv->gve_wq);
1414
1415 abort_with_netdev:
1416 free_netdev(dev);
1417
1418 abort_with_db_bar:
1419 pci_iounmap(pdev, db_bar);
1420
1421 abort_with_reg_bar:
1422 pci_iounmap(pdev, reg_bar);
1423
1424 abort_with_pci_region:
1425 pci_release_regions(pdev);
1426
1427 abort_with_enabled:
1428 pci_disable_device(pdev);
1429 return -ENXIO;
1430 }
1431
gve_remove(struct pci_dev * pdev)1432 static void gve_remove(struct pci_dev *pdev)
1433 {
1434 struct net_device *netdev = pci_get_drvdata(pdev);
1435 struct gve_priv *priv = netdev_priv(netdev);
1436 __be32 __iomem *db_bar = priv->db_bar2;
1437 void __iomem *reg_bar = priv->reg_bar0;
1438
1439 unregister_netdev(netdev);
1440 gve_teardown_priv_resources(priv);
1441 destroy_workqueue(priv->gve_wq);
1442 free_netdev(netdev);
1443 pci_iounmap(pdev, db_bar);
1444 pci_iounmap(pdev, reg_bar);
1445 pci_release_regions(pdev);
1446 pci_disable_device(pdev);
1447 }
1448
1449 static const struct pci_device_id gve_id_table[] = {
1450 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1451 { }
1452 };
1453
1454 static struct pci_driver gvnic_driver = {
1455 .name = "gvnic",
1456 .id_table = gve_id_table,
1457 .probe = gve_probe,
1458 .remove = gve_remove,
1459 };
1460
1461 module_pci_driver(gvnic_driver);
1462
1463 MODULE_DEVICE_TABLE(pci, gve_id_table);
1464 MODULE_AUTHOR("Google, Inc.");
1465 MODULE_DESCRIPTION("gVNIC Driver");
1466 MODULE_LICENSE("Dual MIT/GPL");
1467 MODULE_VERSION(GVE_VERSION);
1468