• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2019 Google, Inc.
5  */
6 
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
16 #include "gve.h"
17 #include "gve_adminq.h"
18 #include "gve_register.h"
19 
20 #define GVE_DEFAULT_RX_COPYBREAK	(256)
21 
22 #define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
23 #define GVE_VERSION		"1.0.0"
24 #define GVE_VERSION_PREFIX	"GVE-"
25 
26 // Minimum amount of time between queue kicks in msec (10 seconds)
27 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
28 
29 const char gve_version_str[] = GVE_VERSION;
30 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
31 
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)32 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
33 {
34 	struct gve_priv *priv = netdev_priv(dev);
35 	unsigned int start;
36 	u64 packets, bytes;
37 	int ring;
38 
39 	if (priv->rx) {
40 		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
41 			do {
42 				start =
43 				  u64_stats_fetch_begin(&priv->rx[ring].statss);
44 				packets = priv->rx[ring].rpackets;
45 				bytes = priv->rx[ring].rbytes;
46 			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
47 						       start));
48 			s->rx_packets += packets;
49 			s->rx_bytes += bytes;
50 		}
51 	}
52 	if (priv->tx) {
53 		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
54 			do {
55 				start =
56 				  u64_stats_fetch_begin(&priv->tx[ring].statss);
57 				packets = priv->tx[ring].pkt_done;
58 				bytes = priv->tx[ring].bytes_done;
59 			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
60 						       start));
61 			s->tx_packets += packets;
62 			s->tx_bytes += bytes;
63 		}
64 	}
65 }
66 
gve_alloc_counter_array(struct gve_priv * priv)67 static int gve_alloc_counter_array(struct gve_priv *priv)
68 {
69 	priv->counter_array =
70 		dma_alloc_coherent(&priv->pdev->dev,
71 				   priv->num_event_counters *
72 				   sizeof(*priv->counter_array),
73 				   &priv->counter_array_bus, GFP_KERNEL);
74 	if (!priv->counter_array)
75 		return -ENOMEM;
76 
77 	return 0;
78 }
79 
gve_free_counter_array(struct gve_priv * priv)80 static void gve_free_counter_array(struct gve_priv *priv)
81 {
82 	if (!priv->counter_array)
83 		return;
84 
85 	dma_free_coherent(&priv->pdev->dev,
86 			  priv->num_event_counters *
87 			  sizeof(*priv->counter_array),
88 			  priv->counter_array, priv->counter_array_bus);
89 	priv->counter_array = NULL;
90 }
91 
92 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)93 static void gve_stats_report_task(struct work_struct *work)
94 {
95 	struct gve_priv *priv = container_of(work, struct gve_priv,
96 					     stats_report_task);
97 	if (gve_get_do_report_stats(priv)) {
98 		gve_handle_report_stats(priv);
99 		gve_clear_do_report_stats(priv);
100 	}
101 }
102 
gve_stats_report_schedule(struct gve_priv * priv)103 static void gve_stats_report_schedule(struct gve_priv *priv)
104 {
105 	if (!gve_get_probe_in_progress(priv) &&
106 	    !gve_get_reset_in_progress(priv)) {
107 		gve_set_do_report_stats(priv);
108 		queue_work(priv->gve_wq, &priv->stats_report_task);
109 	}
110 }
111 
gve_stats_report_timer(struct timer_list * t)112 static void gve_stats_report_timer(struct timer_list *t)
113 {
114 	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
115 
116 	mod_timer(&priv->stats_report_timer,
117 		  round_jiffies(jiffies +
118 		  msecs_to_jiffies(priv->stats_report_timer_period)));
119 	gve_stats_report_schedule(priv);
120 }
121 
gve_alloc_stats_report(struct gve_priv * priv)122 static int gve_alloc_stats_report(struct gve_priv *priv)
123 {
124 	int tx_stats_num, rx_stats_num;
125 
126 	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
127 		       priv->tx_cfg.num_queues;
128 	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
129 		       priv->rx_cfg.num_queues;
130 	priv->stats_report_len = struct_size(priv->stats_report, stats,
131 					     tx_stats_num + rx_stats_num);
132 	priv->stats_report =
133 		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
134 				   &priv->stats_report_bus, GFP_KERNEL);
135 	if (!priv->stats_report)
136 		return -ENOMEM;
137 	/* Set up timer for the report-stats task */
138 	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
139 	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
140 	return 0;
141 }
142 
gve_free_stats_report(struct gve_priv * priv)143 static void gve_free_stats_report(struct gve_priv *priv)
144 {
145 	if (!priv->stats_report)
146 		return;
147 
148 	del_timer_sync(&priv->stats_report_timer);
149 	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
150 			  priv->stats_report, priv->stats_report_bus);
151 	priv->stats_report = NULL;
152 }
153 
gve_mgmnt_intr(int irq,void * arg)154 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
155 {
156 	struct gve_priv *priv = arg;
157 
158 	queue_work(priv->gve_wq, &priv->service_task);
159 	return IRQ_HANDLED;
160 }
161 
gve_intr(int irq,void * arg)162 static irqreturn_t gve_intr(int irq, void *arg)
163 {
164 	struct gve_notify_block *block = arg;
165 	struct gve_priv *priv = block->priv;
166 
167 	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
168 	napi_schedule_irqoff(&block->napi);
169 	return IRQ_HANDLED;
170 }
171 
gve_napi_poll(struct napi_struct * napi,int budget)172 static int gve_napi_poll(struct napi_struct *napi, int budget)
173 {
174 	struct gve_notify_block *block;
175 	__be32 __iomem *irq_doorbell;
176 	bool reschedule = false;
177 	struct gve_priv *priv;
178 
179 	block = container_of(napi, struct gve_notify_block, napi);
180 	priv = block->priv;
181 
182 	if (block->tx)
183 		reschedule |= gve_tx_poll(block, budget);
184 	if (block->rx)
185 		reschedule |= gve_rx_poll(block, budget);
186 
187 	if (reschedule)
188 		return budget;
189 
190 	napi_complete(napi);
191 	irq_doorbell = gve_irq_doorbell(priv, block);
192 	iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
193 
194 	/* Double check we have no extra work.
195 	 * Ensure unmask synchronizes with checking for work.
196 	 */
197 	mb();
198 	if (block->tx)
199 		reschedule |= gve_tx_poll(block, -1);
200 	if (block->rx)
201 		reschedule |= gve_rx_poll(block, -1);
202 	if (reschedule && napi_reschedule(napi))
203 		iowrite32be(GVE_IRQ_MASK, irq_doorbell);
204 
205 	return 0;
206 }
207 
gve_alloc_notify_blocks(struct gve_priv * priv)208 static int gve_alloc_notify_blocks(struct gve_priv *priv)
209 {
210 	int num_vecs_requested = priv->num_ntfy_blks + 1;
211 	char *name = priv->dev->name;
212 	unsigned int active_cpus;
213 	int vecs_enabled;
214 	int i, j;
215 	int err;
216 
217 	priv->msix_vectors = kvzalloc(num_vecs_requested *
218 				      sizeof(*priv->msix_vectors), GFP_KERNEL);
219 	if (!priv->msix_vectors)
220 		return -ENOMEM;
221 	for (i = 0; i < num_vecs_requested; i++)
222 		priv->msix_vectors[i].entry = i;
223 	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
224 					     GVE_MIN_MSIX, num_vecs_requested);
225 	if (vecs_enabled < 0) {
226 		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
227 			GVE_MIN_MSIX, vecs_enabled);
228 		err = vecs_enabled;
229 		goto abort_with_msix_vectors;
230 	}
231 	if (vecs_enabled != num_vecs_requested) {
232 		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
233 		int vecs_per_type = new_num_ntfy_blks / 2;
234 		int vecs_left = new_num_ntfy_blks % 2;
235 
236 		priv->num_ntfy_blks = new_num_ntfy_blks;
237 		priv->mgmt_msix_idx = priv->num_ntfy_blks;
238 		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
239 						vecs_per_type);
240 		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
241 						vecs_per_type + vecs_left);
242 		dev_err(&priv->pdev->dev,
243 			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
244 			vecs_enabled, priv->tx_cfg.max_queues,
245 			priv->rx_cfg.max_queues);
246 		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
247 			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
248 		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
249 			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
250 	}
251 	/* Half the notification blocks go to TX and half to RX */
252 	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
253 
254 	/* Setup Management Vector  - the last vector */
255 	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
256 		 name);
257 	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
258 			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
259 	if (err) {
260 		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
261 		goto abort_with_msix_enabled;
262 	}
263 	priv->ntfy_blocks =
264 		dma_alloc_coherent(&priv->pdev->dev,
265 				   priv->num_ntfy_blks *
266 				   sizeof(*priv->ntfy_blocks),
267 				   &priv->ntfy_block_bus, GFP_KERNEL);
268 	if (!priv->ntfy_blocks) {
269 		err = -ENOMEM;
270 		goto abort_with_mgmt_vector;
271 	}
272 	/* Setup the other blocks - the first n-1 vectors */
273 	for (i = 0; i < priv->num_ntfy_blks; i++) {
274 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
275 		int msix_idx = i;
276 
277 		snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
278 			 name, i);
279 		block->priv = priv;
280 		err = request_irq(priv->msix_vectors[msix_idx].vector,
281 				  gve_intr, 0, block->name, block);
282 		if (err) {
283 			dev_err(&priv->pdev->dev,
284 				"Failed to receive msix vector %d\n", i);
285 			goto abort_with_some_ntfy_blocks;
286 		}
287 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
288 				      get_cpu_mask(i % active_cpus));
289 	}
290 	return 0;
291 abort_with_some_ntfy_blocks:
292 	for (j = 0; j < i; j++) {
293 		struct gve_notify_block *block = &priv->ntfy_blocks[j];
294 		int msix_idx = j;
295 
296 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
297 				      NULL);
298 		free_irq(priv->msix_vectors[msix_idx].vector, block);
299 	}
300 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
301 			  sizeof(*priv->ntfy_blocks),
302 			  priv->ntfy_blocks, priv->ntfy_block_bus);
303 	priv->ntfy_blocks = NULL;
304 abort_with_mgmt_vector:
305 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
306 abort_with_msix_enabled:
307 	pci_disable_msix(priv->pdev);
308 abort_with_msix_vectors:
309 	kvfree(priv->msix_vectors);
310 	priv->msix_vectors = NULL;
311 	return err;
312 }
313 
gve_free_notify_blocks(struct gve_priv * priv)314 static void gve_free_notify_blocks(struct gve_priv *priv)
315 {
316 	int i;
317 
318 	if (!priv->msix_vectors)
319 		return;
320 
321 	/* Free the irqs */
322 	for (i = 0; i < priv->num_ntfy_blks; i++) {
323 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
324 		int msix_idx = i;
325 
326 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
327 				      NULL);
328 		free_irq(priv->msix_vectors[msix_idx].vector, block);
329 	}
330 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
331 	dma_free_coherent(&priv->pdev->dev,
332 			  priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
333 			  priv->ntfy_blocks, priv->ntfy_block_bus);
334 	priv->ntfy_blocks = NULL;
335 	pci_disable_msix(priv->pdev);
336 	kvfree(priv->msix_vectors);
337 	priv->msix_vectors = NULL;
338 }
339 
gve_setup_device_resources(struct gve_priv * priv)340 static int gve_setup_device_resources(struct gve_priv *priv)
341 {
342 	int err;
343 
344 	err = gve_alloc_counter_array(priv);
345 	if (err)
346 		return err;
347 	err = gve_alloc_notify_blocks(priv);
348 	if (err)
349 		goto abort_with_counter;
350 	err = gve_alloc_stats_report(priv);
351 	if (err)
352 		goto abort_with_ntfy_blocks;
353 	err = gve_adminq_configure_device_resources(priv,
354 						    priv->counter_array_bus,
355 						    priv->num_event_counters,
356 						    priv->ntfy_block_bus,
357 						    priv->num_ntfy_blks);
358 	if (unlikely(err)) {
359 		dev_err(&priv->pdev->dev,
360 			"could not setup device_resources: err=%d\n", err);
361 		err = -ENXIO;
362 		goto abort_with_stats_report;
363 	}
364 	err = gve_adminq_report_stats(priv, priv->stats_report_len,
365 				      priv->stats_report_bus,
366 				      GVE_STATS_REPORT_TIMER_PERIOD);
367 	if (err)
368 		dev_err(&priv->pdev->dev,
369 			"Failed to report stats: err=%d\n", err);
370 	gve_set_device_resources_ok(priv);
371 	return 0;
372 abort_with_stats_report:
373 	gve_free_stats_report(priv);
374 abort_with_ntfy_blocks:
375 	gve_free_notify_blocks(priv);
376 abort_with_counter:
377 	gve_free_counter_array(priv);
378 	return err;
379 }
380 
381 static void gve_trigger_reset(struct gve_priv *priv);
382 
gve_teardown_device_resources(struct gve_priv * priv)383 static void gve_teardown_device_resources(struct gve_priv *priv)
384 {
385 	int err;
386 
387 	/* Tell device its resources are being freed */
388 	if (gve_get_device_resources_ok(priv)) {
389 		/* detach the stats report */
390 		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
391 		if (err) {
392 			dev_err(&priv->pdev->dev,
393 				"Failed to detach stats report: err=%d\n", err);
394 			gve_trigger_reset(priv);
395 		}
396 		err = gve_adminq_deconfigure_device_resources(priv);
397 		if (err) {
398 			dev_err(&priv->pdev->dev,
399 				"Could not deconfigure device resources: err=%d\n",
400 				err);
401 			gve_trigger_reset(priv);
402 		}
403 	}
404 	gve_free_counter_array(priv);
405 	gve_free_notify_blocks(priv);
406 	gve_free_stats_report(priv);
407 	gve_clear_device_resources_ok(priv);
408 }
409 
gve_add_napi(struct gve_priv * priv,int ntfy_idx)410 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
411 {
412 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
413 
414 	netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
415 		       NAPI_POLL_WEIGHT);
416 }
417 
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)418 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
419 {
420 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
421 
422 	netif_napi_del(&block->napi);
423 }
424 
gve_register_qpls(struct gve_priv * priv)425 static int gve_register_qpls(struct gve_priv *priv)
426 {
427 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
428 	int err;
429 	int i;
430 
431 	for (i = 0; i < num_qpls; i++) {
432 		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
433 		if (err) {
434 			netif_err(priv, drv, priv->dev,
435 				  "failed to register queue page list %d\n",
436 				  priv->qpls[i].id);
437 			/* This failure will trigger a reset - no need to clean
438 			 * up
439 			 */
440 			return err;
441 		}
442 	}
443 	return 0;
444 }
445 
gve_unregister_qpls(struct gve_priv * priv)446 static int gve_unregister_qpls(struct gve_priv *priv)
447 {
448 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
449 	int err;
450 	int i;
451 
452 	for (i = 0; i < num_qpls; i++) {
453 		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
454 		/* This failure will trigger a reset - no need to clean up */
455 		if (err) {
456 			netif_err(priv, drv, priv->dev,
457 				  "Failed to unregister queue page list %d\n",
458 				  priv->qpls[i].id);
459 			return err;
460 		}
461 	}
462 	return 0;
463 }
464 
gve_create_rings(struct gve_priv * priv)465 static int gve_create_rings(struct gve_priv *priv)
466 {
467 	int err;
468 	int i;
469 
470 	err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
471 	if (err) {
472 		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
473 			  priv->tx_cfg.num_queues);
474 		/* This failure will trigger a reset - no need to clean
475 		 * up
476 		 */
477 		return err;
478 	}
479 	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
480 		  priv->tx_cfg.num_queues);
481 
482 	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
483 	if (err) {
484 		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
485 			  priv->rx_cfg.num_queues);
486 		/* This failure will trigger a reset - no need to clean
487 		 * up
488 		 */
489 		return err;
490 	}
491 	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
492 		  priv->rx_cfg.num_queues);
493 
494 	/* Rx data ring has been prefilled with packet buffers at queue
495 	 * allocation time.
496 	 * Write the doorbell to provide descriptor slots and packet buffers
497 	 * to the NIC.
498 	 */
499 	for (i = 0; i < priv->rx_cfg.num_queues; i++)
500 		gve_rx_write_doorbell(priv, &priv->rx[i]);
501 
502 	return 0;
503 }
504 
gve_alloc_rings(struct gve_priv * priv)505 static int gve_alloc_rings(struct gve_priv *priv)
506 {
507 	int ntfy_idx;
508 	int err;
509 	int i;
510 
511 	/* Setup tx rings */
512 	priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
513 			    GFP_KERNEL);
514 	if (!priv->tx)
515 		return -ENOMEM;
516 	err = gve_tx_alloc_rings(priv);
517 	if (err)
518 		goto free_tx;
519 	/* Setup rx rings */
520 	priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
521 			    GFP_KERNEL);
522 	if (!priv->rx) {
523 		err = -ENOMEM;
524 		goto free_tx_queue;
525 	}
526 	err = gve_rx_alloc_rings(priv);
527 	if (err)
528 		goto free_rx;
529 	/* Add tx napi & init sync stats*/
530 	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
531 		u64_stats_init(&priv->tx[i].statss);
532 		ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
533 		gve_add_napi(priv, ntfy_idx);
534 	}
535 	/* Add rx napi  & init sync stats*/
536 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
537 		u64_stats_init(&priv->rx[i].statss);
538 		ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
539 		gve_add_napi(priv, ntfy_idx);
540 	}
541 
542 	return 0;
543 
544 free_rx:
545 	kvfree(priv->rx);
546 	priv->rx = NULL;
547 free_tx_queue:
548 	gve_tx_free_rings(priv);
549 free_tx:
550 	kvfree(priv->tx);
551 	priv->tx = NULL;
552 	return err;
553 }
554 
gve_destroy_rings(struct gve_priv * priv)555 static int gve_destroy_rings(struct gve_priv *priv)
556 {
557 	int err;
558 
559 	err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
560 	if (err) {
561 		netif_err(priv, drv, priv->dev,
562 			  "failed to destroy tx queues\n");
563 		/* This failure will trigger a reset - no need to clean up */
564 		return err;
565 	}
566 	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
567 	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
568 	if (err) {
569 		netif_err(priv, drv, priv->dev,
570 			  "failed to destroy rx queues\n");
571 		/* This failure will trigger a reset - no need to clean up */
572 		return err;
573 	}
574 	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
575 	return 0;
576 }
577 
gve_free_rings(struct gve_priv * priv)578 static void gve_free_rings(struct gve_priv *priv)
579 {
580 	int ntfy_idx;
581 	int i;
582 
583 	if (priv->tx) {
584 		for (i = 0; i < priv->tx_cfg.num_queues; i++) {
585 			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
586 			gve_remove_napi(priv, ntfy_idx);
587 		}
588 		gve_tx_free_rings(priv);
589 		kvfree(priv->tx);
590 		priv->tx = NULL;
591 	}
592 	if (priv->rx) {
593 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
594 			ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
595 			gve_remove_napi(priv, ntfy_idx);
596 		}
597 		gve_rx_free_rings(priv);
598 		kvfree(priv->rx);
599 		priv->rx = NULL;
600 	}
601 }
602 
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir)603 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
604 		   struct page **page, dma_addr_t *dma,
605 		   enum dma_data_direction dir)
606 {
607 	*page = alloc_page(GFP_KERNEL);
608 	if (!*page) {
609 		priv->page_alloc_fail++;
610 		return -ENOMEM;
611 	}
612 	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
613 	if (dma_mapping_error(dev, *dma)) {
614 		priv->dma_mapping_error++;
615 		put_page(*page);
616 		return -ENOMEM;
617 	}
618 	return 0;
619 }
620 
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)621 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
622 				     int pages)
623 {
624 	struct gve_queue_page_list *qpl = &priv->qpls[id];
625 	int err;
626 	int i;
627 
628 	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
629 		netif_err(priv, drv, priv->dev,
630 			  "Reached max number of registered pages %llu > %llu\n",
631 			  pages + priv->num_registered_pages,
632 			  priv->max_registered_pages);
633 		return -EINVAL;
634 	}
635 
636 	qpl->id = id;
637 	qpl->num_entries = 0;
638 	qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
639 	/* caller handles clean up */
640 	if (!qpl->pages)
641 		return -ENOMEM;
642 	qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
643 				   GFP_KERNEL);
644 	/* caller handles clean up */
645 	if (!qpl->page_buses)
646 		return -ENOMEM;
647 
648 	for (i = 0; i < pages; i++) {
649 		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
650 				     &qpl->page_buses[i],
651 				     gve_qpl_dma_dir(priv, id));
652 		/* caller handles clean up */
653 		if (err)
654 			return -ENOMEM;
655 		qpl->num_entries++;
656 	}
657 	priv->num_registered_pages += pages;
658 
659 	return 0;
660 }
661 
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)662 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
663 		   enum dma_data_direction dir)
664 {
665 	if (!dma_mapping_error(dev, dma))
666 		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
667 	if (page)
668 		put_page(page);
669 }
670 
gve_free_queue_page_list(struct gve_priv * priv,int id)671 static void gve_free_queue_page_list(struct gve_priv *priv,
672 				     int id)
673 {
674 	struct gve_queue_page_list *qpl = &priv->qpls[id];
675 	int i;
676 
677 	if (!qpl->pages)
678 		return;
679 	if (!qpl->page_buses)
680 		goto free_pages;
681 
682 	for (i = 0; i < qpl->num_entries; i++)
683 		gve_free_page(&priv->pdev->dev, qpl->pages[i],
684 			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
685 
686 	kvfree(qpl->page_buses);
687 free_pages:
688 	kvfree(qpl->pages);
689 	priv->num_registered_pages -= qpl->num_entries;
690 }
691 
gve_alloc_qpls(struct gve_priv * priv)692 static int gve_alloc_qpls(struct gve_priv *priv)
693 {
694 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
695 	int i, j;
696 	int err;
697 
698 	priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
699 	if (!priv->qpls)
700 		return -ENOMEM;
701 
702 	for (i = 0; i < gve_num_tx_qpls(priv); i++) {
703 		err = gve_alloc_queue_page_list(priv, i,
704 						priv->tx_pages_per_qpl);
705 		if (err)
706 			goto free_qpls;
707 	}
708 	for (; i < num_qpls; i++) {
709 		err = gve_alloc_queue_page_list(priv, i,
710 						priv->rx_pages_per_qpl);
711 		if (err)
712 			goto free_qpls;
713 	}
714 
715 	priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
716 				     sizeof(unsigned long) * BITS_PER_BYTE;
717 	priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
718 					    sizeof(unsigned long), GFP_KERNEL);
719 	if (!priv->qpl_cfg.qpl_id_map) {
720 		err = -ENOMEM;
721 		goto free_qpls;
722 	}
723 
724 	return 0;
725 
726 free_qpls:
727 	for (j = 0; j <= i; j++)
728 		gve_free_queue_page_list(priv, j);
729 	kvfree(priv->qpls);
730 	return err;
731 }
732 
gve_free_qpls(struct gve_priv * priv)733 static void gve_free_qpls(struct gve_priv *priv)
734 {
735 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
736 	int i;
737 
738 	kvfree(priv->qpl_cfg.qpl_id_map);
739 
740 	for (i = 0; i < num_qpls; i++)
741 		gve_free_queue_page_list(priv, i);
742 
743 	kvfree(priv->qpls);
744 }
745 
746 /* Use this to schedule a reset when the device is capable of continuing
747  * to handle other requests in its current state. If it is not, do a reset
748  * in thread instead.
749  */
gve_schedule_reset(struct gve_priv * priv)750 void gve_schedule_reset(struct gve_priv *priv)
751 {
752 	gve_set_do_reset(priv);
753 	queue_work(priv->gve_wq, &priv->service_task);
754 }
755 
756 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
757 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
758 static void gve_turndown(struct gve_priv *priv);
759 static void gve_turnup(struct gve_priv *priv);
760 
gve_open(struct net_device * dev)761 static int gve_open(struct net_device *dev)
762 {
763 	struct gve_priv *priv = netdev_priv(dev);
764 	int err;
765 
766 	err = gve_alloc_qpls(priv);
767 	if (err)
768 		return err;
769 	err = gve_alloc_rings(priv);
770 	if (err)
771 		goto free_qpls;
772 
773 	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
774 	if (err)
775 		goto free_rings;
776 	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
777 	if (err)
778 		goto free_rings;
779 
780 	err = gve_register_qpls(priv);
781 	if (err)
782 		goto reset;
783 	err = gve_create_rings(priv);
784 	if (err)
785 		goto reset;
786 	gve_set_device_rings_ok(priv);
787 
788 	if (gve_get_report_stats(priv))
789 		mod_timer(&priv->stats_report_timer,
790 			  round_jiffies(jiffies +
791 				msecs_to_jiffies(priv->stats_report_timer_period)));
792 
793 	gve_turnup(priv);
794 	queue_work(priv->gve_wq, &priv->service_task);
795 	priv->interface_up_cnt++;
796 	return 0;
797 
798 free_rings:
799 	gve_free_rings(priv);
800 free_qpls:
801 	gve_free_qpls(priv);
802 	return err;
803 
804 reset:
805 	/* This must have been called from a reset due to the rtnl lock
806 	 * so just return at this point.
807 	 */
808 	if (gve_get_reset_in_progress(priv))
809 		return err;
810 	/* Otherwise reset before returning */
811 	gve_reset_and_teardown(priv, true);
812 	/* if this fails there is nothing we can do so just ignore the return */
813 	gve_reset_recovery(priv, false);
814 	/* return the original error */
815 	return err;
816 }
817 
gve_close(struct net_device * dev)818 static int gve_close(struct net_device *dev)
819 {
820 	struct gve_priv *priv = netdev_priv(dev);
821 	int err;
822 
823 	netif_carrier_off(dev);
824 	if (gve_get_device_rings_ok(priv)) {
825 		gve_turndown(priv);
826 		err = gve_destroy_rings(priv);
827 		if (err)
828 			goto err;
829 		err = gve_unregister_qpls(priv);
830 		if (err)
831 			goto err;
832 		gve_clear_device_rings_ok(priv);
833 	}
834 	del_timer_sync(&priv->stats_report_timer);
835 
836 	gve_free_rings(priv);
837 	gve_free_qpls(priv);
838 	priv->interface_down_cnt++;
839 	return 0;
840 
841 err:
842 	/* This must have been called from a reset due to the rtnl lock
843 	 * so just return at this point.
844 	 */
845 	if (gve_get_reset_in_progress(priv))
846 		return err;
847 	/* Otherwise reset before returning */
848 	gve_reset_and_teardown(priv, true);
849 	return gve_reset_recovery(priv, false);
850 }
851 
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)852 int gve_adjust_queues(struct gve_priv *priv,
853 		      struct gve_queue_config new_rx_config,
854 		      struct gve_queue_config new_tx_config)
855 {
856 	int err;
857 
858 	if (netif_carrier_ok(priv->dev)) {
859 		/* To make this process as simple as possible we teardown the
860 		 * device, set the new configuration, and then bring the device
861 		 * up again.
862 		 */
863 		err = gve_close(priv->dev);
864 		/* we have already tried to reset in close,
865 		 * just fail at this point
866 		 */
867 		if (err)
868 			return err;
869 		priv->tx_cfg = new_tx_config;
870 		priv->rx_cfg = new_rx_config;
871 
872 		err = gve_open(priv->dev);
873 		if (err)
874 			goto err;
875 
876 		return 0;
877 	}
878 	/* Set the config for the next up. */
879 	priv->tx_cfg = new_tx_config;
880 	priv->rx_cfg = new_rx_config;
881 
882 	return 0;
883 err:
884 	netif_err(priv, drv, priv->dev,
885 		  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
886 	gve_turndown(priv);
887 	return err;
888 }
889 
gve_turndown(struct gve_priv * priv)890 static void gve_turndown(struct gve_priv *priv)
891 {
892 	int idx;
893 
894 	if (netif_carrier_ok(priv->dev))
895 		netif_carrier_off(priv->dev);
896 
897 	if (!gve_get_napi_enabled(priv))
898 		return;
899 
900 	/* Disable napi to prevent more work from coming in */
901 	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
902 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
903 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
904 
905 		napi_disable(&block->napi);
906 	}
907 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
908 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
909 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
910 
911 		napi_disable(&block->napi);
912 	}
913 
914 	/* Stop tx queues */
915 	netif_tx_disable(priv->dev);
916 
917 	gve_clear_napi_enabled(priv);
918 	gve_clear_report_stats(priv);
919 }
920 
gve_turnup(struct gve_priv * priv)921 static void gve_turnup(struct gve_priv *priv)
922 {
923 	int idx;
924 
925 	/* Start the tx queues */
926 	netif_tx_start_all_queues(priv->dev);
927 
928 	/* Enable napi and unmask interrupts for all queues */
929 	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
930 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
931 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
932 
933 		napi_enable(&block->napi);
934 		iowrite32be(0, gve_irq_doorbell(priv, block));
935 	}
936 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
937 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
938 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
939 
940 		napi_enable(&block->napi);
941 		iowrite32be(0, gve_irq_doorbell(priv, block));
942 	}
943 
944 	gve_set_napi_enabled(priv);
945 }
946 
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)947 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
948 {
949 	struct gve_notify_block *block;
950 	struct gve_tx_ring *tx = NULL;
951 	struct gve_priv *priv;
952 	u32 last_nic_done;
953 	u32 current_time;
954 	u32 ntfy_idx;
955 
956 	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
957 	priv = netdev_priv(dev);
958 	if (txqueue > priv->tx_cfg.num_queues)
959 		goto reset;
960 
961 	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
962 	if (ntfy_idx >= priv->num_ntfy_blks)
963 		goto reset;
964 
965 	block = &priv->ntfy_blocks[ntfy_idx];
966 	tx = block->tx;
967 
968 	current_time = jiffies_to_msecs(jiffies);
969 	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
970 		goto reset;
971 
972 	/* Check to see if there are missed completions, which will allow us to
973 	 * kick the queue.
974 	 */
975 	last_nic_done = gve_tx_load_event_counter(priv, tx);
976 	if (last_nic_done - tx->done) {
977 		netdev_info(dev, "Kicking queue %d", txqueue);
978 		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
979 		napi_schedule(&block->napi);
980 		tx->last_kick_msec = current_time;
981 		goto out;
982 	} // Else reset.
983 
984 reset:
985 	gve_schedule_reset(priv);
986 
987 out:
988 	if (tx)
989 		tx->queue_timeout++;
990 	priv->tx_timeo_cnt++;
991 }
992 
993 static const struct net_device_ops gve_netdev_ops = {
994 	.ndo_start_xmit		=	gve_tx,
995 	.ndo_open		=	gve_open,
996 	.ndo_stop		=	gve_close,
997 	.ndo_get_stats64	=	gve_get_stats,
998 	.ndo_tx_timeout         =       gve_tx_timeout,
999 };
1000 
gve_handle_status(struct gve_priv * priv,u32 status)1001 static void gve_handle_status(struct gve_priv *priv, u32 status)
1002 {
1003 	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1004 		dev_info(&priv->pdev->dev, "Device requested reset.\n");
1005 		gve_set_do_reset(priv);
1006 	}
1007 	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1008 		priv->stats_report_trigger_cnt++;
1009 		gve_set_do_report_stats(priv);
1010 	}
1011 }
1012 
gve_handle_reset(struct gve_priv * priv)1013 static void gve_handle_reset(struct gve_priv *priv)
1014 {
1015 	/* A service task will be scheduled at the end of probe to catch any
1016 	 * resets that need to happen, and we don't want to reset until
1017 	 * probe is done.
1018 	 */
1019 	if (gve_get_probe_in_progress(priv))
1020 		return;
1021 
1022 	if (gve_get_do_reset(priv)) {
1023 		rtnl_lock();
1024 		gve_reset(priv, false);
1025 		rtnl_unlock();
1026 	}
1027 }
1028 
gve_handle_report_stats(struct gve_priv * priv)1029 void gve_handle_report_stats(struct gve_priv *priv)
1030 {
1031 	struct stats *stats = priv->stats_report->stats;
1032 	int idx, stats_idx = 0;
1033 	unsigned int start = 0;
1034 	u64 tx_bytes;
1035 
1036 	if (!gve_get_report_stats(priv))
1037 		return;
1038 
1039 	be64_add_cpu(&priv->stats_report->written_count, 1);
1040 	/* tx stats */
1041 	if (priv->tx) {
1042 		for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1043 			do {
1044 				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1045 				tx_bytes = priv->tx[idx].bytes_done;
1046 			} while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1047 			stats[stats_idx++] = (struct stats) {
1048 				.stat_name = cpu_to_be32(TX_WAKE_CNT),
1049 				.value = cpu_to_be64(priv->tx[idx].wake_queue),
1050 				.queue_id = cpu_to_be32(idx),
1051 			};
1052 			stats[stats_idx++] = (struct stats) {
1053 				.stat_name = cpu_to_be32(TX_STOP_CNT),
1054 				.value = cpu_to_be64(priv->tx[idx].stop_queue),
1055 				.queue_id = cpu_to_be32(idx),
1056 			};
1057 			stats[stats_idx++] = (struct stats) {
1058 				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
1059 				.value = cpu_to_be64(priv->tx[idx].req),
1060 				.queue_id = cpu_to_be32(idx),
1061 			};
1062 			stats[stats_idx++] = (struct stats) {
1063 				.stat_name = cpu_to_be32(TX_BYTES_SENT),
1064 				.value = cpu_to_be64(tx_bytes),
1065 				.queue_id = cpu_to_be32(idx),
1066 			};
1067 			stats[stats_idx++] = (struct stats) {
1068 				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1069 				.value = cpu_to_be64(priv->tx[idx].done),
1070 				.queue_id = cpu_to_be32(idx),
1071 			};
1072 			stats[stats_idx++] = (struct stats) {
1073 				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1074 				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
1075 				.queue_id = cpu_to_be32(idx),
1076 			};
1077 		}
1078 	}
1079 	/* rx stats */
1080 	if (priv->rx) {
1081 		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1082 			stats[stats_idx++] = (struct stats) {
1083 				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1084 				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
1085 				.queue_id = cpu_to_be32(idx),
1086 			};
1087 			stats[stats_idx++] = (struct stats) {
1088 				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1089 				.value = cpu_to_be64(priv->rx[0].fill_cnt),
1090 				.queue_id = cpu_to_be32(idx),
1091 			};
1092 		}
1093 	}
1094 }
1095 
gve_handle_link_status(struct gve_priv * priv,bool link_status)1096 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1097 {
1098 	if (!gve_get_napi_enabled(priv))
1099 		return;
1100 
1101 	if (link_status == netif_carrier_ok(priv->dev))
1102 		return;
1103 
1104 	if (link_status) {
1105 		netdev_info(priv->dev, "Device link is up.\n");
1106 		netif_carrier_on(priv->dev);
1107 	} else {
1108 		netdev_info(priv->dev, "Device link is down.\n");
1109 		netif_carrier_off(priv->dev);
1110 	}
1111 }
1112 
1113 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)1114 static void gve_service_task(struct work_struct *work)
1115 {
1116 	struct gve_priv *priv = container_of(work, struct gve_priv,
1117 					     service_task);
1118 	u32 status = ioread32be(&priv->reg_bar0->device_status);
1119 
1120 	gve_handle_status(priv, status);
1121 
1122 	gve_handle_reset(priv);
1123 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1124 }
1125 
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)1126 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
1127 {
1128 	int num_ntfy;
1129 	int err;
1130 
1131 	/* Set up the adminq */
1132 	err = gve_adminq_alloc(&priv->pdev->dev, priv);
1133 	if (err) {
1134 		dev_err(&priv->pdev->dev,
1135 			"Failed to alloc admin queue: err=%d\n", err);
1136 		return err;
1137 	}
1138 
1139 	if (skip_describe_device)
1140 		goto setup_device;
1141 
1142 	/* Get the initial information we need from the device */
1143 	err = gve_adminq_describe_device(priv);
1144 	if (err) {
1145 		dev_err(&priv->pdev->dev,
1146 			"Could not get device information: err=%d\n", err);
1147 		goto err;
1148 	}
1149 	if (priv->dev->max_mtu > PAGE_SIZE) {
1150 		priv->dev->max_mtu = PAGE_SIZE;
1151 		err = gve_adminq_set_mtu(priv, priv->dev->mtu);
1152 		if (err) {
1153 			dev_err(&priv->pdev->dev, "Could not set mtu");
1154 			goto err;
1155 		}
1156 	}
1157 	priv->dev->mtu = priv->dev->max_mtu;
1158 	num_ntfy = pci_msix_vec_count(priv->pdev);
1159 	if (num_ntfy <= 0) {
1160 		dev_err(&priv->pdev->dev,
1161 			"could not count MSI-x vectors: err=%d\n", num_ntfy);
1162 		err = num_ntfy;
1163 		goto err;
1164 	} else if (num_ntfy < GVE_MIN_MSIX) {
1165 		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
1166 			GVE_MIN_MSIX, num_ntfy);
1167 		err = -EINVAL;
1168 		goto err;
1169 	}
1170 
1171 	priv->num_registered_pages = 0;
1172 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1173 	/* gvnic has one Notification Block per MSI-x vector, except for the
1174 	 * management vector
1175 	 */
1176 	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
1177 	priv->mgmt_msix_idx = priv->num_ntfy_blks;
1178 
1179 	priv->tx_cfg.max_queues =
1180 		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
1181 	priv->rx_cfg.max_queues =
1182 		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
1183 
1184 	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
1185 	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
1186 	if (priv->default_num_queues > 0) {
1187 		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
1188 						priv->tx_cfg.num_queues);
1189 		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
1190 						priv->rx_cfg.num_queues);
1191 	}
1192 
1193 	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
1194 		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
1195 	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
1196 		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
1197 
1198 setup_device:
1199 	err = gve_setup_device_resources(priv);
1200 	if (!err)
1201 		return 0;
1202 err:
1203 	gve_adminq_free(&priv->pdev->dev, priv);
1204 	return err;
1205 }
1206 
gve_teardown_priv_resources(struct gve_priv * priv)1207 static void gve_teardown_priv_resources(struct gve_priv *priv)
1208 {
1209 	gve_teardown_device_resources(priv);
1210 	gve_adminq_free(&priv->pdev->dev, priv);
1211 }
1212 
gve_trigger_reset(struct gve_priv * priv)1213 static void gve_trigger_reset(struct gve_priv *priv)
1214 {
1215 	/* Reset the device by releasing the AQ */
1216 	gve_adminq_release(priv);
1217 }
1218 
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)1219 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1220 {
1221 	gve_trigger_reset(priv);
1222 	/* With the reset having already happened, close cannot fail */
1223 	if (was_up)
1224 		gve_close(priv->dev);
1225 	gve_teardown_priv_resources(priv);
1226 }
1227 
gve_reset_recovery(struct gve_priv * priv,bool was_up)1228 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1229 {
1230 	int err;
1231 
1232 	err = gve_init_priv(priv, true);
1233 	if (err)
1234 		goto err;
1235 	if (was_up) {
1236 		err = gve_open(priv->dev);
1237 		if (err)
1238 			goto err;
1239 	}
1240 	return 0;
1241 err:
1242 	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1243 	gve_turndown(priv);
1244 	return err;
1245 }
1246 
gve_reset(struct gve_priv * priv,bool attempt_teardown)1247 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1248 {
1249 	bool was_up = netif_carrier_ok(priv->dev);
1250 	int err;
1251 
1252 	dev_info(&priv->pdev->dev, "Performing reset\n");
1253 	gve_clear_do_reset(priv);
1254 	gve_set_reset_in_progress(priv);
1255 	/* If we aren't attempting to teardown normally, just go turndown and
1256 	 * reset right away.
1257 	 */
1258 	if (!attempt_teardown) {
1259 		gve_turndown(priv);
1260 		gve_reset_and_teardown(priv, was_up);
1261 	} else {
1262 		/* Otherwise attempt to close normally */
1263 		if (was_up) {
1264 			err = gve_close(priv->dev);
1265 			/* If that fails reset as we did above */
1266 			if (err)
1267 				gve_reset_and_teardown(priv, was_up);
1268 		}
1269 		/* Clean up any remaining resources */
1270 		gve_teardown_priv_resources(priv);
1271 	}
1272 
1273 	/* Set it all back up */
1274 	err = gve_reset_recovery(priv, was_up);
1275 	gve_clear_reset_in_progress(priv);
1276 	priv->reset_cnt++;
1277 	priv->interface_up_cnt = 0;
1278 	priv->interface_down_cnt = 0;
1279 	priv->stats_report_trigger_cnt = 0;
1280 	return err;
1281 }
1282 
gve_write_version(u8 __iomem * driver_version_register)1283 static void gve_write_version(u8 __iomem *driver_version_register)
1284 {
1285 	const char *c = gve_version_prefix;
1286 
1287 	while (*c) {
1288 		writeb(*c, driver_version_register);
1289 		c++;
1290 	}
1291 
1292 	c = gve_version_str;
1293 	while (*c) {
1294 		writeb(*c, driver_version_register);
1295 		c++;
1296 	}
1297 	writeb('\n', driver_version_register);
1298 }
1299 
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)1300 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1301 {
1302 	int max_tx_queues, max_rx_queues;
1303 	struct net_device *dev;
1304 	__be32 __iomem *db_bar;
1305 	struct gve_registers __iomem *reg_bar;
1306 	struct gve_priv *priv;
1307 	int err;
1308 
1309 	err = pci_enable_device(pdev);
1310 	if (err)
1311 		return -ENXIO;
1312 
1313 	err = pci_request_regions(pdev, "gvnic-cfg");
1314 	if (err)
1315 		goto abort_with_enabled;
1316 
1317 	pci_set_master(pdev);
1318 
1319 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1320 	if (err) {
1321 		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1322 		goto abort_with_pci_region;
1323 	}
1324 
1325 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1326 	if (err) {
1327 		dev_err(&pdev->dev,
1328 			"Failed to set consistent dma mask: err=%d\n", err);
1329 		goto abort_with_pci_region;
1330 	}
1331 
1332 	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1333 	if (!reg_bar) {
1334 		dev_err(&pdev->dev, "Failed to map pci bar!\n");
1335 		err = -ENOMEM;
1336 		goto abort_with_pci_region;
1337 	}
1338 
1339 	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1340 	if (!db_bar) {
1341 		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1342 		err = -ENOMEM;
1343 		goto abort_with_reg_bar;
1344 	}
1345 
1346 	gve_write_version(&reg_bar->driver_version);
1347 	/* Get max queues to alloc etherdev */
1348 	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
1349 	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
1350 	/* Alloc and setup the netdev and priv */
1351 	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1352 	if (!dev) {
1353 		dev_err(&pdev->dev, "could not allocate netdev\n");
1354 		goto abort_with_db_bar;
1355 	}
1356 	SET_NETDEV_DEV(dev, &pdev->dev);
1357 	pci_set_drvdata(pdev, dev);
1358 	dev->ethtool_ops = &gve_ethtool_ops;
1359 	dev->netdev_ops = &gve_netdev_ops;
1360 	/* advertise features */
1361 	dev->hw_features = NETIF_F_HIGHDMA;
1362 	dev->hw_features |= NETIF_F_SG;
1363 	dev->hw_features |= NETIF_F_HW_CSUM;
1364 	dev->hw_features |= NETIF_F_TSO;
1365 	dev->hw_features |= NETIF_F_TSO6;
1366 	dev->hw_features |= NETIF_F_TSO_ECN;
1367 	dev->hw_features |= NETIF_F_RXCSUM;
1368 	dev->hw_features |= NETIF_F_RXHASH;
1369 	dev->features = dev->hw_features;
1370 	dev->watchdog_timeo = 5 * HZ;
1371 	dev->min_mtu = ETH_MIN_MTU;
1372 	netif_carrier_off(dev);
1373 
1374 	priv = netdev_priv(dev);
1375 	priv->dev = dev;
1376 	priv->pdev = pdev;
1377 	priv->msg_enable = DEFAULT_MSG_LEVEL;
1378 	priv->reg_bar0 = reg_bar;
1379 	priv->db_bar2 = db_bar;
1380 	priv->service_task_flags = 0x0;
1381 	priv->state_flags = 0x0;
1382 	priv->ethtool_flags = 0x0;
1383 
1384 	gve_set_probe_in_progress(priv);
1385 	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1386 	if (!priv->gve_wq) {
1387 		dev_err(&pdev->dev, "Could not allocate workqueue");
1388 		err = -ENOMEM;
1389 		goto abort_with_netdev;
1390 	}
1391 	INIT_WORK(&priv->service_task, gve_service_task);
1392 	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
1393 	priv->tx_cfg.max_queues = max_tx_queues;
1394 	priv->rx_cfg.max_queues = max_rx_queues;
1395 
1396 	err = gve_init_priv(priv, false);
1397 	if (err)
1398 		goto abort_with_wq;
1399 
1400 	err = register_netdev(dev);
1401 	if (err)
1402 		goto abort_with_gve_init;
1403 
1404 	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1405 	gve_clear_probe_in_progress(priv);
1406 	queue_work(priv->gve_wq, &priv->service_task);
1407 	return 0;
1408 
1409 abort_with_gve_init:
1410 	gve_teardown_priv_resources(priv);
1411 
1412 abort_with_wq:
1413 	destroy_workqueue(priv->gve_wq);
1414 
1415 abort_with_netdev:
1416 	free_netdev(dev);
1417 
1418 abort_with_db_bar:
1419 	pci_iounmap(pdev, db_bar);
1420 
1421 abort_with_reg_bar:
1422 	pci_iounmap(pdev, reg_bar);
1423 
1424 abort_with_pci_region:
1425 	pci_release_regions(pdev);
1426 
1427 abort_with_enabled:
1428 	pci_disable_device(pdev);
1429 	return -ENXIO;
1430 }
1431 
gve_remove(struct pci_dev * pdev)1432 static void gve_remove(struct pci_dev *pdev)
1433 {
1434 	struct net_device *netdev = pci_get_drvdata(pdev);
1435 	struct gve_priv *priv = netdev_priv(netdev);
1436 	__be32 __iomem *db_bar = priv->db_bar2;
1437 	void __iomem *reg_bar = priv->reg_bar0;
1438 
1439 	unregister_netdev(netdev);
1440 	gve_teardown_priv_resources(priv);
1441 	destroy_workqueue(priv->gve_wq);
1442 	free_netdev(netdev);
1443 	pci_iounmap(pdev, db_bar);
1444 	pci_iounmap(pdev, reg_bar);
1445 	pci_release_regions(pdev);
1446 	pci_disable_device(pdev);
1447 }
1448 
1449 static const struct pci_device_id gve_id_table[] = {
1450 	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1451 	{ }
1452 };
1453 
1454 static struct pci_driver gvnic_driver = {
1455 	.name		= "gvnic",
1456 	.id_table	= gve_id_table,
1457 	.probe		= gve_probe,
1458 	.remove		= gve_remove,
1459 };
1460 
1461 module_pci_driver(gvnic_driver);
1462 
1463 MODULE_DEVICE_TABLE(pci, gve_id_table);
1464 MODULE_AUTHOR("Google, Inc.");
1465 MODULE_DESCRIPTION("gVNIC Driver");
1466 MODULE_LICENSE("Dual MIT/GPL");
1467 MODULE_VERSION(GVE_VERSION);
1468