• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
16 #include "gve.h"
17 #include "gve_dqo.h"
18 #include "gve_adminq.h"
19 #include "gve_register.h"
20 
21 #define GVE_DEFAULT_RX_COPYBREAK	(256)
22 
23 #define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
24 #define GVE_VERSION		"1.0.0"
25 #define GVE_VERSION_PREFIX	"GVE-"
26 
27 // Minimum amount of time between queue kicks in msec (10 seconds)
28 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
29 
30 const char gve_version_str[] = GVE_VERSION;
31 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
32 
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)33 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
34 {
35 	struct gve_priv *priv = netdev_priv(dev);
36 
37 	if (gve_is_gqi(priv))
38 		return gve_tx(skb, dev);
39 	else
40 		return gve_tx_dqo(skb, dev);
41 }
42 
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)43 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
44 {
45 	struct gve_priv *priv = netdev_priv(dev);
46 	unsigned int start;
47 	u64 packets, bytes;
48 	int ring;
49 
50 	if (priv->rx) {
51 		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
52 			do {
53 				start =
54 				  u64_stats_fetch_begin_irq(&priv->rx[ring].statss);
55 				packets = priv->rx[ring].rpackets;
56 				bytes = priv->rx[ring].rbytes;
57 			} while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss,
58 						       start));
59 			s->rx_packets += packets;
60 			s->rx_bytes += bytes;
61 		}
62 	}
63 	if (priv->tx) {
64 		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
65 			do {
66 				start =
67 				  u64_stats_fetch_begin_irq(&priv->tx[ring].statss);
68 				packets = priv->tx[ring].pkt_done;
69 				bytes = priv->tx[ring].bytes_done;
70 			} while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss,
71 						       start));
72 			s->tx_packets += packets;
73 			s->tx_bytes += bytes;
74 		}
75 	}
76 }
77 
gve_alloc_counter_array(struct gve_priv * priv)78 static int gve_alloc_counter_array(struct gve_priv *priv)
79 {
80 	priv->counter_array =
81 		dma_alloc_coherent(&priv->pdev->dev,
82 				   priv->num_event_counters *
83 				   sizeof(*priv->counter_array),
84 				   &priv->counter_array_bus, GFP_KERNEL);
85 	if (!priv->counter_array)
86 		return -ENOMEM;
87 
88 	return 0;
89 }
90 
gve_free_counter_array(struct gve_priv * priv)91 static void gve_free_counter_array(struct gve_priv *priv)
92 {
93 	if (!priv->counter_array)
94 		return;
95 
96 	dma_free_coherent(&priv->pdev->dev,
97 			  priv->num_event_counters *
98 			  sizeof(*priv->counter_array),
99 			  priv->counter_array, priv->counter_array_bus);
100 	priv->counter_array = NULL;
101 }
102 
103 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)104 static void gve_stats_report_task(struct work_struct *work)
105 {
106 	struct gve_priv *priv = container_of(work, struct gve_priv,
107 					     stats_report_task);
108 	if (gve_get_do_report_stats(priv)) {
109 		gve_handle_report_stats(priv);
110 		gve_clear_do_report_stats(priv);
111 	}
112 }
113 
gve_stats_report_schedule(struct gve_priv * priv)114 static void gve_stats_report_schedule(struct gve_priv *priv)
115 {
116 	if (!gve_get_probe_in_progress(priv) &&
117 	    !gve_get_reset_in_progress(priv)) {
118 		gve_set_do_report_stats(priv);
119 		queue_work(priv->gve_wq, &priv->stats_report_task);
120 	}
121 }
122 
gve_stats_report_timer(struct timer_list * t)123 static void gve_stats_report_timer(struct timer_list *t)
124 {
125 	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
126 
127 	mod_timer(&priv->stats_report_timer,
128 		  round_jiffies(jiffies +
129 		  msecs_to_jiffies(priv->stats_report_timer_period)));
130 	gve_stats_report_schedule(priv);
131 }
132 
gve_alloc_stats_report(struct gve_priv * priv)133 static int gve_alloc_stats_report(struct gve_priv *priv)
134 {
135 	int tx_stats_num, rx_stats_num;
136 
137 	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
138 		       priv->tx_cfg.num_queues;
139 	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
140 		       priv->rx_cfg.num_queues;
141 	priv->stats_report_len = struct_size(priv->stats_report, stats,
142 					     size_add(tx_stats_num, rx_stats_num));
143 	priv->stats_report =
144 		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
145 				   &priv->stats_report_bus, GFP_KERNEL);
146 	if (!priv->stats_report)
147 		return -ENOMEM;
148 	/* Set up timer for the report-stats task */
149 	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
150 	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
151 	return 0;
152 }
153 
gve_free_stats_report(struct gve_priv * priv)154 static void gve_free_stats_report(struct gve_priv *priv)
155 {
156 	if (!priv->stats_report)
157 		return;
158 
159 	del_timer_sync(&priv->stats_report_timer);
160 	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
161 			  priv->stats_report, priv->stats_report_bus);
162 	priv->stats_report = NULL;
163 }
164 
gve_mgmnt_intr(int irq,void * arg)165 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
166 {
167 	struct gve_priv *priv = arg;
168 
169 	queue_work(priv->gve_wq, &priv->service_task);
170 	return IRQ_HANDLED;
171 }
172 
gve_intr(int irq,void * arg)173 static irqreturn_t gve_intr(int irq, void *arg)
174 {
175 	struct gve_notify_block *block = arg;
176 	struct gve_priv *priv = block->priv;
177 
178 	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
179 	napi_schedule_irqoff(&block->napi);
180 	return IRQ_HANDLED;
181 }
182 
gve_intr_dqo(int irq,void * arg)183 static irqreturn_t gve_intr_dqo(int irq, void *arg)
184 {
185 	struct gve_notify_block *block = arg;
186 
187 	/* Interrupts are automatically masked */
188 	napi_schedule_irqoff(&block->napi);
189 	return IRQ_HANDLED;
190 }
191 
gve_napi_poll(struct napi_struct * napi,int budget)192 static int gve_napi_poll(struct napi_struct *napi, int budget)
193 {
194 	struct gve_notify_block *block;
195 	__be32 __iomem *irq_doorbell;
196 	bool reschedule = false;
197 	struct gve_priv *priv;
198 	int work_done = 0;
199 
200 	block = container_of(napi, struct gve_notify_block, napi);
201 	priv = block->priv;
202 
203 	if (block->tx)
204 		reschedule |= gve_tx_poll(block, budget);
205 	if (block->rx) {
206 		work_done = gve_rx_poll(block, budget);
207 		reschedule |= work_done == budget;
208 	}
209 
210 	if (reschedule)
211 		return budget;
212 
213        /* Complete processing - don't unmask irq if busy polling is enabled */
214 	if (likely(napi_complete_done(napi, work_done))) {
215 		irq_doorbell = gve_irq_doorbell(priv, block);
216 		iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
217 
218 		/* Ensure IRQ ACK is visible before we check pending work.
219 		 * If queue had issued updates, it would be truly visible.
220 		 */
221 		mb();
222 
223 		if (block->tx)
224 			reschedule |= gve_tx_clean_pending(priv, block->tx);
225 		if (block->rx)
226 			reschedule |= gve_rx_work_pending(block->rx);
227 
228 		if (reschedule && napi_reschedule(napi))
229 			iowrite32be(GVE_IRQ_MASK, irq_doorbell);
230 	}
231 	return work_done;
232 }
233 
gve_napi_poll_dqo(struct napi_struct * napi,int budget)234 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
235 {
236 	struct gve_notify_block *block =
237 		container_of(napi, struct gve_notify_block, napi);
238 	struct gve_priv *priv = block->priv;
239 	bool reschedule = false;
240 	int work_done = 0;
241 
242 	if (block->tx)
243 		reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
244 
245 	if (block->rx) {
246 		work_done = gve_rx_poll_dqo(block, budget);
247 		reschedule |= work_done == budget;
248 	}
249 
250 	if (reschedule)
251 		return budget;
252 
253 	if (likely(napi_complete_done(napi, work_done))) {
254 		/* Enable interrupts again.
255 		 *
256 		 * We don't need to repoll afterwards because HW supports the
257 		 * PCI MSI-X PBA feature.
258 		 *
259 		 * Another interrupt would be triggered if a new event came in
260 		 * since the last one.
261 		 */
262 		gve_write_irq_doorbell_dqo(priv, block,
263 					   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
264 	}
265 
266 	return work_done;
267 }
268 
gve_alloc_notify_blocks(struct gve_priv * priv)269 static int gve_alloc_notify_blocks(struct gve_priv *priv)
270 {
271 	int num_vecs_requested = priv->num_ntfy_blks + 1;
272 	char *name = priv->dev->name;
273 	unsigned int active_cpus;
274 	int vecs_enabled;
275 	int i, j;
276 	int err;
277 
278 	priv->msix_vectors = kvcalloc(num_vecs_requested,
279 				      sizeof(*priv->msix_vectors), GFP_KERNEL);
280 	if (!priv->msix_vectors)
281 		return -ENOMEM;
282 	for (i = 0; i < num_vecs_requested; i++)
283 		priv->msix_vectors[i].entry = i;
284 	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
285 					     GVE_MIN_MSIX, num_vecs_requested);
286 	if (vecs_enabled < 0) {
287 		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
288 			GVE_MIN_MSIX, vecs_enabled);
289 		err = vecs_enabled;
290 		goto abort_with_msix_vectors;
291 	}
292 	if (vecs_enabled != num_vecs_requested) {
293 		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
294 		int vecs_per_type = new_num_ntfy_blks / 2;
295 		int vecs_left = new_num_ntfy_blks % 2;
296 
297 		priv->num_ntfy_blks = new_num_ntfy_blks;
298 		priv->mgmt_msix_idx = priv->num_ntfy_blks;
299 		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
300 						vecs_per_type);
301 		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
302 						vecs_per_type + vecs_left);
303 		dev_err(&priv->pdev->dev,
304 			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
305 			vecs_enabled, priv->tx_cfg.max_queues,
306 			priv->rx_cfg.max_queues);
307 		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
308 			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
309 		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
310 			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
311 	}
312 	/* Half the notification blocks go to TX and half to RX */
313 	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
314 
315 	/* Setup Management Vector  - the last vector */
316 	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
317 		 name);
318 	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
319 			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
320 	if (err) {
321 		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
322 		goto abort_with_msix_enabled;
323 	}
324 	priv->irq_db_indices =
325 		dma_alloc_coherent(&priv->pdev->dev,
326 				   priv->num_ntfy_blks *
327 				   sizeof(*priv->irq_db_indices),
328 				   &priv->irq_db_indices_bus, GFP_KERNEL);
329 	if (!priv->irq_db_indices) {
330 		err = -ENOMEM;
331 		goto abort_with_mgmt_vector;
332 	}
333 
334 	priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
335 				     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
336 	if (!priv->ntfy_blocks) {
337 		err = -ENOMEM;
338 		goto abort_with_irq_db_indices;
339 	}
340 
341 	/* Setup the other blocks - the first n-1 vectors */
342 	for (i = 0; i < priv->num_ntfy_blks; i++) {
343 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
344 		int msix_idx = i;
345 
346 		snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
347 			 name, i);
348 		block->priv = priv;
349 		err = request_irq(priv->msix_vectors[msix_idx].vector,
350 				  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
351 				  0, block->name, block);
352 		if (err) {
353 			dev_err(&priv->pdev->dev,
354 				"Failed to receive msix vector %d\n", i);
355 			goto abort_with_some_ntfy_blocks;
356 		}
357 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
358 				      get_cpu_mask(i % active_cpus));
359 		block->irq_db_index = &priv->irq_db_indices[i].index;
360 	}
361 	return 0;
362 abort_with_some_ntfy_blocks:
363 	for (j = 0; j < i; j++) {
364 		struct gve_notify_block *block = &priv->ntfy_blocks[j];
365 		int msix_idx = j;
366 
367 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
368 				      NULL);
369 		free_irq(priv->msix_vectors[msix_idx].vector, block);
370 	}
371 	kvfree(priv->ntfy_blocks);
372 	priv->ntfy_blocks = NULL;
373 abort_with_irq_db_indices:
374 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
375 			  sizeof(*priv->irq_db_indices),
376 			  priv->irq_db_indices, priv->irq_db_indices_bus);
377 	priv->irq_db_indices = NULL;
378 abort_with_mgmt_vector:
379 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
380 abort_with_msix_enabled:
381 	pci_disable_msix(priv->pdev);
382 abort_with_msix_vectors:
383 	kvfree(priv->msix_vectors);
384 	priv->msix_vectors = NULL;
385 	return err;
386 }
387 
gve_free_notify_blocks(struct gve_priv * priv)388 static void gve_free_notify_blocks(struct gve_priv *priv)
389 {
390 	int i;
391 
392 	if (!priv->msix_vectors)
393 		return;
394 
395 	/* Free the irqs */
396 	for (i = 0; i < priv->num_ntfy_blks; i++) {
397 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
398 		int msix_idx = i;
399 
400 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
401 				      NULL);
402 		free_irq(priv->msix_vectors[msix_idx].vector, block);
403 	}
404 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
405 	kvfree(priv->ntfy_blocks);
406 	priv->ntfy_blocks = NULL;
407 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
408 			  sizeof(*priv->irq_db_indices),
409 			  priv->irq_db_indices, priv->irq_db_indices_bus);
410 	priv->irq_db_indices = NULL;
411 	pci_disable_msix(priv->pdev);
412 	kvfree(priv->msix_vectors);
413 	priv->msix_vectors = NULL;
414 }
415 
gve_setup_device_resources(struct gve_priv * priv)416 static int gve_setup_device_resources(struct gve_priv *priv)
417 {
418 	int err;
419 
420 	err = gve_alloc_counter_array(priv);
421 	if (err)
422 		return err;
423 	err = gve_alloc_notify_blocks(priv);
424 	if (err)
425 		goto abort_with_counter;
426 	err = gve_alloc_stats_report(priv);
427 	if (err)
428 		goto abort_with_ntfy_blocks;
429 	err = gve_adminq_configure_device_resources(priv,
430 						    priv->counter_array_bus,
431 						    priv->num_event_counters,
432 						    priv->irq_db_indices_bus,
433 						    priv->num_ntfy_blks);
434 	if (unlikely(err)) {
435 		dev_err(&priv->pdev->dev,
436 			"could not setup device_resources: err=%d\n", err);
437 		err = -ENXIO;
438 		goto abort_with_stats_report;
439 	}
440 
441 	if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
442 		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
443 					       GFP_KERNEL);
444 		if (!priv->ptype_lut_dqo) {
445 			err = -ENOMEM;
446 			goto abort_with_stats_report;
447 		}
448 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
449 		if (err) {
450 			dev_err(&priv->pdev->dev,
451 				"Failed to get ptype map: err=%d\n", err);
452 			goto abort_with_ptype_lut;
453 		}
454 	}
455 
456 	err = gve_adminq_report_stats(priv, priv->stats_report_len,
457 				      priv->stats_report_bus,
458 				      GVE_STATS_REPORT_TIMER_PERIOD);
459 	if (err)
460 		dev_err(&priv->pdev->dev,
461 			"Failed to report stats: err=%d\n", err);
462 	gve_set_device_resources_ok(priv);
463 	return 0;
464 
465 abort_with_ptype_lut:
466 	kvfree(priv->ptype_lut_dqo);
467 	priv->ptype_lut_dqo = NULL;
468 abort_with_stats_report:
469 	gve_free_stats_report(priv);
470 abort_with_ntfy_blocks:
471 	gve_free_notify_blocks(priv);
472 abort_with_counter:
473 	gve_free_counter_array(priv);
474 
475 	return err;
476 }
477 
478 static void gve_trigger_reset(struct gve_priv *priv);
479 
gve_teardown_device_resources(struct gve_priv * priv)480 static void gve_teardown_device_resources(struct gve_priv *priv)
481 {
482 	int err;
483 
484 	/* Tell device its resources are being freed */
485 	if (gve_get_device_resources_ok(priv)) {
486 		/* detach the stats report */
487 		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
488 		if (err) {
489 			dev_err(&priv->pdev->dev,
490 				"Failed to detach stats report: err=%d\n", err);
491 			gve_trigger_reset(priv);
492 		}
493 		err = gve_adminq_deconfigure_device_resources(priv);
494 		if (err) {
495 			dev_err(&priv->pdev->dev,
496 				"Could not deconfigure device resources: err=%d\n",
497 				err);
498 			gve_trigger_reset(priv);
499 		}
500 	}
501 
502 	kvfree(priv->ptype_lut_dqo);
503 	priv->ptype_lut_dqo = NULL;
504 
505 	gve_free_counter_array(priv);
506 	gve_free_notify_blocks(priv);
507 	gve_free_stats_report(priv);
508 	gve_clear_device_resources_ok(priv);
509 }
510 
gve_add_napi(struct gve_priv * priv,int ntfy_idx,int (* gve_poll)(struct napi_struct *,int))511 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
512 			 int (*gve_poll)(struct napi_struct *, int))
513 {
514 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
515 
516 	netif_napi_add(priv->dev, &block->napi, gve_poll);
517 }
518 
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)519 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
520 {
521 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
522 
523 	netif_napi_del(&block->napi);
524 }
525 
gve_register_qpls(struct gve_priv * priv)526 static int gve_register_qpls(struct gve_priv *priv)
527 {
528 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
529 	int err;
530 	int i;
531 
532 	for (i = 0; i < num_qpls; i++) {
533 		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
534 		if (err) {
535 			netif_err(priv, drv, priv->dev,
536 				  "failed to register queue page list %d\n",
537 				  priv->qpls[i].id);
538 			/* This failure will trigger a reset - no need to clean
539 			 * up
540 			 */
541 			return err;
542 		}
543 	}
544 	return 0;
545 }
546 
gve_unregister_qpls(struct gve_priv * priv)547 static int gve_unregister_qpls(struct gve_priv *priv)
548 {
549 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
550 	int err;
551 	int i;
552 
553 	for (i = 0; i < num_qpls; i++) {
554 		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
555 		/* This failure will trigger a reset - no need to clean up */
556 		if (err) {
557 			netif_err(priv, drv, priv->dev,
558 				  "Failed to unregister queue page list %d\n",
559 				  priv->qpls[i].id);
560 			return err;
561 		}
562 	}
563 	return 0;
564 }
565 
gve_create_rings(struct gve_priv * priv)566 static int gve_create_rings(struct gve_priv *priv)
567 {
568 	int err;
569 	int i;
570 
571 	err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
572 	if (err) {
573 		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
574 			  priv->tx_cfg.num_queues);
575 		/* This failure will trigger a reset - no need to clean
576 		 * up
577 		 */
578 		return err;
579 	}
580 	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
581 		  priv->tx_cfg.num_queues);
582 
583 	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
584 	if (err) {
585 		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
586 			  priv->rx_cfg.num_queues);
587 		/* This failure will trigger a reset - no need to clean
588 		 * up
589 		 */
590 		return err;
591 	}
592 	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
593 		  priv->rx_cfg.num_queues);
594 
595 	if (gve_is_gqi(priv)) {
596 		/* Rx data ring has been prefilled with packet buffers at queue
597 		 * allocation time.
598 		 *
599 		 * Write the doorbell to provide descriptor slots and packet
600 		 * buffers to the NIC.
601 		 */
602 		for (i = 0; i < priv->rx_cfg.num_queues; i++)
603 			gve_rx_write_doorbell(priv, &priv->rx[i]);
604 	} else {
605 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
606 			/* Post buffers and ring doorbell. */
607 			gve_rx_post_buffers_dqo(&priv->rx[i]);
608 		}
609 	}
610 
611 	return 0;
612 }
613 
add_napi_init_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))614 static void add_napi_init_sync_stats(struct gve_priv *priv,
615 				     int (*napi_poll)(struct napi_struct *napi,
616 						      int budget))
617 {
618 	int i;
619 
620 	/* Add tx napi & init sync stats*/
621 	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
622 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
623 
624 		u64_stats_init(&priv->tx[i].statss);
625 		priv->tx[i].ntfy_id = ntfy_idx;
626 		gve_add_napi(priv, ntfy_idx, napi_poll);
627 	}
628 	/* Add rx napi  & init sync stats*/
629 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
630 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
631 
632 		u64_stats_init(&priv->rx[i].statss);
633 		priv->rx[i].ntfy_id = ntfy_idx;
634 		gve_add_napi(priv, ntfy_idx, napi_poll);
635 	}
636 }
637 
gve_tx_free_rings(struct gve_priv * priv)638 static void gve_tx_free_rings(struct gve_priv *priv)
639 {
640 	if (gve_is_gqi(priv)) {
641 		gve_tx_free_rings_gqi(priv);
642 	} else {
643 		gve_tx_free_rings_dqo(priv);
644 	}
645 }
646 
gve_alloc_rings(struct gve_priv * priv)647 static int gve_alloc_rings(struct gve_priv *priv)
648 {
649 	int err;
650 
651 	/* Setup tx rings */
652 	priv->tx = kvcalloc(priv->tx_cfg.num_queues, sizeof(*priv->tx),
653 			    GFP_KERNEL);
654 	if (!priv->tx)
655 		return -ENOMEM;
656 
657 	if (gve_is_gqi(priv))
658 		err = gve_tx_alloc_rings(priv);
659 	else
660 		err = gve_tx_alloc_rings_dqo(priv);
661 	if (err)
662 		goto free_tx;
663 
664 	/* Setup rx rings */
665 	priv->rx = kvcalloc(priv->rx_cfg.num_queues, sizeof(*priv->rx),
666 			    GFP_KERNEL);
667 	if (!priv->rx) {
668 		err = -ENOMEM;
669 		goto free_tx_queue;
670 	}
671 
672 	if (gve_is_gqi(priv))
673 		err = gve_rx_alloc_rings(priv);
674 	else
675 		err = gve_rx_alloc_rings_dqo(priv);
676 	if (err)
677 		goto free_rx;
678 
679 	if (gve_is_gqi(priv))
680 		add_napi_init_sync_stats(priv, gve_napi_poll);
681 	else
682 		add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
683 
684 	return 0;
685 
686 free_rx:
687 	kvfree(priv->rx);
688 	priv->rx = NULL;
689 free_tx_queue:
690 	gve_tx_free_rings(priv);
691 free_tx:
692 	kvfree(priv->tx);
693 	priv->tx = NULL;
694 	return err;
695 }
696 
gve_destroy_rings(struct gve_priv * priv)697 static int gve_destroy_rings(struct gve_priv *priv)
698 {
699 	int err;
700 
701 	err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
702 	if (err) {
703 		netif_err(priv, drv, priv->dev,
704 			  "failed to destroy tx queues\n");
705 		/* This failure will trigger a reset - no need to clean up */
706 		return err;
707 	}
708 	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
709 	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
710 	if (err) {
711 		netif_err(priv, drv, priv->dev,
712 			  "failed to destroy rx queues\n");
713 		/* This failure will trigger a reset - no need to clean up */
714 		return err;
715 	}
716 	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
717 	return 0;
718 }
719 
gve_rx_free_rings(struct gve_priv * priv)720 static void gve_rx_free_rings(struct gve_priv *priv)
721 {
722 	if (gve_is_gqi(priv))
723 		gve_rx_free_rings_gqi(priv);
724 	else
725 		gve_rx_free_rings_dqo(priv);
726 }
727 
gve_free_rings(struct gve_priv * priv)728 static void gve_free_rings(struct gve_priv *priv)
729 {
730 	int ntfy_idx;
731 	int i;
732 
733 	if (priv->tx) {
734 		for (i = 0; i < priv->tx_cfg.num_queues; i++) {
735 			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
736 			gve_remove_napi(priv, ntfy_idx);
737 		}
738 		gve_tx_free_rings(priv);
739 		kvfree(priv->tx);
740 		priv->tx = NULL;
741 	}
742 	if (priv->rx) {
743 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
744 			ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
745 			gve_remove_napi(priv, ntfy_idx);
746 		}
747 		gve_rx_free_rings(priv);
748 		kvfree(priv->rx);
749 		priv->rx = NULL;
750 	}
751 }
752 
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)753 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
754 		   struct page **page, dma_addr_t *dma,
755 		   enum dma_data_direction dir, gfp_t gfp_flags)
756 {
757 	*page = alloc_page(gfp_flags);
758 	if (!*page) {
759 		priv->page_alloc_fail++;
760 		return -ENOMEM;
761 	}
762 	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
763 	if (dma_mapping_error(dev, *dma)) {
764 		priv->dma_mapping_error++;
765 		put_page(*page);
766 		return -ENOMEM;
767 	}
768 	return 0;
769 }
770 
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)771 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
772 				     int pages)
773 {
774 	struct gve_queue_page_list *qpl = &priv->qpls[id];
775 	int err;
776 	int i;
777 
778 	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
779 		netif_err(priv, drv, priv->dev,
780 			  "Reached max number of registered pages %llu > %llu\n",
781 			  pages + priv->num_registered_pages,
782 			  priv->max_registered_pages);
783 		return -EINVAL;
784 	}
785 
786 	qpl->id = id;
787 	qpl->num_entries = 0;
788 	qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
789 	/* caller handles clean up */
790 	if (!qpl->pages)
791 		return -ENOMEM;
792 	qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
793 	/* caller handles clean up */
794 	if (!qpl->page_buses)
795 		return -ENOMEM;
796 
797 	for (i = 0; i < pages; i++) {
798 		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
799 				     &qpl->page_buses[i],
800 				     gve_qpl_dma_dir(priv, id), GFP_KERNEL);
801 		/* caller handles clean up */
802 		if (err)
803 			return -ENOMEM;
804 		qpl->num_entries++;
805 	}
806 	priv->num_registered_pages += pages;
807 
808 	return 0;
809 }
810 
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)811 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
812 		   enum dma_data_direction dir)
813 {
814 	if (!dma_mapping_error(dev, dma))
815 		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
816 	if (page)
817 		put_page(page);
818 }
819 
gve_free_queue_page_list(struct gve_priv * priv,u32 id)820 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
821 {
822 	struct gve_queue_page_list *qpl = &priv->qpls[id];
823 	int i;
824 
825 	if (!qpl->pages)
826 		return;
827 	if (!qpl->page_buses)
828 		goto free_pages;
829 
830 	for (i = 0; i < qpl->num_entries; i++)
831 		gve_free_page(&priv->pdev->dev, qpl->pages[i],
832 			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
833 
834 	kvfree(qpl->page_buses);
835 free_pages:
836 	kvfree(qpl->pages);
837 	priv->num_registered_pages -= qpl->num_entries;
838 }
839 
gve_alloc_qpls(struct gve_priv * priv)840 static int gve_alloc_qpls(struct gve_priv *priv)
841 {
842 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
843 	int i, j;
844 	int err;
845 
846 	if (num_qpls == 0)
847 		return 0;
848 
849 	priv->qpls = kvcalloc(num_qpls, sizeof(*priv->qpls), GFP_KERNEL);
850 	if (!priv->qpls)
851 		return -ENOMEM;
852 
853 	for (i = 0; i < gve_num_tx_qpls(priv); i++) {
854 		err = gve_alloc_queue_page_list(priv, i,
855 						priv->tx_pages_per_qpl);
856 		if (err)
857 			goto free_qpls;
858 	}
859 	for (; i < num_qpls; i++) {
860 		err = gve_alloc_queue_page_list(priv, i,
861 						priv->rx_data_slot_cnt);
862 		if (err)
863 			goto free_qpls;
864 	}
865 
866 	priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
867 				     sizeof(unsigned long) * BITS_PER_BYTE;
868 	priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(num_qpls),
869 					    sizeof(unsigned long), GFP_KERNEL);
870 	if (!priv->qpl_cfg.qpl_id_map) {
871 		err = -ENOMEM;
872 		goto free_qpls;
873 	}
874 
875 	return 0;
876 
877 free_qpls:
878 	for (j = 0; j <= i; j++)
879 		gve_free_queue_page_list(priv, j);
880 	kvfree(priv->qpls);
881 	return err;
882 }
883 
gve_free_qpls(struct gve_priv * priv)884 static void gve_free_qpls(struct gve_priv *priv)
885 {
886 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
887 	int i;
888 
889 	if (num_qpls == 0)
890 		return;
891 
892 	kvfree(priv->qpl_cfg.qpl_id_map);
893 
894 	for (i = 0; i < num_qpls; i++)
895 		gve_free_queue_page_list(priv, i);
896 
897 	kvfree(priv->qpls);
898 }
899 
900 /* Use this to schedule a reset when the device is capable of continuing
901  * to handle other requests in its current state. If it is not, do a reset
902  * in thread instead.
903  */
gve_schedule_reset(struct gve_priv * priv)904 void gve_schedule_reset(struct gve_priv *priv)
905 {
906 	gve_set_do_reset(priv);
907 	queue_work(priv->gve_wq, &priv->service_task);
908 }
909 
910 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
911 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
912 static void gve_turndown(struct gve_priv *priv);
913 static void gve_turnup(struct gve_priv *priv);
914 
gve_open(struct net_device * dev)915 static int gve_open(struct net_device *dev)
916 {
917 	struct gve_priv *priv = netdev_priv(dev);
918 	int err;
919 
920 	err = gve_alloc_qpls(priv);
921 	if (err)
922 		return err;
923 
924 	err = gve_alloc_rings(priv);
925 	if (err)
926 		goto free_qpls;
927 
928 	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
929 	if (err)
930 		goto free_rings;
931 	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
932 	if (err)
933 		goto free_rings;
934 
935 	err = gve_register_qpls(priv);
936 	if (err)
937 		goto reset;
938 
939 	if (!gve_is_gqi(priv)) {
940 		/* Hard code this for now. This may be tuned in the future for
941 		 * performance.
942 		 */
943 		priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
944 	}
945 	err = gve_create_rings(priv);
946 	if (err)
947 		goto reset;
948 
949 	gve_set_device_rings_ok(priv);
950 
951 	if (gve_get_report_stats(priv))
952 		mod_timer(&priv->stats_report_timer,
953 			  round_jiffies(jiffies +
954 				msecs_to_jiffies(priv->stats_report_timer_period)));
955 
956 	gve_turnup(priv);
957 	queue_work(priv->gve_wq, &priv->service_task);
958 	priv->interface_up_cnt++;
959 	return 0;
960 
961 free_rings:
962 	gve_free_rings(priv);
963 free_qpls:
964 	gve_free_qpls(priv);
965 	return err;
966 
967 reset:
968 	/* This must have been called from a reset due to the rtnl lock
969 	 * so just return at this point.
970 	 */
971 	if (gve_get_reset_in_progress(priv))
972 		return err;
973 	/* Otherwise reset before returning */
974 	gve_reset_and_teardown(priv, true);
975 	/* if this fails there is nothing we can do so just ignore the return */
976 	gve_reset_recovery(priv, false);
977 	/* return the original error */
978 	return err;
979 }
980 
gve_close(struct net_device * dev)981 static int gve_close(struct net_device *dev)
982 {
983 	struct gve_priv *priv = netdev_priv(dev);
984 	int err;
985 
986 	netif_carrier_off(dev);
987 	if (gve_get_device_rings_ok(priv)) {
988 		gve_turndown(priv);
989 		err = gve_destroy_rings(priv);
990 		if (err)
991 			goto err;
992 		err = gve_unregister_qpls(priv);
993 		if (err)
994 			goto err;
995 		gve_clear_device_rings_ok(priv);
996 	}
997 	del_timer_sync(&priv->stats_report_timer);
998 
999 	gve_free_rings(priv);
1000 	gve_free_qpls(priv);
1001 	priv->interface_down_cnt++;
1002 	return 0;
1003 
1004 err:
1005 	/* This must have been called from a reset due to the rtnl lock
1006 	 * so just return at this point.
1007 	 */
1008 	if (gve_get_reset_in_progress(priv))
1009 		return err;
1010 	/* Otherwise reset before returning */
1011 	gve_reset_and_teardown(priv, true);
1012 	return gve_reset_recovery(priv, false);
1013 }
1014 
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1015 int gve_adjust_queues(struct gve_priv *priv,
1016 		      struct gve_queue_config new_rx_config,
1017 		      struct gve_queue_config new_tx_config)
1018 {
1019 	int err;
1020 
1021 	if (netif_carrier_ok(priv->dev)) {
1022 		/* To make this process as simple as possible we teardown the
1023 		 * device, set the new configuration, and then bring the device
1024 		 * up again.
1025 		 */
1026 		err = gve_close(priv->dev);
1027 		/* we have already tried to reset in close,
1028 		 * just fail at this point
1029 		 */
1030 		if (err)
1031 			return err;
1032 		priv->tx_cfg = new_tx_config;
1033 		priv->rx_cfg = new_rx_config;
1034 
1035 		err = gve_open(priv->dev);
1036 		if (err)
1037 			goto err;
1038 
1039 		return 0;
1040 	}
1041 	/* Set the config for the next up. */
1042 	priv->tx_cfg = new_tx_config;
1043 	priv->rx_cfg = new_rx_config;
1044 
1045 	return 0;
1046 err:
1047 	netif_err(priv, drv, priv->dev,
1048 		  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1049 	gve_turndown(priv);
1050 	return err;
1051 }
1052 
gve_turndown(struct gve_priv * priv)1053 static void gve_turndown(struct gve_priv *priv)
1054 {
1055 	int idx;
1056 
1057 	if (netif_carrier_ok(priv->dev))
1058 		netif_carrier_off(priv->dev);
1059 
1060 	if (!gve_get_napi_enabled(priv))
1061 		return;
1062 
1063 	/* Disable napi to prevent more work from coming in */
1064 	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1065 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1066 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1067 
1068 		napi_disable(&block->napi);
1069 	}
1070 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1071 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1072 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1073 
1074 		napi_disable(&block->napi);
1075 	}
1076 
1077 	/* Stop tx queues */
1078 	netif_tx_disable(priv->dev);
1079 
1080 	gve_clear_napi_enabled(priv);
1081 	gve_clear_report_stats(priv);
1082 }
1083 
gve_turnup(struct gve_priv * priv)1084 static void gve_turnup(struct gve_priv *priv)
1085 {
1086 	int idx;
1087 
1088 	/* Start the tx queues */
1089 	netif_tx_start_all_queues(priv->dev);
1090 
1091 	/* Enable napi and unmask interrupts for all queues */
1092 	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1093 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1094 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1095 
1096 		napi_enable(&block->napi);
1097 		if (gve_is_gqi(priv)) {
1098 			iowrite32be(0, gve_irq_doorbell(priv, block));
1099 		} else {
1100 			gve_set_itr_coalesce_usecs_dqo(priv, block,
1101 						       priv->tx_coalesce_usecs);
1102 		}
1103 	}
1104 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1105 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1106 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1107 
1108 		napi_enable(&block->napi);
1109 		if (gve_is_gqi(priv)) {
1110 			iowrite32be(0, gve_irq_doorbell(priv, block));
1111 		} else {
1112 			gve_set_itr_coalesce_usecs_dqo(priv, block,
1113 						       priv->rx_coalesce_usecs);
1114 		}
1115 	}
1116 
1117 	gve_set_napi_enabled(priv);
1118 }
1119 
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1120 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1121 {
1122 	struct gve_notify_block *block;
1123 	struct gve_tx_ring *tx = NULL;
1124 	struct gve_priv *priv;
1125 	u32 last_nic_done;
1126 	u32 current_time;
1127 	u32 ntfy_idx;
1128 
1129 	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1130 	priv = netdev_priv(dev);
1131 	if (txqueue > priv->tx_cfg.num_queues)
1132 		goto reset;
1133 
1134 	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1135 	if (ntfy_idx >= priv->num_ntfy_blks)
1136 		goto reset;
1137 
1138 	block = &priv->ntfy_blocks[ntfy_idx];
1139 	tx = block->tx;
1140 
1141 	current_time = jiffies_to_msecs(jiffies);
1142 	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1143 		goto reset;
1144 
1145 	/* Check to see if there are missed completions, which will allow us to
1146 	 * kick the queue.
1147 	 */
1148 	last_nic_done = gve_tx_load_event_counter(priv, tx);
1149 	if (last_nic_done - tx->done) {
1150 		netdev_info(dev, "Kicking queue %d", txqueue);
1151 		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1152 		napi_schedule(&block->napi);
1153 		tx->last_kick_msec = current_time;
1154 		goto out;
1155 	} // Else reset.
1156 
1157 reset:
1158 	gve_schedule_reset(priv);
1159 
1160 out:
1161 	if (tx)
1162 		tx->queue_timeout++;
1163 	priv->tx_timeo_cnt++;
1164 }
1165 
gve_set_features(struct net_device * netdev,netdev_features_t features)1166 static int gve_set_features(struct net_device *netdev,
1167 			    netdev_features_t features)
1168 {
1169 	const netdev_features_t orig_features = netdev->features;
1170 	struct gve_priv *priv = netdev_priv(netdev);
1171 	int err;
1172 
1173 	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1174 		netdev->features ^= NETIF_F_LRO;
1175 		if (netif_carrier_ok(netdev)) {
1176 			/* To make this process as simple as possible we
1177 			 * teardown the device, set the new configuration,
1178 			 * and then bring the device up again.
1179 			 */
1180 			err = gve_close(netdev);
1181 			/* We have already tried to reset in close, just fail
1182 			 * at this point.
1183 			 */
1184 			if (err)
1185 				goto err;
1186 
1187 			err = gve_open(netdev);
1188 			if (err)
1189 				goto err;
1190 		}
1191 	}
1192 
1193 	return 0;
1194 err:
1195 	/* Reverts the change on error. */
1196 	netdev->features = orig_features;
1197 	netif_err(priv, drv, netdev,
1198 		  "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1199 	return err;
1200 }
1201 
1202 static const struct net_device_ops gve_netdev_ops = {
1203 	.ndo_start_xmit		=	gve_start_xmit,
1204 	.ndo_open		=	gve_open,
1205 	.ndo_stop		=	gve_close,
1206 	.ndo_get_stats64	=	gve_get_stats,
1207 	.ndo_tx_timeout         =       gve_tx_timeout,
1208 	.ndo_set_features	=	gve_set_features,
1209 };
1210 
gve_handle_status(struct gve_priv * priv,u32 status)1211 static void gve_handle_status(struct gve_priv *priv, u32 status)
1212 {
1213 	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1214 		dev_info(&priv->pdev->dev, "Device requested reset.\n");
1215 		gve_set_do_reset(priv);
1216 	}
1217 	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1218 		priv->stats_report_trigger_cnt++;
1219 		gve_set_do_report_stats(priv);
1220 	}
1221 }
1222 
gve_handle_reset(struct gve_priv * priv)1223 static void gve_handle_reset(struct gve_priv *priv)
1224 {
1225 	/* A service task will be scheduled at the end of probe to catch any
1226 	 * resets that need to happen, and we don't want to reset until
1227 	 * probe is done.
1228 	 */
1229 	if (gve_get_probe_in_progress(priv))
1230 		return;
1231 
1232 	if (gve_get_do_reset(priv)) {
1233 		rtnl_lock();
1234 		gve_reset(priv, false);
1235 		rtnl_unlock();
1236 	}
1237 }
1238 
gve_handle_report_stats(struct gve_priv * priv)1239 void gve_handle_report_stats(struct gve_priv *priv)
1240 {
1241 	struct stats *stats = priv->stats_report->stats;
1242 	int idx, stats_idx = 0;
1243 	unsigned int start = 0;
1244 	u64 tx_bytes;
1245 
1246 	if (!gve_get_report_stats(priv))
1247 		return;
1248 
1249 	be64_add_cpu(&priv->stats_report->written_count, 1);
1250 	/* tx stats */
1251 	if (priv->tx) {
1252 		for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1253 			u32 last_completion = 0;
1254 			u32 tx_frames = 0;
1255 
1256 			/* DQO doesn't currently support these metrics. */
1257 			if (gve_is_gqi(priv)) {
1258 				last_completion = priv->tx[idx].done;
1259 				tx_frames = priv->tx[idx].req;
1260 			}
1261 
1262 			do {
1263 				start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss);
1264 				tx_bytes = priv->tx[idx].bytes_done;
1265 			} while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start));
1266 			stats[stats_idx++] = (struct stats) {
1267 				.stat_name = cpu_to_be32(TX_WAKE_CNT),
1268 				.value = cpu_to_be64(priv->tx[idx].wake_queue),
1269 				.queue_id = cpu_to_be32(idx),
1270 			};
1271 			stats[stats_idx++] = (struct stats) {
1272 				.stat_name = cpu_to_be32(TX_STOP_CNT),
1273 				.value = cpu_to_be64(priv->tx[idx].stop_queue),
1274 				.queue_id = cpu_to_be32(idx),
1275 			};
1276 			stats[stats_idx++] = (struct stats) {
1277 				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
1278 				.value = cpu_to_be64(tx_frames),
1279 				.queue_id = cpu_to_be32(idx),
1280 			};
1281 			stats[stats_idx++] = (struct stats) {
1282 				.stat_name = cpu_to_be32(TX_BYTES_SENT),
1283 				.value = cpu_to_be64(tx_bytes),
1284 				.queue_id = cpu_to_be32(idx),
1285 			};
1286 			stats[stats_idx++] = (struct stats) {
1287 				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1288 				.value = cpu_to_be64(last_completion),
1289 				.queue_id = cpu_to_be32(idx),
1290 			};
1291 			stats[stats_idx++] = (struct stats) {
1292 				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1293 				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
1294 				.queue_id = cpu_to_be32(idx),
1295 			};
1296 		}
1297 	}
1298 	/* rx stats */
1299 	if (priv->rx) {
1300 		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1301 			stats[stats_idx++] = (struct stats) {
1302 				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1303 				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
1304 				.queue_id = cpu_to_be32(idx),
1305 			};
1306 			stats[stats_idx++] = (struct stats) {
1307 				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1308 				.value = cpu_to_be64(priv->rx[0].fill_cnt),
1309 				.queue_id = cpu_to_be32(idx),
1310 			};
1311 		}
1312 	}
1313 }
1314 
gve_handle_link_status(struct gve_priv * priv,bool link_status)1315 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1316 {
1317 	if (!gve_get_napi_enabled(priv))
1318 		return;
1319 
1320 	if (link_status == netif_carrier_ok(priv->dev))
1321 		return;
1322 
1323 	if (link_status) {
1324 		netdev_info(priv->dev, "Device link is up.\n");
1325 		netif_carrier_on(priv->dev);
1326 	} else {
1327 		netdev_info(priv->dev, "Device link is down.\n");
1328 		netif_carrier_off(priv->dev);
1329 	}
1330 }
1331 
1332 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)1333 static void gve_service_task(struct work_struct *work)
1334 {
1335 	struct gve_priv *priv = container_of(work, struct gve_priv,
1336 					     service_task);
1337 	u32 status = ioread32be(&priv->reg_bar0->device_status);
1338 
1339 	gve_handle_status(priv, status);
1340 
1341 	gve_handle_reset(priv);
1342 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1343 }
1344 
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)1345 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
1346 {
1347 	int num_ntfy;
1348 	int err;
1349 
1350 	/* Set up the adminq */
1351 	err = gve_adminq_alloc(&priv->pdev->dev, priv);
1352 	if (err) {
1353 		dev_err(&priv->pdev->dev,
1354 			"Failed to alloc admin queue: err=%d\n", err);
1355 		return err;
1356 	}
1357 
1358 	if (skip_describe_device)
1359 		goto setup_device;
1360 
1361 	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
1362 	/* Get the initial information we need from the device */
1363 	err = gve_adminq_describe_device(priv);
1364 	if (err) {
1365 		dev_err(&priv->pdev->dev,
1366 			"Could not get device information: err=%d\n", err);
1367 		goto err;
1368 	}
1369 	priv->dev->mtu = priv->dev->max_mtu;
1370 	num_ntfy = pci_msix_vec_count(priv->pdev);
1371 	if (num_ntfy <= 0) {
1372 		dev_err(&priv->pdev->dev,
1373 			"could not count MSI-x vectors: err=%d\n", num_ntfy);
1374 		err = num_ntfy;
1375 		goto err;
1376 	} else if (num_ntfy < GVE_MIN_MSIX) {
1377 		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
1378 			GVE_MIN_MSIX, num_ntfy);
1379 		err = -EINVAL;
1380 		goto err;
1381 	}
1382 
1383 	priv->num_registered_pages = 0;
1384 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1385 	/* gvnic has one Notification Block per MSI-x vector, except for the
1386 	 * management vector
1387 	 */
1388 	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
1389 	priv->mgmt_msix_idx = priv->num_ntfy_blks;
1390 
1391 	priv->tx_cfg.max_queues =
1392 		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
1393 	priv->rx_cfg.max_queues =
1394 		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
1395 
1396 	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
1397 	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
1398 	if (priv->default_num_queues > 0) {
1399 		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
1400 						priv->tx_cfg.num_queues);
1401 		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
1402 						priv->rx_cfg.num_queues);
1403 	}
1404 
1405 	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
1406 		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
1407 	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
1408 		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
1409 
1410 	if (!gve_is_gqi(priv)) {
1411 		priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
1412 		priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
1413 	}
1414 
1415 setup_device:
1416 	err = gve_setup_device_resources(priv);
1417 	if (!err)
1418 		return 0;
1419 err:
1420 	gve_adminq_free(&priv->pdev->dev, priv);
1421 	return err;
1422 }
1423 
gve_teardown_priv_resources(struct gve_priv * priv)1424 static void gve_teardown_priv_resources(struct gve_priv *priv)
1425 {
1426 	gve_teardown_device_resources(priv);
1427 	gve_adminq_free(&priv->pdev->dev, priv);
1428 }
1429 
gve_trigger_reset(struct gve_priv * priv)1430 static void gve_trigger_reset(struct gve_priv *priv)
1431 {
1432 	/* Reset the device by releasing the AQ */
1433 	gve_adminq_release(priv);
1434 }
1435 
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)1436 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1437 {
1438 	gve_trigger_reset(priv);
1439 	/* With the reset having already happened, close cannot fail */
1440 	if (was_up)
1441 		gve_close(priv->dev);
1442 	gve_teardown_priv_resources(priv);
1443 }
1444 
gve_reset_recovery(struct gve_priv * priv,bool was_up)1445 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1446 {
1447 	int err;
1448 
1449 	err = gve_init_priv(priv, true);
1450 	if (err)
1451 		goto err;
1452 	if (was_up) {
1453 		err = gve_open(priv->dev);
1454 		if (err)
1455 			goto err;
1456 	}
1457 	return 0;
1458 err:
1459 	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1460 	gve_turndown(priv);
1461 	return err;
1462 }
1463 
gve_reset(struct gve_priv * priv,bool attempt_teardown)1464 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1465 {
1466 	bool was_up = netif_carrier_ok(priv->dev);
1467 	int err;
1468 
1469 	dev_info(&priv->pdev->dev, "Performing reset\n");
1470 	gve_clear_do_reset(priv);
1471 	gve_set_reset_in_progress(priv);
1472 	/* If we aren't attempting to teardown normally, just go turndown and
1473 	 * reset right away.
1474 	 */
1475 	if (!attempt_teardown) {
1476 		gve_turndown(priv);
1477 		gve_reset_and_teardown(priv, was_up);
1478 	} else {
1479 		/* Otherwise attempt to close normally */
1480 		if (was_up) {
1481 			err = gve_close(priv->dev);
1482 			/* If that fails reset as we did above */
1483 			if (err)
1484 				gve_reset_and_teardown(priv, was_up);
1485 		}
1486 		/* Clean up any remaining resources */
1487 		gve_teardown_priv_resources(priv);
1488 	}
1489 
1490 	/* Set it all back up */
1491 	err = gve_reset_recovery(priv, was_up);
1492 	gve_clear_reset_in_progress(priv);
1493 	priv->reset_cnt++;
1494 	priv->interface_up_cnt = 0;
1495 	priv->interface_down_cnt = 0;
1496 	priv->stats_report_trigger_cnt = 0;
1497 	return err;
1498 }
1499 
gve_write_version(u8 __iomem * driver_version_register)1500 static void gve_write_version(u8 __iomem *driver_version_register)
1501 {
1502 	const char *c = gve_version_prefix;
1503 
1504 	while (*c) {
1505 		writeb(*c, driver_version_register);
1506 		c++;
1507 	}
1508 
1509 	c = gve_version_str;
1510 	while (*c) {
1511 		writeb(*c, driver_version_register);
1512 		c++;
1513 	}
1514 	writeb('\n', driver_version_register);
1515 }
1516 
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)1517 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1518 {
1519 	int max_tx_queues, max_rx_queues;
1520 	struct net_device *dev;
1521 	__be32 __iomem *db_bar;
1522 	struct gve_registers __iomem *reg_bar;
1523 	struct gve_priv *priv;
1524 	int err;
1525 
1526 	err = pci_enable_device(pdev);
1527 	if (err)
1528 		return err;
1529 
1530 	err = pci_request_regions(pdev, "gvnic-cfg");
1531 	if (err)
1532 		goto abort_with_enabled;
1533 
1534 	pci_set_master(pdev);
1535 
1536 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
1537 	if (err) {
1538 		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1539 		goto abort_with_pci_region;
1540 	}
1541 
1542 	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1543 	if (!reg_bar) {
1544 		dev_err(&pdev->dev, "Failed to map pci bar!\n");
1545 		err = -ENOMEM;
1546 		goto abort_with_pci_region;
1547 	}
1548 
1549 	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1550 	if (!db_bar) {
1551 		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1552 		err = -ENOMEM;
1553 		goto abort_with_reg_bar;
1554 	}
1555 
1556 	gve_write_version(&reg_bar->driver_version);
1557 	/* Get max queues to alloc etherdev */
1558 	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
1559 	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
1560 	/* Alloc and setup the netdev and priv */
1561 	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1562 	if (!dev) {
1563 		dev_err(&pdev->dev, "could not allocate netdev\n");
1564 		err = -ENOMEM;
1565 		goto abort_with_db_bar;
1566 	}
1567 	SET_NETDEV_DEV(dev, &pdev->dev);
1568 	pci_set_drvdata(pdev, dev);
1569 	dev->ethtool_ops = &gve_ethtool_ops;
1570 	dev->netdev_ops = &gve_netdev_ops;
1571 
1572 	/* Set default and supported features.
1573 	 *
1574 	 * Features might be set in other locations as well (such as
1575 	 * `gve_adminq_describe_device`).
1576 	 */
1577 	dev->hw_features = NETIF_F_HIGHDMA;
1578 	dev->hw_features |= NETIF_F_SG;
1579 	dev->hw_features |= NETIF_F_HW_CSUM;
1580 	dev->hw_features |= NETIF_F_TSO;
1581 	dev->hw_features |= NETIF_F_TSO6;
1582 	dev->hw_features |= NETIF_F_TSO_ECN;
1583 	dev->hw_features |= NETIF_F_RXCSUM;
1584 	dev->hw_features |= NETIF_F_RXHASH;
1585 	dev->features = dev->hw_features;
1586 	dev->watchdog_timeo = 5 * HZ;
1587 	dev->min_mtu = ETH_MIN_MTU;
1588 	netif_carrier_off(dev);
1589 
1590 	priv = netdev_priv(dev);
1591 	priv->dev = dev;
1592 	priv->pdev = pdev;
1593 	priv->msg_enable = DEFAULT_MSG_LEVEL;
1594 	priv->reg_bar0 = reg_bar;
1595 	priv->db_bar2 = db_bar;
1596 	priv->service_task_flags = 0x0;
1597 	priv->state_flags = 0x0;
1598 	priv->ethtool_flags = 0x0;
1599 
1600 	gve_set_probe_in_progress(priv);
1601 	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1602 	if (!priv->gve_wq) {
1603 		dev_err(&pdev->dev, "Could not allocate workqueue");
1604 		err = -ENOMEM;
1605 		goto abort_with_netdev;
1606 	}
1607 	INIT_WORK(&priv->service_task, gve_service_task);
1608 	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
1609 	priv->tx_cfg.max_queues = max_tx_queues;
1610 	priv->rx_cfg.max_queues = max_rx_queues;
1611 
1612 	err = gve_init_priv(priv, false);
1613 	if (err)
1614 		goto abort_with_wq;
1615 
1616 	err = register_netdev(dev);
1617 	if (err)
1618 		goto abort_with_gve_init;
1619 
1620 	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1621 	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
1622 	gve_clear_probe_in_progress(priv);
1623 	queue_work(priv->gve_wq, &priv->service_task);
1624 	return 0;
1625 
1626 abort_with_gve_init:
1627 	gve_teardown_priv_resources(priv);
1628 
1629 abort_with_wq:
1630 	destroy_workqueue(priv->gve_wq);
1631 
1632 abort_with_netdev:
1633 	free_netdev(dev);
1634 
1635 abort_with_db_bar:
1636 	pci_iounmap(pdev, db_bar);
1637 
1638 abort_with_reg_bar:
1639 	pci_iounmap(pdev, reg_bar);
1640 
1641 abort_with_pci_region:
1642 	pci_release_regions(pdev);
1643 
1644 abort_with_enabled:
1645 	pci_disable_device(pdev);
1646 	return err;
1647 }
1648 
gve_remove(struct pci_dev * pdev)1649 static void gve_remove(struct pci_dev *pdev)
1650 {
1651 	struct net_device *netdev = pci_get_drvdata(pdev);
1652 	struct gve_priv *priv = netdev_priv(netdev);
1653 	__be32 __iomem *db_bar = priv->db_bar2;
1654 	void __iomem *reg_bar = priv->reg_bar0;
1655 
1656 	unregister_netdev(netdev);
1657 	gve_teardown_priv_resources(priv);
1658 	destroy_workqueue(priv->gve_wq);
1659 	free_netdev(netdev);
1660 	pci_iounmap(pdev, db_bar);
1661 	pci_iounmap(pdev, reg_bar);
1662 	pci_release_regions(pdev);
1663 	pci_disable_device(pdev);
1664 }
1665 
gve_shutdown(struct pci_dev * pdev)1666 static void gve_shutdown(struct pci_dev *pdev)
1667 {
1668 	struct net_device *netdev = pci_get_drvdata(pdev);
1669 	struct gve_priv *priv = netdev_priv(netdev);
1670 	bool was_up = netif_carrier_ok(priv->dev);
1671 
1672 	rtnl_lock();
1673 	if (was_up && gve_close(priv->dev)) {
1674 		/* If the dev was up, attempt to close, if close fails, reset */
1675 		gve_reset_and_teardown(priv, was_up);
1676 	} else {
1677 		/* If the dev wasn't up or close worked, finish tearing down */
1678 		gve_teardown_priv_resources(priv);
1679 	}
1680 	rtnl_unlock();
1681 }
1682 
1683 #ifdef CONFIG_PM
gve_suspend(struct pci_dev * pdev,pm_message_t state)1684 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
1685 {
1686 	struct net_device *netdev = pci_get_drvdata(pdev);
1687 	struct gve_priv *priv = netdev_priv(netdev);
1688 	bool was_up = netif_carrier_ok(priv->dev);
1689 
1690 	priv->suspend_cnt++;
1691 	rtnl_lock();
1692 	if (was_up && gve_close(priv->dev)) {
1693 		/* If the dev was up, attempt to close, if close fails, reset */
1694 		gve_reset_and_teardown(priv, was_up);
1695 	} else {
1696 		/* If the dev wasn't up or close worked, finish tearing down */
1697 		gve_teardown_priv_resources(priv);
1698 	}
1699 	priv->up_before_suspend = was_up;
1700 	rtnl_unlock();
1701 	return 0;
1702 }
1703 
gve_resume(struct pci_dev * pdev)1704 static int gve_resume(struct pci_dev *pdev)
1705 {
1706 	struct net_device *netdev = pci_get_drvdata(pdev);
1707 	struct gve_priv *priv = netdev_priv(netdev);
1708 	int err;
1709 
1710 	priv->resume_cnt++;
1711 	rtnl_lock();
1712 	err = gve_reset_recovery(priv, priv->up_before_suspend);
1713 	rtnl_unlock();
1714 	return err;
1715 }
1716 #endif /* CONFIG_PM */
1717 
1718 static const struct pci_device_id gve_id_table[] = {
1719 	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1720 	{ }
1721 };
1722 
1723 static struct pci_driver gvnic_driver = {
1724 	.name		= "gvnic",
1725 	.id_table	= gve_id_table,
1726 	.probe		= gve_probe,
1727 	.remove		= gve_remove,
1728 	.shutdown	= gve_shutdown,
1729 #ifdef CONFIG_PM
1730 	.suspend        = gve_suspend,
1731 	.resume         = gve_resume,
1732 #endif
1733 };
1734 
1735 module_pci_driver(gvnic_driver);
1736 
1737 MODULE_DEVICE_TABLE(pci, gve_id_table);
1738 MODULE_AUTHOR("Google, Inc.");
1739 MODULE_DESCRIPTION("gVNIC Driver");
1740 MODULE_LICENSE("Dual MIT/GPL");
1741 MODULE_VERSION(GVE_VERSION);
1742