• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6 
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
16 #include "gve.h"
17 #include "gve_dqo.h"
18 #include "gve_adminq.h"
19 #include "gve_register.h"
20 
21 #define GVE_DEFAULT_RX_COPYBREAK	(256)
22 
23 #define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
24 #define GVE_VERSION		"1.0.0"
25 #define GVE_VERSION_PREFIX	"GVE-"
26 
27 // Minimum amount of time between queue kicks in msec (10 seconds)
28 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
29 
30 const char gve_version_str[] = GVE_VERSION;
31 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
32 
gve_start_xmit(struct sk_buff * skb,struct net_device * dev)33 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
34 {
35 	struct gve_priv *priv = netdev_priv(dev);
36 
37 	if (gve_is_gqi(priv))
38 		return gve_tx(skb, dev);
39 	else
40 		return gve_tx_dqo(skb, dev);
41 }
42 
gve_get_stats(struct net_device * dev,struct rtnl_link_stats64 * s)43 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
44 {
45 	struct gve_priv *priv = netdev_priv(dev);
46 	unsigned int start;
47 	u64 packets, bytes;
48 	int ring;
49 
50 	if (priv->rx) {
51 		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
52 			do {
53 				start =
54 				  u64_stats_fetch_begin_irq(&priv->rx[ring].statss);
55 				packets = priv->rx[ring].rpackets;
56 				bytes = priv->rx[ring].rbytes;
57 			} while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss,
58 						       start));
59 			s->rx_packets += packets;
60 			s->rx_bytes += bytes;
61 		}
62 	}
63 	if (priv->tx) {
64 		for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
65 			do {
66 				start =
67 				  u64_stats_fetch_begin_irq(&priv->tx[ring].statss);
68 				packets = priv->tx[ring].pkt_done;
69 				bytes = priv->tx[ring].bytes_done;
70 			} while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss,
71 						       start));
72 			s->tx_packets += packets;
73 			s->tx_bytes += bytes;
74 		}
75 	}
76 }
77 
gve_alloc_counter_array(struct gve_priv * priv)78 static int gve_alloc_counter_array(struct gve_priv *priv)
79 {
80 	priv->counter_array =
81 		dma_alloc_coherent(&priv->pdev->dev,
82 				   priv->num_event_counters *
83 				   sizeof(*priv->counter_array),
84 				   &priv->counter_array_bus, GFP_KERNEL);
85 	if (!priv->counter_array)
86 		return -ENOMEM;
87 
88 	return 0;
89 }
90 
gve_free_counter_array(struct gve_priv * priv)91 static void gve_free_counter_array(struct gve_priv *priv)
92 {
93 	if (!priv->counter_array)
94 		return;
95 
96 	dma_free_coherent(&priv->pdev->dev,
97 			  priv->num_event_counters *
98 			  sizeof(*priv->counter_array),
99 			  priv->counter_array, priv->counter_array_bus);
100 	priv->counter_array = NULL;
101 }
102 
103 /* NIC requests to report stats */
gve_stats_report_task(struct work_struct * work)104 static void gve_stats_report_task(struct work_struct *work)
105 {
106 	struct gve_priv *priv = container_of(work, struct gve_priv,
107 					     stats_report_task);
108 	if (gve_get_do_report_stats(priv)) {
109 		gve_handle_report_stats(priv);
110 		gve_clear_do_report_stats(priv);
111 	}
112 }
113 
gve_stats_report_schedule(struct gve_priv * priv)114 static void gve_stats_report_schedule(struct gve_priv *priv)
115 {
116 	if (!gve_get_probe_in_progress(priv) &&
117 	    !gve_get_reset_in_progress(priv)) {
118 		gve_set_do_report_stats(priv);
119 		queue_work(priv->gve_wq, &priv->stats_report_task);
120 	}
121 }
122 
gve_stats_report_timer(struct timer_list * t)123 static void gve_stats_report_timer(struct timer_list *t)
124 {
125 	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
126 
127 	mod_timer(&priv->stats_report_timer,
128 		  round_jiffies(jiffies +
129 		  msecs_to_jiffies(priv->stats_report_timer_period)));
130 	gve_stats_report_schedule(priv);
131 }
132 
gve_alloc_stats_report(struct gve_priv * priv)133 static int gve_alloc_stats_report(struct gve_priv *priv)
134 {
135 	int tx_stats_num, rx_stats_num;
136 
137 	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
138 		       priv->tx_cfg.num_queues;
139 	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
140 		       priv->rx_cfg.num_queues;
141 	priv->stats_report_len = struct_size(priv->stats_report, stats,
142 					     size_add(tx_stats_num, rx_stats_num));
143 	priv->stats_report =
144 		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
145 				   &priv->stats_report_bus, GFP_KERNEL);
146 	if (!priv->stats_report)
147 		return -ENOMEM;
148 	/* Set up timer for the report-stats task */
149 	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
150 	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
151 	return 0;
152 }
153 
gve_free_stats_report(struct gve_priv * priv)154 static void gve_free_stats_report(struct gve_priv *priv)
155 {
156 	if (!priv->stats_report)
157 		return;
158 
159 	del_timer_sync(&priv->stats_report_timer);
160 	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
161 			  priv->stats_report, priv->stats_report_bus);
162 	priv->stats_report = NULL;
163 }
164 
gve_mgmnt_intr(int irq,void * arg)165 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
166 {
167 	struct gve_priv *priv = arg;
168 
169 	queue_work(priv->gve_wq, &priv->service_task);
170 	return IRQ_HANDLED;
171 }
172 
gve_intr(int irq,void * arg)173 static irqreturn_t gve_intr(int irq, void *arg)
174 {
175 	struct gve_notify_block *block = arg;
176 	struct gve_priv *priv = block->priv;
177 
178 	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
179 	napi_schedule_irqoff(&block->napi);
180 	return IRQ_HANDLED;
181 }
182 
gve_intr_dqo(int irq,void * arg)183 static irqreturn_t gve_intr_dqo(int irq, void *arg)
184 {
185 	struct gve_notify_block *block = arg;
186 
187 	/* Interrupts are automatically masked */
188 	napi_schedule_irqoff(&block->napi);
189 	return IRQ_HANDLED;
190 }
191 
gve_napi_poll(struct napi_struct * napi,int budget)192 static int gve_napi_poll(struct napi_struct *napi, int budget)
193 {
194 	struct gve_notify_block *block;
195 	__be32 __iomem *irq_doorbell;
196 	bool reschedule = false;
197 	struct gve_priv *priv;
198 
199 	block = container_of(napi, struct gve_notify_block, napi);
200 	priv = block->priv;
201 
202 	if (block->tx)
203 		reschedule |= gve_tx_poll(block, budget);
204 	if (block->rx)
205 		reschedule |= gve_rx_poll(block, budget);
206 
207 	if (reschedule)
208 		return budget;
209 
210 	napi_complete(napi);
211 	irq_doorbell = gve_irq_doorbell(priv, block);
212 	iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
213 
214 	/* Double check we have no extra work.
215 	 * Ensure unmask synchronizes with checking for work.
216 	 */
217 	mb();
218 	if (block->tx)
219 		reschedule |= gve_tx_poll(block, -1);
220 	if (block->rx)
221 		reschedule |= gve_rx_poll(block, -1);
222 	if (reschedule && napi_reschedule(napi))
223 		iowrite32be(GVE_IRQ_MASK, irq_doorbell);
224 
225 	return 0;
226 }
227 
gve_napi_poll_dqo(struct napi_struct * napi,int budget)228 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
229 {
230 	struct gve_notify_block *block =
231 		container_of(napi, struct gve_notify_block, napi);
232 	struct gve_priv *priv = block->priv;
233 	bool reschedule = false;
234 	int work_done = 0;
235 
236 	if (block->tx)
237 		reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
238 
239 	if (block->rx) {
240 		work_done = gve_rx_poll_dqo(block, budget);
241 		reschedule |= work_done == budget;
242 	}
243 
244 	if (reschedule)
245 		return budget;
246 
247 	if (likely(napi_complete_done(napi, work_done))) {
248 		/* Enable interrupts again.
249 		 *
250 		 * We don't need to repoll afterwards because HW supports the
251 		 * PCI MSI-X PBA feature.
252 		 *
253 		 * Another interrupt would be triggered if a new event came in
254 		 * since the last one.
255 		 */
256 		gve_write_irq_doorbell_dqo(priv, block,
257 					   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
258 	}
259 
260 	return work_done;
261 }
262 
gve_alloc_notify_blocks(struct gve_priv * priv)263 static int gve_alloc_notify_blocks(struct gve_priv *priv)
264 {
265 	int num_vecs_requested = priv->num_ntfy_blks + 1;
266 	char *name = priv->dev->name;
267 	unsigned int active_cpus;
268 	int vecs_enabled;
269 	int i, j;
270 	int err;
271 
272 	priv->msix_vectors = kvzalloc(num_vecs_requested *
273 				      sizeof(*priv->msix_vectors), GFP_KERNEL);
274 	if (!priv->msix_vectors)
275 		return -ENOMEM;
276 	for (i = 0; i < num_vecs_requested; i++)
277 		priv->msix_vectors[i].entry = i;
278 	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
279 					     GVE_MIN_MSIX, num_vecs_requested);
280 	if (vecs_enabled < 0) {
281 		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
282 			GVE_MIN_MSIX, vecs_enabled);
283 		err = vecs_enabled;
284 		goto abort_with_msix_vectors;
285 	}
286 	if (vecs_enabled != num_vecs_requested) {
287 		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
288 		int vecs_per_type = new_num_ntfy_blks / 2;
289 		int vecs_left = new_num_ntfy_blks % 2;
290 
291 		priv->num_ntfy_blks = new_num_ntfy_blks;
292 		priv->mgmt_msix_idx = priv->num_ntfy_blks;
293 		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
294 						vecs_per_type);
295 		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
296 						vecs_per_type + vecs_left);
297 		dev_err(&priv->pdev->dev,
298 			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
299 			vecs_enabled, priv->tx_cfg.max_queues,
300 			priv->rx_cfg.max_queues);
301 		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
302 			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
303 		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
304 			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
305 	}
306 	/* Half the notification blocks go to TX and half to RX */
307 	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
308 
309 	/* Setup Management Vector  - the last vector */
310 	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt",
311 		 name);
312 	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
313 			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
314 	if (err) {
315 		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
316 		goto abort_with_msix_enabled;
317 	}
318 	priv->ntfy_blocks =
319 		dma_alloc_coherent(&priv->pdev->dev,
320 				   priv->num_ntfy_blks *
321 				   sizeof(*priv->ntfy_blocks),
322 				   &priv->ntfy_block_bus, GFP_KERNEL);
323 	if (!priv->ntfy_blocks) {
324 		err = -ENOMEM;
325 		goto abort_with_mgmt_vector;
326 	}
327 	/* Setup the other blocks - the first n-1 vectors */
328 	for (i = 0; i < priv->num_ntfy_blks; i++) {
329 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
330 		int msix_idx = i;
331 
332 		snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d",
333 			 name, i);
334 		block->priv = priv;
335 		err = request_irq(priv->msix_vectors[msix_idx].vector,
336 				  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
337 				  0, block->name, block);
338 		if (err) {
339 			dev_err(&priv->pdev->dev,
340 				"Failed to receive msix vector %d\n", i);
341 			goto abort_with_some_ntfy_blocks;
342 		}
343 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
344 				      get_cpu_mask(i % active_cpus));
345 	}
346 	return 0;
347 abort_with_some_ntfy_blocks:
348 	for (j = 0; j < i; j++) {
349 		struct gve_notify_block *block = &priv->ntfy_blocks[j];
350 		int msix_idx = j;
351 
352 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
353 				      NULL);
354 		free_irq(priv->msix_vectors[msix_idx].vector, block);
355 	}
356 	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
357 			  sizeof(*priv->ntfy_blocks),
358 			  priv->ntfy_blocks, priv->ntfy_block_bus);
359 	priv->ntfy_blocks = NULL;
360 abort_with_mgmt_vector:
361 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
362 abort_with_msix_enabled:
363 	pci_disable_msix(priv->pdev);
364 abort_with_msix_vectors:
365 	kvfree(priv->msix_vectors);
366 	priv->msix_vectors = NULL;
367 	return err;
368 }
369 
gve_free_notify_blocks(struct gve_priv * priv)370 static void gve_free_notify_blocks(struct gve_priv *priv)
371 {
372 	int i;
373 
374 	if (!priv->msix_vectors)
375 		return;
376 
377 	/* Free the irqs */
378 	for (i = 0; i < priv->num_ntfy_blks; i++) {
379 		struct gve_notify_block *block = &priv->ntfy_blocks[i];
380 		int msix_idx = i;
381 
382 		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
383 				      NULL);
384 		free_irq(priv->msix_vectors[msix_idx].vector, block);
385 	}
386 	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
387 	dma_free_coherent(&priv->pdev->dev,
388 			  priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
389 			  priv->ntfy_blocks, priv->ntfy_block_bus);
390 	priv->ntfy_blocks = NULL;
391 	pci_disable_msix(priv->pdev);
392 	kvfree(priv->msix_vectors);
393 	priv->msix_vectors = NULL;
394 }
395 
gve_setup_device_resources(struct gve_priv * priv)396 static int gve_setup_device_resources(struct gve_priv *priv)
397 {
398 	int err;
399 
400 	err = gve_alloc_counter_array(priv);
401 	if (err)
402 		return err;
403 	err = gve_alloc_notify_blocks(priv);
404 	if (err)
405 		goto abort_with_counter;
406 	err = gve_alloc_stats_report(priv);
407 	if (err)
408 		goto abort_with_ntfy_blocks;
409 	err = gve_adminq_configure_device_resources(priv,
410 						    priv->counter_array_bus,
411 						    priv->num_event_counters,
412 						    priv->ntfy_block_bus,
413 						    priv->num_ntfy_blks);
414 	if (unlikely(err)) {
415 		dev_err(&priv->pdev->dev,
416 			"could not setup device_resources: err=%d\n", err);
417 		err = -ENXIO;
418 		goto abort_with_stats_report;
419 	}
420 
421 	if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
422 		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
423 					       GFP_KERNEL);
424 		if (!priv->ptype_lut_dqo) {
425 			err = -ENOMEM;
426 			goto abort_with_stats_report;
427 		}
428 		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
429 		if (err) {
430 			dev_err(&priv->pdev->dev,
431 				"Failed to get ptype map: err=%d\n", err);
432 			goto abort_with_ptype_lut;
433 		}
434 	}
435 
436 	err = gve_adminq_report_stats(priv, priv->stats_report_len,
437 				      priv->stats_report_bus,
438 				      GVE_STATS_REPORT_TIMER_PERIOD);
439 	if (err)
440 		dev_err(&priv->pdev->dev,
441 			"Failed to report stats: err=%d\n", err);
442 	gve_set_device_resources_ok(priv);
443 	return 0;
444 
445 abort_with_ptype_lut:
446 	kvfree(priv->ptype_lut_dqo);
447 	priv->ptype_lut_dqo = NULL;
448 abort_with_stats_report:
449 	gve_free_stats_report(priv);
450 abort_with_ntfy_blocks:
451 	gve_free_notify_blocks(priv);
452 abort_with_counter:
453 	gve_free_counter_array(priv);
454 
455 	return err;
456 }
457 
458 static void gve_trigger_reset(struct gve_priv *priv);
459 
gve_teardown_device_resources(struct gve_priv * priv)460 static void gve_teardown_device_resources(struct gve_priv *priv)
461 {
462 	int err;
463 
464 	/* Tell device its resources are being freed */
465 	if (gve_get_device_resources_ok(priv)) {
466 		/* detach the stats report */
467 		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
468 		if (err) {
469 			dev_err(&priv->pdev->dev,
470 				"Failed to detach stats report: err=%d\n", err);
471 			gve_trigger_reset(priv);
472 		}
473 		err = gve_adminq_deconfigure_device_resources(priv);
474 		if (err) {
475 			dev_err(&priv->pdev->dev,
476 				"Could not deconfigure device resources: err=%d\n",
477 				err);
478 			gve_trigger_reset(priv);
479 		}
480 	}
481 
482 	kvfree(priv->ptype_lut_dqo);
483 	priv->ptype_lut_dqo = NULL;
484 
485 	gve_free_counter_array(priv);
486 	gve_free_notify_blocks(priv);
487 	gve_free_stats_report(priv);
488 	gve_clear_device_resources_ok(priv);
489 }
490 
gve_add_napi(struct gve_priv * priv,int ntfy_idx,int (* gve_poll)(struct napi_struct *,int))491 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
492 			 int (*gve_poll)(struct napi_struct *, int))
493 {
494 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
495 
496 	netif_napi_add(priv->dev, &block->napi, gve_poll,
497 		       NAPI_POLL_WEIGHT);
498 }
499 
gve_remove_napi(struct gve_priv * priv,int ntfy_idx)500 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
501 {
502 	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
503 
504 	netif_napi_del(&block->napi);
505 }
506 
gve_register_qpls(struct gve_priv * priv)507 static int gve_register_qpls(struct gve_priv *priv)
508 {
509 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
510 	int err;
511 	int i;
512 
513 	for (i = 0; i < num_qpls; i++) {
514 		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
515 		if (err) {
516 			netif_err(priv, drv, priv->dev,
517 				  "failed to register queue page list %d\n",
518 				  priv->qpls[i].id);
519 			/* This failure will trigger a reset - no need to clean
520 			 * up
521 			 */
522 			return err;
523 		}
524 	}
525 	return 0;
526 }
527 
gve_unregister_qpls(struct gve_priv * priv)528 static int gve_unregister_qpls(struct gve_priv *priv)
529 {
530 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
531 	int err;
532 	int i;
533 
534 	for (i = 0; i < num_qpls; i++) {
535 		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
536 		/* This failure will trigger a reset - no need to clean up */
537 		if (err) {
538 			netif_err(priv, drv, priv->dev,
539 				  "Failed to unregister queue page list %d\n",
540 				  priv->qpls[i].id);
541 			return err;
542 		}
543 	}
544 	return 0;
545 }
546 
gve_create_rings(struct gve_priv * priv)547 static int gve_create_rings(struct gve_priv *priv)
548 {
549 	int err;
550 	int i;
551 
552 	err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues);
553 	if (err) {
554 		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
555 			  priv->tx_cfg.num_queues);
556 		/* This failure will trigger a reset - no need to clean
557 		 * up
558 		 */
559 		return err;
560 	}
561 	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
562 		  priv->tx_cfg.num_queues);
563 
564 	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
565 	if (err) {
566 		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
567 			  priv->rx_cfg.num_queues);
568 		/* This failure will trigger a reset - no need to clean
569 		 * up
570 		 */
571 		return err;
572 	}
573 	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
574 		  priv->rx_cfg.num_queues);
575 
576 	if (gve_is_gqi(priv)) {
577 		/* Rx data ring has been prefilled with packet buffers at queue
578 		 * allocation time.
579 		 *
580 		 * Write the doorbell to provide descriptor slots and packet
581 		 * buffers to the NIC.
582 		 */
583 		for (i = 0; i < priv->rx_cfg.num_queues; i++)
584 			gve_rx_write_doorbell(priv, &priv->rx[i]);
585 	} else {
586 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
587 			/* Post buffers and ring doorbell. */
588 			gve_rx_post_buffers_dqo(&priv->rx[i]);
589 		}
590 	}
591 
592 	return 0;
593 }
594 
add_napi_init_sync_stats(struct gve_priv * priv,int (* napi_poll)(struct napi_struct * napi,int budget))595 static void add_napi_init_sync_stats(struct gve_priv *priv,
596 				     int (*napi_poll)(struct napi_struct *napi,
597 						      int budget))
598 {
599 	int i;
600 
601 	/* Add tx napi & init sync stats*/
602 	for (i = 0; i < priv->tx_cfg.num_queues; i++) {
603 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
604 
605 		u64_stats_init(&priv->tx[i].statss);
606 		priv->tx[i].ntfy_id = ntfy_idx;
607 		gve_add_napi(priv, ntfy_idx, napi_poll);
608 	}
609 	/* Add rx napi  & init sync stats*/
610 	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
611 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
612 
613 		u64_stats_init(&priv->rx[i].statss);
614 		priv->rx[i].ntfy_id = ntfy_idx;
615 		gve_add_napi(priv, ntfy_idx, napi_poll);
616 	}
617 }
618 
gve_tx_free_rings(struct gve_priv * priv)619 static void gve_tx_free_rings(struct gve_priv *priv)
620 {
621 	if (gve_is_gqi(priv)) {
622 		gve_tx_free_rings_gqi(priv);
623 	} else {
624 		gve_tx_free_rings_dqo(priv);
625 	}
626 }
627 
gve_alloc_rings(struct gve_priv * priv)628 static int gve_alloc_rings(struct gve_priv *priv)
629 {
630 	int err;
631 
632 	/* Setup tx rings */
633 	priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
634 			    GFP_KERNEL);
635 	if (!priv->tx)
636 		return -ENOMEM;
637 
638 	if (gve_is_gqi(priv))
639 		err = gve_tx_alloc_rings(priv);
640 	else
641 		err = gve_tx_alloc_rings_dqo(priv);
642 	if (err)
643 		goto free_tx;
644 
645 	/* Setup rx rings */
646 	priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx),
647 			    GFP_KERNEL);
648 	if (!priv->rx) {
649 		err = -ENOMEM;
650 		goto free_tx_queue;
651 	}
652 
653 	if (gve_is_gqi(priv))
654 		err = gve_rx_alloc_rings(priv);
655 	else
656 		err = gve_rx_alloc_rings_dqo(priv);
657 	if (err)
658 		goto free_rx;
659 
660 	if (gve_is_gqi(priv))
661 		add_napi_init_sync_stats(priv, gve_napi_poll);
662 	else
663 		add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
664 
665 	return 0;
666 
667 free_rx:
668 	kvfree(priv->rx);
669 	priv->rx = NULL;
670 free_tx_queue:
671 	gve_tx_free_rings(priv);
672 free_tx:
673 	kvfree(priv->tx);
674 	priv->tx = NULL;
675 	return err;
676 }
677 
gve_destroy_rings(struct gve_priv * priv)678 static int gve_destroy_rings(struct gve_priv *priv)
679 {
680 	int err;
681 
682 	err = gve_adminq_destroy_tx_queues(priv, priv->tx_cfg.num_queues);
683 	if (err) {
684 		netif_err(priv, drv, priv->dev,
685 			  "failed to destroy tx queues\n");
686 		/* This failure will trigger a reset - no need to clean up */
687 		return err;
688 	}
689 	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
690 	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
691 	if (err) {
692 		netif_err(priv, drv, priv->dev,
693 			  "failed to destroy rx queues\n");
694 		/* This failure will trigger a reset - no need to clean up */
695 		return err;
696 	}
697 	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
698 	return 0;
699 }
700 
gve_rx_free_rings(struct gve_priv * priv)701 static void gve_rx_free_rings(struct gve_priv *priv)
702 {
703 	if (gve_is_gqi(priv))
704 		gve_rx_free_rings_gqi(priv);
705 	else
706 		gve_rx_free_rings_dqo(priv);
707 }
708 
gve_free_rings(struct gve_priv * priv)709 static void gve_free_rings(struct gve_priv *priv)
710 {
711 	int ntfy_idx;
712 	int i;
713 
714 	if (priv->tx) {
715 		for (i = 0; i < priv->tx_cfg.num_queues; i++) {
716 			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
717 			gve_remove_napi(priv, ntfy_idx);
718 		}
719 		gve_tx_free_rings(priv);
720 		kvfree(priv->tx);
721 		priv->tx = NULL;
722 	}
723 	if (priv->rx) {
724 		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
725 			ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
726 			gve_remove_napi(priv, ntfy_idx);
727 		}
728 		gve_rx_free_rings(priv);
729 		kvfree(priv->rx);
730 		priv->rx = NULL;
731 	}
732 }
733 
gve_alloc_page(struct gve_priv * priv,struct device * dev,struct page ** page,dma_addr_t * dma,enum dma_data_direction dir,gfp_t gfp_flags)734 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
735 		   struct page **page, dma_addr_t *dma,
736 		   enum dma_data_direction dir, gfp_t gfp_flags)
737 {
738 	*page = alloc_page(gfp_flags);
739 	if (!*page) {
740 		priv->page_alloc_fail++;
741 		return -ENOMEM;
742 	}
743 	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
744 	if (dma_mapping_error(dev, *dma)) {
745 		priv->dma_mapping_error++;
746 		put_page(*page);
747 		return -ENOMEM;
748 	}
749 	return 0;
750 }
751 
gve_alloc_queue_page_list(struct gve_priv * priv,u32 id,int pages)752 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
753 				     int pages)
754 {
755 	struct gve_queue_page_list *qpl = &priv->qpls[id];
756 	int err;
757 	int i;
758 
759 	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
760 		netif_err(priv, drv, priv->dev,
761 			  "Reached max number of registered pages %llu > %llu\n",
762 			  pages + priv->num_registered_pages,
763 			  priv->max_registered_pages);
764 		return -EINVAL;
765 	}
766 
767 	qpl->id = id;
768 	qpl->num_entries = 0;
769 	qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL);
770 	/* caller handles clean up */
771 	if (!qpl->pages)
772 		return -ENOMEM;
773 	qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses),
774 				   GFP_KERNEL);
775 	/* caller handles clean up */
776 	if (!qpl->page_buses)
777 		return -ENOMEM;
778 
779 	for (i = 0; i < pages; i++) {
780 		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
781 				     &qpl->page_buses[i],
782 				     gve_qpl_dma_dir(priv, id), GFP_KERNEL);
783 		/* caller handles clean up */
784 		if (err)
785 			return -ENOMEM;
786 		qpl->num_entries++;
787 	}
788 	priv->num_registered_pages += pages;
789 
790 	return 0;
791 }
792 
gve_free_page(struct device * dev,struct page * page,dma_addr_t dma,enum dma_data_direction dir)793 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
794 		   enum dma_data_direction dir)
795 {
796 	if (!dma_mapping_error(dev, dma))
797 		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
798 	if (page)
799 		put_page(page);
800 }
801 
gve_free_queue_page_list(struct gve_priv * priv,int id)802 static void gve_free_queue_page_list(struct gve_priv *priv,
803 				     int id)
804 {
805 	struct gve_queue_page_list *qpl = &priv->qpls[id];
806 	int i;
807 
808 	if (!qpl->pages)
809 		return;
810 	if (!qpl->page_buses)
811 		goto free_pages;
812 
813 	for (i = 0; i < qpl->num_entries; i++)
814 		gve_free_page(&priv->pdev->dev, qpl->pages[i],
815 			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
816 
817 	kvfree(qpl->page_buses);
818 free_pages:
819 	kvfree(qpl->pages);
820 	priv->num_registered_pages -= qpl->num_entries;
821 }
822 
gve_alloc_qpls(struct gve_priv * priv)823 static int gve_alloc_qpls(struct gve_priv *priv)
824 {
825 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
826 	int i, j;
827 	int err;
828 
829 	/* Raw addressing means no QPLs */
830 	if (priv->queue_format == GVE_GQI_RDA_FORMAT)
831 		return 0;
832 
833 	priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL);
834 	if (!priv->qpls)
835 		return -ENOMEM;
836 
837 	for (i = 0; i < gve_num_tx_qpls(priv); i++) {
838 		err = gve_alloc_queue_page_list(priv, i,
839 						priv->tx_pages_per_qpl);
840 		if (err)
841 			goto free_qpls;
842 	}
843 	for (; i < num_qpls; i++) {
844 		err = gve_alloc_queue_page_list(priv, i,
845 						priv->rx_data_slot_cnt);
846 		if (err)
847 			goto free_qpls;
848 	}
849 
850 	priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) *
851 				     sizeof(unsigned long) * BITS_PER_BYTE;
852 	priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) *
853 					    sizeof(unsigned long), GFP_KERNEL);
854 	if (!priv->qpl_cfg.qpl_id_map) {
855 		err = -ENOMEM;
856 		goto free_qpls;
857 	}
858 
859 	return 0;
860 
861 free_qpls:
862 	for (j = 0; j <= i; j++)
863 		gve_free_queue_page_list(priv, j);
864 	kvfree(priv->qpls);
865 	return err;
866 }
867 
gve_free_qpls(struct gve_priv * priv)868 static void gve_free_qpls(struct gve_priv *priv)
869 {
870 	int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv);
871 	int i;
872 
873 	/* Raw addressing means no QPLs */
874 	if (priv->queue_format == GVE_GQI_RDA_FORMAT)
875 		return;
876 
877 	kvfree(priv->qpl_cfg.qpl_id_map);
878 
879 	for (i = 0; i < num_qpls; i++)
880 		gve_free_queue_page_list(priv, i);
881 
882 	kvfree(priv->qpls);
883 }
884 
885 /* Use this to schedule a reset when the device is capable of continuing
886  * to handle other requests in its current state. If it is not, do a reset
887  * in thread instead.
888  */
gve_schedule_reset(struct gve_priv * priv)889 void gve_schedule_reset(struct gve_priv *priv)
890 {
891 	gve_set_do_reset(priv);
892 	queue_work(priv->gve_wq, &priv->service_task);
893 }
894 
895 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
896 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
897 static void gve_turndown(struct gve_priv *priv);
898 static void gve_turnup(struct gve_priv *priv);
899 
gve_open(struct net_device * dev)900 static int gve_open(struct net_device *dev)
901 {
902 	struct gve_priv *priv = netdev_priv(dev);
903 	int err;
904 
905 	err = gve_alloc_qpls(priv);
906 	if (err)
907 		return err;
908 
909 	err = gve_alloc_rings(priv);
910 	if (err)
911 		goto free_qpls;
912 
913 	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
914 	if (err)
915 		goto free_rings;
916 	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
917 	if (err)
918 		goto free_rings;
919 
920 	err = gve_register_qpls(priv);
921 	if (err)
922 		goto reset;
923 
924 	if (!gve_is_gqi(priv)) {
925 		/* Hard code this for now. This may be tuned in the future for
926 		 * performance.
927 		 */
928 		priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
929 	}
930 	err = gve_create_rings(priv);
931 	if (err)
932 		goto reset;
933 
934 	gve_set_device_rings_ok(priv);
935 
936 	if (gve_get_report_stats(priv))
937 		mod_timer(&priv->stats_report_timer,
938 			  round_jiffies(jiffies +
939 				msecs_to_jiffies(priv->stats_report_timer_period)));
940 
941 	gve_turnup(priv);
942 	queue_work(priv->gve_wq, &priv->service_task);
943 	priv->interface_up_cnt++;
944 	return 0;
945 
946 free_rings:
947 	gve_free_rings(priv);
948 free_qpls:
949 	gve_free_qpls(priv);
950 	return err;
951 
952 reset:
953 	/* This must have been called from a reset due to the rtnl lock
954 	 * so just return at this point.
955 	 */
956 	if (gve_get_reset_in_progress(priv))
957 		return err;
958 	/* Otherwise reset before returning */
959 	gve_reset_and_teardown(priv, true);
960 	/* if this fails there is nothing we can do so just ignore the return */
961 	gve_reset_recovery(priv, false);
962 	/* return the original error */
963 	return err;
964 }
965 
gve_close(struct net_device * dev)966 static int gve_close(struct net_device *dev)
967 {
968 	struct gve_priv *priv = netdev_priv(dev);
969 	int err;
970 
971 	netif_carrier_off(dev);
972 	if (gve_get_device_rings_ok(priv)) {
973 		gve_turndown(priv);
974 		err = gve_destroy_rings(priv);
975 		if (err)
976 			goto err;
977 		err = gve_unregister_qpls(priv);
978 		if (err)
979 			goto err;
980 		gve_clear_device_rings_ok(priv);
981 	}
982 	del_timer_sync(&priv->stats_report_timer);
983 
984 	gve_free_rings(priv);
985 	gve_free_qpls(priv);
986 	priv->interface_down_cnt++;
987 	return 0;
988 
989 err:
990 	/* This must have been called from a reset due to the rtnl lock
991 	 * so just return at this point.
992 	 */
993 	if (gve_get_reset_in_progress(priv))
994 		return err;
995 	/* Otherwise reset before returning */
996 	gve_reset_and_teardown(priv, true);
997 	return gve_reset_recovery(priv, false);
998 }
999 
gve_adjust_queues(struct gve_priv * priv,struct gve_queue_config new_rx_config,struct gve_queue_config new_tx_config)1000 int gve_adjust_queues(struct gve_priv *priv,
1001 		      struct gve_queue_config new_rx_config,
1002 		      struct gve_queue_config new_tx_config)
1003 {
1004 	int err;
1005 
1006 	if (netif_carrier_ok(priv->dev)) {
1007 		/* To make this process as simple as possible we teardown the
1008 		 * device, set the new configuration, and then bring the device
1009 		 * up again.
1010 		 */
1011 		err = gve_close(priv->dev);
1012 		/* we have already tried to reset in close,
1013 		 * just fail at this point
1014 		 */
1015 		if (err)
1016 			return err;
1017 		priv->tx_cfg = new_tx_config;
1018 		priv->rx_cfg = new_rx_config;
1019 
1020 		err = gve_open(priv->dev);
1021 		if (err)
1022 			goto err;
1023 
1024 		return 0;
1025 	}
1026 	/* Set the config for the next up. */
1027 	priv->tx_cfg = new_tx_config;
1028 	priv->rx_cfg = new_rx_config;
1029 
1030 	return 0;
1031 err:
1032 	netif_err(priv, drv, priv->dev,
1033 		  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1034 	gve_turndown(priv);
1035 	return err;
1036 }
1037 
gve_turndown(struct gve_priv * priv)1038 static void gve_turndown(struct gve_priv *priv)
1039 {
1040 	int idx;
1041 
1042 	if (netif_carrier_ok(priv->dev))
1043 		netif_carrier_off(priv->dev);
1044 
1045 	if (!gve_get_napi_enabled(priv))
1046 		return;
1047 
1048 	/* Disable napi to prevent more work from coming in */
1049 	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1050 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1051 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1052 
1053 		napi_disable(&block->napi);
1054 	}
1055 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1056 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1057 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1058 
1059 		napi_disable(&block->napi);
1060 	}
1061 
1062 	/* Stop tx queues */
1063 	netif_tx_disable(priv->dev);
1064 
1065 	gve_clear_napi_enabled(priv);
1066 	gve_clear_report_stats(priv);
1067 }
1068 
gve_turnup(struct gve_priv * priv)1069 static void gve_turnup(struct gve_priv *priv)
1070 {
1071 	int idx;
1072 
1073 	/* Start the tx queues */
1074 	netif_tx_start_all_queues(priv->dev);
1075 
1076 	/* Enable napi and unmask interrupts for all queues */
1077 	for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1078 		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1079 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1080 
1081 		napi_enable(&block->napi);
1082 		if (gve_is_gqi(priv)) {
1083 			iowrite32be(0, gve_irq_doorbell(priv, block));
1084 		} else {
1085 			u32 val = gve_set_itr_ratelimit_dqo(GVE_TX_IRQ_RATELIMIT_US_DQO);
1086 
1087 			gve_write_irq_doorbell_dqo(priv, block, val);
1088 		}
1089 	}
1090 	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1091 		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1092 		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1093 
1094 		napi_enable(&block->napi);
1095 		if (gve_is_gqi(priv)) {
1096 			iowrite32be(0, gve_irq_doorbell(priv, block));
1097 		} else {
1098 			u32 val = gve_set_itr_ratelimit_dqo(GVE_RX_IRQ_RATELIMIT_US_DQO);
1099 
1100 			gve_write_irq_doorbell_dqo(priv, block, val);
1101 		}
1102 	}
1103 
1104 	gve_set_napi_enabled(priv);
1105 }
1106 
gve_tx_timeout(struct net_device * dev,unsigned int txqueue)1107 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1108 {
1109 	struct gve_notify_block *block;
1110 	struct gve_tx_ring *tx = NULL;
1111 	struct gve_priv *priv;
1112 	u32 last_nic_done;
1113 	u32 current_time;
1114 	u32 ntfy_idx;
1115 
1116 	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1117 	priv = netdev_priv(dev);
1118 	if (txqueue > priv->tx_cfg.num_queues)
1119 		goto reset;
1120 
1121 	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1122 	if (ntfy_idx >= priv->num_ntfy_blks)
1123 		goto reset;
1124 
1125 	block = &priv->ntfy_blocks[ntfy_idx];
1126 	tx = block->tx;
1127 
1128 	current_time = jiffies_to_msecs(jiffies);
1129 	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1130 		goto reset;
1131 
1132 	/* Check to see if there are missed completions, which will allow us to
1133 	 * kick the queue.
1134 	 */
1135 	last_nic_done = gve_tx_load_event_counter(priv, tx);
1136 	if (last_nic_done - tx->done) {
1137 		netdev_info(dev, "Kicking queue %d", txqueue);
1138 		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1139 		napi_schedule(&block->napi);
1140 		tx->last_kick_msec = current_time;
1141 		goto out;
1142 	} // Else reset.
1143 
1144 reset:
1145 	gve_schedule_reset(priv);
1146 
1147 out:
1148 	if (tx)
1149 		tx->queue_timeout++;
1150 	priv->tx_timeo_cnt++;
1151 }
1152 
gve_set_features(struct net_device * netdev,netdev_features_t features)1153 static int gve_set_features(struct net_device *netdev,
1154 			    netdev_features_t features)
1155 {
1156 	const netdev_features_t orig_features = netdev->features;
1157 	struct gve_priv *priv = netdev_priv(netdev);
1158 	int err;
1159 
1160 	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1161 		netdev->features ^= NETIF_F_LRO;
1162 		if (netif_carrier_ok(netdev)) {
1163 			/* To make this process as simple as possible we
1164 			 * teardown the device, set the new configuration,
1165 			 * and then bring the device up again.
1166 			 */
1167 			err = gve_close(netdev);
1168 			/* We have already tried to reset in close, just fail
1169 			 * at this point.
1170 			 */
1171 			if (err)
1172 				goto err;
1173 
1174 			err = gve_open(netdev);
1175 			if (err)
1176 				goto err;
1177 		}
1178 	}
1179 
1180 	return 0;
1181 err:
1182 	/* Reverts the change on error. */
1183 	netdev->features = orig_features;
1184 	netif_err(priv, drv, netdev,
1185 		  "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1186 	return err;
1187 }
1188 
1189 static const struct net_device_ops gve_netdev_ops = {
1190 	.ndo_start_xmit		=	gve_start_xmit,
1191 	.ndo_open		=	gve_open,
1192 	.ndo_stop		=	gve_close,
1193 	.ndo_get_stats64	=	gve_get_stats,
1194 	.ndo_tx_timeout         =       gve_tx_timeout,
1195 	.ndo_set_features	=	gve_set_features,
1196 };
1197 
gve_handle_status(struct gve_priv * priv,u32 status)1198 static void gve_handle_status(struct gve_priv *priv, u32 status)
1199 {
1200 	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1201 		dev_info(&priv->pdev->dev, "Device requested reset.\n");
1202 		gve_set_do_reset(priv);
1203 	}
1204 	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1205 		priv->stats_report_trigger_cnt++;
1206 		gve_set_do_report_stats(priv);
1207 	}
1208 }
1209 
gve_handle_reset(struct gve_priv * priv)1210 static void gve_handle_reset(struct gve_priv *priv)
1211 {
1212 	/* A service task will be scheduled at the end of probe to catch any
1213 	 * resets that need to happen, and we don't want to reset until
1214 	 * probe is done.
1215 	 */
1216 	if (gve_get_probe_in_progress(priv))
1217 		return;
1218 
1219 	if (gve_get_do_reset(priv)) {
1220 		rtnl_lock();
1221 		gve_reset(priv, false);
1222 		rtnl_unlock();
1223 	}
1224 }
1225 
gve_handle_report_stats(struct gve_priv * priv)1226 void gve_handle_report_stats(struct gve_priv *priv)
1227 {
1228 	struct stats *stats = priv->stats_report->stats;
1229 	int idx, stats_idx = 0;
1230 	unsigned int start = 0;
1231 	u64 tx_bytes;
1232 
1233 	if (!gve_get_report_stats(priv))
1234 		return;
1235 
1236 	be64_add_cpu(&priv->stats_report->written_count, 1);
1237 	/* tx stats */
1238 	if (priv->tx) {
1239 		for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
1240 			u32 last_completion = 0;
1241 			u32 tx_frames = 0;
1242 
1243 			/* DQO doesn't currently support these metrics. */
1244 			if (gve_is_gqi(priv)) {
1245 				last_completion = priv->tx[idx].done;
1246 				tx_frames = priv->tx[idx].req;
1247 			}
1248 
1249 			do {
1250 				start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss);
1251 				tx_bytes = priv->tx[idx].bytes_done;
1252 			} while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start));
1253 			stats[stats_idx++] = (struct stats) {
1254 				.stat_name = cpu_to_be32(TX_WAKE_CNT),
1255 				.value = cpu_to_be64(priv->tx[idx].wake_queue),
1256 				.queue_id = cpu_to_be32(idx),
1257 			};
1258 			stats[stats_idx++] = (struct stats) {
1259 				.stat_name = cpu_to_be32(TX_STOP_CNT),
1260 				.value = cpu_to_be64(priv->tx[idx].stop_queue),
1261 				.queue_id = cpu_to_be32(idx),
1262 			};
1263 			stats[stats_idx++] = (struct stats) {
1264 				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
1265 				.value = cpu_to_be64(tx_frames),
1266 				.queue_id = cpu_to_be32(idx),
1267 			};
1268 			stats[stats_idx++] = (struct stats) {
1269 				.stat_name = cpu_to_be32(TX_BYTES_SENT),
1270 				.value = cpu_to_be64(tx_bytes),
1271 				.queue_id = cpu_to_be32(idx),
1272 			};
1273 			stats[stats_idx++] = (struct stats) {
1274 				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1275 				.value = cpu_to_be64(last_completion),
1276 				.queue_id = cpu_to_be32(idx),
1277 			};
1278 			stats[stats_idx++] = (struct stats) {
1279 				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1280 				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
1281 				.queue_id = cpu_to_be32(idx),
1282 			};
1283 		}
1284 	}
1285 	/* rx stats */
1286 	if (priv->rx) {
1287 		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1288 			stats[stats_idx++] = (struct stats) {
1289 				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1290 				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
1291 				.queue_id = cpu_to_be32(idx),
1292 			};
1293 			stats[stats_idx++] = (struct stats) {
1294 				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1295 				.value = cpu_to_be64(priv->rx[0].fill_cnt),
1296 				.queue_id = cpu_to_be32(idx),
1297 			};
1298 		}
1299 	}
1300 }
1301 
gve_handle_link_status(struct gve_priv * priv,bool link_status)1302 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1303 {
1304 	if (!gve_get_napi_enabled(priv))
1305 		return;
1306 
1307 	if (link_status == netif_carrier_ok(priv->dev))
1308 		return;
1309 
1310 	if (link_status) {
1311 		netdev_info(priv->dev, "Device link is up.\n");
1312 		netif_carrier_on(priv->dev);
1313 	} else {
1314 		netdev_info(priv->dev, "Device link is down.\n");
1315 		netif_carrier_off(priv->dev);
1316 	}
1317 }
1318 
1319 /* Handle NIC status register changes, reset requests and report stats */
gve_service_task(struct work_struct * work)1320 static void gve_service_task(struct work_struct *work)
1321 {
1322 	struct gve_priv *priv = container_of(work, struct gve_priv,
1323 					     service_task);
1324 	u32 status = ioread32be(&priv->reg_bar0->device_status);
1325 
1326 	gve_handle_status(priv, status);
1327 
1328 	gve_handle_reset(priv);
1329 	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1330 }
1331 
gve_init_priv(struct gve_priv * priv,bool skip_describe_device)1332 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
1333 {
1334 	int num_ntfy;
1335 	int err;
1336 
1337 	/* Set up the adminq */
1338 	err = gve_adminq_alloc(&priv->pdev->dev, priv);
1339 	if (err) {
1340 		dev_err(&priv->pdev->dev,
1341 			"Failed to alloc admin queue: err=%d\n", err);
1342 		return err;
1343 	}
1344 
1345 	if (skip_describe_device)
1346 		goto setup_device;
1347 
1348 	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
1349 	/* Get the initial information we need from the device */
1350 	err = gve_adminq_describe_device(priv);
1351 	if (err) {
1352 		dev_err(&priv->pdev->dev,
1353 			"Could not get device information: err=%d\n", err);
1354 		goto err;
1355 	}
1356 	if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) {
1357 		priv->dev->max_mtu = PAGE_SIZE;
1358 		err = gve_adminq_set_mtu(priv, priv->dev->mtu);
1359 		if (err) {
1360 			dev_err(&priv->pdev->dev, "Could not set mtu");
1361 			goto err;
1362 		}
1363 	}
1364 	priv->dev->mtu = priv->dev->max_mtu;
1365 	num_ntfy = pci_msix_vec_count(priv->pdev);
1366 	if (num_ntfy <= 0) {
1367 		dev_err(&priv->pdev->dev,
1368 			"could not count MSI-x vectors: err=%d\n", num_ntfy);
1369 		err = num_ntfy;
1370 		goto err;
1371 	} else if (num_ntfy < GVE_MIN_MSIX) {
1372 		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
1373 			GVE_MIN_MSIX, num_ntfy);
1374 		err = -EINVAL;
1375 		goto err;
1376 	}
1377 
1378 	priv->num_registered_pages = 0;
1379 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
1380 	/* gvnic has one Notification Block per MSI-x vector, except for the
1381 	 * management vector
1382 	 */
1383 	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
1384 	priv->mgmt_msix_idx = priv->num_ntfy_blks;
1385 
1386 	priv->tx_cfg.max_queues =
1387 		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
1388 	priv->rx_cfg.max_queues =
1389 		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
1390 
1391 	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
1392 	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
1393 	if (priv->default_num_queues > 0) {
1394 		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
1395 						priv->tx_cfg.num_queues);
1396 		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
1397 						priv->rx_cfg.num_queues);
1398 	}
1399 
1400 	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
1401 		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
1402 	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
1403 		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
1404 
1405 setup_device:
1406 	err = gve_setup_device_resources(priv);
1407 	if (!err)
1408 		return 0;
1409 err:
1410 	gve_adminq_free(&priv->pdev->dev, priv);
1411 	return err;
1412 }
1413 
gve_teardown_priv_resources(struct gve_priv * priv)1414 static void gve_teardown_priv_resources(struct gve_priv *priv)
1415 {
1416 	gve_teardown_device_resources(priv);
1417 	gve_adminq_free(&priv->pdev->dev, priv);
1418 }
1419 
gve_trigger_reset(struct gve_priv * priv)1420 static void gve_trigger_reset(struct gve_priv *priv)
1421 {
1422 	/* Reset the device by releasing the AQ */
1423 	gve_adminq_release(priv);
1424 }
1425 
gve_reset_and_teardown(struct gve_priv * priv,bool was_up)1426 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
1427 {
1428 	gve_trigger_reset(priv);
1429 	/* With the reset having already happened, close cannot fail */
1430 	if (was_up)
1431 		gve_close(priv->dev);
1432 	gve_teardown_priv_resources(priv);
1433 }
1434 
gve_reset_recovery(struct gve_priv * priv,bool was_up)1435 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
1436 {
1437 	int err;
1438 
1439 	err = gve_init_priv(priv, true);
1440 	if (err)
1441 		goto err;
1442 	if (was_up) {
1443 		err = gve_open(priv->dev);
1444 		if (err)
1445 			goto err;
1446 	}
1447 	return 0;
1448 err:
1449 	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1450 	gve_turndown(priv);
1451 	return err;
1452 }
1453 
gve_reset(struct gve_priv * priv,bool attempt_teardown)1454 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
1455 {
1456 	bool was_up = netif_carrier_ok(priv->dev);
1457 	int err;
1458 
1459 	dev_info(&priv->pdev->dev, "Performing reset\n");
1460 	gve_clear_do_reset(priv);
1461 	gve_set_reset_in_progress(priv);
1462 	/* If we aren't attempting to teardown normally, just go turndown and
1463 	 * reset right away.
1464 	 */
1465 	if (!attempt_teardown) {
1466 		gve_turndown(priv);
1467 		gve_reset_and_teardown(priv, was_up);
1468 	} else {
1469 		/* Otherwise attempt to close normally */
1470 		if (was_up) {
1471 			err = gve_close(priv->dev);
1472 			/* If that fails reset as we did above */
1473 			if (err)
1474 				gve_reset_and_teardown(priv, was_up);
1475 		}
1476 		/* Clean up any remaining resources */
1477 		gve_teardown_priv_resources(priv);
1478 	}
1479 
1480 	/* Set it all back up */
1481 	err = gve_reset_recovery(priv, was_up);
1482 	gve_clear_reset_in_progress(priv);
1483 	priv->reset_cnt++;
1484 	priv->interface_up_cnt = 0;
1485 	priv->interface_down_cnt = 0;
1486 	priv->stats_report_trigger_cnt = 0;
1487 	return err;
1488 }
1489 
gve_write_version(u8 __iomem * driver_version_register)1490 static void gve_write_version(u8 __iomem *driver_version_register)
1491 {
1492 	const char *c = gve_version_prefix;
1493 
1494 	while (*c) {
1495 		writeb(*c, driver_version_register);
1496 		c++;
1497 	}
1498 
1499 	c = gve_version_str;
1500 	while (*c) {
1501 		writeb(*c, driver_version_register);
1502 		c++;
1503 	}
1504 	writeb('\n', driver_version_register);
1505 }
1506 
gve_probe(struct pci_dev * pdev,const struct pci_device_id * ent)1507 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1508 {
1509 	int max_tx_queues, max_rx_queues;
1510 	struct net_device *dev;
1511 	__be32 __iomem *db_bar;
1512 	struct gve_registers __iomem *reg_bar;
1513 	struct gve_priv *priv;
1514 	int err;
1515 
1516 	err = pci_enable_device(pdev);
1517 	if (err)
1518 		return err;
1519 
1520 	err = pci_request_regions(pdev, "gvnic-cfg");
1521 	if (err)
1522 		goto abort_with_enabled;
1523 
1524 	pci_set_master(pdev);
1525 
1526 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
1527 	if (err) {
1528 		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
1529 		goto abort_with_pci_region;
1530 	}
1531 
1532 	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
1533 	if (!reg_bar) {
1534 		dev_err(&pdev->dev, "Failed to map pci bar!\n");
1535 		err = -ENOMEM;
1536 		goto abort_with_pci_region;
1537 	}
1538 
1539 	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
1540 	if (!db_bar) {
1541 		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
1542 		err = -ENOMEM;
1543 		goto abort_with_reg_bar;
1544 	}
1545 
1546 	gve_write_version(&reg_bar->driver_version);
1547 	/* Get max queues to alloc etherdev */
1548 	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
1549 	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
1550 	/* Alloc and setup the netdev and priv */
1551 	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
1552 	if (!dev) {
1553 		dev_err(&pdev->dev, "could not allocate netdev\n");
1554 		err = -ENOMEM;
1555 		goto abort_with_db_bar;
1556 	}
1557 	SET_NETDEV_DEV(dev, &pdev->dev);
1558 	pci_set_drvdata(pdev, dev);
1559 	dev->ethtool_ops = &gve_ethtool_ops;
1560 	dev->netdev_ops = &gve_netdev_ops;
1561 
1562 	/* Set default and supported features.
1563 	 *
1564 	 * Features might be set in other locations as well (such as
1565 	 * `gve_adminq_describe_device`).
1566 	 */
1567 	dev->hw_features = NETIF_F_HIGHDMA;
1568 	dev->hw_features |= NETIF_F_SG;
1569 	dev->hw_features |= NETIF_F_HW_CSUM;
1570 	dev->hw_features |= NETIF_F_TSO;
1571 	dev->hw_features |= NETIF_F_TSO6;
1572 	dev->hw_features |= NETIF_F_TSO_ECN;
1573 	dev->hw_features |= NETIF_F_RXCSUM;
1574 	dev->hw_features |= NETIF_F_RXHASH;
1575 	dev->features = dev->hw_features;
1576 	dev->watchdog_timeo = 5 * HZ;
1577 	dev->min_mtu = ETH_MIN_MTU;
1578 	netif_carrier_off(dev);
1579 
1580 	priv = netdev_priv(dev);
1581 	priv->dev = dev;
1582 	priv->pdev = pdev;
1583 	priv->msg_enable = DEFAULT_MSG_LEVEL;
1584 	priv->reg_bar0 = reg_bar;
1585 	priv->db_bar2 = db_bar;
1586 	priv->service_task_flags = 0x0;
1587 	priv->state_flags = 0x0;
1588 	priv->ethtool_flags = 0x0;
1589 
1590 	gve_set_probe_in_progress(priv);
1591 	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
1592 	if (!priv->gve_wq) {
1593 		dev_err(&pdev->dev, "Could not allocate workqueue");
1594 		err = -ENOMEM;
1595 		goto abort_with_netdev;
1596 	}
1597 	INIT_WORK(&priv->service_task, gve_service_task);
1598 	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
1599 	priv->tx_cfg.max_queues = max_tx_queues;
1600 	priv->rx_cfg.max_queues = max_rx_queues;
1601 
1602 	err = gve_init_priv(priv, false);
1603 	if (err)
1604 		goto abort_with_wq;
1605 
1606 	err = register_netdev(dev);
1607 	if (err)
1608 		goto abort_with_gve_init;
1609 
1610 	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
1611 	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
1612 	gve_clear_probe_in_progress(priv);
1613 	queue_work(priv->gve_wq, &priv->service_task);
1614 	return 0;
1615 
1616 abort_with_gve_init:
1617 	gve_teardown_priv_resources(priv);
1618 
1619 abort_with_wq:
1620 	destroy_workqueue(priv->gve_wq);
1621 
1622 abort_with_netdev:
1623 	free_netdev(dev);
1624 
1625 abort_with_db_bar:
1626 	pci_iounmap(pdev, db_bar);
1627 
1628 abort_with_reg_bar:
1629 	pci_iounmap(pdev, reg_bar);
1630 
1631 abort_with_pci_region:
1632 	pci_release_regions(pdev);
1633 
1634 abort_with_enabled:
1635 	pci_disable_device(pdev);
1636 	return err;
1637 }
1638 
gve_remove(struct pci_dev * pdev)1639 static void gve_remove(struct pci_dev *pdev)
1640 {
1641 	struct net_device *netdev = pci_get_drvdata(pdev);
1642 	struct gve_priv *priv = netdev_priv(netdev);
1643 	__be32 __iomem *db_bar = priv->db_bar2;
1644 	void __iomem *reg_bar = priv->reg_bar0;
1645 
1646 	unregister_netdev(netdev);
1647 	gve_teardown_priv_resources(priv);
1648 	destroy_workqueue(priv->gve_wq);
1649 	free_netdev(netdev);
1650 	pci_iounmap(pdev, db_bar);
1651 	pci_iounmap(pdev, reg_bar);
1652 	pci_release_regions(pdev);
1653 	pci_disable_device(pdev);
1654 }
1655 
1656 static const struct pci_device_id gve_id_table[] = {
1657 	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
1658 	{ }
1659 };
1660 
1661 static struct pci_driver gvnic_driver = {
1662 	.name		= "gvnic",
1663 	.id_table	= gve_id_table,
1664 	.probe		= gve_probe,
1665 	.remove		= gve_remove,
1666 };
1667 
1668 module_pci_driver(gvnic_driver);
1669 
1670 MODULE_DEVICE_TABLE(pci, gve_id_table);
1671 MODULE_AUTHOR("Google, Inc.");
1672 MODULE_DESCRIPTION("gVNIC Driver");
1673 MODULE_LICENSE("Dual MIT/GPL");
1674 MODULE_VERSION(GVE_VERSION);
1675