1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include "gve.h"
8 #include "gve_adminq.h"
9 #include "gve_utils.h"
10 #include <linux/etherdevice.h>
11
gve_rx_free_buffer(struct device * dev,struct gve_rx_slot_page_info * page_info,union gve_rx_data_slot * data_slot)12 static void gve_rx_free_buffer(struct device *dev,
13 struct gve_rx_slot_page_info *page_info,
14 union gve_rx_data_slot *data_slot)
15 {
16 dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
17 GVE_DATA_SLOT_ADDR_PAGE_MASK);
18
19 gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
20 }
21
gve_rx_unfill_pages(struct gve_priv * priv,struct gve_rx_ring * rx)22 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
23 {
24 if (rx->data.raw_addressing) {
25 u32 slots = rx->mask + 1;
26 int i;
27
28 for (i = 0; i < slots; i++)
29 gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
30 &rx->data.data_ring[i]);
31 } else {
32 gve_unassign_qpl(priv, rx->data.qpl->id);
33 rx->data.qpl = NULL;
34 }
35 kvfree(rx->data.page_info);
36 rx->data.page_info = NULL;
37 }
38
gve_rx_free_ring(struct gve_priv * priv,int idx)39 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
40 {
41 struct gve_rx_ring *rx = &priv->rx[idx];
42 struct device *dev = &priv->pdev->dev;
43 u32 slots = rx->mask + 1;
44 size_t bytes;
45
46 gve_rx_remove_from_block(priv, idx);
47
48 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
49 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
50 rx->desc.desc_ring = NULL;
51
52 dma_free_coherent(dev, sizeof(*rx->q_resources),
53 rx->q_resources, rx->q_resources_bus);
54 rx->q_resources = NULL;
55
56 gve_rx_unfill_pages(priv, rx);
57
58 bytes = sizeof(*rx->data.data_ring) * slots;
59 dma_free_coherent(dev, bytes, rx->data.data_ring,
60 rx->data.data_bus);
61 rx->data.data_ring = NULL;
62 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
63 }
64
gve_setup_rx_buffer(struct gve_rx_slot_page_info * page_info,dma_addr_t addr,struct page * page,__be64 * slot_addr)65 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
66 dma_addr_t addr, struct page *page, __be64 *slot_addr)
67 {
68 page_info->page = page;
69 page_info->page_offset = 0;
70 page_info->page_address = page_address(page);
71 *slot_addr = cpu_to_be64(addr);
72 }
73
gve_rx_alloc_buffer(struct gve_priv * priv,struct device * dev,struct gve_rx_slot_page_info * page_info,union gve_rx_data_slot * data_slot)74 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
75 struct gve_rx_slot_page_info *page_info,
76 union gve_rx_data_slot *data_slot)
77 {
78 struct page *page;
79 dma_addr_t dma;
80 int err;
81
82 err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE,
83 GFP_ATOMIC);
84 if (err)
85 return err;
86
87 gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
88 return 0;
89 }
90
gve_prefill_rx_pages(struct gve_rx_ring * rx)91 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
92 {
93 struct gve_priv *priv = rx->gve;
94 u32 slots;
95 int err;
96 int i;
97
98 /* Allocate one page per Rx queue slot. Each page is split into two
99 * packet buffers, when possible we "page flip" between the two.
100 */
101 slots = rx->mask + 1;
102
103 rx->data.page_info = kvzalloc(slots *
104 sizeof(*rx->data.page_info), GFP_KERNEL);
105 if (!rx->data.page_info)
106 return -ENOMEM;
107
108 if (!rx->data.raw_addressing) {
109 rx->data.qpl = gve_assign_rx_qpl(priv);
110 if (!rx->data.qpl) {
111 kvfree(rx->data.page_info);
112 rx->data.page_info = NULL;
113 return -ENOMEM;
114 }
115 }
116 for (i = 0; i < slots; i++) {
117 if (!rx->data.raw_addressing) {
118 struct page *page = rx->data.qpl->pages[i];
119 dma_addr_t addr = i * PAGE_SIZE;
120
121 gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
122 &rx->data.data_ring[i].qpl_offset);
123 continue;
124 }
125 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
126 &rx->data.data_ring[i]);
127 if (err)
128 goto alloc_err;
129 }
130
131 return slots;
132 alloc_err:
133 while (i--)
134 gve_rx_free_buffer(&priv->pdev->dev,
135 &rx->data.page_info[i],
136 &rx->data.data_ring[i]);
137 return err;
138 }
139
gve_rx_alloc_ring(struct gve_priv * priv,int idx)140 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
141 {
142 struct gve_rx_ring *rx = &priv->rx[idx];
143 struct device *hdev = &priv->pdev->dev;
144 u32 slots, npages;
145 int filled_pages;
146 size_t bytes;
147 int err;
148
149 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
150 /* Make sure everything is zeroed to start with */
151 memset(rx, 0, sizeof(*rx));
152
153 rx->gve = priv;
154 rx->q_num = idx;
155
156 slots = priv->rx_data_slot_cnt;
157 rx->mask = slots - 1;
158 rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
159
160 /* alloc rx data ring */
161 bytes = sizeof(*rx->data.data_ring) * slots;
162 rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
163 &rx->data.data_bus,
164 GFP_KERNEL);
165 if (!rx->data.data_ring)
166 return -ENOMEM;
167 filled_pages = gve_prefill_rx_pages(rx);
168 if (filled_pages < 0) {
169 err = -ENOMEM;
170 goto abort_with_slots;
171 }
172 rx->fill_cnt = filled_pages;
173 /* Ensure data ring slots (packet buffers) are visible. */
174 dma_wmb();
175
176 /* Alloc gve_queue_resources */
177 rx->q_resources =
178 dma_alloc_coherent(hdev,
179 sizeof(*rx->q_resources),
180 &rx->q_resources_bus,
181 GFP_KERNEL);
182 if (!rx->q_resources) {
183 err = -ENOMEM;
184 goto abort_filled;
185 }
186 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
187 (unsigned long)rx->data.data_bus);
188
189 /* alloc rx desc ring */
190 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
191 npages = bytes / PAGE_SIZE;
192 if (npages * PAGE_SIZE != bytes) {
193 err = -EIO;
194 goto abort_with_q_resources;
195 }
196
197 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
198 GFP_KERNEL);
199 if (!rx->desc.desc_ring) {
200 err = -ENOMEM;
201 goto abort_with_q_resources;
202 }
203 rx->cnt = 0;
204 rx->db_threshold = priv->rx_desc_cnt / 2;
205 rx->desc.seqno = 1;
206 gve_rx_add_to_block(priv, idx);
207
208 return 0;
209
210 abort_with_q_resources:
211 dma_free_coherent(hdev, sizeof(*rx->q_resources),
212 rx->q_resources, rx->q_resources_bus);
213 rx->q_resources = NULL;
214 abort_filled:
215 gve_rx_unfill_pages(priv, rx);
216 abort_with_slots:
217 bytes = sizeof(*rx->data.data_ring) * slots;
218 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
219 rx->data.data_ring = NULL;
220
221 return err;
222 }
223
gve_rx_alloc_rings(struct gve_priv * priv)224 int gve_rx_alloc_rings(struct gve_priv *priv)
225 {
226 int err = 0;
227 int i;
228
229 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
230 err = gve_rx_alloc_ring(priv, i);
231 if (err) {
232 netif_err(priv, drv, priv->dev,
233 "Failed to alloc rx ring=%d: err=%d\n",
234 i, err);
235 break;
236 }
237 }
238 /* Unallocate if there was an error */
239 if (err) {
240 int j;
241
242 for (j = 0; j < i; j++)
243 gve_rx_free_ring(priv, j);
244 }
245 return err;
246 }
247
gve_rx_free_rings_gqi(struct gve_priv * priv)248 void gve_rx_free_rings_gqi(struct gve_priv *priv)
249 {
250 int i;
251
252 for (i = 0; i < priv->rx_cfg.num_queues; i++)
253 gve_rx_free_ring(priv, i);
254 }
255
gve_rx_write_doorbell(struct gve_priv * priv,struct gve_rx_ring * rx)256 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
257 {
258 u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
259
260 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
261 }
262
gve_rss_type(__be16 pkt_flags)263 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
264 {
265 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
266 return PKT_HASH_TYPE_L4;
267 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
268 return PKT_HASH_TYPE_L3;
269 return PKT_HASH_TYPE_L2;
270 }
271
gve_rx_add_frags(struct napi_struct * napi,struct gve_rx_slot_page_info * page_info,u16 len)272 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
273 struct gve_rx_slot_page_info *page_info,
274 u16 len)
275 {
276 struct sk_buff *skb = napi_get_frags(napi);
277
278 if (unlikely(!skb))
279 return NULL;
280
281 skb_add_rx_frag(skb, 0, page_info->page,
282 page_info->page_offset +
283 GVE_RX_PAD, len, PAGE_SIZE / 2);
284
285 return skb;
286 }
287
gve_rx_flip_buff(struct gve_rx_slot_page_info * page_info,__be64 * slot_addr)288 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
289 {
290 const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
291
292 /* "flip" to other packet buffer on this page */
293 page_info->page_offset ^= PAGE_SIZE / 2;
294 *(slot_addr) ^= offset;
295 }
296
gve_rx_can_flip_buffers(struct net_device * netdev)297 static bool gve_rx_can_flip_buffers(struct net_device *netdev)
298 {
299 return PAGE_SIZE == 4096
300 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
301 }
302
gve_rx_can_recycle_buffer(struct page * page)303 static int gve_rx_can_recycle_buffer(struct page *page)
304 {
305 int pagecount = page_count(page);
306
307 /* This page is not being used by any SKBs - reuse */
308 if (pagecount == 1)
309 return 1;
310 /* This page is still being used by an SKB - we can't reuse */
311 else if (pagecount >= 2)
312 return 0;
313 WARN(pagecount < 1, "Pagecount should never be < 1");
314 return -1;
315 }
316
317 static struct sk_buff *
gve_rx_raw_addressing(struct device * dev,struct net_device * netdev,struct gve_rx_slot_page_info * page_info,u16 len,struct napi_struct * napi,union gve_rx_data_slot * data_slot)318 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
319 struct gve_rx_slot_page_info *page_info, u16 len,
320 struct napi_struct *napi,
321 union gve_rx_data_slot *data_slot)
322 {
323 struct sk_buff *skb;
324
325 skb = gve_rx_add_frags(napi, page_info, len);
326 if (!skb)
327 return NULL;
328
329 /* Optimistically stop the kernel from freeing the page by increasing
330 * the page bias. We will check the refcount in refill to determine if
331 * we need to alloc a new page.
332 */
333 get_page(page_info->page);
334
335 return skb;
336 }
337
338 static struct sk_buff *
gve_rx_qpl(struct device * dev,struct net_device * netdev,struct gve_rx_ring * rx,struct gve_rx_slot_page_info * page_info,u16 len,struct napi_struct * napi,union gve_rx_data_slot * data_slot)339 gve_rx_qpl(struct device *dev, struct net_device *netdev,
340 struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
341 u16 len, struct napi_struct *napi,
342 union gve_rx_data_slot *data_slot)
343 {
344 struct sk_buff *skb;
345
346 /* if raw_addressing mode is not enabled gvnic can only receive into
347 * registered segments. If the buffer can't be recycled, our only
348 * choice is to copy the data out of it so that we can return it to the
349 * device.
350 */
351 if (page_info->can_flip) {
352 skb = gve_rx_add_frags(napi, page_info, len);
353 /* No point in recycling if we didn't get the skb */
354 if (skb) {
355 /* Make sure that the page isn't freed. */
356 get_page(page_info->page);
357 gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
358 }
359 } else {
360 skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
361 if (skb) {
362 u64_stats_update_begin(&rx->statss);
363 rx->rx_copied_pkt++;
364 u64_stats_update_end(&rx->statss);
365 }
366 }
367 return skb;
368 }
369
gve_rx(struct gve_rx_ring * rx,struct gve_rx_desc * rx_desc,netdev_features_t feat,u32 idx)370 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
371 netdev_features_t feat, u32 idx)
372 {
373 struct gve_rx_slot_page_info *page_info;
374 struct gve_priv *priv = rx->gve;
375 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
376 struct net_device *dev = priv->dev;
377 union gve_rx_data_slot *data_slot;
378 struct sk_buff *skb = NULL;
379 dma_addr_t page_bus;
380 u16 len;
381
382 /* drop this packet */
383 if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
384 u64_stats_update_begin(&rx->statss);
385 rx->rx_desc_err_dropped_pkt++;
386 u64_stats_update_end(&rx->statss);
387 return false;
388 }
389
390 len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
391 page_info = &rx->data.page_info[idx];
392
393 data_slot = &rx->data.data_ring[idx];
394 page_bus = (rx->data.raw_addressing) ?
395 be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
396 rx->data.qpl->page_buses[idx];
397 dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
398 PAGE_SIZE, DMA_FROM_DEVICE);
399
400 if (len <= priv->rx_copybreak) {
401 /* Just copy small packets */
402 skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD);
403 u64_stats_update_begin(&rx->statss);
404 rx->rx_copied_pkt++;
405 rx->rx_copybreak_pkt++;
406 u64_stats_update_end(&rx->statss);
407 } else {
408 u8 can_flip = gve_rx_can_flip_buffers(dev);
409 int recycle = 0;
410
411 if (can_flip) {
412 recycle = gve_rx_can_recycle_buffer(page_info->page);
413 if (recycle < 0) {
414 if (!rx->data.raw_addressing)
415 gve_schedule_reset(priv);
416 return false;
417 }
418 }
419
420 page_info->can_flip = can_flip && recycle;
421 if (rx->data.raw_addressing) {
422 skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
423 page_info, len, napi,
424 data_slot);
425 } else {
426 skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
427 page_info, len, napi, data_slot);
428 }
429 }
430
431 if (!skb) {
432 u64_stats_update_begin(&rx->statss);
433 rx->rx_skb_alloc_fail++;
434 u64_stats_update_end(&rx->statss);
435 return false;
436 }
437
438 if (likely(feat & NETIF_F_RXCSUM)) {
439 /* NIC passes up the partial sum */
440 if (rx_desc->csum)
441 skb->ip_summed = CHECKSUM_COMPLETE;
442 else
443 skb->ip_summed = CHECKSUM_NONE;
444 skb->csum = csum_unfold(rx_desc->csum);
445 }
446
447 /* parse flags & pass relevant info up */
448 if (likely(feat & NETIF_F_RXHASH) &&
449 gve_needs_rss(rx_desc->flags_seq))
450 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
451 gve_rss_type(rx_desc->flags_seq));
452
453 skb_record_rx_queue(skb, rx->q_num);
454 if (skb_is_nonlinear(skb))
455 napi_gro_frags(napi);
456 else
457 napi_gro_receive(napi, skb);
458 return true;
459 }
460
gve_rx_work_pending(struct gve_rx_ring * rx)461 static bool gve_rx_work_pending(struct gve_rx_ring *rx)
462 {
463 struct gve_rx_desc *desc;
464 __be16 flags_seq;
465 u32 next_idx;
466
467 next_idx = rx->cnt & rx->mask;
468 desc = rx->desc.desc_ring + next_idx;
469
470 flags_seq = desc->flags_seq;
471 /* Make sure we have synchronized the seq no with the device */
472 smp_rmb();
473
474 return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
475 }
476
gve_rx_refill_buffers(struct gve_priv * priv,struct gve_rx_ring * rx)477 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
478 {
479 int refill_target = rx->mask + 1;
480 u32 fill_cnt = rx->fill_cnt;
481
482 while (fill_cnt - rx->cnt < refill_target) {
483 struct gve_rx_slot_page_info *page_info;
484 u32 idx = fill_cnt & rx->mask;
485
486 page_info = &rx->data.page_info[idx];
487 if (page_info->can_flip) {
488 /* The other half of the page is free because it was
489 * free when we processed the descriptor. Flip to it.
490 */
491 union gve_rx_data_slot *data_slot =
492 &rx->data.data_ring[idx];
493
494 gve_rx_flip_buff(page_info, &data_slot->addr);
495 page_info->can_flip = 0;
496 } else {
497 /* It is possible that the networking stack has already
498 * finished processing all outstanding packets in the buffer
499 * and it can be reused.
500 * Flipping is unnecessary here - if the networking stack still
501 * owns half the page it is impossible to tell which half. Either
502 * the whole page is free or it needs to be replaced.
503 */
504 int recycle = gve_rx_can_recycle_buffer(page_info->page);
505
506 if (recycle < 0) {
507 if (!rx->data.raw_addressing)
508 gve_schedule_reset(priv);
509 return false;
510 }
511 if (!recycle) {
512 /* We can't reuse the buffer - alloc a new one*/
513 union gve_rx_data_slot *data_slot =
514 &rx->data.data_ring[idx];
515 struct device *dev = &priv->pdev->dev;
516
517 gve_rx_free_buffer(dev, page_info, data_slot);
518 page_info->page = NULL;
519 if (gve_rx_alloc_buffer(priv, dev, page_info,
520 data_slot)) {
521 u64_stats_update_begin(&rx->statss);
522 rx->rx_buf_alloc_fail++;
523 u64_stats_update_end(&rx->statss);
524 break;
525 }
526 }
527 }
528 fill_cnt++;
529 }
530 rx->fill_cnt = fill_cnt;
531 return true;
532 }
533
gve_clean_rx_done(struct gve_rx_ring * rx,int budget,netdev_features_t feat)534 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
535 netdev_features_t feat)
536 {
537 struct gve_priv *priv = rx->gve;
538 u32 work_done = 0, packets = 0;
539 struct gve_rx_desc *desc;
540 u32 cnt = rx->cnt;
541 u32 idx = cnt & rx->mask;
542 u64 bytes = 0;
543
544 desc = rx->desc.desc_ring + idx;
545 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
546 work_done < budget) {
547 bool dropped;
548
549 netif_info(priv, rx_status, priv->dev,
550 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
551 rx->q_num, idx, desc, desc->flags_seq);
552 netif_info(priv, rx_status, priv->dev,
553 "[%d] seqno=%d rx->desc.seqno=%d\n",
554 rx->q_num, GVE_SEQNO(desc->flags_seq),
555 rx->desc.seqno);
556 dropped = !gve_rx(rx, desc, feat, idx);
557 if (!dropped) {
558 bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
559 packets++;
560 }
561 cnt++;
562 idx = cnt & rx->mask;
563 desc = rx->desc.desc_ring + idx;
564 rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
565 work_done++;
566 }
567
568 if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
569 return false;
570
571 u64_stats_update_begin(&rx->statss);
572 rx->rpackets += packets;
573 rx->rbytes += bytes;
574 u64_stats_update_end(&rx->statss);
575 rx->cnt = cnt;
576
577 /* restock ring slots */
578 if (!rx->data.raw_addressing) {
579 /* In QPL mode buffs are refilled as the desc are processed */
580 rx->fill_cnt += work_done;
581 } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
582 /* In raw addressing mode buffs are only refilled if the avail
583 * falls below a threshold.
584 */
585 if (!gve_rx_refill_buffers(priv, rx))
586 return false;
587
588 /* If we were not able to completely refill buffers, we'll want
589 * to schedule this queue for work again to refill buffers.
590 */
591 if (rx->fill_cnt - cnt <= rx->db_threshold) {
592 gve_rx_write_doorbell(priv, rx);
593 return true;
594 }
595 }
596
597 gve_rx_write_doorbell(priv, rx);
598 return gve_rx_work_pending(rx);
599 }
600
gve_rx_poll(struct gve_notify_block * block,int budget)601 bool gve_rx_poll(struct gve_notify_block *block, int budget)
602 {
603 struct gve_rx_ring *rx = block->rx;
604 netdev_features_t feat;
605 bool repoll = false;
606
607 feat = block->napi.dev->features;
608
609 /* If budget is 0, do all the work */
610 if (budget == 0)
611 budget = INT_MAX;
612
613 if (budget > 0)
614 repoll |= gve_clean_rx_done(rx, budget, feat);
615 else
616 repoll |= gve_rx_work_pending(rx);
617 return repoll;
618 }
619