1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * Broadcom Dongle Host Driver (DHD), Linux-specific network interface
4 * Basically selected code segments from usb-cdc.c and usb-rndis.c
5 *
6 * Copyright (C) 1999-2019, Broadcom.
7 *
8 * Unless you and Broadcom execute a separate written software license
9 * agreement governing use of this software, this software is licensed to you
10 * under the terms of the GNU General Public License version 2 (the "GPL"),
11 * available at http://www.broadcom.com/licenses/GPLv2.php, with the
12 * following added to such license:
13 *
14 * As a special exception, the copyright holders of this software give you
15 * permission to link this software with independent modules, and to copy and
16 * distribute the resulting executable under terms of your choice, provided that
17 * you also meet, for each linked independent module, the terms and conditions of
18 * the license of that module. An independent module is a module which is not
19 * derived from this software. The special exception does not apply to any
20 * modifications of the software.
21 *
22 * Notwithstanding the above, under no circumstances may you combine this
23 * software in any way with any other Broadcom software provided under a license
24 * other than the GPL, without Broadcom's express prior written consent.
25 *
26 *
27 * <<Broadcom-WL-IPTag/Open:>>
28 *
29 * $Id: dhd_linux_lb.c 805819 2019-02-20 10:49:35Z $
30 */
31
32 #include <dhd_linux_priv.h>
33
34 extern dhd_pub_t* g_dhd_pub;
35
36 #if defined(DHD_LB)
37
38 void
dhd_lb_set_default_cpus(dhd_info_t * dhd)39 dhd_lb_set_default_cpus(dhd_info_t *dhd)
40 {
41 /* Default CPU allocation for the jobs */
42 atomic_set(&dhd->rx_napi_cpu, 1);
43 atomic_set(&dhd->rx_compl_cpu, 2);
44 atomic_set(&dhd->tx_compl_cpu, 2);
45 atomic_set(&dhd->tx_cpu, 2);
46 atomic_set(&dhd->net_tx_cpu, 0);
47 }
48
49 void
dhd_cpumasks_deinit(dhd_info_t * dhd)50 dhd_cpumasks_deinit(dhd_info_t *dhd)
51 {
52 free_cpumask_var(dhd->cpumask_curr_avail);
53 free_cpumask_var(dhd->cpumask_primary);
54 free_cpumask_var(dhd->cpumask_primary_new);
55 free_cpumask_var(dhd->cpumask_secondary);
56 free_cpumask_var(dhd->cpumask_secondary_new);
57 }
58
59 int
dhd_cpumasks_init(dhd_info_t * dhd)60 dhd_cpumasks_init(dhd_info_t *dhd)
61 {
62 int id;
63 uint32 cpus, num_cpus = num_possible_cpus();
64 int ret = 0;
65
66 DHD_ERROR(("%s CPU masks primary(big)=0x%x secondary(little)=0x%x\n", __FUNCTION__,
67 DHD_LB_PRIMARY_CPUS, DHD_LB_SECONDARY_CPUS));
68
69 if (!alloc_cpumask_var(&dhd->cpumask_curr_avail, GFP_KERNEL) ||
70 !alloc_cpumask_var(&dhd->cpumask_primary, GFP_KERNEL) ||
71 !alloc_cpumask_var(&dhd->cpumask_primary_new, GFP_KERNEL) ||
72 !alloc_cpumask_var(&dhd->cpumask_secondary, GFP_KERNEL) ||
73 !alloc_cpumask_var(&dhd->cpumask_secondary_new, GFP_KERNEL)) {
74 DHD_ERROR(("%s Failed to init cpumasks\n", __FUNCTION__));
75 ret = -ENOMEM;
76 goto fail;
77 }
78
79 cpumask_copy(dhd->cpumask_curr_avail, cpu_online_mask);
80 cpumask_clear(dhd->cpumask_primary);
81 cpumask_clear(dhd->cpumask_secondary);
82
83 if (num_cpus > 32) {
84 DHD_ERROR(("%s max cpus must be 32, %d too big\n", __FUNCTION__, num_cpus));
85 ASSERT(0);
86 }
87
88 cpus = DHD_LB_PRIMARY_CPUS;
89 for (id = 0; id < num_cpus; id++) {
90 if (isset(&cpus, id))
91 cpumask_set_cpu(id, dhd->cpumask_primary);
92 }
93
94 cpus = DHD_LB_SECONDARY_CPUS;
95 for (id = 0; id < num_cpus; id++) {
96 if (isset(&cpus, id))
97 cpumask_set_cpu(id, dhd->cpumask_secondary);
98 }
99
100 return ret;
101 fail:
102 dhd_cpumasks_deinit(dhd);
103 return ret;
104 }
105
106 /*
107 * The CPU Candidacy Algorithm
108 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
109 * The available CPUs for selection are divided into two groups
110 * Primary Set - A CPU mask that carries the First Choice CPUs
111 * Secondary Set - A CPU mask that carries the Second Choice CPUs.
112 *
113 * There are two types of Job, that needs to be assigned to
114 * the CPUs, from one of the above mentioned CPU group. The Jobs are
115 * 1) Rx Packet Processing - napi_cpu
116 * 2) Completion Processiong (Tx, RX) - compl_cpu
117 *
118 * To begin with both napi_cpu and compl_cpu are on CPU0. Whenever a CPU goes
119 * on-line/off-line the CPU candidacy algorithm is triggerd. The candidacy
120 * algo tries to pickup the first available non boot CPU (CPU0) for napi_cpu.
121 * If there are more processors free, it assigns one to compl_cpu.
122 * It also tries to ensure that both napi_cpu and compl_cpu are not on the same
123 * CPU, as much as possible.
124 *
125 * By design, both Tx and Rx completion jobs are run on the same CPU core, as it
126 * would allow Tx completion skb's to be released into a local free pool from
127 * which the rx buffer posts could have been serviced. it is important to note
128 * that a Tx packet may not have a large enough buffer for rx posting.
129 */
dhd_select_cpu_candidacy(dhd_info_t * dhd)130 void dhd_select_cpu_candidacy(dhd_info_t *dhd)
131 {
132 uint32 primary_available_cpus; /* count of primary available cpus */
133 uint32 secondary_available_cpus; /* count of secondary available cpus */
134 uint32 napi_cpu = 0; /* cpu selected for napi rx processing */
135 uint32 compl_cpu = 0; /* cpu selected for completion jobs */
136 uint32 tx_cpu = 0; /* cpu selected for tx processing job */
137
138 cpumask_clear(dhd->cpumask_primary_new);
139 cpumask_clear(dhd->cpumask_secondary_new);
140
141 /*
142 * Now select from the primary mask. Even if a Job is
143 * already running on a CPU in secondary group, we still move
144 * to primary CPU. So no conditional checks.
145 */
146 cpumask_and(dhd->cpumask_primary_new, dhd->cpumask_primary,
147 dhd->cpumask_curr_avail);
148
149 cpumask_and(dhd->cpumask_secondary_new, dhd->cpumask_secondary,
150 dhd->cpumask_curr_avail);
151
152 primary_available_cpus = cpumask_weight(dhd->cpumask_primary_new);
153
154 if (primary_available_cpus > 0) {
155 napi_cpu = cpumask_first(dhd->cpumask_primary_new);
156
157 /* If no further CPU is available,
158 * cpumask_next returns >= nr_cpu_ids
159 */
160 tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_primary_new);
161 if (tx_cpu >= nr_cpu_ids)
162 tx_cpu = 0;
163
164 /* In case there are no more CPUs, do completions & Tx in same CPU */
165 compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_primary_new);
166 if (compl_cpu >= nr_cpu_ids)
167 compl_cpu = tx_cpu;
168 }
169
170 DHD_INFO(("%s After primary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
171 __FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
172
173 /* -- Now check for the CPUs from the secondary mask -- */
174 secondary_available_cpus = cpumask_weight(dhd->cpumask_secondary_new);
175
176 DHD_INFO(("%s Available secondary cpus %d nr_cpu_ids %d\n",
177 __FUNCTION__, secondary_available_cpus, nr_cpu_ids));
178
179 if (secondary_available_cpus > 0) {
180 /* At this point if napi_cpu is unassigned it means no CPU
181 * is online from Primary Group
182 */
183 if (napi_cpu == 0) {
184 napi_cpu = cpumask_first(dhd->cpumask_secondary_new);
185 tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_secondary_new);
186 compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
187 } else if (tx_cpu == 0) {
188 tx_cpu = cpumask_first(dhd->cpumask_secondary_new);
189 compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
190 } else if (compl_cpu == 0) {
191 compl_cpu = cpumask_first(dhd->cpumask_secondary_new);
192 }
193
194 /* If no CPU was available for tx processing, choose CPU 0 */
195 if (tx_cpu >= nr_cpu_ids)
196 tx_cpu = 0;
197
198 /* If no CPU was available for completion, choose CPU 0 */
199 if (compl_cpu >= nr_cpu_ids)
200 compl_cpu = 0;
201 }
202 if ((primary_available_cpus == 0) &&
203 (secondary_available_cpus == 0)) {
204 /* No CPUs available from primary or secondary mask */
205 napi_cpu = 1;
206 compl_cpu = 0;
207 tx_cpu = 2;
208 }
209
210 DHD_INFO(("%s After secondary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
211 __FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
212
213 ASSERT(napi_cpu < nr_cpu_ids);
214 ASSERT(compl_cpu < nr_cpu_ids);
215 ASSERT(tx_cpu < nr_cpu_ids);
216
217 atomic_set(&dhd->rx_napi_cpu, napi_cpu);
218 atomic_set(&dhd->tx_compl_cpu, compl_cpu);
219 atomic_set(&dhd->rx_compl_cpu, compl_cpu);
220 atomic_set(&dhd->tx_cpu, tx_cpu);
221
222 return;
223 }
224
225 /*
226 * Function to handle CPU Hotplug notifications.
227 * One of the task it does is to trigger the CPU Candidacy algorithm
228 * for load balancing.
229 */
230
231 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
232
dhd_cpu_startup_callback(unsigned int cpu)233 int dhd_cpu_startup_callback(unsigned int cpu)
234 {
235 dhd_info_t *dhd = g_dhd_pub->info;
236
237 DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
238 DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
239 cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
240 dhd_select_cpu_candidacy(dhd);
241
242 return 0;
243 }
244
dhd_cpu_teardown_callback(unsigned int cpu)245 int dhd_cpu_teardown_callback(unsigned int cpu)
246 {
247 dhd_info_t *dhd = g_dhd_pub->info;
248
249 DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
250 DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
251 cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
252 dhd_select_cpu_candidacy(dhd);
253
254 return 0;
255 }
256 #else
257 int
dhd_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)258 dhd_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
259 {
260 unsigned long int cpu = (unsigned long int)hcpu;
261
262 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
263 #pragma GCC diagnostic push
264 #pragma GCC diagnostic ignored "-Wcast-qual"
265 #endif // endif
266 dhd_info_t *dhd = container_of(nfb, dhd_info_t, cpu_notifier);
267 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
268 #pragma GCC diagnostic pop
269 #endif // endif
270
271 if (!dhd || !(dhd->dhd_state & DHD_ATTACH_STATE_LB_ATTACH_DONE)) {
272 DHD_INFO(("%s(): LB data is not initialized yet.\n",
273 __FUNCTION__));
274 return NOTIFY_BAD;
275 }
276
277 switch (action)
278 {
279 case CPU_ONLINE:
280 case CPU_ONLINE_FROZEN:
281 DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
282 cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
283 dhd_select_cpu_candidacy(dhd);
284 break;
285
286 case CPU_DOWN_PREPARE:
287 case CPU_DOWN_PREPARE_FROZEN:
288 DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
289 cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
290 dhd_select_cpu_candidacy(dhd);
291 break;
292 default:
293 break;
294 }
295
296 return NOTIFY_OK;
297 }
298 #endif /* LINUX_VERSION_CODE < 4.10.0 */
299
dhd_register_cpuhp_callback(dhd_info_t * dhd)300 int dhd_register_cpuhp_callback(dhd_info_t *dhd)
301 {
302 int cpuhp_ret = 0;
303 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
304 cpuhp_ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dhd",
305 dhd_cpu_startup_callback, dhd_cpu_teardown_callback);
306
307 if (cpuhp_ret < 0) {
308 DHD_ERROR(("%s(): cpuhp_setup_state failed %d RX LB won't happen \r\n",
309 __FUNCTION__, cpuhp_ret));
310 }
311 #else
312 /*
313 * If we are able to initialize CPU masks, lets register to the
314 * CPU Hotplug framework to change the CPU for each job dynamically
315 * using candidacy algorithm.
316 */
317 dhd->cpu_notifier.notifier_call = dhd_cpu_callback;
318 register_hotcpu_notifier(&dhd->cpu_notifier); /* Register a callback */
319 #endif /* LINUX_VERSION_CODE < 4.10.0 */
320 return cpuhp_ret;
321 }
322
dhd_unregister_cpuhp_callback(dhd_info_t * dhd)323 int dhd_unregister_cpuhp_callback(dhd_info_t *dhd)
324 {
325 int ret = 0;
326 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
327 /* Don't want to call tear down while unregistering */
328 cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
329 #else
330 if (dhd->cpu_notifier.notifier_call != NULL) {
331 unregister_cpu_notifier(&dhd->cpu_notifier);
332 }
333 #endif // endif
334 return ret;
335 }
336
337 #if defined(DHD_LB_STATS)
dhd_lb_stats_init(dhd_pub_t * dhdp)338 void dhd_lb_stats_init(dhd_pub_t *dhdp)
339 {
340 dhd_info_t *dhd;
341 int i, j, num_cpus = num_possible_cpus();
342 int alloc_size = sizeof(uint32) * num_cpus;
343
344 if (dhdp == NULL) {
345 DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
346 __FUNCTION__));
347 return;
348 }
349
350 dhd = dhdp->info;
351 if (dhd == NULL) {
352 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
353 return;
354 }
355
356 DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
357 DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
358
359 dhd->napi_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
360 if (!dhd->napi_percpu_run_cnt) {
361 DHD_ERROR(("%s(): napi_percpu_run_cnt malloc failed \n",
362 __FUNCTION__));
363 return;
364 }
365 for (i = 0; i < num_cpus; i++)
366 DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
367
368 DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
369
370 dhd->rxc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
371 if (!dhd->rxc_percpu_run_cnt) {
372 DHD_ERROR(("%s(): rxc_percpu_run_cnt malloc failed \n",
373 __FUNCTION__));
374 return;
375 }
376 for (i = 0; i < num_cpus; i++)
377 DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
378
379 DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
380
381 dhd->txc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
382 if (!dhd->txc_percpu_run_cnt) {
383 DHD_ERROR(("%s(): txc_percpu_run_cnt malloc failed \n",
384 __FUNCTION__));
385 return;
386 }
387 for (i = 0; i < num_cpus; i++)
388 DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
389
390 dhd->cpu_online_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
391 if (!dhd->cpu_online_cnt) {
392 DHD_ERROR(("%s(): cpu_online_cnt malloc failed \n",
393 __FUNCTION__));
394 return;
395 }
396 for (i = 0; i < num_cpus; i++)
397 DHD_LB_STATS_CLR(dhd->cpu_online_cnt[i]);
398
399 dhd->cpu_offline_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
400 if (!dhd->cpu_offline_cnt) {
401 DHD_ERROR(("%s(): cpu_offline_cnt malloc failed \n",
402 __FUNCTION__));
403 return;
404 }
405 for (i = 0; i < num_cpus; i++)
406 DHD_LB_STATS_CLR(dhd->cpu_offline_cnt[i]);
407
408 dhd->txp_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
409 if (!dhd->txp_percpu_run_cnt) {
410 DHD_ERROR(("%s(): txp_percpu_run_cnt malloc failed \n",
411 __FUNCTION__));
412 return;
413 }
414 for (i = 0; i < num_cpus; i++)
415 DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
416
417 dhd->tx_start_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
418 if (!dhd->tx_start_percpu_run_cnt) {
419 DHD_ERROR(("%s(): tx_start_percpu_run_cnt malloc failed \n",
420 __FUNCTION__));
421 return;
422 }
423 for (i = 0; i < num_cpus; i++)
424 DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
425
426 for (j = 0; j < HIST_BIN_SIZE; j++) {
427 dhd->napi_rx_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
428 if (!dhd->napi_rx_hist[j]) {
429 DHD_ERROR(("%s(): dhd->napi_rx_hist[%d] malloc failed \n",
430 __FUNCTION__, j));
431 return;
432 }
433 for (i = 0; i < num_cpus; i++) {
434 DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
435 }
436 }
437 #ifdef DHD_LB_TXC
438 for (j = 0; j < HIST_BIN_SIZE; j++) {
439 dhd->txc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
440 if (!dhd->txc_hist[j]) {
441 DHD_ERROR(("%s(): dhd->txc_hist[%d] malloc failed \n",
442 __FUNCTION__, j));
443 return;
444 }
445 for (i = 0; i < num_cpus; i++) {
446 DHD_LB_STATS_CLR(dhd->txc_hist[j][i]);
447 }
448 }
449 #endif /* DHD_LB_TXC */
450 #ifdef DHD_LB_RXC
451 for (j = 0; j < HIST_BIN_SIZE; j++) {
452 dhd->rxc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
453 if (!dhd->rxc_hist[j]) {
454 DHD_ERROR(("%s(): dhd->rxc_hist[%d] malloc failed \n",
455 __FUNCTION__, j));
456 return;
457 }
458 for (i = 0; i < num_cpus; i++) {
459 DHD_LB_STATS_CLR(dhd->rxc_hist[j][i]);
460 }
461 }
462 #endif /* DHD_LB_RXC */
463 return;
464 }
465
dhd_lb_stats_deinit(dhd_pub_t * dhdp)466 void dhd_lb_stats_deinit(dhd_pub_t *dhdp)
467 {
468 dhd_info_t *dhd;
469 int j, num_cpus = num_possible_cpus();
470 int alloc_size = sizeof(uint32) * num_cpus;
471
472 if (dhdp == NULL) {
473 DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
474 __FUNCTION__));
475 return;
476 }
477
478 dhd = dhdp->info;
479 if (dhd == NULL) {
480 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
481 return;
482 }
483
484 if (dhd->napi_percpu_run_cnt) {
485 MFREE(dhdp->osh, dhd->napi_percpu_run_cnt, alloc_size);
486 dhd->napi_percpu_run_cnt = NULL;
487 }
488 if (dhd->rxc_percpu_run_cnt) {
489 MFREE(dhdp->osh, dhd->rxc_percpu_run_cnt, alloc_size);
490 dhd->rxc_percpu_run_cnt = NULL;
491 }
492 if (dhd->txc_percpu_run_cnt) {
493 MFREE(dhdp->osh, dhd->txc_percpu_run_cnt, alloc_size);
494 dhd->txc_percpu_run_cnt = NULL;
495 }
496 if (dhd->cpu_online_cnt) {
497 MFREE(dhdp->osh, dhd->cpu_online_cnt, alloc_size);
498 dhd->cpu_online_cnt = NULL;
499 }
500 if (dhd->cpu_offline_cnt) {
501 MFREE(dhdp->osh, dhd->cpu_offline_cnt, alloc_size);
502 dhd->cpu_offline_cnt = NULL;
503 }
504
505 if (dhd->txp_percpu_run_cnt) {
506 MFREE(dhdp->osh, dhd->txp_percpu_run_cnt, alloc_size);
507 dhd->txp_percpu_run_cnt = NULL;
508 }
509 if (dhd->tx_start_percpu_run_cnt) {
510 MFREE(dhdp->osh, dhd->tx_start_percpu_run_cnt, alloc_size);
511 dhd->tx_start_percpu_run_cnt = NULL;
512 }
513
514 for (j = 0; j < HIST_BIN_SIZE; j++) {
515 if (dhd->napi_rx_hist[j]) {
516 MFREE(dhdp->osh, dhd->napi_rx_hist[j], alloc_size);
517 dhd->napi_rx_hist[j] = NULL;
518 }
519 #ifdef DHD_LB_TXC
520 if (dhd->txc_hist[j]) {
521 MFREE(dhdp->osh, dhd->txc_hist[j], alloc_size);
522 dhd->txc_hist[j] = NULL;
523 }
524 #endif /* DHD_LB_TXC */
525 #ifdef DHD_LB_RXC
526 if (dhd->rxc_hist[j]) {
527 MFREE(dhdp->osh, dhd->rxc_hist[j], alloc_size);
528 dhd->rxc_hist[j] = NULL;
529 }
530 #endif /* DHD_LB_RXC */
531 }
532
533 return;
534 }
535
dhd_lb_stats_dump_histo(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint32 ** hist)536 void dhd_lb_stats_dump_histo(dhd_pub_t *dhdp,
537 struct bcmstrbuf *strbuf, uint32 **hist)
538 {
539 int i, j;
540 uint32 *per_cpu_total;
541 uint32 total = 0;
542 uint32 num_cpus = num_possible_cpus();
543
544 per_cpu_total = (uint32 *)MALLOC(dhdp->osh, sizeof(uint32) * num_cpus);
545 if (!per_cpu_total) {
546 DHD_ERROR(("%s(): dhd->per_cpu_total malloc failed \n", __FUNCTION__));
547 return;
548 }
549 bzero(per_cpu_total, sizeof(uint32) * num_cpus);
550
551 bcm_bprintf(strbuf, "CPU: \t\t");
552 for (i = 0; i < num_cpus; i++)
553 bcm_bprintf(strbuf, "%d\t", i);
554 bcm_bprintf(strbuf, "\nBin\n");
555
556 for (i = 0; i < HIST_BIN_SIZE; i++) {
557 bcm_bprintf(strbuf, "%d:\t\t", 1<<i);
558 for (j = 0; j < num_cpus; j++) {
559 bcm_bprintf(strbuf, "%d\t", hist[i][j]);
560 }
561 bcm_bprintf(strbuf, "\n");
562 }
563 bcm_bprintf(strbuf, "Per CPU Total \t");
564 total = 0;
565 for (i = 0; i < num_cpus; i++) {
566 for (j = 0; j < HIST_BIN_SIZE; j++) {
567 per_cpu_total[i] += (hist[j][i] * (1<<j));
568 }
569 bcm_bprintf(strbuf, "%d\t", per_cpu_total[i]);
570 total += per_cpu_total[i];
571 }
572 bcm_bprintf(strbuf, "\nTotal\t\t%d \n", total);
573
574 if (per_cpu_total) {
575 MFREE(dhdp->osh, per_cpu_total, sizeof(uint32) * num_cpus);
576 per_cpu_total = NULL;
577 }
578 return;
579 }
580
dhd_lb_stats_dump_cpu_array(struct bcmstrbuf * strbuf,uint32 * p)581 void dhd_lb_stats_dump_cpu_array(struct bcmstrbuf *strbuf, uint32 *p)
582 {
583 int i, num_cpus = num_possible_cpus();
584
585 bcm_bprintf(strbuf, "CPU: \t");
586 for (i = 0; i < num_cpus; i++)
587 bcm_bprintf(strbuf, "%d\t", i);
588 bcm_bprintf(strbuf, "\n");
589
590 bcm_bprintf(strbuf, "Val: \t");
591 for (i = 0; i < num_cpus; i++)
592 bcm_bprintf(strbuf, "%u\t", *(p+i));
593 bcm_bprintf(strbuf, "\n");
594 return;
595 }
596
dhd_lb_stats_dump(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)597 void dhd_lb_stats_dump(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
598 {
599 dhd_info_t *dhd;
600
601 if (dhdp == NULL || strbuf == NULL) {
602 DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n",
603 __FUNCTION__, dhdp, strbuf));
604 return;
605 }
606
607 dhd = dhdp->info;
608 if (dhd == NULL) {
609 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
610 return;
611 }
612
613 bcm_bprintf(strbuf, "\ncpu_online_cnt:\n");
614 dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_online_cnt);
615
616 bcm_bprintf(strbuf, "\ncpu_offline_cnt:\n");
617 dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_offline_cnt);
618
619 bcm_bprintf(strbuf, "\nsched_cnt: dhd_dpc %u napi %u rxc %u txc %u\n",
620 dhd->dhd_dpc_cnt, dhd->napi_sched_cnt, dhd->rxc_sched_cnt,
621 dhd->txc_sched_cnt);
622
623 #ifdef DHD_LB_RXP
624 bcm_bprintf(strbuf, "\nnapi_percpu_run_cnt:\n");
625 dhd_lb_stats_dump_cpu_array(strbuf, dhd->napi_percpu_run_cnt);
626 bcm_bprintf(strbuf, "\nNAPI Packets Received Histogram:\n");
627 dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->napi_rx_hist);
628 #endif /* DHD_LB_RXP */
629
630 #ifdef DHD_LB_RXC
631 bcm_bprintf(strbuf, "\nrxc_percpu_run_cnt:\n");
632 dhd_lb_stats_dump_cpu_array(strbuf, dhd->rxc_percpu_run_cnt);
633 bcm_bprintf(strbuf, "\nRX Completions (Buffer Post) Histogram:\n");
634 dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->rxc_hist);
635 #endif /* DHD_LB_RXC */
636
637 #ifdef DHD_LB_TXC
638 bcm_bprintf(strbuf, "\ntxc_percpu_run_cnt:\n");
639 dhd_lb_stats_dump_cpu_array(strbuf, dhd->txc_percpu_run_cnt);
640 bcm_bprintf(strbuf, "\nTX Completions (Buffer Free) Histogram:\n");
641 dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->txc_hist);
642 #endif /* DHD_LB_TXC */
643
644 #ifdef DHD_LB_TXP
645 bcm_bprintf(strbuf, "\ntxp_percpu_run_cnt:\n");
646 dhd_lb_stats_dump_cpu_array(strbuf, dhd->txp_percpu_run_cnt);
647
648 bcm_bprintf(strbuf, "\ntx_start_percpu_run_cnt:\n");
649 dhd_lb_stats_dump_cpu_array(strbuf, dhd->tx_start_percpu_run_cnt);
650 #endif /* DHD_LB_TXP */
651 }
652
653 /* Given a number 'n' returns 'm' that is next larger power of 2 after n */
next_larger_power2(uint32 num)654 static inline uint32 next_larger_power2(uint32 num)
655 {
656 num--;
657 num |= (num >> 1);
658 num |= (num >> 2);
659 num |= (num >> 4);
660 num |= (num >> 8);
661 num |= (num >> 16);
662
663 return (num + 1);
664 }
665
dhd_lb_stats_update_histo(uint32 ** bin,uint32 count,uint32 cpu)666 void dhd_lb_stats_update_histo(uint32 **bin, uint32 count, uint32 cpu)
667 {
668 uint32 bin_power;
669 uint32 *p;
670 bin_power = next_larger_power2(count);
671
672 switch (bin_power) {
673 case 1: p = bin[0] + cpu; break;
674 case 2: p = bin[1] + cpu; break;
675 case 4: p = bin[2] + cpu; break;
676 case 8: p = bin[3] + cpu; break;
677 case 16: p = bin[4] + cpu; break;
678 case 32: p = bin[5] + cpu; break;
679 case 64: p = bin[6] + cpu; break;
680 case 128: p = bin[7] + cpu; break;
681 default : p = bin[8] + cpu; break;
682 }
683
684 *p = *p + 1;
685 return;
686 }
687
dhd_lb_stats_update_napi_histo(dhd_pub_t * dhdp,uint32 count)688 void dhd_lb_stats_update_napi_histo(dhd_pub_t *dhdp, uint32 count)
689 {
690 int cpu;
691 dhd_info_t *dhd = dhdp->info;
692
693 cpu = get_cpu();
694 put_cpu();
695 dhd_lb_stats_update_histo(dhd->napi_rx_hist, count, cpu);
696
697 return;
698 }
699
dhd_lb_stats_update_txc_histo(dhd_pub_t * dhdp,uint32 count)700 void dhd_lb_stats_update_txc_histo(dhd_pub_t *dhdp, uint32 count)
701 {
702 int cpu;
703 dhd_info_t *dhd = dhdp->info;
704
705 cpu = get_cpu();
706 put_cpu();
707 dhd_lb_stats_update_histo(dhd->txc_hist, count, cpu);
708
709 return;
710 }
711
dhd_lb_stats_update_rxc_histo(dhd_pub_t * dhdp,uint32 count)712 void dhd_lb_stats_update_rxc_histo(dhd_pub_t *dhdp, uint32 count)
713 {
714 int cpu;
715 dhd_info_t *dhd = dhdp->info;
716
717 cpu = get_cpu();
718 put_cpu();
719 dhd_lb_stats_update_histo(dhd->rxc_hist, count, cpu);
720
721 return;
722 }
723
dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t * dhdp)724 void dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t *dhdp)
725 {
726 dhd_info_t *dhd = dhdp->info;
727 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txc_percpu_run_cnt);
728 }
729
dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t * dhdp)730 void dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t *dhdp)
731 {
732 dhd_info_t *dhd = dhdp->info;
733 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->rxc_percpu_run_cnt);
734 }
735 #endif /* DHD_LB_STATS */
736
737 #endif /* DHD_LB */
738 #if defined(DHD_LB)
739 /**
740 * dhd_tasklet_schedule - Function that runs in IPI context of the destination
741 * CPU and schedules a tasklet.
742 * @tasklet: opaque pointer to the tasklet
743 */
744 INLINE void
dhd_tasklet_schedule(void * tasklet)745 dhd_tasklet_schedule(void *tasklet)
746 {
747 tasklet_schedule((struct tasklet_struct *)tasklet);
748 }
749 /**
750 * dhd_tasklet_schedule_on - Executes the passed takslet in a given CPU
751 * @tasklet: tasklet to be scheduled
752 * @on_cpu: cpu core id
753 *
754 * If the requested cpu is online, then an IPI is sent to this cpu via the
755 * smp_call_function_single with no wait and the tasklet_schedule function
756 * will be invoked to schedule the specified tasklet on the requested CPU.
757 */
758 INLINE void
dhd_tasklet_schedule_on(struct tasklet_struct * tasklet,int on_cpu)759 dhd_tasklet_schedule_on(struct tasklet_struct *tasklet, int on_cpu)
760 {
761 const int wait = 0;
762 smp_call_function_single(on_cpu,
763 dhd_tasklet_schedule, (void *)tasklet, wait);
764 }
765
766 /**
767 * dhd_work_schedule_on - Executes the passed work in a given CPU
768 * @work: work to be scheduled
769 * @on_cpu: cpu core id
770 *
771 * If the requested cpu is online, then an IPI is sent to this cpu via the
772 * schedule_work_on and the work function
773 * will be invoked to schedule the specified work on the requested CPU.
774 */
775
776 INLINE void
dhd_work_schedule_on(struct work_struct * work,int on_cpu)777 dhd_work_schedule_on(struct work_struct *work, int on_cpu)
778 {
779 schedule_work_on(on_cpu, work);
780 }
781
782 #if defined(DHD_LB_TXC)
783 /**
784 * dhd_lb_tx_compl_dispatch - load balance by dispatching the tx_compl_tasklet
785 * on another cpu. The tx_compl_tasklet will take care of DMA unmapping and
786 * freeing the packets placed in the tx_compl workq
787 */
788 void
dhd_lb_tx_compl_dispatch(dhd_pub_t * dhdp)789 dhd_lb_tx_compl_dispatch(dhd_pub_t *dhdp)
790 {
791 dhd_info_t *dhd = dhdp->info;
792 int curr_cpu, on_cpu;
793
794 if (dhd->rx_napi_netdev == NULL) {
795 DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
796 return;
797 }
798
799 DHD_LB_STATS_INCR(dhd->txc_sched_cnt);
800 /*
801 * If the destination CPU is NOT online or is same as current CPU
802 * no need to schedule the work
803 */
804 curr_cpu = get_cpu();
805 put_cpu();
806
807 on_cpu = atomic_read(&dhd->tx_compl_cpu);
808
809 if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
810 dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
811 } else {
812 schedule_work(&dhd->tx_compl_dispatcher_work);
813 }
814 }
815
dhd_tx_compl_dispatcher_fn(struct work_struct * work)816 static void dhd_tx_compl_dispatcher_fn(struct work_struct * work)
817 {
818 struct dhd_info *dhd =
819 container_of(work, struct dhd_info, tx_compl_dispatcher_work);
820 int cpu;
821
822 get_online_cpus();
823 cpu = atomic_read(&dhd->tx_compl_cpu);
824 if (!cpu_online(cpu))
825 dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
826 else
827 dhd_tasklet_schedule_on(&dhd->tx_compl_tasklet, cpu);
828 put_online_cpus();
829 }
830 #endif /* DHD_LB_TXC */
831
832 #if defined(DHD_LB_RXC)
833 /**
834 * dhd_lb_rx_compl_dispatch - load balance by dispatching the rx_compl_tasklet
835 * on another cpu. The rx_compl_tasklet will take care of reposting rx buffers
836 * in the H2D RxBuffer Post common ring, by using the recycled pktids that were
837 * placed in the rx_compl workq.
838 *
839 * @dhdp: pointer to dhd_pub object
840 */
841 void
dhd_lb_rx_compl_dispatch(dhd_pub_t * dhdp)842 dhd_lb_rx_compl_dispatch(dhd_pub_t *dhdp)
843 {
844 dhd_info_t *dhd = dhdp->info;
845 int curr_cpu, on_cpu;
846
847 if (dhd->rx_napi_netdev == NULL) {
848 DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
849 return;
850 }
851
852 DHD_LB_STATS_INCR(dhd->rxc_sched_cnt);
853 /*
854 * If the destination CPU is NOT online or is same as current CPU
855 * no need to schedule the work
856 */
857 curr_cpu = get_cpu();
858 put_cpu();
859 on_cpu = atomic_read(&dhd->rx_compl_cpu);
860
861 if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
862 dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
863 } else {
864 schedule_work(&dhd->rx_compl_dispatcher_work);
865 }
866 }
867
dhd_rx_compl_dispatcher_fn(struct work_struct * work)868 void dhd_rx_compl_dispatcher_fn(struct work_struct * work)
869 {
870 struct dhd_info *dhd =
871 container_of(work, struct dhd_info, rx_compl_dispatcher_work);
872 int cpu;
873
874 get_online_cpus();
875 cpu = atomic_read(&dhd->rx_compl_cpu);
876 if (!cpu_online(cpu))
877 dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
878 else {
879 dhd_tasklet_schedule_on(&dhd->rx_compl_tasklet, cpu);
880 }
881 put_online_cpus();
882 }
883 #endif /* DHD_LB_RXC */
884
885 #if defined(DHD_LB_TXP)
dhd_tx_dispatcher_work(struct work_struct * work)886 void dhd_tx_dispatcher_work(struct work_struct * work)
887 {
888 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
889 #pragma GCC diagnostic push
890 #pragma GCC diagnostic ignored "-Wcast-qual"
891 #endif // endif
892 struct dhd_info *dhd =
893 container_of(work, struct dhd_info, tx_dispatcher_work);
894 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
895 #pragma GCC diagnostic pop
896 #endif // endif
897 dhd_tasklet_schedule(&dhd->tx_tasklet);
898 }
899
dhd_tx_dispatcher_fn(dhd_pub_t * dhdp)900 void dhd_tx_dispatcher_fn(dhd_pub_t *dhdp)
901 {
902 int cpu;
903 int net_tx_cpu;
904 dhd_info_t *dhd = dhdp->info;
905
906 preempt_disable();
907 cpu = atomic_read(&dhd->tx_cpu);
908 net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
909
910 /*
911 * Now if the NET_TX has pushed the packet in the same
912 * CPU that is chosen for Tx processing, seperate it out
913 * i.e run the TX processing tasklet in compl_cpu
914 */
915 if (net_tx_cpu == cpu)
916 cpu = atomic_read(&dhd->tx_compl_cpu);
917
918 if (!cpu_online(cpu)) {
919 /*
920 * Ooohh... but the Chosen CPU is not online,
921 * Do the job in the current CPU itself.
922 */
923 dhd_tasklet_schedule(&dhd->tx_tasklet);
924 } else {
925 /*
926 * Schedule tx_dispatcher_work to on the cpu which
927 * in turn will schedule tx_tasklet.
928 */
929 dhd_work_schedule_on(&dhd->tx_dispatcher_work, cpu);
930 }
931 preempt_enable();
932 }
933
934 /**
935 * dhd_lb_tx_dispatch - load balance by dispatching the tx_tasklet
936 * on another cpu. The tx_tasklet will take care of actually putting
937 * the skbs into appropriate flow ring and ringing H2D interrupt
938 *
939 * @dhdp: pointer to dhd_pub object
940 */
941 void
dhd_lb_tx_dispatch(dhd_pub_t * dhdp)942 dhd_lb_tx_dispatch(dhd_pub_t *dhdp)
943 {
944 dhd_info_t *dhd = dhdp->info;
945 int curr_cpu;
946
947 curr_cpu = get_cpu();
948 put_cpu();
949
950 /* Record the CPU in which the TX request from Network stack came */
951 atomic_set(&dhd->net_tx_cpu, curr_cpu);
952
953 /* Schedule the work to dispatch ... */
954 dhd_tx_dispatcher_fn(dhdp);
955 }
956 #endif /* DHD_LB_TXP */
957
958 #if defined(DHD_LB_RXP)
959 /**
960 * dhd_napi_poll - Load balance napi poll function to process received
961 * packets and send up the network stack using netif_receive_skb()
962 *
963 * @napi: napi object in which context this poll function is invoked
964 * @budget: number of packets to be processed.
965 *
966 * Fetch the dhd_info given the rx_napi_struct. Move all packets from the
967 * rx_napi_queue into a local rx_process_queue (lock and queue move and unlock).
968 * Dequeue each packet from head of rx_process_queue, fetch the ifid from the
969 * packet tag and sendup.
970 */
971 int
dhd_napi_poll(struct napi_struct * napi,int budget)972 dhd_napi_poll(struct napi_struct *napi, int budget)
973 {
974 int ifid;
975 const int pkt_count = 1;
976 const int chan = 0;
977 struct sk_buff * skb;
978 unsigned long flags;
979 struct dhd_info *dhd;
980 int processed = 0;
981 struct sk_buff_head rx_process_queue;
982
983 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
984 #pragma GCC diagnostic push
985 #pragma GCC diagnostic ignored "-Wcast-qual"
986 #endif // endif
987 dhd = container_of(napi, struct dhd_info, rx_napi_struct);
988 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
989 #pragma GCC diagnostic pop
990 #endif // endif
991
992 DHD_INFO(("%s napi_queue<%d> budget<%d>\n",
993 __FUNCTION__, skb_queue_len(&dhd->rx_napi_queue), budget));
994 __skb_queue_head_init(&rx_process_queue);
995
996 /* extract the entire rx_napi_queue into local rx_process_queue */
997 spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
998 skb_queue_splice_tail_init(&dhd->rx_napi_queue, &rx_process_queue);
999 spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1000
1001 while ((skb = __skb_dequeue(&rx_process_queue)) != NULL) {
1002 OSL_PREFETCH(skb->data);
1003
1004 ifid = DHD_PKTTAG_IFID((dhd_pkttag_fr_t *)PKTTAG(skb));
1005
1006 DHD_INFO(("%s dhd_rx_frame pkt<%p> ifid<%d>\n",
1007 __FUNCTION__, skb, ifid));
1008
1009 dhd_rx_frame(&dhd->pub, ifid, skb, pkt_count, chan);
1010 processed++;
1011 }
1012
1013 DHD_LB_STATS_UPDATE_NAPI_HISTO(&dhd->pub, processed);
1014
1015 DHD_INFO(("%s processed %d\n", __FUNCTION__, processed));
1016 napi_complete(napi);
1017
1018 return budget - 1;
1019 }
1020
1021 /**
1022 * dhd_napi_schedule - Place the napi struct into the current cpus softnet napi
1023 * poll list. This function may be invoked via the smp_call_function_single
1024 * from a remote CPU.
1025 *
1026 * This function will essentially invoke __raise_softirq_irqoff(NET_RX_SOFTIRQ)
1027 * after the napi_struct is added to the softnet data's poll_list
1028 *
1029 * @info: pointer to a dhd_info struct
1030 */
1031 static void
dhd_napi_schedule(void * info)1032 dhd_napi_schedule(void *info)
1033 {
1034 dhd_info_t *dhd = (dhd_info_t *)info;
1035
1036 DHD_INFO(("%s rx_napi_struct<%p> on cpu<%d>\n",
1037 __FUNCTION__, &dhd->rx_napi_struct, atomic_read(&dhd->rx_napi_cpu)));
1038
1039 /* add napi_struct to softnet data poll list and raise NET_RX_SOFTIRQ */
1040 if (napi_schedule_prep(&dhd->rx_napi_struct)) {
1041 __napi_schedule(&dhd->rx_napi_struct);
1042 #ifdef WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE
1043 raise_softirq(NET_RX_SOFTIRQ);
1044 #endif /* WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE */
1045 }
1046
1047 /*
1048 * If the rx_napi_struct was already running, then we let it complete
1049 * processing all its packets. The rx_napi_struct may only run on one
1050 * core at a time, to avoid out-of-order handling.
1051 */
1052 }
1053
1054 /**
1055 * dhd_napi_schedule_on - API to schedule on a desired CPU core a NET_RX_SOFTIRQ
1056 * action after placing the dhd's rx_process napi object in the the remote CPU's
1057 * softnet data's poll_list.
1058 *
1059 * @dhd: dhd_info which has the rx_process napi object
1060 * @on_cpu: desired remote CPU id
1061 */
1062 static INLINE int
dhd_napi_schedule_on(dhd_info_t * dhd,int on_cpu)1063 dhd_napi_schedule_on(dhd_info_t *dhd, int on_cpu)
1064 {
1065 int wait = 0; /* asynchronous IPI */
1066 DHD_INFO(("%s dhd<%p> napi<%p> on_cpu<%d>\n",
1067 __FUNCTION__, dhd, &dhd->rx_napi_struct, on_cpu));
1068
1069 if (smp_call_function_single(on_cpu, dhd_napi_schedule, dhd, wait)) {
1070 DHD_ERROR(("%s smp_call_function_single on_cpu<%d> failed\n",
1071 __FUNCTION__, on_cpu));
1072 }
1073
1074 DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1075
1076 return 0;
1077 }
1078
1079 /*
1080 * Call get_online_cpus/put_online_cpus around dhd_napi_schedule_on
1081 * Why should we do this?
1082 * The candidacy algorithm is run from the call back function
1083 * registered to CPU hotplug notifier. This call back happens from Worker
1084 * context. The dhd_napi_schedule_on is also from worker context.
1085 * Note that both of this can run on two different CPUs at the same time.
1086 * So we can possibly have a window where a given CPUn is being brought
1087 * down from CPUm while we try to run a function on CPUn.
1088 * To prevent this its better have the whole code to execute an SMP
1089 * function under get_online_cpus.
1090 * This function call ensures that hotplug mechanism does not kick-in
1091 * until we are done dealing with online CPUs
1092 * If the hotplug worker is already running, no worries because the
1093 * candidacy algo would then reflect the same in dhd->rx_napi_cpu.
1094 *
1095 * The below mentioned code structure is proposed in
1096 * https://www.kernel.org/doc/Documentation/cpu-hotplug.txt
1097 * for the question
1098 * Q: I need to ensure that a particular cpu is not removed when there is some
1099 * work specific to this cpu is in progress
1100 *
1101 * According to the documentation calling get_online_cpus is NOT required, if
1102 * we are running from tasklet context. Since dhd_rx_napi_dispatcher_fn can
1103 * run from Work Queue context we have to call these functions
1104 */
dhd_rx_napi_dispatcher_fn(struct work_struct * work)1105 void dhd_rx_napi_dispatcher_fn(struct work_struct * work)
1106 {
1107 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1108 #pragma GCC diagnostic push
1109 #pragma GCC diagnostic ignored "-Wcast-qual"
1110 #endif // endif
1111 struct dhd_info *dhd =
1112 container_of(work, struct dhd_info, rx_napi_dispatcher_work);
1113 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1114 #pragma GCC diagnostic pop
1115 #endif // endif
1116
1117 dhd_napi_schedule(dhd);
1118 }
1119
1120 /**
1121 * dhd_lb_rx_napi_dispatch - load balance by dispatching the rx_napi_struct
1122 * to run on another CPU. The rx_napi_struct's poll function will retrieve all
1123 * the packets enqueued into the rx_napi_queue and sendup.
1124 * The producer's rx packet queue is appended to the rx_napi_queue before
1125 * dispatching the rx_napi_struct.
1126 */
1127 void
dhd_lb_rx_napi_dispatch(dhd_pub_t * dhdp)1128 dhd_lb_rx_napi_dispatch(dhd_pub_t *dhdp)
1129 {
1130 unsigned long flags;
1131 dhd_info_t *dhd = dhdp->info;
1132 int curr_cpu;
1133 int on_cpu;
1134 #ifdef DHD_LB_IRQSET
1135 cpumask_t cpus;
1136 #endif /* DHD_LB_IRQSET */
1137
1138 if (dhd->rx_napi_netdev == NULL) {
1139 DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
1140 return;
1141 }
1142
1143 DHD_INFO(("%s append napi_queue<%d> pend_queue<%d>\n", __FUNCTION__,
1144 skb_queue_len(&dhd->rx_napi_queue), skb_queue_len(&dhd->rx_pend_queue)));
1145
1146 /* append the producer's queue of packets to the napi's rx process queue */
1147 spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
1148 skb_queue_splice_tail_init(&dhd->rx_pend_queue, &dhd->rx_napi_queue);
1149 spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1150
1151 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->napi_percpu_run_cnt);
1152
1153 /* if LB RXP is disabled directly schedule NAPI */
1154 if (atomic_read(&dhd->lb_rxp_active) == 0) {
1155 dhd_napi_schedule(dhd);
1156 return;
1157 }
1158
1159 /*
1160 * If the destination CPU is NOT online or is same as current CPU
1161 * no need to schedule the work
1162 */
1163 curr_cpu = get_cpu();
1164 put_cpu();
1165
1166 preempt_disable();
1167 on_cpu = atomic_read(&dhd->rx_napi_cpu);
1168 #ifdef DHD_LB_IRQSET
1169 if (cpumask_and(&cpus, cpumask_of(curr_cpu), dhd->cpumask_primary) ||
1170 (!cpu_online(on_cpu)))
1171 #else
1172 if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu)))
1173 #endif /* DHD_LB_IRQSET */
1174 {
1175 DHD_INFO(("%s : curr_cpu : %d, cpumask : 0x%lx\n", __FUNCTION__,
1176 curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1177 dhd_napi_schedule(dhd);
1178 } else {
1179 DHD_INFO(("%s : schedule to curr_cpu : %d, cpumask : 0x%lx\n",
1180 __FUNCTION__, curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1181 dhd_work_schedule_on(&dhd->rx_napi_dispatcher_work, on_cpu);
1182 DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1183 }
1184 preempt_enable();
1185 }
1186
1187 /**
1188 * dhd_lb_rx_pkt_enqueue - Enqueue the packet into the producer's queue
1189 */
1190 void
dhd_lb_rx_pkt_enqueue(dhd_pub_t * dhdp,void * pkt,int ifidx)1191 dhd_lb_rx_pkt_enqueue(dhd_pub_t *dhdp, void *pkt, int ifidx)
1192 {
1193 dhd_info_t *dhd = dhdp->info;
1194
1195 DHD_INFO(("%s enqueue pkt<%p> ifidx<%d> pend_queue<%d>\n", __FUNCTION__,
1196 pkt, ifidx, skb_queue_len(&dhd->rx_pend_queue)));
1197 DHD_PKTTAG_SET_IFID((dhd_pkttag_fr_t *)PKTTAG(pkt), ifidx);
1198 __skb_queue_tail(&dhd->rx_pend_queue, pkt);
1199 }
1200 #endif /* DHD_LB_RXP */
1201 #endif /* DHD_LB */
1202
1203 #if defined(DHD_LB_IRQSET) || defined(DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON)
1204 void
dhd_irq_set_affinity(dhd_pub_t * dhdp,const struct cpumask * cpumask)1205 dhd_irq_set_affinity(dhd_pub_t *dhdp, const struct cpumask *cpumask)
1206 {
1207 unsigned int irq = (unsigned int)-1;
1208 int err = BCME_OK;
1209
1210 if (!dhdp) {
1211 DHD_ERROR(("%s : dhdp is NULL\n", __FUNCTION__));
1212 return;
1213 }
1214
1215 if (!dhdp->bus) {
1216 DHD_ERROR(("%s : bus is NULL\n", __FUNCTION__));
1217 return;
1218 }
1219
1220 DHD_ERROR(("%s : irq set affinity cpu:0x%lx\n",
1221 __FUNCTION__, *cpumask_bits(cpumask)));
1222
1223 dhdpcie_get_pcieirq(dhdp->bus, &irq);
1224 err = irq_set_affinity(irq, cpumask);
1225 if (err)
1226 DHD_ERROR(("%s : irq set affinity is failed cpu:0x%lx\n",
1227 __FUNCTION__, *cpumask_bits(cpumask)));
1228 }
1229 #endif /* DHD_LB_IRQSET || DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON */
1230
1231 #if defined(DHD_LB_TXP)
1232
1233 int BCMFASTPATH
dhd_lb_sendpkt(dhd_info_t * dhd,struct net_device * net,int ifidx,void * skb)1234 dhd_lb_sendpkt(dhd_info_t *dhd, struct net_device *net,
1235 int ifidx, void *skb)
1236 {
1237 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->tx_start_percpu_run_cnt);
1238
1239 /* If the feature is disabled run-time do TX from here */
1240 if (atomic_read(&dhd->lb_txp_active) == 0) {
1241 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1242 return __dhd_sendpkt(&dhd->pub, ifidx, skb);
1243 }
1244
1245 /* Store the address of net device and interface index in the Packet tag */
1246 DHD_LB_TX_PKTTAG_SET_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), net);
1247 DHD_LB_TX_PKTTAG_SET_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), ifidx);
1248
1249 /* Enqueue the skb into tx_pend_queue */
1250 skb_queue_tail(&dhd->tx_pend_queue, skb);
1251
1252 DHD_TRACE(("%s(): Added skb %p for netdev %p \r\n", __FUNCTION__, skb, net));
1253
1254 /* Dispatch the Tx job to be processed by the tx_tasklet */
1255 dhd_lb_tx_dispatch(&dhd->pub);
1256
1257 return NETDEV_TX_OK;
1258 }
1259 #endif /* DHD_LB_TXP */
1260
1261 #ifdef DHD_LB_TXP
1262 #define DHD_LB_TXBOUND 64
1263 /*
1264 * Function that performs the TX processing on a given CPU
1265 */
1266 bool
dhd_lb_tx_process(dhd_info_t * dhd)1267 dhd_lb_tx_process(dhd_info_t *dhd)
1268 {
1269 struct sk_buff *skb;
1270 int cnt = 0;
1271 struct net_device *net;
1272 int ifidx;
1273 bool resched = FALSE;
1274
1275 DHD_TRACE(("%s(): TX Processing \r\n", __FUNCTION__));
1276 if (dhd == NULL) {
1277 DHD_ERROR((" Null pointer DHD \r\n"));
1278 return resched;
1279 }
1280
1281 BCM_REFERENCE(net);
1282
1283 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1284
1285 /* Base Loop to perform the actual Tx */
1286 do {
1287 skb = skb_dequeue(&dhd->tx_pend_queue);
1288 if (skb == NULL) {
1289 DHD_TRACE(("Dequeued a Null Packet \r\n"));
1290 break;
1291 }
1292 cnt++;
1293
1294 net = DHD_LB_TX_PKTTAG_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1295 ifidx = DHD_LB_TX_PKTTAG_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1296
1297 DHD_TRACE(("Processing skb %p for net %p index %d \r\n", skb,
1298 net, ifidx));
1299
1300 __dhd_sendpkt(&dhd->pub, ifidx, skb);
1301
1302 if (cnt >= DHD_LB_TXBOUND) {
1303 resched = TRUE;
1304 break;
1305 }
1306
1307 } while (1);
1308
1309 DHD_INFO(("%s(): Processed %d packets \r\n", __FUNCTION__, cnt));
1310
1311 return resched;
1312 }
1313
1314 void
dhd_lb_tx_handler(unsigned long data)1315 dhd_lb_tx_handler(unsigned long data)
1316 {
1317 dhd_info_t *dhd = (dhd_info_t *)data;
1318
1319 if (dhd_lb_tx_process(dhd)) {
1320 dhd_tasklet_schedule(&dhd->tx_tasklet);
1321 }
1322 }
1323
1324 #endif /* DHD_LB_TXP */
1325