1 /*
2 * Broadcom Dongle Host Driver (DHD), Linux-specific network interface
3 * Basically selected code segments from usb-cdc.c and usb-rndis.c
4 *
5 * Copyright (C) 1999-2019, Broadcom.
6 *
7 * Unless you and Broadcom execute a separate written software license
8 * agreement governing use of this software, this software is licensed to you
9 * under the terms of the GNU General Public License version 2 (the "GPL"),
10 * available at http://www.broadcom.com/licenses/GPLv2.php, with the
11 * following added to such license:
12 *
13 * As a special exception, the copyright holders of this software give you
14 * permission to link this software with independent modules, and to copy and
15 * distribute the resulting executable under terms of your choice, provided that
16 * you also meet, for each linked independent module, the terms and conditions
17 * of the license of that module. An independent module is a module which is
18 * not derived from this software. The special exception does not apply to any
19 * modifications of the software.
20 *
21 * Notwithstanding the above, under no circumstances may you combine this
22 * software in any way with any other Broadcom software provided under a license
23 * other than the GPL, without Broadcom's express prior written consent.
24 *
25 *
26 * <<Broadcom-WL-IPTag/Open:>>
27 *
28 * $Id: dhd_linux_lb.c 805819 2019-02-20 10:49:35Z $
29 */
30
31 #include <dhd_linux_priv.h>
32
33 extern dhd_pub_t *g_dhd_pub;
34
35 #if defined(DHD_LB)
36
dhd_lb_set_default_cpus(dhd_info_t * dhd)37 void dhd_lb_set_default_cpus(dhd_info_t *dhd)
38 {
39 /* Default CPU allocation for the jobs */
40 atomic_set(&dhd->rx_napi_cpu, 1);
41 atomic_set(&dhd->rx_compl_cpu, 0x2);
42 atomic_set(&dhd->tx_compl_cpu, 0x2);
43 atomic_set(&dhd->tx_cpu, 0x2);
44 atomic_set(&dhd->net_tx_cpu, 0);
45 }
46
dhd_cpumasks_deinit(dhd_info_t * dhd)47 void dhd_cpumasks_deinit(dhd_info_t *dhd)
48 {
49 free_cpumask_var(dhd->cpumask_curr_avail);
50 free_cpumask_var(dhd->cpumask_primary);
51 free_cpumask_var(dhd->cpumask_primary_new);
52 free_cpumask_var(dhd->cpumask_secondary);
53 free_cpumask_var(dhd->cpumask_secondary_new);
54 }
55
dhd_cpumasks_init(dhd_info_t * dhd)56 int dhd_cpumasks_init(dhd_info_t *dhd)
57 {
58 int id;
59 uint32 cpus, num_cpus = num_possible_cpus();
60 int ret = 0;
61
62 DHD_ERROR(("%s CPU masks primary(big)=0x%x secondary(little)=0x%x\n",
63 __FUNCTION__, DHD_LB_PRIMARY_CPUS, DHD_LB_SECONDARY_CPUS));
64
65 if (!alloc_cpumask_var(&dhd->cpumask_curr_avail, GFP_KERNEL) ||
66 !alloc_cpumask_var(&dhd->cpumask_primary, GFP_KERNEL) ||
67 !alloc_cpumask_var(&dhd->cpumask_primary_new, GFP_KERNEL) ||
68 !alloc_cpumask_var(&dhd->cpumask_secondary, GFP_KERNEL) ||
69 !alloc_cpumask_var(&dhd->cpumask_secondary_new, GFP_KERNEL)) {
70 DHD_ERROR(("%s Failed to init cpumasks\n", __FUNCTION__));
71 ret = -ENOMEM;
72 goto fail;
73 }
74
75 cpumask_copy(dhd->cpumask_curr_avail, cpu_online_mask);
76 cpumask_clear(dhd->cpumask_primary);
77 cpumask_clear(dhd->cpumask_secondary);
78
79 if (num_cpus > 0x20) {
80 DHD_ERROR(
81 ("%s max cpus must be 32, %d too big\n", __FUNCTION__, num_cpus));
82 ASSERT(0);
83 }
84
85 cpus = DHD_LB_PRIMARY_CPUS;
86 for (id = 0; id < num_cpus; id++) {
87 if (isset(&cpus, id)) {
88 cpumask_set_cpu(id, dhd->cpumask_primary);
89 }
90 }
91
92 cpus = DHD_LB_SECONDARY_CPUS;
93 for (id = 0; id < num_cpus; id++) {
94 if (isset(&cpus, id)) {
95 cpumask_set_cpu(id, dhd->cpumask_secondary);
96 }
97 }
98
99 return ret;
100 fail:
101 dhd_cpumasks_deinit(dhd);
102 return ret;
103 }
104
105 /*
106 * The CPU Candidacy Algorithm
107 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
108 * The available CPUs for selection are divided into two groups
109 * Primary Set - A CPU mask that carries the First Choice CPUs
110 * Secondary Set - A CPU mask that carries the Second Choice CPUs.
111 *
112 * There are two types of Job, that needs to be assigned to
113 * the CPUs, from one of the above mentioned CPU group. The Jobs are
114 * 1) Rx Packet Processing - napi_cpu
115 * 2) Completion Processiong (Tx, RX) - compl_cpu
116 *
117 * To begin with both napi_cpu and compl_cpu are on CPU0. Whenever a CPU goes
118 * on-line/off-line the CPU candidacy algorithm is triggerd. The candidacy
119 * algo tries to pickup the first available non boot CPU (CPU0) for napi_cpu.
120 * If there are more processors free, it assigns one to compl_cpu.
121 * It also tries to ensure that both napi_cpu and compl_cpu are not on the same
122 * CPU, as much as possible.
123 *
124 * By design, both Tx and Rx completion jobs are run on the same CPU core, as it
125 * would allow Tx completion skb's to be released into a local free pool from
126 * which the rx buffer posts could have been serviced. it is important to note
127 * that a Tx packet may not have a large enough buffer for rx posting.
128 */
dhd_select_cpu_candidacy(dhd_info_t * dhd)129 void dhd_select_cpu_candidacy(dhd_info_t *dhd)
130 {
131 uint32 primary_available_cpus; /* count of primary available cpus */
132 uint32 secondary_available_cpus; /* count of secondary available cpus */
133 uint32 napi_cpu = 0; /* cpu selected for napi rx processing */
134 uint32 compl_cpu = 0; /* cpu selected for completion jobs */
135 uint32 tx_cpu = 0; /* cpu selected for tx processing job */
136
137 cpumask_clear(dhd->cpumask_primary_new);
138 cpumask_clear(dhd->cpumask_secondary_new);
139
140 /*
141 * Now select from the primary mask. Even if a Job is
142 * already running on a CPU in secondary group, we still move
143 * to primary CPU. So no conditional checks.
144 */
145 cpumask_and(dhd->cpumask_primary_new, dhd->cpumask_primary,
146 dhd->cpumask_curr_avail);
147
148 cpumask_and(dhd->cpumask_secondary_new, dhd->cpumask_secondary,
149 dhd->cpumask_curr_avail);
150
151 primary_available_cpus = cpumask_weight(dhd->cpumask_primary_new);
152 if (primary_available_cpus > 0) {
153 napi_cpu = cpumask_first(dhd->cpumask_primary_new);
154
155 /* If no further CPU is available,
156 * cpumask_next returns >= nr_cpu_ids
157 */
158 tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_primary_new);
159 if (tx_cpu >= nr_cpu_ids) {
160 tx_cpu = 0;
161 }
162
163 /* In case there are no more CPUs, do completions & Tx in same CPU */
164 compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_primary_new);
165 if (compl_cpu >= nr_cpu_ids) {
166 compl_cpu = tx_cpu;
167 }
168 }
169
170 DHD_INFO(("%s After primary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
171 __FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
172
173 /* -- Now check for the CPUs from the secondary mask -- */
174 secondary_available_cpus = cpumask_weight(dhd->cpumask_secondary_new);
175
176 DHD_INFO(("%s Available secondary cpus %d nr_cpu_ids %d\n", __FUNCTION__,
177 secondary_available_cpus, nr_cpu_ids));
178
179 if (secondary_available_cpus > 0) {
180 /* At this point if napi_cpu is unassigned it means no CPU
181 * is online from Primary Group
182 */
183 if (napi_cpu == 0) {
184 napi_cpu = cpumask_first(dhd->cpumask_secondary_new);
185 tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_secondary_new);
186 compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
187 } else if (tx_cpu == 0) {
188 tx_cpu = cpumask_first(dhd->cpumask_secondary_new);
189 compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
190 } else if (compl_cpu == 0) {
191 compl_cpu = cpumask_first(dhd->cpumask_secondary_new);
192 }
193
194 /* If no CPU was available for tx processing, choose CPU 0 */
195 if (tx_cpu >= nr_cpu_ids) {
196 tx_cpu = 0;
197 }
198
199 /* If no CPU was available for completion, choose CPU 0 */
200 if (compl_cpu >= nr_cpu_ids) {
201 compl_cpu = 0;
202 }
203 }
204 if ((primary_available_cpus == 0) && (secondary_available_cpus == 0)) {
205 /* No CPUs available from primary or secondary mask */
206 napi_cpu = 1;
207 compl_cpu = 0;
208 tx_cpu = 0x2;
209 }
210
211 DHD_INFO(
212 ("%s After secondary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
213 __FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
214
215 ASSERT(napi_cpu < nr_cpu_ids);
216 ASSERT(compl_cpu < nr_cpu_ids);
217 ASSERT(tx_cpu < nr_cpu_ids);
218
219 atomic_set(&dhd->rx_napi_cpu, napi_cpu);
220 atomic_set(&dhd->tx_compl_cpu, compl_cpu);
221 atomic_set(&dhd->rx_compl_cpu, compl_cpu);
222 atomic_set(&dhd->tx_cpu, tx_cpu);
223
224 return;
225 }
226
227 /*
228 * Function to handle CPU Hotplug notifications.
229 * One of the task it does is to trigger the CPU Candidacy algorithm
230 * for load balancing.
231 */
232
233 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
234
dhd_cpu_startup_callback(unsigned int cpu)235 int dhd_cpu_startup_callback(unsigned int cpu)
236 {
237 dhd_info_t *dhd = g_dhd_pub->info;
238
239 DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
240 DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
241 cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
242 dhd_select_cpu_candidacy(dhd);
243
244 return 0;
245 }
246
dhd_cpu_teardown_callback(unsigned int cpu)247 int dhd_cpu_teardown_callback(unsigned int cpu)
248 {
249 dhd_info_t *dhd = g_dhd_pub->info;
250
251 DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
252 DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
253 cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
254 dhd_select_cpu_candidacy(dhd);
255
256 return 0;
257 }
258 #else
dhd_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)259 int dhd_cpu_callback(struct notifier_block *nfb, unsigned long action,
260 void *hcpu)
261 {
262 unsigned long int cpu = (unsigned long int)hcpu;
263
264 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
265 #pragma GCC diagnostic push
266 #pragma GCC diagnostic ignored "-Wcast-qual"
267 #endif // endif
268 dhd_info_t *dhd = container_of(nfb, dhd_info_t, cpu_notifier);
269 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
270 #pragma GCC diagnostic pop
271 #endif // endif
272
273 if (!dhd || !(dhd->dhd_state & DHD_ATTACH_STATE_LB_ATTACH_DONE)) {
274 DHD_INFO(("%s(): LB data is not initialized yet.\n", __FUNCTION__));
275 return NOTIFY_BAD;
276 }
277
278 switch (action) {
279 case CPU_ONLINE:
280 case CPU_ONLINE_FROZEN:
281 DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
282 cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
283 dhd_select_cpu_candidacy(dhd);
284 break;
285
286 case CPU_DOWN_PREPARE:
287 case CPU_DOWN_PREPARE_FROZEN:
288 DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
289 cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
290 dhd_select_cpu_candidacy(dhd);
291 break;
292 default:
293 break;
294 }
295
296 return NOTIFY_OK;
297 }
298 #endif /* LINUX_VERSION_CODE < 4.10.0 */
299
dhd_register_cpuhp_callback(dhd_info_t * dhd)300 int dhd_register_cpuhp_callback(dhd_info_t *dhd)
301 {
302 int cpuhp_ret = 0;
303 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
304 cpuhp_ret =
305 cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dhd", dhd_cpu_startup_callback,
306 dhd_cpu_teardown_callback);
307 if (cpuhp_ret < 0) {
308 DHD_ERROR(("%s(): cpuhp_setup_state failed %d RX LB won't happen \r\n",
309 __FUNCTION__, cpuhp_ret));
310 }
311 #else
312 /*
313 * If we are able to initialize CPU masks, lets register to the
314 * CPU Hotplug framework to change the CPU for each job dynamically
315 * using candidacy algorithm.
316 */
317 dhd->cpu_notifier.notifier_call = dhd_cpu_callback;
318 register_hotcpu_notifier(&dhd->cpu_notifier); /* Register a callback */
319 #endif /* LINUX_VERSION_CODE < 4.10.0 */
320 return cpuhp_ret;
321 }
322
dhd_unregister_cpuhp_callback(dhd_info_t * dhd)323 int dhd_unregister_cpuhp_callback(dhd_info_t *dhd)
324 {
325 int ret = 0;
326 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
327 /* Don't want to call tear down while unregistering */
328 cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
329 #else
330 if (dhd->cpu_notifier.notifier_call != NULL) {
331 unregister_cpu_notifier(&dhd->cpu_notifier);
332 }
333 #endif // endif
334 return ret;
335 }
336
337 #if defined(DHD_LB_STATS)
dhd_lb_stats_init(dhd_pub_t * dhdp)338 void dhd_lb_stats_init(dhd_pub_t *dhdp)
339 {
340 dhd_info_t *dhd;
341 int i, j, num_cpus = num_possible_cpus();
342 int alloc_size = sizeof(uint32) * num_cpus;
343
344 if (dhdp == NULL) {
345 DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
346 __FUNCTION__));
347 return;
348 }
349
350 dhd = dhdp->info;
351 if (dhd == NULL) {
352 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
353 return;
354 }
355
356 DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
357 DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
358
359 dhd->napi_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
360 if (!dhd->napi_percpu_run_cnt) {
361 DHD_ERROR(("%s(): napi_percpu_run_cnt malloc failed \n", __FUNCTION__));
362 return;
363 }
364 for (i = 0; i < num_cpus; i++) {
365 DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
366 }
367
368 DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
369
370 dhd->rxc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
371 if (!dhd->rxc_percpu_run_cnt) {
372 DHD_ERROR(("%s(): rxc_percpu_run_cnt malloc failed \n", __FUNCTION__));
373 return;
374 }
375 for (i = 0; i < num_cpus; i++) {
376 DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
377 }
378
379 DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
380
381 dhd->txc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
382 if (!dhd->txc_percpu_run_cnt) {
383 DHD_ERROR(("%s(): txc_percpu_run_cnt malloc failed \n", __FUNCTION__));
384 return;
385 }
386 for (i = 0; i < num_cpus; i++) {
387 DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
388 }
389
390 dhd->cpu_online_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
391 if (!dhd->cpu_online_cnt) {
392 DHD_ERROR(("%s(): cpu_online_cnt malloc failed \n", __FUNCTION__));
393 return;
394 }
395 for (i = 0; i < num_cpus; i++) {
396 DHD_LB_STATS_CLR(dhd->cpu_online_cnt[i]);
397 }
398
399 dhd->cpu_offline_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
400 if (!dhd->cpu_offline_cnt) {
401 DHD_ERROR(("%s(): cpu_offline_cnt malloc failed \n", __FUNCTION__));
402 return;
403 }
404 for (i = 0; i < num_cpus; i++) {
405 DHD_LB_STATS_CLR(dhd->cpu_offline_cnt[i]);
406 }
407
408 dhd->txp_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
409 if (!dhd->txp_percpu_run_cnt) {
410 DHD_ERROR(("%s(): txp_percpu_run_cnt malloc failed \n", __FUNCTION__));
411 return;
412 }
413 for (i = 0; i < num_cpus; i++) {
414 DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
415 }
416
417 dhd->tx_start_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
418 if (!dhd->tx_start_percpu_run_cnt) {
419 DHD_ERROR(
420 ("%s(): tx_start_percpu_run_cnt malloc failed \n", __FUNCTION__));
421 return;
422 }
423 for (i = 0; i < num_cpus; i++) {
424 DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
425 }
426
427 for (j = 0; j < HIST_BIN_SIZE; j++) {
428 dhd->napi_rx_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
429 if (!dhd->napi_rx_hist[j]) {
430 DHD_ERROR(("%s(): dhd->napi_rx_hist[%d] malloc failed \n",
431 __FUNCTION__, j));
432 return;
433 }
434 for (i = 0; i < num_cpus; i++) {
435 DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
436 }
437 }
438 #ifdef DHD_LB_TXC
439 for (j = 0; j < HIST_BIN_SIZE; j++) {
440 dhd->txc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
441 if (!dhd->txc_hist[j]) {
442 DHD_ERROR(
443 ("%s(): dhd->txc_hist[%d] malloc failed \n", __FUNCTION__, j));
444 return;
445 }
446 for (i = 0; i < num_cpus; i++) {
447 DHD_LB_STATS_CLR(dhd->txc_hist[j][i]);
448 }
449 }
450 #endif /* DHD_LB_TXC */
451 #ifdef DHD_LB_RXC
452 for (j = 0; j < HIST_BIN_SIZE; j++) {
453 dhd->rxc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
454 if (!dhd->rxc_hist[j]) {
455 DHD_ERROR(
456 ("%s(): dhd->rxc_hist[%d] malloc failed \n", __FUNCTION__, j));
457 return;
458 }
459 for (i = 0; i < num_cpus; i++) {
460 DHD_LB_STATS_CLR(dhd->rxc_hist[j][i]);
461 }
462 }
463 #endif /* DHD_LB_RXC */
464 return;
465 }
466
dhd_lb_stats_deinit(dhd_pub_t * dhdp)467 void dhd_lb_stats_deinit(dhd_pub_t *dhdp)
468 {
469 dhd_info_t *dhd;
470 int j, num_cpus = num_possible_cpus();
471 int alloc_size = sizeof(uint32) * num_cpus;
472
473 if (dhdp == NULL) {
474 DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
475 __FUNCTION__));
476 return;
477 }
478
479 dhd = dhdp->info;
480 if (dhd == NULL) {
481 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
482 return;
483 }
484
485 if (dhd->napi_percpu_run_cnt) {
486 MFREE(dhdp->osh, dhd->napi_percpu_run_cnt, alloc_size);
487 dhd->napi_percpu_run_cnt = NULL;
488 }
489 if (dhd->rxc_percpu_run_cnt) {
490 MFREE(dhdp->osh, dhd->rxc_percpu_run_cnt, alloc_size);
491 dhd->rxc_percpu_run_cnt = NULL;
492 }
493 if (dhd->txc_percpu_run_cnt) {
494 MFREE(dhdp->osh, dhd->txc_percpu_run_cnt, alloc_size);
495 dhd->txc_percpu_run_cnt = NULL;
496 }
497 if (dhd->cpu_online_cnt) {
498 MFREE(dhdp->osh, dhd->cpu_online_cnt, alloc_size);
499 dhd->cpu_online_cnt = NULL;
500 }
501 if (dhd->cpu_offline_cnt) {
502 MFREE(dhdp->osh, dhd->cpu_offline_cnt, alloc_size);
503 dhd->cpu_offline_cnt = NULL;
504 }
505
506 if (dhd->txp_percpu_run_cnt) {
507 MFREE(dhdp->osh, dhd->txp_percpu_run_cnt, alloc_size);
508 dhd->txp_percpu_run_cnt = NULL;
509 }
510 if (dhd->tx_start_percpu_run_cnt) {
511 MFREE(dhdp->osh, dhd->tx_start_percpu_run_cnt, alloc_size);
512 dhd->tx_start_percpu_run_cnt = NULL;
513 }
514
515 for (j = 0; j < HIST_BIN_SIZE; j++) {
516 if (dhd->napi_rx_hist[j]) {
517 MFREE(dhdp->osh, dhd->napi_rx_hist[j], alloc_size);
518 dhd->napi_rx_hist[j] = NULL;
519 }
520 #ifdef DHD_LB_TXC
521 if (dhd->txc_hist[j]) {
522 MFREE(dhdp->osh, dhd->txc_hist[j], alloc_size);
523 dhd->txc_hist[j] = NULL;
524 }
525 #endif /* DHD_LB_TXC */
526 #ifdef DHD_LB_RXC
527 if (dhd->rxc_hist[j]) {
528 MFREE(dhdp->osh, dhd->rxc_hist[j], alloc_size);
529 dhd->rxc_hist[j] = NULL;
530 }
531 #endif /* DHD_LB_RXC */
532 }
533
534 return;
535 }
536
dhd_lb_stats_dump_histo(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint32 ** hist)537 void dhd_lb_stats_dump_histo(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf,
538 uint32 **hist)
539 {
540 int i, j;
541 uint32 *per_cpu_total;
542 uint32 total = 0;
543 uint32 num_cpus = num_possible_cpus();
544
545 per_cpu_total = (uint32 *)MALLOC(dhdp->osh, sizeof(uint32) * num_cpus);
546 if (!per_cpu_total) {
547 DHD_ERROR(("%s(): dhd->per_cpu_total malloc failed \n", __FUNCTION__));
548 return;
549 }
550 bzero(per_cpu_total, sizeof(uint32) * num_cpus);
551
552 bcm_bprintf(strbuf, "CPU: \t\t");
553 for (i = 0; i < num_cpus; i++) {
554 bcm_bprintf(strbuf, "%d\t", i);
555 }
556 bcm_bprintf(strbuf, "\nBin\n");
557
558 for (i = 0; i < HIST_BIN_SIZE; i++) {
559 bcm_bprintf(strbuf, "%d:\t\t", 1 << i);
560 for (j = 0; j < num_cpus; j++) {
561 bcm_bprintf(strbuf, "%d\t", hist[i][j]);
562 }
563 bcm_bprintf(strbuf, "\n");
564 }
565 bcm_bprintf(strbuf, "Per CPU Total \t");
566 total = 0;
567 for (i = 0; i < num_cpus; i++) {
568 for (j = 0; j < HIST_BIN_SIZE; j++) {
569 per_cpu_total[i] += (hist[j][i] * (1 << j));
570 }
571 bcm_bprintf(strbuf, "%d\t", per_cpu_total[i]);
572 total += per_cpu_total[i];
573 }
574 bcm_bprintf(strbuf, "\nTotal\t\t%d \n", total);
575
576 if (per_cpu_total) {
577 MFREE(dhdp->osh, per_cpu_total, sizeof(uint32) * num_cpus);
578 per_cpu_total = NULL;
579 }
580 return;
581 }
582
dhd_lb_stats_dump_cpu_array(struct bcmstrbuf * strbuf,uint32 * p)583 void dhd_lb_stats_dump_cpu_array(struct bcmstrbuf *strbuf, uint32 *p)
584 {
585 int i, num_cpus = num_possible_cpus();
586
587 bcm_bprintf(strbuf, "CPU: \t");
588 for (i = 0; i < num_cpus; i++) {
589 bcm_bprintf(strbuf, "%d\t", i);
590 }
591 bcm_bprintf(strbuf, "\n");
592
593 bcm_bprintf(strbuf, "Val: \t");
594 for (i = 0; i < num_cpus; i++) {
595 bcm_bprintf(strbuf, "%u\t", *(p + i));
596 }
597 bcm_bprintf(strbuf, "\n");
598 return;
599 }
600
dhd_lb_stats_dump(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)601 void dhd_lb_stats_dump(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
602 {
603 dhd_info_t *dhd;
604
605 if (dhdp == NULL || strbuf == NULL) {
606 DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n", __FUNCTION__,
607 dhdp, strbuf));
608 return;
609 }
610
611 dhd = dhdp->info;
612 if (dhd == NULL) {
613 DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
614 return;
615 }
616
617 bcm_bprintf(strbuf, "\ncpu_online_cnt:\n");
618 dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_online_cnt);
619
620 bcm_bprintf(strbuf, "\ncpu_offline_cnt:\n");
621 dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_offline_cnt);
622
623 bcm_bprintf(strbuf, "\nsched_cnt: dhd_dpc %u napi %u rxc %u txc %u\n",
624 dhd->dhd_dpc_cnt, dhd->napi_sched_cnt, dhd->rxc_sched_cnt,
625 dhd->txc_sched_cnt);
626
627 #ifdef DHD_LB_RXP
628 bcm_bprintf(strbuf, "\nnapi_percpu_run_cnt:\n");
629 dhd_lb_stats_dump_cpu_array(strbuf, dhd->napi_percpu_run_cnt);
630 bcm_bprintf(strbuf, "\nNAPI Packets Received Histogram:\n");
631 dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->napi_rx_hist);
632 #endif /* DHD_LB_RXP */
633
634 #ifdef DHD_LB_RXC
635 bcm_bprintf(strbuf, "\nrxc_percpu_run_cnt:\n");
636 dhd_lb_stats_dump_cpu_array(strbuf, dhd->rxc_percpu_run_cnt);
637 bcm_bprintf(strbuf, "\nRX Completions (Buffer Post) Histogram:\n");
638 dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->rxc_hist);
639 #endif /* DHD_LB_RXC */
640
641 #ifdef DHD_LB_TXC
642 bcm_bprintf(strbuf, "\ntxc_percpu_run_cnt:\n");
643 dhd_lb_stats_dump_cpu_array(strbuf, dhd->txc_percpu_run_cnt);
644 bcm_bprintf(strbuf, "\nTX Completions (Buffer Free) Histogram:\n");
645 dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->txc_hist);
646 #endif /* DHD_LB_TXC */
647
648 #ifdef DHD_LB_TXP
649 bcm_bprintf(strbuf, "\ntxp_percpu_run_cnt:\n");
650 dhd_lb_stats_dump_cpu_array(strbuf, dhd->txp_percpu_run_cnt);
651
652 bcm_bprintf(strbuf, "\ntx_start_percpu_run_cnt:\n");
653 dhd_lb_stats_dump_cpu_array(strbuf, dhd->tx_start_percpu_run_cnt);
654 #endif /* DHD_LB_TXP */
655 }
656
657 /* Given a number 'n' returns 'm' that is next larger power of 2 after n */
next_larger_power2(uint32 num)658 static inline uint32 next_larger_power2(uint32 num)
659 {
660 num--;
661 num |= (num >> 1);
662 num |= (num >> 0x2);
663 num |= (num >> 0x4);
664 num |= (num >> 0x8);
665 num |= (num >> 0x10);
666
667 return (num + 1);
668 }
669
dhd_lb_stats_update_histo(uint32 ** bin,uint32 count,uint32 cpu)670 void dhd_lb_stats_update_histo(uint32 **bin, uint32 count, uint32 cpu)
671 {
672 uint32 bin_power;
673 uint32 *p;
674 bin_power = next_larger_power2(count);
675
676 switch (bin_power) {
677 case 1:
678 p = bin[0] + cpu;
679 break;
680 case 0x2:
681 p = bin[1] + cpu;
682 break;
683 case 0x4:
684 p = bin[0x2] + cpu;
685 break;
686 case 0x8:
687 p = bin[0x3] + cpu;
688 break;
689 case 0x10:
690 p = bin[0x4] + cpu;
691 break;
692 case 0x20:
693 p = bin[0x5] + cpu;
694 break;
695 case 0x40:
696 p = bin[0x6] + cpu;
697 break;
698 case 0x80:
699 p = bin[0x7] + cpu;
700 break;
701 default:
702 p = bin[0x8] + cpu;
703 break;
704 }
705
706 *p = *p + 1;
707 return;
708 }
709
dhd_lb_stats_update_napi_histo(dhd_pub_t * dhdp,uint32 count)710 void dhd_lb_stats_update_napi_histo(dhd_pub_t *dhdp, uint32 count)
711 {
712 int cpu;
713 dhd_info_t *dhd = dhdp->info;
714
715 cpu = get_cpu();
716 put_cpu();
717 dhd_lb_stats_update_histo(dhd->napi_rx_hist, count, cpu);
718
719 return;
720 }
721
dhd_lb_stats_update_txc_histo(dhd_pub_t * dhdp,uint32 count)722 void dhd_lb_stats_update_txc_histo(dhd_pub_t *dhdp, uint32 count)
723 {
724 int cpu;
725 dhd_info_t *dhd = dhdp->info;
726
727 cpu = get_cpu();
728 put_cpu();
729 dhd_lb_stats_update_histo(dhd->txc_hist, count, cpu);
730
731 return;
732 }
733
dhd_lb_stats_update_rxc_histo(dhd_pub_t * dhdp,uint32 count)734 void dhd_lb_stats_update_rxc_histo(dhd_pub_t *dhdp, uint32 count)
735 {
736 int cpu;
737 dhd_info_t *dhd = dhdp->info;
738
739 cpu = get_cpu();
740 put_cpu();
741 dhd_lb_stats_update_histo(dhd->rxc_hist, count, cpu);
742
743 return;
744 }
745
dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t * dhdp)746 void dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t *dhdp)
747 {
748 dhd_info_t *dhd = dhdp->info;
749 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txc_percpu_run_cnt);
750 }
751
dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t * dhdp)752 void dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t *dhdp)
753 {
754 dhd_info_t *dhd = dhdp->info;
755 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->rxc_percpu_run_cnt);
756 }
757 #endif /* DHD_LB_STATS */
758
759 #endif /* DHD_LB */
760 #if defined(DHD_LB)
761 /**
762 * dhd_tasklet_schedule - Function that runs in IPI context of the destination
763 * CPU and schedules a tasklet.
764 * @tasklet: opaque pointer to the tasklet
765 */
dhd_tasklet_schedule(void * tasklet)766 INLINE void dhd_tasklet_schedule(void *tasklet)
767 {
768 tasklet_schedule((struct tasklet_struct *)tasklet);
769 }
770 /**
771 * dhd_tasklet_schedule_on - Executes the passed takslet in a given CPU
772 * @tasklet: tasklet to be scheduled
773 * @on_cpu: cpu core id
774 *
775 * If the requested cpu is online, then an IPI is sent to this cpu via the
776 * smp_call_function_single with no wait and the tasklet_schedule function
777 * will be invoked to schedule the specified tasklet on the requested CPU.
778 */
dhd_tasklet_schedule_on(struct tasklet_struct * tasklet,int on_cpu)779 INLINE void dhd_tasklet_schedule_on(struct tasklet_struct *tasklet, int on_cpu)
780 {
781 const int wait = 0;
782 smp_call_function_single(on_cpu, dhd_tasklet_schedule, (void *)tasklet,
783 wait);
784 }
785
786 /**
787 * dhd_work_schedule_on - Executes the passed work in a given CPU
788 * @work: work to be scheduled
789 * @on_cpu: cpu core id
790 *
791 * If the requested cpu is online, then an IPI is sent to this cpu via the
792 * schedule_work_on and the work function
793 * will be invoked to schedule the specified work on the requested CPU.
794 */
795
dhd_work_schedule_on(struct work_struct * work,int on_cpu)796 INLINE void dhd_work_schedule_on(struct work_struct *work, int on_cpu)
797 {
798 schedule_work_on(on_cpu, work);
799 }
800
801 #if defined(DHD_LB_TXC)
802 /**
803 * dhd_lb_tx_compl_dispatch - load balance by dispatching the tx_compl_tasklet
804 * on another cpu. The tx_compl_tasklet will take care of DMA unmapping and
805 * freeing the packets placed in the tx_compl workq
806 */
dhd_lb_tx_compl_dispatch(dhd_pub_t * dhdp)807 void dhd_lb_tx_compl_dispatch(dhd_pub_t *dhdp)
808 {
809 dhd_info_t *dhd = dhdp->info;
810 int curr_cpu, on_cpu;
811
812 if (dhd->rx_napi_netdev == NULL) {
813 DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
814 return;
815 }
816
817 DHD_LB_STATS_INCR(dhd->txc_sched_cnt);
818 /*
819 * If the destination CPU is NOT online or is same as current CPU
820 * no need to schedule the work
821 */
822 curr_cpu = get_cpu();
823 put_cpu();
824
825 on_cpu = atomic_read(&dhd->tx_compl_cpu);
826 if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
827 dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
828 } else {
829 schedule_work(&dhd->tx_compl_dispatcher_work);
830 }
831 }
832
dhd_tx_compl_dispatcher_fn(struct work_struct * work)833 static void dhd_tx_compl_dispatcher_fn(struct work_struct *work)
834 {
835 struct dhd_info *dhd =
836 container_of(work, struct dhd_info, tx_compl_dispatcher_work);
837 int cpu;
838
839 get_online_cpus();
840 cpu = atomic_read(&dhd->tx_compl_cpu);
841 if (!cpu_online(cpu)) {
842 dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
843 } else {
844 dhd_tasklet_schedule_on(&dhd->tx_compl_tasklet, cpu);
845 }
846 put_online_cpus();
847 }
848 #endif /* DHD_LB_TXC */
849
850 #if defined(DHD_LB_RXC)
851 /**
852 * dhd_lb_rx_compl_dispatch - load balance by dispatching the rx_compl_tasklet
853 * on another cpu. The rx_compl_tasklet will take care of reposting rx buffers
854 * in the H2D RxBuffer Post common ring, by using the recycled pktids that were
855 * placed in the rx_compl workq.
856 *
857 * @dhdp: pointer to dhd_pub object
858 */
dhd_lb_rx_compl_dispatch(dhd_pub_t * dhdp)859 void dhd_lb_rx_compl_dispatch(dhd_pub_t *dhdp)
860 {
861 dhd_info_t *dhd = dhdp->info;
862 int curr_cpu, on_cpu;
863
864 if (dhd->rx_napi_netdev == NULL) {
865 DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
866 return;
867 }
868
869 DHD_LB_STATS_INCR(dhd->rxc_sched_cnt);
870 /*
871 * If the destination CPU is NOT online or is same as current CPU
872 * no need to schedule the work
873 */
874 curr_cpu = get_cpu();
875 put_cpu();
876 on_cpu = atomic_read(&dhd->rx_compl_cpu);
877 if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
878 dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
879 } else {
880 schedule_work(&dhd->rx_compl_dispatcher_work);
881 }
882 }
883
dhd_rx_compl_dispatcher_fn(struct work_struct * work)884 void dhd_rx_compl_dispatcher_fn(struct work_struct *work)
885 {
886 struct dhd_info *dhd =
887 container_of(work, struct dhd_info, rx_compl_dispatcher_work);
888 int cpu;
889
890 get_online_cpus();
891 cpu = atomic_read(&dhd->rx_compl_cpu);
892 if (!cpu_online(cpu)) {
893 dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
894 } else {
895 dhd_tasklet_schedule_on(&dhd->rx_compl_tasklet, cpu);
896 }
897 put_online_cpus();
898 }
899 #endif /* DHD_LB_RXC */
900
901 #if defined(DHD_LB_TXP)
dhd_tx_dispatcher_work(struct work_struct * work)902 void dhd_tx_dispatcher_work(struct work_struct *work)
903 {
904 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
905 #pragma GCC diagnostic push
906 #pragma GCC diagnostic ignored "-Wcast-qual"
907 #endif // endif
908 struct dhd_info *dhd =
909 container_of(work, struct dhd_info, tx_dispatcher_work);
910 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
911 #pragma GCC diagnostic pop
912 #endif // endif
913 dhd_tasklet_schedule(&dhd->tx_tasklet);
914 }
915
dhd_tx_dispatcher_fn(dhd_pub_t * dhdp)916 void dhd_tx_dispatcher_fn(dhd_pub_t *dhdp)
917 {
918 int cpu;
919 int net_tx_cpu;
920 dhd_info_t *dhd = dhdp->info;
921
922 preempt_disable();
923 cpu = atomic_read(&dhd->tx_cpu);
924 net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
925 /*
926 * Now if the NET_TX has pushed the packet in the same
927 * CPU that is chosen for Tx processing, seperate it out
928 * i.e run the TX processing tasklet in compl_cpu
929 */
930 if (net_tx_cpu == cpu) {
931 cpu = atomic_read(&dhd->tx_compl_cpu);
932 }
933
934 if (!cpu_online(cpu)) {
935 /*
936 * Ooohh... but the Chosen CPU is not online,
937 * Do the job in the current CPU itself.
938 */
939 dhd_tasklet_schedule(&dhd->tx_tasklet);
940 } else {
941 /*
942 * Schedule tx_dispatcher_work to on the cpu which
943 * in turn will schedule tx_tasklet.
944 */
945 dhd_work_schedule_on(&dhd->tx_dispatcher_work, cpu);
946 }
947 preempt_enable();
948 }
949
950 /**
951 * dhd_lb_tx_dispatch - load balance by dispatching the tx_tasklet
952 * on another cpu. The tx_tasklet will take care of actually putting
953 * the skbs into appropriate flow ring and ringing H2D interrupt
954 *
955 * @dhdp: pointer to dhd_pub object
956 */
dhd_lb_tx_dispatch(dhd_pub_t * dhdp)957 void dhd_lb_tx_dispatch(dhd_pub_t *dhdp)
958 {
959 dhd_info_t *dhd = dhdp->info;
960 int curr_cpu;
961
962 curr_cpu = get_cpu();
963 put_cpu();
964
965 /* Record the CPU in which the TX request from Network stack came */
966 atomic_set(&dhd->net_tx_cpu, curr_cpu);
967
968 /* Schedule the work to dispatch ... */
969 dhd_tx_dispatcher_fn(dhdp);
970 }
971 #endif /* DHD_LB_TXP */
972
973 #if defined(DHD_LB_RXP)
974 /**
975 * dhd_napi_poll - Load balance napi poll function to process received
976 * packets and send up the network stack using netif_receive_skb()
977 *
978 * @napi: napi object in which context this poll function is invoked
979 * @budget: number of packets to be processed.
980 *
981 * Fetch the dhd_info given the rx_napi_struct. Move all packets from the
982 * rx_napi_queue into a local rx_process_queue (lock and queue move and unlock).
983 * Dequeue each packet from head of rx_process_queue, fetch the ifid from the
984 * packet tag and sendup.
985 */
dhd_napi_poll(struct napi_struct * napi,int budget)986 int dhd_napi_poll(struct napi_struct *napi, int budget)
987 {
988 int ifid;
989 const int pkt_count = 1;
990 const int chan = 0;
991 struct sk_buff *skb;
992 unsigned long flags;
993 struct dhd_info *dhd;
994 int processed = 0;
995 struct sk_buff_head rx_process_queue;
996
997 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
998 #pragma GCC diagnostic push
999 #pragma GCC diagnostic ignored "-Wcast-qual"
1000 #endif // endif
1001 dhd = container_of(napi, struct dhd_info, rx_napi_struct);
1002 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1003 #pragma GCC diagnostic pop
1004 #endif // endif
1005
1006 DHD_INFO(("%s napi_queue<%d> budget<%d>\n", __FUNCTION__,
1007 skb_queue_len(&dhd->rx_napi_queue), budget));
1008 __skb_queue_head_init(&rx_process_queue);
1009
1010 /* extract the entire rx_napi_queue into local rx_process_queue */
1011 spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
1012 skb_queue_splice_tail_init(&dhd->rx_napi_queue, &rx_process_queue);
1013 spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1014
1015 while ((skb = __skb_dequeue(&rx_process_queue)) != NULL) {
1016 OSL_PREFETCH(skb->data);
1017
1018 ifid = DHD_PKTTAG_IFID((dhd_pkttag_fr_t *)PKTTAG(skb));
1019
1020 DHD_INFO(
1021 ("%s dhd_rx_frame pkt<%p> ifid<%d>\n", __FUNCTION__, skb, ifid));
1022
1023 dhd_rx_frame(&dhd->pub, ifid, skb, pkt_count, chan);
1024 processed++;
1025 }
1026
1027 DHD_LB_STATS_UPDATE_NAPI_HISTO(&dhd->pub, processed);
1028
1029 DHD_INFO(("%s processed %d\n", __FUNCTION__, processed));
1030 napi_complete(napi);
1031
1032 return budget - 1;
1033 }
1034
1035 /**
1036 * dhd_napi_schedule - Place the napi struct into the current cpus softnet napi
1037 * poll list. This function may be invoked via the smp_call_function_single
1038 * from a remote CPU.
1039 *
1040 * This function will essentially invoke __raise_softirq_irqoff(NET_RX_SOFTIRQ)
1041 * after the napi_struct is added to the softnet data's poll_list
1042 *
1043 * @info: pointer to a dhd_info struct
1044 */
dhd_napi_schedule(void * info)1045 static void dhd_napi_schedule(void *info)
1046 {
1047 dhd_info_t *dhd = (dhd_info_t *)info;
1048
1049 DHD_INFO(("%s rx_napi_struct<%p> on cpu<%d>\n", __FUNCTION__,
1050 &dhd->rx_napi_struct, atomic_read(&dhd->rx_napi_cpu)));
1051
1052 /* add napi_struct to softnet data poll list and raise NET_RX_SOFTIRQ */
1053 if (napi_schedule_prep(&dhd->rx_napi_struct)) {
1054 __napi_schedule(&dhd->rx_napi_struct);
1055 #ifdef WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE
1056 raise_softirq(NET_RX_SOFTIRQ);
1057 #endif /* WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE */
1058 }
1059
1060 /*
1061 * If the rx_napi_struct was already running, then we let it complete
1062 * processing all its packets. The rx_napi_struct may only run on one
1063 * core at a time, to avoid out-of-order handling.
1064 */
1065 }
1066
1067 /**
1068 * dhd_napi_schedule_on - API to schedule on a desired CPU core a NET_RX_SOFTIRQ
1069 * action after placing the dhd's rx_process napi object in the the remote CPU's
1070 * softnet data's poll_list.
1071 *
1072 * @dhd: dhd_info which has the rx_process napi object
1073 * @on_cpu: desired remote CPU id
1074 */
dhd_napi_schedule_on(dhd_info_t * dhd,int on_cpu)1075 static INLINE int dhd_napi_schedule_on(dhd_info_t *dhd, int on_cpu)
1076 {
1077 int wait = 0; /* asynchronous IPI */
1078 DHD_INFO(("%s dhd<%p> napi<%p> on_cpu<%d>\n", __FUNCTION__, dhd,
1079 &dhd->rx_napi_struct, on_cpu));
1080
1081 if (smp_call_function_single(on_cpu, dhd_napi_schedule, dhd, wait)) {
1082 DHD_ERROR(("%s smp_call_function_single on_cpu<%d> failed\n",
1083 __FUNCTION__, on_cpu));
1084 }
1085
1086 DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1087
1088 return 0;
1089 }
1090
1091 /*
1092 * Call get_online_cpus/put_online_cpus around dhd_napi_schedule_on
1093 * Why should we do this?
1094 * The candidacy algorithm is run from the call back function
1095 * registered to CPU hotplug notifier. This call back happens from Worker
1096 * context. The dhd_napi_schedule_on is also from worker context.
1097 * Note that both of this can run on two different CPUs at the same time.
1098 * So we can possibly have a window where a given CPUn is being brought
1099 * down from CPUm while we try to run a function on CPUn.
1100 * To prevent this its better have the whole code to execute an SMP
1101 * function under get_online_cpus.
1102 * This function call ensures that hotplug mechanism does not kick-in
1103 * until we are done dealing with online CPUs
1104 * If the hotplug worker is already running, no worries because the
1105 * candidacy algo would then reflect the same in dhd->rx_napi_cpu.
1106 *
1107 * The below mentioned code structure is proposed in
1108 * https://www.kernel.org/doc/Documentation/cpu-hotplug.txt
1109 * for the question
1110 * Q: I need to ensure that a particular cpu is not removed when there is some
1111 * work specific to this cpu is in progress
1112 *
1113 * According to the documentation calling get_online_cpus is NOT required, if
1114 * we are running from tasklet context. Since dhd_rx_napi_dispatcher_fn can
1115 * run from Work Queue context we have to call these functions
1116 */
dhd_rx_napi_dispatcher_fn(struct work_struct * work)1117 void dhd_rx_napi_dispatcher_fn(struct work_struct *work)
1118 {
1119 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1120 #pragma GCC diagnostic push
1121 #pragma GCC diagnostic ignored "-Wcast-qual"
1122 #endif // endif
1123 struct dhd_info *dhd =
1124 container_of(work, struct dhd_info, rx_napi_dispatcher_work);
1125 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1126 #pragma GCC diagnostic pop
1127 #endif // endif
1128
1129 dhd_napi_schedule(dhd);
1130 }
1131
1132 /**
1133 * dhd_lb_rx_napi_dispatch - load balance by dispatching the rx_napi_struct
1134 * to run on another CPU. The rx_napi_struct's poll function will retrieve all
1135 * the packets enqueued into the rx_napi_queue and sendup.
1136 * The producer's rx packet queue is appended to the rx_napi_queue before
1137 * dispatching the rx_napi_struct.
1138 */
dhd_lb_rx_napi_dispatch(dhd_pub_t * dhdp)1139 void dhd_lb_rx_napi_dispatch(dhd_pub_t *dhdp)
1140 {
1141 unsigned long flags;
1142 dhd_info_t *dhd = dhdp->info;
1143 int curr_cpu;
1144 int on_cpu;
1145 #ifdef DHD_LB_IRQSET
1146 cpumask_t cpus;
1147 #endif /* DHD_LB_IRQSET */
1148
1149 if (dhd->rx_napi_netdev == NULL) {
1150 DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
1151 return;
1152 }
1153
1154 DHD_INFO(("%s append napi_queue<%d> pend_queue<%d>\n", __FUNCTION__,
1155 skb_queue_len(&dhd->rx_napi_queue),
1156 skb_queue_len(&dhd->rx_pend_queue)));
1157
1158 /* append the producer's queue of packets to the napi's rx process queue */
1159 spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
1160 skb_queue_splice_tail_init(&dhd->rx_pend_queue, &dhd->rx_napi_queue);
1161 spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1162
1163 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->napi_percpu_run_cnt);
1164
1165 /* if LB RXP is disabled directly schedule NAPI */
1166 if (atomic_read(&dhd->lb_rxp_active) == 0) {
1167 dhd_napi_schedule(dhd);
1168 return;
1169 }
1170
1171 /*
1172 * If the destination CPU is NOT online or is same as current CPU
1173 * no need to schedule the work
1174 */
1175 curr_cpu = get_cpu();
1176 put_cpu();
1177
1178 preempt_disable();
1179 on_cpu = atomic_read(&dhd->rx_napi_cpu);
1180 #ifdef DHD_LB_IRQSET
1181 if (cpumask_and(&cpus, cpumask_of(curr_cpu), dhd->cpumask_primary) ||
1182 (!cpu_online(on_cpu)))
1183 #else
1184 if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu)))
1185 #endif /* DHD_LB_IRQSET */
1186 {
1187 DHD_INFO(("%s : curr_cpu : %d, cpumask : 0x%lx\n", __FUNCTION__,
1188 curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1189 dhd_napi_schedule(dhd);
1190 } else {
1191 DHD_INFO(("%s : schedule to curr_cpu : %d, cpumask : 0x%lx\n",
1192 __FUNCTION__, curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1193 dhd_work_schedule_on(&dhd->rx_napi_dispatcher_work, on_cpu);
1194 DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1195 }
1196 preempt_enable();
1197 }
1198
1199 /**
1200 * dhd_lb_rx_pkt_enqueue - Enqueue the packet into the producer's queue
1201 */
dhd_lb_rx_pkt_enqueue(dhd_pub_t * dhdp,void * pkt,int ifidx)1202 void dhd_lb_rx_pkt_enqueue(dhd_pub_t *dhdp, void *pkt, int ifidx)
1203 {
1204 dhd_info_t *dhd = dhdp->info;
1205
1206 DHD_INFO(("%s enqueue pkt<%p> ifidx<%d> pend_queue<%d>\n", __FUNCTION__,
1207 pkt, ifidx, skb_queue_len(&dhd->rx_pend_queue)));
1208 DHD_PKTTAG_SET_IFID((dhd_pkttag_fr_t *)PKTTAG(pkt), ifidx);
1209 __skb_queue_tail(&dhd->rx_pend_queue, pkt);
1210 }
1211 #endif /* DHD_LB_RXP */
1212 #endif /* DHD_LB */
1213
1214 #if defined(DHD_LB_IRQSET) || defined(DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON)
dhd_irq_set_affinity(dhd_pub_t * dhdp,const struct cpumask * cpumask)1215 void dhd_irq_set_affinity(dhd_pub_t *dhdp, const struct cpumask *cpumask)
1216 {
1217 unsigned int irq = (unsigned int)-1;
1218 int err = BCME_OK;
1219
1220 if (!dhdp) {
1221 DHD_ERROR(("%s : dhdp is NULL\n", __FUNCTION__));
1222 return;
1223 }
1224
1225 if (!dhdp->bus) {
1226 DHD_ERROR(("%s : bus is NULL\n", __FUNCTION__));
1227 return;
1228 }
1229
1230 DHD_ERROR(("%s : irq set affinity cpu:0x%lx\n", __FUNCTION__,
1231 *cpumask_bits(cpumask)));
1232
1233 dhdpcie_get_pcieirq(dhdp->bus, &irq);
1234 err = irq_set_affinity(irq, cpumask);
1235 if (err) {
1236 DHD_ERROR(("%s : irq set affinity is failed cpu:0x%lx\n", __FUNCTION__,
1237 *cpumask_bits(cpumask)));
1238 }
1239 }
1240 #endif /* DHD_LB_IRQSET || DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON */
1241
1242 #if defined(DHD_LB_TXP)
1243
dhd_lb_sendpkt(dhd_info_t * dhd,struct net_device * net,int ifidx,void * skb)1244 int BCMFASTPATH dhd_lb_sendpkt(dhd_info_t *dhd, struct net_device *net,
1245 int ifidx, void *skb)
1246 {
1247 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->tx_start_percpu_run_cnt);
1248
1249 /* If the feature is disabled run-time do TX from here */
1250 if (atomic_read(&dhd->lb_txp_active) == 0) {
1251 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1252 return __dhd_sendpkt(&dhd->pub, ifidx, skb);
1253 }
1254
1255 /* Store the address of net device and interface index in the Packet tag */
1256 DHD_LB_TX_PKTTAG_SET_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), net);
1257 DHD_LB_TX_PKTTAG_SET_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), ifidx);
1258
1259 /* Enqueue the skb into tx_pend_queue */
1260 skb_queue_tail(&dhd->tx_pend_queue, skb);
1261
1262 DHD_TRACE(
1263 ("%s(): Added skb %p for netdev %p \r\n", __FUNCTION__, skb, net));
1264
1265 /* Dispatch the Tx job to be processed by the tx_tasklet */
1266 dhd_lb_tx_dispatch(&dhd->pub);
1267
1268 return NETDEV_TX_OK;
1269 }
1270 #endif /* DHD_LB_TXP */
1271
1272 #ifdef DHD_LB_TXP
1273 #define DHD_LB_TXBOUND 64
1274 /*
1275 * Function that performs the TX processing on a given CPU
1276 */
dhd_lb_tx_process(dhd_info_t * dhd)1277 bool dhd_lb_tx_process(dhd_info_t *dhd)
1278 {
1279 struct sk_buff *skb;
1280 int cnt = 0;
1281 struct net_device *net;
1282 int ifidx;
1283 bool resched = FALSE;
1284
1285 DHD_TRACE(("%s(): TX Processing \r\n", __FUNCTION__));
1286 if (dhd == NULL) {
1287 DHD_ERROR((" Null pointer DHD \r\n"));
1288 return resched;
1289 }
1290
1291 BCM_REFERENCE(net);
1292
1293 DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1294
1295 /* Base Loop to perform the actual Tx */
1296 do {
1297 skb = skb_dequeue(&dhd->tx_pend_queue);
1298 if (skb == NULL) {
1299 DHD_TRACE(("Dequeued a Null Packet \r\n"));
1300 break;
1301 }
1302 cnt++;
1303
1304 net = DHD_LB_TX_PKTTAG_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1305 ifidx = DHD_LB_TX_PKTTAG_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1306
1307 DHD_TRACE(
1308 ("Processing skb %p for net %p index %d \r\n", skb, net, ifidx));
1309
1310 __dhd_sendpkt(&dhd->pub, ifidx, skb);
1311
1312 if (cnt >= DHD_LB_TXBOUND) {
1313 resched = TRUE;
1314 break;
1315 }
1316 } while (1);
1317
1318 DHD_INFO(("%s(): Processed %d packets \r\n", __FUNCTION__, cnt));
1319 return resched;
1320 }
1321
dhd_lb_tx_handler(unsigned long data)1322 void dhd_lb_tx_handler(unsigned long data)
1323 {
1324 dhd_info_t *dhd = (dhd_info_t *)data;
1325
1326 if (dhd_lb_tx_process(dhd)) {
1327 dhd_tasklet_schedule(&dhd->tx_tasklet);
1328 }
1329 }
1330
1331 #endif /* DHD_LB_TXP */
1332