• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Broadcom Dongle Host Driver (DHD), Linux-specific network interface
3  * Basically selected code segments from usb-cdc.c and usb-rndis.c
4  *
5  * Copyright (C) 1999-2019, Broadcom.
6  *
7  *      Unless you and Broadcom execute a separate written software license
8  * agreement governing use of this software, this software is licensed to you
9  * under the terms of the GNU General Public License version 2 (the "GPL"),
10  * available at http://www.broadcom.com/licenses/GPLv2.php, with the
11  * following added to such license:
12  *
13  *      As a special exception, the copyright holders of this software give you
14  * permission to link this software with independent modules, and to copy and
15  * distribute the resulting executable under terms of your choice, provided that
16  * you also meet, for each linked independent module, the terms and conditions
17  * of the license of that module.  An independent module is a module which is
18  * not derived from this software.  The special exception does not apply to any
19  * modifications of the software.
20  *
21  *      Notwithstanding the above, under no circumstances may you combine this
22  * software in any way with any other Broadcom software provided under a license
23  * other than the GPL, without Broadcom's express prior written consent.
24  *
25  *
26  * <<Broadcom-WL-IPTag/Open:>>
27  *
28  * $Id: dhd_linux_lb.c 805819 2019-02-20 10:49:35Z $
29  */
30 
31 #include <dhd_linux_priv.h>
32 
33 extern dhd_pub_t *g_dhd_pub;
34 
35 #if defined(DHD_LB)
36 
dhd_lb_set_default_cpus(dhd_info_t * dhd)37 void dhd_lb_set_default_cpus(dhd_info_t *dhd)
38 {
39     /* Default CPU allocation for the jobs */
40     atomic_set(&dhd->rx_napi_cpu, 1);
41     atomic_set(&dhd->rx_compl_cpu, 0x2);
42     atomic_set(&dhd->tx_compl_cpu, 0x2);
43     atomic_set(&dhd->tx_cpu, 0x2);
44     atomic_set(&dhd->net_tx_cpu, 0);
45 }
46 
dhd_cpumasks_deinit(dhd_info_t * dhd)47 void dhd_cpumasks_deinit(dhd_info_t *dhd)
48 {
49     free_cpumask_var(dhd->cpumask_curr_avail);
50     free_cpumask_var(dhd->cpumask_primary);
51     free_cpumask_var(dhd->cpumask_primary_new);
52     free_cpumask_var(dhd->cpumask_secondary);
53     free_cpumask_var(dhd->cpumask_secondary_new);
54 }
55 
dhd_cpumasks_init(dhd_info_t * dhd)56 int dhd_cpumasks_init(dhd_info_t *dhd)
57 {
58     int id;
59     uint32 cpus, num_cpus = num_possible_cpus();
60     int ret = 0;
61 
62     DHD_ERROR(("%s CPU masks primary(big)=0x%x secondary(little)=0x%x\n",
63                __FUNCTION__, DHD_LB_PRIMARY_CPUS, DHD_LB_SECONDARY_CPUS));
64 
65     if (!alloc_cpumask_var(&dhd->cpumask_curr_avail, GFP_KERNEL) ||
66         !alloc_cpumask_var(&dhd->cpumask_primary, GFP_KERNEL) ||
67         !alloc_cpumask_var(&dhd->cpumask_primary_new, GFP_KERNEL) ||
68         !alloc_cpumask_var(&dhd->cpumask_secondary, GFP_KERNEL) ||
69         !alloc_cpumask_var(&dhd->cpumask_secondary_new, GFP_KERNEL)) {
70         DHD_ERROR(("%s Failed to init cpumasks\n", __FUNCTION__));
71         ret = -ENOMEM;
72         goto fail;
73     }
74 
75     cpumask_copy(dhd->cpumask_curr_avail, cpu_online_mask);
76     cpumask_clear(dhd->cpumask_primary);
77     cpumask_clear(dhd->cpumask_secondary);
78 
79     if (num_cpus > 0x20) {
80         DHD_ERROR(
81             ("%s max cpus must be 32, %d too big\n", __FUNCTION__, num_cpus));
82         ASSERT(0);
83     }
84 
85     cpus = DHD_LB_PRIMARY_CPUS;
86     for (id = 0; id < num_cpus; id++) {
87         if (isset(&cpus, id)) {
88             cpumask_set_cpu(id, dhd->cpumask_primary);
89         }
90     }
91 
92     cpus = DHD_LB_SECONDARY_CPUS;
93     for (id = 0; id < num_cpus; id++) {
94         if (isset(&cpus, id)) {
95             cpumask_set_cpu(id, dhd->cpumask_secondary);
96         }
97     }
98 
99     return ret;
100 fail:
101     dhd_cpumasks_deinit(dhd);
102     return ret;
103 }
104 
105 /*
106  * The CPU Candidacy Algorithm
107  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
108  * The available CPUs for selection are divided into two groups
109  *  Primary Set - A CPU mask that carries the First Choice CPUs
110  *  Secondary Set - A CPU mask that carries the Second Choice CPUs.
111  *
112  * There are two types of Job, that needs to be assigned to
113  * the CPUs, from one of the above mentioned CPU group. The Jobs are
114  * 1) Rx Packet Processing - napi_cpu
115  * 2) Completion Processiong (Tx, RX) - compl_cpu
116  *
117  * To begin with both napi_cpu and compl_cpu are on CPU0. Whenever a CPU goes
118  * on-line/off-line the CPU candidacy algorithm is triggerd. The candidacy
119  * algo tries to pickup the first available non boot CPU (CPU0) for napi_cpu.
120  * If there are more processors free, it assigns one to compl_cpu.
121  * It also tries to ensure that both napi_cpu and compl_cpu are not on the same
122  * CPU, as much as possible.
123  *
124  * By design, both Tx and Rx completion jobs are run on the same CPU core, as it
125  * would allow Tx completion skb's to be released into a local free pool from
126  * which the rx buffer posts could have been serviced. it is important to note
127  * that a Tx packet may not have a large enough buffer for rx posting.
128  */
dhd_select_cpu_candidacy(dhd_info_t * dhd)129 void dhd_select_cpu_candidacy(dhd_info_t *dhd)
130 {
131     uint32 primary_available_cpus;   /* count of primary available cpus */
132     uint32 secondary_available_cpus; /* count of secondary available cpus */
133     uint32 napi_cpu = 0;             /* cpu selected for napi rx processing */
134     uint32 compl_cpu = 0;            /* cpu selected for completion jobs */
135     uint32 tx_cpu = 0;               /* cpu selected for tx processing job */
136 
137     cpumask_clear(dhd->cpumask_primary_new);
138     cpumask_clear(dhd->cpumask_secondary_new);
139 
140     /*
141      * Now select from the primary mask. Even if a Job is
142      * already running on a CPU in secondary group, we still move
143      * to primary CPU. So no conditional checks.
144      */
145     cpumask_and(dhd->cpumask_primary_new, dhd->cpumask_primary,
146                 dhd->cpumask_curr_avail);
147 
148     cpumask_and(dhd->cpumask_secondary_new, dhd->cpumask_secondary,
149                 dhd->cpumask_curr_avail);
150 
151     primary_available_cpus = cpumask_weight(dhd->cpumask_primary_new);
152     if (primary_available_cpus > 0) {
153         napi_cpu = cpumask_first(dhd->cpumask_primary_new);
154 
155         /* If no further CPU is available,
156          * cpumask_next returns >= nr_cpu_ids
157          */
158         tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_primary_new);
159         if (tx_cpu >= nr_cpu_ids) {
160             tx_cpu = 0;
161         }
162 
163         /* In case there are no more CPUs, do completions & Tx in same CPU */
164         compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_primary_new);
165         if (compl_cpu >= nr_cpu_ids) {
166             compl_cpu = tx_cpu;
167         }
168     }
169 
170     DHD_INFO(("%s After primary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
171               __FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
172 
173     /* -- Now check for the CPUs from the secondary mask -- */
174     secondary_available_cpus = cpumask_weight(dhd->cpumask_secondary_new);
175 
176     DHD_INFO(("%s Available secondary cpus %d nr_cpu_ids %d\n", __FUNCTION__,
177               secondary_available_cpus, nr_cpu_ids));
178 
179     if (secondary_available_cpus > 0) {
180         /* At this point if napi_cpu is unassigned it means no CPU
181          * is online from Primary Group
182          */
183         if (napi_cpu == 0) {
184             napi_cpu = cpumask_first(dhd->cpumask_secondary_new);
185             tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_secondary_new);
186             compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
187         } else if (tx_cpu == 0) {
188             tx_cpu = cpumask_first(dhd->cpumask_secondary_new);
189             compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
190         } else if (compl_cpu == 0) {
191             compl_cpu = cpumask_first(dhd->cpumask_secondary_new);
192         }
193 
194         /* If no CPU was available for tx processing, choose CPU 0 */
195         if (tx_cpu >= nr_cpu_ids) {
196             tx_cpu = 0;
197         }
198 
199         /* If no CPU was available for completion, choose CPU 0 */
200         if (compl_cpu >= nr_cpu_ids) {
201             compl_cpu = 0;
202         }
203     }
204     if ((primary_available_cpus == 0) && (secondary_available_cpus == 0)) {
205         /* No CPUs available from primary or secondary mask */
206         napi_cpu = 1;
207         compl_cpu = 0;
208         tx_cpu = 0x2;
209     }
210 
211     DHD_INFO(
212         ("%s After secondary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
213          __FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
214 
215     ASSERT(napi_cpu < nr_cpu_ids);
216     ASSERT(compl_cpu < nr_cpu_ids);
217     ASSERT(tx_cpu < nr_cpu_ids);
218 
219     atomic_set(&dhd->rx_napi_cpu, napi_cpu);
220     atomic_set(&dhd->tx_compl_cpu, compl_cpu);
221     atomic_set(&dhd->rx_compl_cpu, compl_cpu);
222     atomic_set(&dhd->tx_cpu, tx_cpu);
223 
224     return;
225 }
226 
227 /*
228  * Function to handle CPU Hotplug notifications.
229  * One of the task it does is to trigger the CPU Candidacy algorithm
230  * for load balancing.
231  */
232 
233 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
234 
dhd_cpu_startup_callback(unsigned int cpu)235 int dhd_cpu_startup_callback(unsigned int cpu)
236 {
237     dhd_info_t *dhd = g_dhd_pub->info;
238 
239     DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
240     DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
241     cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
242     dhd_select_cpu_candidacy(dhd);
243 
244     return 0;
245 }
246 
dhd_cpu_teardown_callback(unsigned int cpu)247 int dhd_cpu_teardown_callback(unsigned int cpu)
248 {
249     dhd_info_t *dhd = g_dhd_pub->info;
250 
251     DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
252     DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
253     cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
254     dhd_select_cpu_candidacy(dhd);
255 
256     return 0;
257 }
258 #else
dhd_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)259 int dhd_cpu_callback(struct notifier_block *nfb, unsigned long action,
260                      void *hcpu)
261 {
262     unsigned long int cpu = (unsigned long int)hcpu;
263 
264 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
265 #pragma GCC diagnostic push
266 #pragma GCC diagnostic ignored "-Wcast-qual"
267 #endif // endif
268     dhd_info_t *dhd = container_of(nfb, dhd_info_t, cpu_notifier);
269 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
270 #pragma GCC diagnostic pop
271 #endif // endif
272 
273     if (!dhd || !(dhd->dhd_state & DHD_ATTACH_STATE_LB_ATTACH_DONE)) {
274         DHD_INFO(("%s(): LB data is not initialized yet.\n", __FUNCTION__));
275         return NOTIFY_BAD;
276     }
277 
278     switch (action) {
279         case CPU_ONLINE:
280         case CPU_ONLINE_FROZEN:
281             DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
282             cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
283             dhd_select_cpu_candidacy(dhd);
284             break;
285 
286         case CPU_DOWN_PREPARE:
287         case CPU_DOWN_PREPARE_FROZEN:
288             DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
289             cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
290             dhd_select_cpu_candidacy(dhd);
291             break;
292         default:
293             break;
294     }
295 
296     return NOTIFY_OK;
297 }
298 #endif /* LINUX_VERSION_CODE < 4.10.0 */
299 
dhd_register_cpuhp_callback(dhd_info_t * dhd)300 int dhd_register_cpuhp_callback(dhd_info_t *dhd)
301 {
302     int cpuhp_ret = 0;
303 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
304     cpuhp_ret =
305         cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dhd", dhd_cpu_startup_callback,
306                           dhd_cpu_teardown_callback);
307     if (cpuhp_ret < 0) {
308         DHD_ERROR(("%s(): cpuhp_setup_state failed %d RX LB won't happen \r\n",
309                    __FUNCTION__, cpuhp_ret));
310     }
311 #else
312     /*
313      * If we are able to initialize CPU masks, lets register to the
314      * CPU Hotplug framework to change the CPU for each job dynamically
315      * using candidacy algorithm.
316      */
317     dhd->cpu_notifier.notifier_call = dhd_cpu_callback;
318     register_hotcpu_notifier(&dhd->cpu_notifier); /* Register a callback */
319 #endif /* LINUX_VERSION_CODE < 4.10.0 */
320     return cpuhp_ret;
321 }
322 
dhd_unregister_cpuhp_callback(dhd_info_t * dhd)323 int dhd_unregister_cpuhp_callback(dhd_info_t *dhd)
324 {
325     int ret = 0;
326 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
327     /* Don't want to call tear down while unregistering */
328     cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
329 #else
330     if (dhd->cpu_notifier.notifier_call != NULL) {
331         unregister_cpu_notifier(&dhd->cpu_notifier);
332     }
333 #endif // endif
334     return ret;
335 }
336 
337 #if defined(DHD_LB_STATS)
dhd_lb_stats_init(dhd_pub_t * dhdp)338 void dhd_lb_stats_init(dhd_pub_t *dhdp)
339 {
340     dhd_info_t *dhd;
341     int i, j, num_cpus = num_possible_cpus();
342     int alloc_size = sizeof(uint32) * num_cpus;
343 
344     if (dhdp == NULL) {
345         DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
346                    __FUNCTION__));
347         return;
348     }
349 
350     dhd = dhdp->info;
351     if (dhd == NULL) {
352         DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
353         return;
354     }
355 
356     DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
357     DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
358 
359     dhd->napi_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
360     if (!dhd->napi_percpu_run_cnt) {
361         DHD_ERROR(("%s(): napi_percpu_run_cnt malloc failed \n", __FUNCTION__));
362         return;
363     }
364     for (i = 0; i < num_cpus; i++) {
365         DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
366     }
367 
368     DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
369 
370     dhd->rxc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
371     if (!dhd->rxc_percpu_run_cnt) {
372         DHD_ERROR(("%s(): rxc_percpu_run_cnt malloc failed \n", __FUNCTION__));
373         return;
374     }
375     for (i = 0; i < num_cpus; i++) {
376         DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
377     }
378 
379     DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
380 
381     dhd->txc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
382     if (!dhd->txc_percpu_run_cnt) {
383         DHD_ERROR(("%s(): txc_percpu_run_cnt malloc failed \n", __FUNCTION__));
384         return;
385     }
386     for (i = 0; i < num_cpus; i++) {
387         DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
388     }
389 
390     dhd->cpu_online_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
391     if (!dhd->cpu_online_cnt) {
392         DHD_ERROR(("%s(): cpu_online_cnt malloc failed \n", __FUNCTION__));
393         return;
394     }
395     for (i = 0; i < num_cpus; i++) {
396         DHD_LB_STATS_CLR(dhd->cpu_online_cnt[i]);
397     }
398 
399     dhd->cpu_offline_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
400     if (!dhd->cpu_offline_cnt) {
401         DHD_ERROR(("%s(): cpu_offline_cnt malloc failed \n", __FUNCTION__));
402         return;
403     }
404     for (i = 0; i < num_cpus; i++) {
405         DHD_LB_STATS_CLR(dhd->cpu_offline_cnt[i]);
406     }
407 
408     dhd->txp_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
409     if (!dhd->txp_percpu_run_cnt) {
410         DHD_ERROR(("%s(): txp_percpu_run_cnt malloc failed \n", __FUNCTION__));
411         return;
412     }
413     for (i = 0; i < num_cpus; i++) {
414         DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
415     }
416 
417     dhd->tx_start_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
418     if (!dhd->tx_start_percpu_run_cnt) {
419         DHD_ERROR(
420             ("%s(): tx_start_percpu_run_cnt malloc failed \n", __FUNCTION__));
421         return;
422     }
423     for (i = 0; i < num_cpus; i++) {
424         DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
425     }
426 
427     for (j = 0; j < HIST_BIN_SIZE; j++) {
428         dhd->napi_rx_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
429         if (!dhd->napi_rx_hist[j]) {
430             DHD_ERROR(("%s(): dhd->napi_rx_hist[%d] malloc failed \n",
431                        __FUNCTION__, j));
432             return;
433         }
434         for (i = 0; i < num_cpus; i++) {
435             DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
436         }
437     }
438 #ifdef DHD_LB_TXC
439     for (j = 0; j < HIST_BIN_SIZE; j++) {
440         dhd->txc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
441         if (!dhd->txc_hist[j]) {
442             DHD_ERROR(
443                 ("%s(): dhd->txc_hist[%d] malloc failed \n", __FUNCTION__, j));
444             return;
445         }
446         for (i = 0; i < num_cpus; i++) {
447             DHD_LB_STATS_CLR(dhd->txc_hist[j][i]);
448         }
449     }
450 #endif /* DHD_LB_TXC */
451 #ifdef DHD_LB_RXC
452     for (j = 0; j < HIST_BIN_SIZE; j++) {
453         dhd->rxc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
454         if (!dhd->rxc_hist[j]) {
455             DHD_ERROR(
456                 ("%s(): dhd->rxc_hist[%d] malloc failed \n", __FUNCTION__, j));
457             return;
458         }
459         for (i = 0; i < num_cpus; i++) {
460             DHD_LB_STATS_CLR(dhd->rxc_hist[j][i]);
461         }
462     }
463 #endif /* DHD_LB_RXC */
464     return;
465 }
466 
dhd_lb_stats_deinit(dhd_pub_t * dhdp)467 void dhd_lb_stats_deinit(dhd_pub_t *dhdp)
468 {
469     dhd_info_t *dhd;
470     int j, num_cpus = num_possible_cpus();
471     int alloc_size = sizeof(uint32) * num_cpus;
472 
473     if (dhdp == NULL) {
474         DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
475                    __FUNCTION__));
476         return;
477     }
478 
479     dhd = dhdp->info;
480     if (dhd == NULL) {
481         DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
482         return;
483     }
484 
485     if (dhd->napi_percpu_run_cnt) {
486         MFREE(dhdp->osh, dhd->napi_percpu_run_cnt, alloc_size);
487         dhd->napi_percpu_run_cnt = NULL;
488     }
489     if (dhd->rxc_percpu_run_cnt) {
490         MFREE(dhdp->osh, dhd->rxc_percpu_run_cnt, alloc_size);
491         dhd->rxc_percpu_run_cnt = NULL;
492     }
493     if (dhd->txc_percpu_run_cnt) {
494         MFREE(dhdp->osh, dhd->txc_percpu_run_cnt, alloc_size);
495         dhd->txc_percpu_run_cnt = NULL;
496     }
497     if (dhd->cpu_online_cnt) {
498         MFREE(dhdp->osh, dhd->cpu_online_cnt, alloc_size);
499         dhd->cpu_online_cnt = NULL;
500     }
501     if (dhd->cpu_offline_cnt) {
502         MFREE(dhdp->osh, dhd->cpu_offline_cnt, alloc_size);
503         dhd->cpu_offline_cnt = NULL;
504     }
505 
506     if (dhd->txp_percpu_run_cnt) {
507         MFREE(dhdp->osh, dhd->txp_percpu_run_cnt, alloc_size);
508         dhd->txp_percpu_run_cnt = NULL;
509     }
510     if (dhd->tx_start_percpu_run_cnt) {
511         MFREE(dhdp->osh, dhd->tx_start_percpu_run_cnt, alloc_size);
512         dhd->tx_start_percpu_run_cnt = NULL;
513     }
514 
515     for (j = 0; j < HIST_BIN_SIZE; j++) {
516         if (dhd->napi_rx_hist[j]) {
517             MFREE(dhdp->osh, dhd->napi_rx_hist[j], alloc_size);
518             dhd->napi_rx_hist[j] = NULL;
519         }
520 #ifdef DHD_LB_TXC
521         if (dhd->txc_hist[j]) {
522             MFREE(dhdp->osh, dhd->txc_hist[j], alloc_size);
523             dhd->txc_hist[j] = NULL;
524         }
525 #endif /* DHD_LB_TXC */
526 #ifdef DHD_LB_RXC
527         if (dhd->rxc_hist[j]) {
528             MFREE(dhdp->osh, dhd->rxc_hist[j], alloc_size);
529             dhd->rxc_hist[j] = NULL;
530         }
531 #endif /* DHD_LB_RXC */
532     }
533 
534     return;
535 }
536 
dhd_lb_stats_dump_histo(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint32 ** hist)537 void dhd_lb_stats_dump_histo(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf,
538                              uint32 **hist)
539 {
540     int i, j;
541     uint32 *per_cpu_total;
542     uint32 total = 0;
543     uint32 num_cpus = num_possible_cpus();
544 
545     per_cpu_total = (uint32 *)MALLOC(dhdp->osh, sizeof(uint32) * num_cpus);
546     if (!per_cpu_total) {
547         DHD_ERROR(("%s(): dhd->per_cpu_total malloc failed \n", __FUNCTION__));
548         return;
549     }
550     bzero(per_cpu_total, sizeof(uint32) * num_cpus);
551 
552     bcm_bprintf(strbuf, "CPU: \t\t");
553     for (i = 0; i < num_cpus; i++) {
554         bcm_bprintf(strbuf, "%d\t", i);
555     }
556     bcm_bprintf(strbuf, "\nBin\n");
557 
558     for (i = 0; i < HIST_BIN_SIZE; i++) {
559         bcm_bprintf(strbuf, "%d:\t\t", 1 << i);
560         for (j = 0; j < num_cpus; j++) {
561             bcm_bprintf(strbuf, "%d\t", hist[i][j]);
562         }
563         bcm_bprintf(strbuf, "\n");
564     }
565     bcm_bprintf(strbuf, "Per CPU Total \t");
566     total = 0;
567     for (i = 0; i < num_cpus; i++) {
568         for (j = 0; j < HIST_BIN_SIZE; j++) {
569             per_cpu_total[i] += (hist[j][i] * (1 << j));
570         }
571         bcm_bprintf(strbuf, "%d\t", per_cpu_total[i]);
572         total += per_cpu_total[i];
573     }
574     bcm_bprintf(strbuf, "\nTotal\t\t%d \n", total);
575 
576     if (per_cpu_total) {
577         MFREE(dhdp->osh, per_cpu_total, sizeof(uint32) * num_cpus);
578         per_cpu_total = NULL;
579     }
580     return;
581 }
582 
dhd_lb_stats_dump_cpu_array(struct bcmstrbuf * strbuf,uint32 * p)583 void dhd_lb_stats_dump_cpu_array(struct bcmstrbuf *strbuf, uint32 *p)
584 {
585     int i, num_cpus = num_possible_cpus();
586 
587     bcm_bprintf(strbuf, "CPU: \t");
588     for (i = 0; i < num_cpus; i++) {
589         bcm_bprintf(strbuf, "%d\t", i);
590     }
591     bcm_bprintf(strbuf, "\n");
592 
593     bcm_bprintf(strbuf, "Val: \t");
594     for (i = 0; i < num_cpus; i++) {
595         bcm_bprintf(strbuf, "%u\t", *(p + i));
596     }
597     bcm_bprintf(strbuf, "\n");
598     return;
599 }
600 
dhd_lb_stats_dump(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)601 void dhd_lb_stats_dump(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
602 {
603     dhd_info_t *dhd;
604 
605     if (dhdp == NULL || strbuf == NULL) {
606         DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n", __FUNCTION__,
607                    dhdp, strbuf));
608         return;
609     }
610 
611     dhd = dhdp->info;
612     if (dhd == NULL) {
613         DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
614         return;
615     }
616 
617     bcm_bprintf(strbuf, "\ncpu_online_cnt:\n");
618     dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_online_cnt);
619 
620     bcm_bprintf(strbuf, "\ncpu_offline_cnt:\n");
621     dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_offline_cnt);
622 
623     bcm_bprintf(strbuf, "\nsched_cnt: dhd_dpc %u napi %u rxc %u txc %u\n",
624                 dhd->dhd_dpc_cnt, dhd->napi_sched_cnt, dhd->rxc_sched_cnt,
625                 dhd->txc_sched_cnt);
626 
627 #ifdef DHD_LB_RXP
628     bcm_bprintf(strbuf, "\nnapi_percpu_run_cnt:\n");
629     dhd_lb_stats_dump_cpu_array(strbuf, dhd->napi_percpu_run_cnt);
630     bcm_bprintf(strbuf, "\nNAPI Packets Received Histogram:\n");
631     dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->napi_rx_hist);
632 #endif /* DHD_LB_RXP */
633 
634 #ifdef DHD_LB_RXC
635     bcm_bprintf(strbuf, "\nrxc_percpu_run_cnt:\n");
636     dhd_lb_stats_dump_cpu_array(strbuf, dhd->rxc_percpu_run_cnt);
637     bcm_bprintf(strbuf, "\nRX Completions (Buffer Post) Histogram:\n");
638     dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->rxc_hist);
639 #endif /* DHD_LB_RXC */
640 
641 #ifdef DHD_LB_TXC
642     bcm_bprintf(strbuf, "\ntxc_percpu_run_cnt:\n");
643     dhd_lb_stats_dump_cpu_array(strbuf, dhd->txc_percpu_run_cnt);
644     bcm_bprintf(strbuf, "\nTX Completions (Buffer Free) Histogram:\n");
645     dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->txc_hist);
646 #endif /* DHD_LB_TXC */
647 
648 #ifdef DHD_LB_TXP
649     bcm_bprintf(strbuf, "\ntxp_percpu_run_cnt:\n");
650     dhd_lb_stats_dump_cpu_array(strbuf, dhd->txp_percpu_run_cnt);
651 
652     bcm_bprintf(strbuf, "\ntx_start_percpu_run_cnt:\n");
653     dhd_lb_stats_dump_cpu_array(strbuf, dhd->tx_start_percpu_run_cnt);
654 #endif /* DHD_LB_TXP */
655 }
656 
657 /* Given a number 'n' returns 'm' that is next larger power of 2 after n */
next_larger_power2(uint32 num)658 static inline uint32 next_larger_power2(uint32 num)
659 {
660     num--;
661     num |= (num >> 1);
662     num |= (num >> 0x2);
663     num |= (num >> 0x4);
664     num |= (num >> 0x8);
665     num |= (num >> 0x10);
666 
667     return (num + 1);
668 }
669 
dhd_lb_stats_update_histo(uint32 ** bin,uint32 count,uint32 cpu)670 void dhd_lb_stats_update_histo(uint32 **bin, uint32 count, uint32 cpu)
671 {
672     uint32 bin_power;
673     uint32 *p;
674     bin_power = next_larger_power2(count);
675 
676     switch (bin_power) {
677         case 1:
678             p = bin[0] + cpu;
679             break;
680         case 0x2:
681             p = bin[1] + cpu;
682             break;
683         case 0x4:
684             p = bin[0x2] + cpu;
685             break;
686         case 0x8:
687             p = bin[0x3] + cpu;
688             break;
689         case 0x10:
690             p = bin[0x4] + cpu;
691             break;
692         case 0x20:
693             p = bin[0x5] + cpu;
694             break;
695         case 0x40:
696             p = bin[0x6] + cpu;
697             break;
698         case 0x80:
699             p = bin[0x7] + cpu;
700             break;
701         default:
702             p = bin[0x8] + cpu;
703             break;
704     }
705 
706     *p = *p + 1;
707     return;
708 }
709 
dhd_lb_stats_update_napi_histo(dhd_pub_t * dhdp,uint32 count)710 void dhd_lb_stats_update_napi_histo(dhd_pub_t *dhdp, uint32 count)
711 {
712     int cpu;
713     dhd_info_t *dhd = dhdp->info;
714 
715     cpu = get_cpu();
716     put_cpu();
717     dhd_lb_stats_update_histo(dhd->napi_rx_hist, count, cpu);
718 
719     return;
720 }
721 
dhd_lb_stats_update_txc_histo(dhd_pub_t * dhdp,uint32 count)722 void dhd_lb_stats_update_txc_histo(dhd_pub_t *dhdp, uint32 count)
723 {
724     int cpu;
725     dhd_info_t *dhd = dhdp->info;
726 
727     cpu = get_cpu();
728     put_cpu();
729     dhd_lb_stats_update_histo(dhd->txc_hist, count, cpu);
730 
731     return;
732 }
733 
dhd_lb_stats_update_rxc_histo(dhd_pub_t * dhdp,uint32 count)734 void dhd_lb_stats_update_rxc_histo(dhd_pub_t *dhdp, uint32 count)
735 {
736     int cpu;
737     dhd_info_t *dhd = dhdp->info;
738 
739     cpu = get_cpu();
740     put_cpu();
741     dhd_lb_stats_update_histo(dhd->rxc_hist, count, cpu);
742 
743     return;
744 }
745 
dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t * dhdp)746 void dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t *dhdp)
747 {
748     dhd_info_t *dhd = dhdp->info;
749     DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txc_percpu_run_cnt);
750 }
751 
dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t * dhdp)752 void dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t *dhdp)
753 {
754     dhd_info_t *dhd = dhdp->info;
755     DHD_LB_STATS_PERCPU_ARR_INCR(dhd->rxc_percpu_run_cnt);
756 }
757 #endif /* DHD_LB_STATS */
758 
759 #endif /* DHD_LB */
760 #if defined(DHD_LB)
761 /**
762  * dhd_tasklet_schedule - Function that runs in IPI context of the destination
763  * CPU and schedules a tasklet.
764  * @tasklet: opaque pointer to the tasklet
765  */
dhd_tasklet_schedule(void * tasklet)766 INLINE void dhd_tasklet_schedule(void *tasklet)
767 {
768     tasklet_schedule((struct tasklet_struct *)tasklet);
769 }
770 /**
771  * dhd_tasklet_schedule_on - Executes the passed takslet in a given CPU
772  * @tasklet: tasklet to be scheduled
773  * @on_cpu: cpu core id
774  *
775  * If the requested cpu is online, then an IPI is sent to this cpu via the
776  * smp_call_function_single with no wait and the tasklet_schedule function
777  * will be invoked to schedule the specified tasklet on the requested CPU.
778  */
dhd_tasklet_schedule_on(struct tasklet_struct * tasklet,int on_cpu)779 INLINE void dhd_tasklet_schedule_on(struct tasklet_struct *tasklet, int on_cpu)
780 {
781     const int wait = 0;
782     smp_call_function_single(on_cpu, dhd_tasklet_schedule, (void *)tasklet,
783                              wait);
784 }
785 
786 /**
787  * dhd_work_schedule_on - Executes the passed work in a given CPU
788  * @work: work to be scheduled
789  * @on_cpu: cpu core id
790  *
791  * If the requested cpu is online, then an IPI is sent to this cpu via the
792  * schedule_work_on and the work function
793  * will be invoked to schedule the specified work on the requested CPU.
794  */
795 
dhd_work_schedule_on(struct work_struct * work,int on_cpu)796 INLINE void dhd_work_schedule_on(struct work_struct *work, int on_cpu)
797 {
798     schedule_work_on(on_cpu, work);
799 }
800 
801 #if defined(DHD_LB_TXC)
802 /**
803  * dhd_lb_tx_compl_dispatch - load balance by dispatching the tx_compl_tasklet
804  * on another cpu. The tx_compl_tasklet will take care of DMA unmapping and
805  * freeing the packets placed in the tx_compl workq
806  */
dhd_lb_tx_compl_dispatch(dhd_pub_t * dhdp)807 void dhd_lb_tx_compl_dispatch(dhd_pub_t *dhdp)
808 {
809     dhd_info_t *dhd = dhdp->info;
810     int curr_cpu, on_cpu;
811 
812     if (dhd->rx_napi_netdev == NULL) {
813         DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
814         return;
815     }
816 
817     DHD_LB_STATS_INCR(dhd->txc_sched_cnt);
818     /*
819      * If the destination CPU is NOT online or is same as current CPU
820      * no need to schedule the work
821      */
822     curr_cpu = get_cpu();
823     put_cpu();
824 
825     on_cpu = atomic_read(&dhd->tx_compl_cpu);
826     if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
827         dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
828     } else {
829         schedule_work(&dhd->tx_compl_dispatcher_work);
830     }
831 }
832 
dhd_tx_compl_dispatcher_fn(struct work_struct * work)833 static void dhd_tx_compl_dispatcher_fn(struct work_struct *work)
834 {
835     struct dhd_info *dhd =
836         container_of(work, struct dhd_info, tx_compl_dispatcher_work);
837     int cpu;
838 
839     get_online_cpus();
840     cpu = atomic_read(&dhd->tx_compl_cpu);
841     if (!cpu_online(cpu)) {
842         dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
843     } else {
844         dhd_tasklet_schedule_on(&dhd->tx_compl_tasklet, cpu);
845     }
846     put_online_cpus();
847 }
848 #endif /* DHD_LB_TXC */
849 
850 #if defined(DHD_LB_RXC)
851 /**
852  * dhd_lb_rx_compl_dispatch - load balance by dispatching the rx_compl_tasklet
853  * on another cpu. The rx_compl_tasklet will take care of reposting rx buffers
854  * in the H2D RxBuffer Post common ring, by using the recycled pktids that were
855  * placed in the rx_compl workq.
856  *
857  * @dhdp: pointer to dhd_pub object
858  */
dhd_lb_rx_compl_dispatch(dhd_pub_t * dhdp)859 void dhd_lb_rx_compl_dispatch(dhd_pub_t *dhdp)
860 {
861     dhd_info_t *dhd = dhdp->info;
862     int curr_cpu, on_cpu;
863 
864     if (dhd->rx_napi_netdev == NULL) {
865         DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
866         return;
867     }
868 
869     DHD_LB_STATS_INCR(dhd->rxc_sched_cnt);
870     /*
871      * If the destination CPU is NOT online or is same as current CPU
872      * no need to schedule the work
873      */
874     curr_cpu = get_cpu();
875     put_cpu();
876     on_cpu = atomic_read(&dhd->rx_compl_cpu);
877     if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
878         dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
879     } else {
880         schedule_work(&dhd->rx_compl_dispatcher_work);
881     }
882 }
883 
dhd_rx_compl_dispatcher_fn(struct work_struct * work)884 void dhd_rx_compl_dispatcher_fn(struct work_struct *work)
885 {
886     struct dhd_info *dhd =
887         container_of(work, struct dhd_info, rx_compl_dispatcher_work);
888     int cpu;
889 
890     get_online_cpus();
891     cpu = atomic_read(&dhd->rx_compl_cpu);
892     if (!cpu_online(cpu)) {
893         dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
894     } else {
895         dhd_tasklet_schedule_on(&dhd->rx_compl_tasklet, cpu);
896     }
897     put_online_cpus();
898 }
899 #endif /* DHD_LB_RXC */
900 
901 #if defined(DHD_LB_TXP)
dhd_tx_dispatcher_work(struct work_struct * work)902 void dhd_tx_dispatcher_work(struct work_struct *work)
903 {
904 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
905 #pragma GCC diagnostic push
906 #pragma GCC diagnostic ignored "-Wcast-qual"
907 #endif // endif
908     struct dhd_info *dhd =
909         container_of(work, struct dhd_info, tx_dispatcher_work);
910 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
911 #pragma GCC diagnostic pop
912 #endif // endif
913     dhd_tasklet_schedule(&dhd->tx_tasklet);
914 }
915 
dhd_tx_dispatcher_fn(dhd_pub_t * dhdp)916 void dhd_tx_dispatcher_fn(dhd_pub_t *dhdp)
917 {
918     int cpu;
919     int net_tx_cpu;
920     dhd_info_t *dhd = dhdp->info;
921 
922     preempt_disable();
923     cpu = atomic_read(&dhd->tx_cpu);
924     net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
925     /*
926      * Now if the NET_TX has pushed the packet in the same
927      * CPU that is chosen for Tx processing, seperate it out
928      * i.e run the TX processing tasklet in compl_cpu
929      */
930     if (net_tx_cpu == cpu) {
931         cpu = atomic_read(&dhd->tx_compl_cpu);
932     }
933 
934     if (!cpu_online(cpu)) {
935         /*
936          * Ooohh... but the Chosen CPU is not online,
937          * Do the job in the current CPU itself.
938          */
939         dhd_tasklet_schedule(&dhd->tx_tasklet);
940     } else {
941         /*
942          * Schedule tx_dispatcher_work to on the cpu which
943          * in turn will schedule tx_tasklet.
944          */
945         dhd_work_schedule_on(&dhd->tx_dispatcher_work, cpu);
946     }
947     preempt_enable();
948 }
949 
950 /**
951  * dhd_lb_tx_dispatch - load balance by dispatching the tx_tasklet
952  * on another cpu. The tx_tasklet will take care of actually putting
953  * the skbs into appropriate flow ring and ringing H2D interrupt
954  *
955  * @dhdp: pointer to dhd_pub object
956  */
dhd_lb_tx_dispatch(dhd_pub_t * dhdp)957 void dhd_lb_tx_dispatch(dhd_pub_t *dhdp)
958 {
959     dhd_info_t *dhd = dhdp->info;
960     int curr_cpu;
961 
962     curr_cpu = get_cpu();
963     put_cpu();
964 
965     /* Record the CPU in which the TX request from Network stack came */
966     atomic_set(&dhd->net_tx_cpu, curr_cpu);
967 
968     /* Schedule the work to dispatch ... */
969     dhd_tx_dispatcher_fn(dhdp);
970 }
971 #endif /* DHD_LB_TXP */
972 
973 #if defined(DHD_LB_RXP)
974 /**
975  * dhd_napi_poll - Load balance napi poll function to process received
976  * packets and send up the network stack using netif_receive_skb()
977  *
978  * @napi: napi object in which context this poll function is invoked
979  * @budget: number of packets to be processed.
980  *
981  * Fetch the dhd_info given the rx_napi_struct. Move all packets from the
982  * rx_napi_queue into a local rx_process_queue (lock and queue move and unlock).
983  * Dequeue each packet from head of rx_process_queue, fetch the ifid from the
984  * packet tag and sendup.
985  */
dhd_napi_poll(struct napi_struct * napi,int budget)986 int dhd_napi_poll(struct napi_struct *napi, int budget)
987 {
988     int ifid;
989     const int pkt_count = 1;
990     const int chan = 0;
991     struct sk_buff *skb;
992     unsigned long flags;
993     struct dhd_info *dhd;
994     int processed = 0;
995     struct sk_buff_head rx_process_queue;
996 
997 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
998 #pragma GCC diagnostic push
999 #pragma GCC diagnostic ignored "-Wcast-qual"
1000 #endif // endif
1001     dhd = container_of(napi, struct dhd_info, rx_napi_struct);
1002 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1003 #pragma GCC diagnostic pop
1004 #endif // endif
1005 
1006     DHD_INFO(("%s napi_queue<%d> budget<%d>\n", __FUNCTION__,
1007               skb_queue_len(&dhd->rx_napi_queue), budget));
1008     __skb_queue_head_init(&rx_process_queue);
1009 
1010     /* extract the entire rx_napi_queue into local rx_process_queue */
1011     spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
1012     skb_queue_splice_tail_init(&dhd->rx_napi_queue, &rx_process_queue);
1013     spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1014 
1015     while ((skb = __skb_dequeue(&rx_process_queue)) != NULL) {
1016         OSL_PREFETCH(skb->data);
1017 
1018         ifid = DHD_PKTTAG_IFID((dhd_pkttag_fr_t *)PKTTAG(skb));
1019 
1020         DHD_INFO(
1021             ("%s dhd_rx_frame pkt<%p> ifid<%d>\n", __FUNCTION__, skb, ifid));
1022 
1023         dhd_rx_frame(&dhd->pub, ifid, skb, pkt_count, chan);
1024         processed++;
1025     }
1026 
1027     DHD_LB_STATS_UPDATE_NAPI_HISTO(&dhd->pub, processed);
1028 
1029     DHD_INFO(("%s processed %d\n", __FUNCTION__, processed));
1030     napi_complete(napi);
1031 
1032     return budget - 1;
1033 }
1034 
1035 /**
1036  * dhd_napi_schedule - Place the napi struct into the current cpus softnet napi
1037  * poll list. This function may be invoked via the smp_call_function_single
1038  * from a remote CPU.
1039  *
1040  * This function will essentially invoke __raise_softirq_irqoff(NET_RX_SOFTIRQ)
1041  * after the napi_struct is added to the softnet data's poll_list
1042  *
1043  * @info: pointer to a dhd_info struct
1044  */
dhd_napi_schedule(void * info)1045 static void dhd_napi_schedule(void *info)
1046 {
1047     dhd_info_t *dhd = (dhd_info_t *)info;
1048 
1049     DHD_INFO(("%s rx_napi_struct<%p> on cpu<%d>\n", __FUNCTION__,
1050               &dhd->rx_napi_struct, atomic_read(&dhd->rx_napi_cpu)));
1051 
1052     /* add napi_struct to softnet data poll list and raise NET_RX_SOFTIRQ */
1053     if (napi_schedule_prep(&dhd->rx_napi_struct)) {
1054         __napi_schedule(&dhd->rx_napi_struct);
1055 #ifdef WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE
1056         raise_softirq(NET_RX_SOFTIRQ);
1057 #endif /* WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE */
1058     }
1059 
1060     /*
1061      * If the rx_napi_struct was already running, then we let it complete
1062      * processing all its packets. The rx_napi_struct may only run on one
1063      * core at a time, to avoid out-of-order handling.
1064      */
1065 }
1066 
1067 /**
1068  * dhd_napi_schedule_on - API to schedule on a desired CPU core a NET_RX_SOFTIRQ
1069  * action after placing the dhd's rx_process napi object in the the remote CPU's
1070  * softnet data's poll_list.
1071  *
1072  * @dhd: dhd_info which has the rx_process napi object
1073  * @on_cpu: desired remote CPU id
1074  */
dhd_napi_schedule_on(dhd_info_t * dhd,int on_cpu)1075 static INLINE int dhd_napi_schedule_on(dhd_info_t *dhd, int on_cpu)
1076 {
1077     int wait = 0; /* asynchronous IPI */
1078     DHD_INFO(("%s dhd<%p> napi<%p> on_cpu<%d>\n", __FUNCTION__, dhd,
1079               &dhd->rx_napi_struct, on_cpu));
1080 
1081     if (smp_call_function_single(on_cpu, dhd_napi_schedule, dhd, wait)) {
1082         DHD_ERROR(("%s smp_call_function_single on_cpu<%d> failed\n",
1083                    __FUNCTION__, on_cpu));
1084     }
1085 
1086     DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1087 
1088     return 0;
1089 }
1090 
1091 /*
1092  * Call get_online_cpus/put_online_cpus around dhd_napi_schedule_on
1093  * Why should we do this?
1094  * The candidacy algorithm is run from the call back function
1095  * registered to CPU hotplug notifier. This call back happens from Worker
1096  * context. The dhd_napi_schedule_on is also from worker context.
1097  * Note that both of this can run on two different CPUs at the same time.
1098  * So we can possibly have a window where a given CPUn is being brought
1099  * down from CPUm while we try to run a function on CPUn.
1100  * To prevent this its better have the whole code to execute an SMP
1101  * function under get_online_cpus.
1102  * This function call ensures that hotplug mechanism does not kick-in
1103  * until we are done dealing with online CPUs
1104  * If the hotplug worker is already running, no worries because the
1105  * candidacy algo would then reflect the same in dhd->rx_napi_cpu.
1106  *
1107  * The below mentioned code structure is proposed in
1108  * https://www.kernel.org/doc/Documentation/cpu-hotplug.txt
1109  * for the question
1110  * Q: I need to ensure that a particular cpu is not removed when there is some
1111  *    work specific to this cpu is in progress
1112  *
1113  * According to the documentation calling get_online_cpus is NOT required, if
1114  * we are running from tasklet context. Since dhd_rx_napi_dispatcher_fn can
1115  * run from Work Queue context we have to call these functions
1116  */
dhd_rx_napi_dispatcher_fn(struct work_struct * work)1117 void dhd_rx_napi_dispatcher_fn(struct work_struct *work)
1118 {
1119 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1120 #pragma GCC diagnostic push
1121 #pragma GCC diagnostic ignored "-Wcast-qual"
1122 #endif // endif
1123     struct dhd_info *dhd =
1124         container_of(work, struct dhd_info, rx_napi_dispatcher_work);
1125 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1126 #pragma GCC diagnostic pop
1127 #endif // endif
1128 
1129     dhd_napi_schedule(dhd);
1130 }
1131 
1132 /**
1133  * dhd_lb_rx_napi_dispatch - load balance by dispatching the rx_napi_struct
1134  * to run on another CPU. The rx_napi_struct's poll function will retrieve all
1135  * the packets enqueued into the rx_napi_queue and sendup.
1136  * The producer's rx packet queue is appended to the rx_napi_queue before
1137  * dispatching the rx_napi_struct.
1138  */
dhd_lb_rx_napi_dispatch(dhd_pub_t * dhdp)1139 void dhd_lb_rx_napi_dispatch(dhd_pub_t *dhdp)
1140 {
1141     unsigned long flags;
1142     dhd_info_t *dhd = dhdp->info;
1143     int curr_cpu;
1144     int on_cpu;
1145 #ifdef DHD_LB_IRQSET
1146     cpumask_t cpus;
1147 #endif /* DHD_LB_IRQSET */
1148 
1149     if (dhd->rx_napi_netdev == NULL) {
1150         DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
1151         return;
1152     }
1153 
1154     DHD_INFO(("%s append napi_queue<%d> pend_queue<%d>\n", __FUNCTION__,
1155               skb_queue_len(&dhd->rx_napi_queue),
1156               skb_queue_len(&dhd->rx_pend_queue)));
1157 
1158     /* append the producer's queue of packets to the napi's rx process queue */
1159     spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
1160     skb_queue_splice_tail_init(&dhd->rx_pend_queue, &dhd->rx_napi_queue);
1161     spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1162 
1163     DHD_LB_STATS_PERCPU_ARR_INCR(dhd->napi_percpu_run_cnt);
1164 
1165     /* if LB RXP is disabled directly schedule NAPI */
1166     if (atomic_read(&dhd->lb_rxp_active) == 0) {
1167         dhd_napi_schedule(dhd);
1168         return;
1169     }
1170 
1171     /*
1172      * If the destination CPU is NOT online or is same as current CPU
1173      * no need to schedule the work
1174      */
1175     curr_cpu = get_cpu();
1176     put_cpu();
1177 
1178     preempt_disable();
1179     on_cpu = atomic_read(&dhd->rx_napi_cpu);
1180 #ifdef DHD_LB_IRQSET
1181     if (cpumask_and(&cpus, cpumask_of(curr_cpu), dhd->cpumask_primary) ||
1182         (!cpu_online(on_cpu)))
1183 #else
1184     if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu)))
1185 #endif /* DHD_LB_IRQSET */
1186     {
1187         DHD_INFO(("%s : curr_cpu : %d, cpumask : 0x%lx\n", __FUNCTION__,
1188                   curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1189         dhd_napi_schedule(dhd);
1190     } else {
1191         DHD_INFO(("%s : schedule to curr_cpu : %d, cpumask : 0x%lx\n",
1192                   __FUNCTION__, curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1193         dhd_work_schedule_on(&dhd->rx_napi_dispatcher_work, on_cpu);
1194         DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1195     }
1196     preempt_enable();
1197 }
1198 
1199 /**
1200  * dhd_lb_rx_pkt_enqueue - Enqueue the packet into the producer's queue
1201  */
dhd_lb_rx_pkt_enqueue(dhd_pub_t * dhdp,void * pkt,int ifidx)1202 void dhd_lb_rx_pkt_enqueue(dhd_pub_t *dhdp, void *pkt, int ifidx)
1203 {
1204     dhd_info_t *dhd = dhdp->info;
1205 
1206     DHD_INFO(("%s enqueue pkt<%p> ifidx<%d> pend_queue<%d>\n", __FUNCTION__,
1207               pkt, ifidx, skb_queue_len(&dhd->rx_pend_queue)));
1208     DHD_PKTTAG_SET_IFID((dhd_pkttag_fr_t *)PKTTAG(pkt), ifidx);
1209     __skb_queue_tail(&dhd->rx_pend_queue, pkt);
1210 }
1211 #endif /* DHD_LB_RXP */
1212 #endif /* DHD_LB */
1213 
1214 #if defined(DHD_LB_IRQSET) || defined(DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON)
dhd_irq_set_affinity(dhd_pub_t * dhdp,const struct cpumask * cpumask)1215 void dhd_irq_set_affinity(dhd_pub_t *dhdp, const struct cpumask *cpumask)
1216 {
1217     unsigned int irq = (unsigned int)-1;
1218     int err = BCME_OK;
1219 
1220     if (!dhdp) {
1221         DHD_ERROR(("%s : dhdp is NULL\n", __FUNCTION__));
1222         return;
1223     }
1224 
1225     if (!dhdp->bus) {
1226         DHD_ERROR(("%s : bus is NULL\n", __FUNCTION__));
1227         return;
1228     }
1229 
1230     DHD_ERROR(("%s : irq set affinity cpu:0x%lx\n", __FUNCTION__,
1231                *cpumask_bits(cpumask)));
1232 
1233     dhdpcie_get_pcieirq(dhdp->bus, &irq);
1234     err = irq_set_affinity(irq, cpumask);
1235     if (err) {
1236         DHD_ERROR(("%s : irq set affinity is failed cpu:0x%lx\n", __FUNCTION__,
1237                    *cpumask_bits(cpumask)));
1238     }
1239 }
1240 #endif /* DHD_LB_IRQSET || DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON */
1241 
1242 #if defined(DHD_LB_TXP)
1243 
dhd_lb_sendpkt(dhd_info_t * dhd,struct net_device * net,int ifidx,void * skb)1244 int BCMFASTPATH dhd_lb_sendpkt(dhd_info_t *dhd, struct net_device *net,
1245                                int ifidx, void *skb)
1246 {
1247     DHD_LB_STATS_PERCPU_ARR_INCR(dhd->tx_start_percpu_run_cnt);
1248 
1249     /* If the feature is disabled run-time do TX from here */
1250     if (atomic_read(&dhd->lb_txp_active) == 0) {
1251         DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1252         return __dhd_sendpkt(&dhd->pub, ifidx, skb);
1253     }
1254 
1255     /* Store the address of net device and interface index in the Packet tag */
1256     DHD_LB_TX_PKTTAG_SET_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), net);
1257     DHD_LB_TX_PKTTAG_SET_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), ifidx);
1258 
1259     /* Enqueue the skb into tx_pend_queue */
1260     skb_queue_tail(&dhd->tx_pend_queue, skb);
1261 
1262     DHD_TRACE(
1263         ("%s(): Added skb %p for netdev %p \r\n", __FUNCTION__, skb, net));
1264 
1265     /* Dispatch the Tx job to be processed by the tx_tasklet */
1266     dhd_lb_tx_dispatch(&dhd->pub);
1267 
1268     return NETDEV_TX_OK;
1269 }
1270 #endif /* DHD_LB_TXP */
1271 
1272 #ifdef DHD_LB_TXP
1273 #define DHD_LB_TXBOUND 64
1274 /*
1275  * Function that performs the TX processing on a given CPU
1276  */
dhd_lb_tx_process(dhd_info_t * dhd)1277 bool dhd_lb_tx_process(dhd_info_t *dhd)
1278 {
1279     struct sk_buff *skb;
1280     int cnt = 0;
1281     struct net_device *net;
1282     int ifidx;
1283     bool resched = FALSE;
1284 
1285     DHD_TRACE(("%s(): TX Processing \r\n", __FUNCTION__));
1286     if (dhd == NULL) {
1287         DHD_ERROR((" Null pointer DHD \r\n"));
1288         return resched;
1289     }
1290 
1291     BCM_REFERENCE(net);
1292 
1293     DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1294 
1295     /* Base Loop to perform the actual Tx */
1296     do {
1297         skb = skb_dequeue(&dhd->tx_pend_queue);
1298         if (skb == NULL) {
1299             DHD_TRACE(("Dequeued a Null Packet \r\n"));
1300             break;
1301         }
1302         cnt++;
1303 
1304         net = DHD_LB_TX_PKTTAG_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1305         ifidx = DHD_LB_TX_PKTTAG_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1306 
1307         DHD_TRACE(
1308             ("Processing skb %p for net %p index %d \r\n", skb, net, ifidx));
1309 
1310         __dhd_sendpkt(&dhd->pub, ifidx, skb);
1311 
1312         if (cnt >= DHD_LB_TXBOUND) {
1313             resched = TRUE;
1314             break;
1315         }
1316     } while (1);
1317 
1318     DHD_INFO(("%s(): Processed %d packets \r\n", __FUNCTION__, cnt));
1319     return resched;
1320 }
1321 
dhd_lb_tx_handler(unsigned long data)1322 void dhd_lb_tx_handler(unsigned long data)
1323 {
1324     dhd_info_t *dhd = (dhd_info_t *)data;
1325 
1326     if (dhd_lb_tx_process(dhd)) {
1327         dhd_tasklet_schedule(&dhd->tx_tasklet);
1328     }
1329 }
1330 
1331 #endif /* DHD_LB_TXP */
1332