• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Broadcom Dongle Host Driver (DHD), Linux-specific network interface
4  * Basically selected code segments from usb-cdc.c and usb-rndis.c
5  *
6  * Copyright (C) 1999-2019, Broadcom.
7  *
8  *      Unless you and Broadcom execute a separate written software license
9  * agreement governing use of this software, this software is licensed to you
10  * under the terms of the GNU General Public License version 2 (the "GPL"),
11  * available at http://www.broadcom.com/licenses/GPLv2.php, with the
12  * following added to such license:
13  *
14  *      As a special exception, the copyright holders of this software give you
15  * permission to link this software with independent modules, and to copy and
16  * distribute the resulting executable under terms of your choice, provided that
17  * you also meet, for each linked independent module, the terms and conditions of
18  * the license of that module.  An independent module is a module which is not
19  * derived from this software.  The special exception does not apply to any
20  * modifications of the software.
21  *
22  *      Notwithstanding the above, under no circumstances may you combine this
23  * software in any way with any other Broadcom software provided under a license
24  * other than the GPL, without Broadcom's express prior written consent.
25  *
26  *
27  * <<Broadcom-WL-IPTag/Open:>>
28  *
29  * $Id: dhd_linux_lb.c 805819 2019-02-20 10:49:35Z $
30  */
31 
32 #include <dhd_linux_priv.h>
33 
34 extern dhd_pub_t* g_dhd_pub;
35 
36 #if defined(DHD_LB)
37 
38 void
dhd_lb_set_default_cpus(dhd_info_t * dhd)39 dhd_lb_set_default_cpus(dhd_info_t *dhd)
40 {
41 	/* Default CPU allocation for the jobs */
42 	atomic_set(&dhd->rx_napi_cpu, 1);
43 	atomic_set(&dhd->rx_compl_cpu, 2);
44 	atomic_set(&dhd->tx_compl_cpu, 2);
45 	atomic_set(&dhd->tx_cpu, 2);
46 	atomic_set(&dhd->net_tx_cpu, 0);
47 }
48 
49 void
dhd_cpumasks_deinit(dhd_info_t * dhd)50 dhd_cpumasks_deinit(dhd_info_t *dhd)
51 {
52 	free_cpumask_var(dhd->cpumask_curr_avail);
53 	free_cpumask_var(dhd->cpumask_primary);
54 	free_cpumask_var(dhd->cpumask_primary_new);
55 	free_cpumask_var(dhd->cpumask_secondary);
56 	free_cpumask_var(dhd->cpumask_secondary_new);
57 }
58 
59 int
dhd_cpumasks_init(dhd_info_t * dhd)60 dhd_cpumasks_init(dhd_info_t *dhd)
61 {
62 	int id;
63 	uint32 cpus, num_cpus = num_possible_cpus();
64 	int ret = 0;
65 
66 	DHD_ERROR(("%s CPU masks primary(big)=0x%x secondary(little)=0x%x\n", __FUNCTION__,
67 		DHD_LB_PRIMARY_CPUS, DHD_LB_SECONDARY_CPUS));
68 
69 	if (!alloc_cpumask_var(&dhd->cpumask_curr_avail, GFP_KERNEL) ||
70 	    !alloc_cpumask_var(&dhd->cpumask_primary, GFP_KERNEL) ||
71 	    !alloc_cpumask_var(&dhd->cpumask_primary_new, GFP_KERNEL) ||
72 	    !alloc_cpumask_var(&dhd->cpumask_secondary, GFP_KERNEL) ||
73 	    !alloc_cpumask_var(&dhd->cpumask_secondary_new, GFP_KERNEL)) {
74 		DHD_ERROR(("%s Failed to init cpumasks\n", __FUNCTION__));
75 		ret = -ENOMEM;
76 		goto fail;
77 	}
78 
79 	cpumask_copy(dhd->cpumask_curr_avail, cpu_online_mask);
80 	cpumask_clear(dhd->cpumask_primary);
81 	cpumask_clear(dhd->cpumask_secondary);
82 
83 	if (num_cpus > 32) {
84 		DHD_ERROR(("%s max cpus must be 32, %d too big\n", __FUNCTION__, num_cpus));
85 		ASSERT(0);
86 	}
87 
88 	cpus = DHD_LB_PRIMARY_CPUS;
89 	for (id = 0; id < num_cpus; id++) {
90 		if (isset(&cpus, id))
91 			cpumask_set_cpu(id, dhd->cpumask_primary);
92 	}
93 
94 	cpus = DHD_LB_SECONDARY_CPUS;
95 	for (id = 0; id < num_cpus; id++) {
96 		if (isset(&cpus, id))
97 			cpumask_set_cpu(id, dhd->cpumask_secondary);
98 	}
99 
100 	return ret;
101 fail:
102 	dhd_cpumasks_deinit(dhd);
103 	return ret;
104 }
105 
106 /*
107  * The CPU Candidacy Algorithm
108  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~
109  * The available CPUs for selection are divided into two groups
110  *  Primary Set - A CPU mask that carries the First Choice CPUs
111  *  Secondary Set - A CPU mask that carries the Second Choice CPUs.
112  *
113  * There are two types of Job, that needs to be assigned to
114  * the CPUs, from one of the above mentioned CPU group. The Jobs are
115  * 1) Rx Packet Processing - napi_cpu
116  * 2) Completion Processiong (Tx, RX) - compl_cpu
117  *
118  * To begin with both napi_cpu and compl_cpu are on CPU0. Whenever a CPU goes
119  * on-line/off-line the CPU candidacy algorithm is triggerd. The candidacy
120  * algo tries to pickup the first available non boot CPU (CPU0) for napi_cpu.
121  * If there are more processors free, it assigns one to compl_cpu.
122  * It also tries to ensure that both napi_cpu and compl_cpu are not on the same
123  * CPU, as much as possible.
124  *
125  * By design, both Tx and Rx completion jobs are run on the same CPU core, as it
126  * would allow Tx completion skb's to be released into a local free pool from
127  * which the rx buffer posts could have been serviced. it is important to note
128  * that a Tx packet may not have a large enough buffer for rx posting.
129  */
dhd_select_cpu_candidacy(dhd_info_t * dhd)130 void dhd_select_cpu_candidacy(dhd_info_t *dhd)
131 {
132 	uint32 primary_available_cpus; /* count of primary available cpus */
133 	uint32 secondary_available_cpus; /* count of secondary available cpus */
134 	uint32 napi_cpu = 0; /* cpu selected for napi rx processing */
135 	uint32 compl_cpu = 0; /* cpu selected for completion jobs */
136 	uint32 tx_cpu = 0; /* cpu selected for tx processing job */
137 
138 	cpumask_clear(dhd->cpumask_primary_new);
139 	cpumask_clear(dhd->cpumask_secondary_new);
140 
141 	/*
142 	 * Now select from the primary mask. Even if a Job is
143 	 * already running on a CPU in secondary group, we still move
144 	 * to primary CPU. So no conditional checks.
145 	 */
146 	cpumask_and(dhd->cpumask_primary_new, dhd->cpumask_primary,
147 		dhd->cpumask_curr_avail);
148 
149 	cpumask_and(dhd->cpumask_secondary_new, dhd->cpumask_secondary,
150 		dhd->cpumask_curr_avail);
151 
152 	primary_available_cpus = cpumask_weight(dhd->cpumask_primary_new);
153 
154 	if (primary_available_cpus > 0) {
155 		napi_cpu = cpumask_first(dhd->cpumask_primary_new);
156 
157 		/* If no further CPU is available,
158 		 * cpumask_next returns >= nr_cpu_ids
159 		 */
160 		tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_primary_new);
161 		if (tx_cpu >= nr_cpu_ids)
162 			tx_cpu = 0;
163 
164 		/* In case there are no more CPUs, do completions & Tx in same CPU */
165 		compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_primary_new);
166 		if (compl_cpu >= nr_cpu_ids)
167 			compl_cpu = tx_cpu;
168 	}
169 
170 	DHD_INFO(("%s After primary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
171 		__FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
172 
173 	/* -- Now check for the CPUs from the secondary mask -- */
174 	secondary_available_cpus = cpumask_weight(dhd->cpumask_secondary_new);
175 
176 	DHD_INFO(("%s Available secondary cpus %d nr_cpu_ids %d\n",
177 		__FUNCTION__, secondary_available_cpus, nr_cpu_ids));
178 
179 	if (secondary_available_cpus > 0) {
180 		/* At this point if napi_cpu is unassigned it means no CPU
181 		 * is online from Primary Group
182 		 */
183 		if (napi_cpu == 0) {
184 			napi_cpu = cpumask_first(dhd->cpumask_secondary_new);
185 			tx_cpu = cpumask_next(napi_cpu, dhd->cpumask_secondary_new);
186 			compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
187 		} else if (tx_cpu == 0) {
188 			tx_cpu = cpumask_first(dhd->cpumask_secondary_new);
189 			compl_cpu = cpumask_next(tx_cpu, dhd->cpumask_secondary_new);
190 		} else if (compl_cpu == 0) {
191 			compl_cpu = cpumask_first(dhd->cpumask_secondary_new);
192 		}
193 
194 		/* If no CPU was available for tx processing, choose CPU 0 */
195 		if (tx_cpu >= nr_cpu_ids)
196 			tx_cpu = 0;
197 
198 		/* If no CPU was available for completion, choose CPU 0 */
199 		if (compl_cpu >= nr_cpu_ids)
200 			compl_cpu = 0;
201 	}
202 	if ((primary_available_cpus == 0) &&
203 		(secondary_available_cpus == 0)) {
204 		/* No CPUs available from primary or secondary mask */
205 		napi_cpu = 1;
206 		compl_cpu = 0;
207 		tx_cpu = 2;
208 	}
209 
210 	DHD_INFO(("%s After secondary CPU check napi_cpu %d compl_cpu %d tx_cpu %d\n",
211 		__FUNCTION__, napi_cpu, compl_cpu, tx_cpu));
212 
213 	ASSERT(napi_cpu < nr_cpu_ids);
214 	ASSERT(compl_cpu < nr_cpu_ids);
215 	ASSERT(tx_cpu < nr_cpu_ids);
216 
217 	atomic_set(&dhd->rx_napi_cpu, napi_cpu);
218 	atomic_set(&dhd->tx_compl_cpu, compl_cpu);
219 	atomic_set(&dhd->rx_compl_cpu, compl_cpu);
220 	atomic_set(&dhd->tx_cpu, tx_cpu);
221 
222 	return;
223 }
224 
225 /*
226  * Function to handle CPU Hotplug notifications.
227  * One of the task it does is to trigger the CPU Candidacy algorithm
228  * for load balancing.
229  */
230 
231 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
232 
dhd_cpu_startup_callback(unsigned int cpu)233 int dhd_cpu_startup_callback(unsigned int cpu)
234 {
235 	dhd_info_t *dhd = g_dhd_pub->info;
236 
237 	DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
238 	DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
239 	cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
240 	dhd_select_cpu_candidacy(dhd);
241 
242 	return 0;
243 }
244 
dhd_cpu_teardown_callback(unsigned int cpu)245 int dhd_cpu_teardown_callback(unsigned int cpu)
246 {
247 	dhd_info_t *dhd = g_dhd_pub->info;
248 
249 	DHD_INFO(("%s(): \r\n cpu:%d", __FUNCTION__, cpu));
250 	DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
251 	cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
252 	dhd_select_cpu_candidacy(dhd);
253 
254 	return 0;
255 }
256 #else
257 int
dhd_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)258 dhd_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
259 {
260 	unsigned long int cpu = (unsigned long int)hcpu;
261 
262 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
263 #pragma GCC diagnostic push
264 #pragma GCC diagnostic ignored "-Wcast-qual"
265 #endif // endif
266 	dhd_info_t *dhd = container_of(nfb, dhd_info_t, cpu_notifier);
267 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
268 #pragma GCC diagnostic pop
269 #endif // endif
270 
271 	if (!dhd || !(dhd->dhd_state & DHD_ATTACH_STATE_LB_ATTACH_DONE)) {
272 		DHD_INFO(("%s(): LB data is not initialized yet.\n",
273 			__FUNCTION__));
274 		return NOTIFY_BAD;
275 	}
276 
277 	switch (action)
278 	{
279 		case CPU_ONLINE:
280 		case CPU_ONLINE_FROZEN:
281 			DHD_LB_STATS_INCR(dhd->cpu_online_cnt[cpu]);
282 			cpumask_set_cpu(cpu, dhd->cpumask_curr_avail);
283 			dhd_select_cpu_candidacy(dhd);
284 			break;
285 
286 		case CPU_DOWN_PREPARE:
287 		case CPU_DOWN_PREPARE_FROZEN:
288 			DHD_LB_STATS_INCR(dhd->cpu_offline_cnt[cpu]);
289 			cpumask_clear_cpu(cpu, dhd->cpumask_curr_avail);
290 			dhd_select_cpu_candidacy(dhd);
291 			break;
292 		default:
293 			break;
294 	}
295 
296 	return NOTIFY_OK;
297 }
298 #endif /* LINUX_VERSION_CODE < 4.10.0 */
299 
dhd_register_cpuhp_callback(dhd_info_t * dhd)300 int dhd_register_cpuhp_callback(dhd_info_t *dhd)
301 {
302 	int cpuhp_ret = 0;
303 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
304 	cpuhp_ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dhd",
305 		dhd_cpu_startup_callback, dhd_cpu_teardown_callback);
306 
307 	if (cpuhp_ret < 0) {
308 		DHD_ERROR(("%s(): cpuhp_setup_state failed %d RX LB won't happen \r\n",
309 			__FUNCTION__, cpuhp_ret));
310 	}
311 #else
312 	/*
313 	 * If we are able to initialize CPU masks, lets register to the
314 	 * CPU Hotplug framework to change the CPU for each job dynamically
315 	 * using candidacy algorithm.
316 	 */
317 	dhd->cpu_notifier.notifier_call = dhd_cpu_callback;
318 	register_hotcpu_notifier(&dhd->cpu_notifier); /* Register a callback */
319 #endif /* LINUX_VERSION_CODE < 4.10.0 */
320 	return cpuhp_ret;
321 }
322 
dhd_unregister_cpuhp_callback(dhd_info_t * dhd)323 int dhd_unregister_cpuhp_callback(dhd_info_t *dhd)
324 {
325 	int ret = 0;
326 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0))
327 	/* Don't want to call tear down while unregistering */
328 	cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
329 #else
330 	if (dhd->cpu_notifier.notifier_call != NULL) {
331 		unregister_cpu_notifier(&dhd->cpu_notifier);
332 	}
333 #endif // endif
334 	return ret;
335 }
336 
337 #if defined(DHD_LB_STATS)
dhd_lb_stats_init(dhd_pub_t * dhdp)338 void dhd_lb_stats_init(dhd_pub_t *dhdp)
339 {
340 	dhd_info_t *dhd;
341 	int i, j, num_cpus = num_possible_cpus();
342 	int alloc_size = sizeof(uint32) * num_cpus;
343 
344 	if (dhdp == NULL) {
345 		DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
346 			__FUNCTION__));
347 		return;
348 	}
349 
350 	dhd = dhdp->info;
351 	if (dhd == NULL) {
352 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
353 		return;
354 	}
355 
356 	DHD_LB_STATS_CLR(dhd->dhd_dpc_cnt);
357 	DHD_LB_STATS_CLR(dhd->napi_sched_cnt);
358 
359 	dhd->napi_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
360 	if (!dhd->napi_percpu_run_cnt) {
361 		DHD_ERROR(("%s(): napi_percpu_run_cnt malloc failed \n",
362 			__FUNCTION__));
363 		return;
364 	}
365 	for (i = 0; i < num_cpus; i++)
366 		DHD_LB_STATS_CLR(dhd->napi_percpu_run_cnt[i]);
367 
368 	DHD_LB_STATS_CLR(dhd->rxc_sched_cnt);
369 
370 	dhd->rxc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
371 	if (!dhd->rxc_percpu_run_cnt) {
372 		DHD_ERROR(("%s(): rxc_percpu_run_cnt malloc failed \n",
373 			__FUNCTION__));
374 		return;
375 	}
376 	for (i = 0; i < num_cpus; i++)
377 		DHD_LB_STATS_CLR(dhd->rxc_percpu_run_cnt[i]);
378 
379 	DHD_LB_STATS_CLR(dhd->txc_sched_cnt);
380 
381 	dhd->txc_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
382 	if (!dhd->txc_percpu_run_cnt) {
383 		DHD_ERROR(("%s(): txc_percpu_run_cnt malloc failed \n",
384 			__FUNCTION__));
385 		return;
386 	}
387 	for (i = 0; i < num_cpus; i++)
388 		DHD_LB_STATS_CLR(dhd->txc_percpu_run_cnt[i]);
389 
390 	dhd->cpu_online_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
391 	if (!dhd->cpu_online_cnt) {
392 		DHD_ERROR(("%s(): cpu_online_cnt malloc failed \n",
393 			__FUNCTION__));
394 		return;
395 	}
396 	for (i = 0; i < num_cpus; i++)
397 		DHD_LB_STATS_CLR(dhd->cpu_online_cnt[i]);
398 
399 	dhd->cpu_offline_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
400 	if (!dhd->cpu_offline_cnt) {
401 		DHD_ERROR(("%s(): cpu_offline_cnt malloc failed \n",
402 			__FUNCTION__));
403 		return;
404 	}
405 	for (i = 0; i < num_cpus; i++)
406 		DHD_LB_STATS_CLR(dhd->cpu_offline_cnt[i]);
407 
408 	dhd->txp_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
409 	if (!dhd->txp_percpu_run_cnt) {
410 		DHD_ERROR(("%s(): txp_percpu_run_cnt malloc failed \n",
411 			__FUNCTION__));
412 		return;
413 	}
414 	for (i = 0; i < num_cpus; i++)
415 		DHD_LB_STATS_CLR(dhd->txp_percpu_run_cnt[i]);
416 
417 	dhd->tx_start_percpu_run_cnt = (uint32 *)MALLOC(dhdp->osh, alloc_size);
418 	if (!dhd->tx_start_percpu_run_cnt) {
419 		DHD_ERROR(("%s(): tx_start_percpu_run_cnt malloc failed \n",
420 			__FUNCTION__));
421 		return;
422 	}
423 	for (i = 0; i < num_cpus; i++)
424 		DHD_LB_STATS_CLR(dhd->tx_start_percpu_run_cnt[i]);
425 
426 	for (j = 0; j < HIST_BIN_SIZE; j++) {
427 		dhd->napi_rx_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
428 		if (!dhd->napi_rx_hist[j]) {
429 			DHD_ERROR(("%s(): dhd->napi_rx_hist[%d] malloc failed \n",
430 				__FUNCTION__, j));
431 			return;
432 		}
433 		for (i = 0; i < num_cpus; i++) {
434 			DHD_LB_STATS_CLR(dhd->napi_rx_hist[j][i]);
435 		}
436 	}
437 #ifdef DHD_LB_TXC
438 	for (j = 0; j < HIST_BIN_SIZE; j++) {
439 		dhd->txc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
440 		if (!dhd->txc_hist[j]) {
441 			DHD_ERROR(("%s(): dhd->txc_hist[%d] malloc failed \n",
442 			         __FUNCTION__, j));
443 			return;
444 		}
445 		for (i = 0; i < num_cpus; i++) {
446 			DHD_LB_STATS_CLR(dhd->txc_hist[j][i]);
447 		}
448 	}
449 #endif /* DHD_LB_TXC */
450 #ifdef DHD_LB_RXC
451 	for (j = 0; j < HIST_BIN_SIZE; j++) {
452 		dhd->rxc_hist[j] = (uint32 *)MALLOC(dhdp->osh, alloc_size);
453 		if (!dhd->rxc_hist[j]) {
454 			DHD_ERROR(("%s(): dhd->rxc_hist[%d] malloc failed \n",
455 				__FUNCTION__, j));
456 			return;
457 		}
458 		for (i = 0; i < num_cpus; i++) {
459 			DHD_LB_STATS_CLR(dhd->rxc_hist[j][i]);
460 		}
461 	}
462 #endif /* DHD_LB_RXC */
463 	return;
464 }
465 
dhd_lb_stats_deinit(dhd_pub_t * dhdp)466 void dhd_lb_stats_deinit(dhd_pub_t *dhdp)
467 {
468 	dhd_info_t *dhd;
469 	int j, num_cpus = num_possible_cpus();
470 	int alloc_size = sizeof(uint32) * num_cpus;
471 
472 	if (dhdp == NULL) {
473 		DHD_ERROR(("%s(): Invalid argument dhd pubb pointer is NULL \n",
474 			__FUNCTION__));
475 		return;
476 	}
477 
478 	dhd = dhdp->info;
479 	if (dhd == NULL) {
480 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
481 		return;
482 	}
483 
484 	if (dhd->napi_percpu_run_cnt) {
485 		MFREE(dhdp->osh, dhd->napi_percpu_run_cnt, alloc_size);
486 		dhd->napi_percpu_run_cnt = NULL;
487 	}
488 	if (dhd->rxc_percpu_run_cnt) {
489 		MFREE(dhdp->osh, dhd->rxc_percpu_run_cnt, alloc_size);
490 		dhd->rxc_percpu_run_cnt = NULL;
491 	}
492 	if (dhd->txc_percpu_run_cnt) {
493 		MFREE(dhdp->osh, dhd->txc_percpu_run_cnt, alloc_size);
494 		dhd->txc_percpu_run_cnt = NULL;
495 	}
496 	if (dhd->cpu_online_cnt) {
497 		MFREE(dhdp->osh, dhd->cpu_online_cnt, alloc_size);
498 		dhd->cpu_online_cnt = NULL;
499 	}
500 	if (dhd->cpu_offline_cnt) {
501 		MFREE(dhdp->osh, dhd->cpu_offline_cnt, alloc_size);
502 		dhd->cpu_offline_cnt = NULL;
503 	}
504 
505 	if (dhd->txp_percpu_run_cnt) {
506 		MFREE(dhdp->osh, dhd->txp_percpu_run_cnt, alloc_size);
507 		dhd->txp_percpu_run_cnt = NULL;
508 	}
509 	if (dhd->tx_start_percpu_run_cnt) {
510 		MFREE(dhdp->osh, dhd->tx_start_percpu_run_cnt, alloc_size);
511 		dhd->tx_start_percpu_run_cnt = NULL;
512 	}
513 
514 	for (j = 0; j < HIST_BIN_SIZE; j++) {
515 		if (dhd->napi_rx_hist[j]) {
516 			MFREE(dhdp->osh, dhd->napi_rx_hist[j], alloc_size);
517 			dhd->napi_rx_hist[j] = NULL;
518 		}
519 #ifdef DHD_LB_TXC
520 		if (dhd->txc_hist[j]) {
521 			MFREE(dhdp->osh, dhd->txc_hist[j], alloc_size);
522 			dhd->txc_hist[j] = NULL;
523 		}
524 #endif /* DHD_LB_TXC */
525 #ifdef DHD_LB_RXC
526 		if (dhd->rxc_hist[j]) {
527 			MFREE(dhdp->osh, dhd->rxc_hist[j], alloc_size);
528 			dhd->rxc_hist[j] = NULL;
529 		}
530 #endif /* DHD_LB_RXC */
531 	}
532 
533 	return;
534 }
535 
dhd_lb_stats_dump_histo(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf,uint32 ** hist)536 void dhd_lb_stats_dump_histo(dhd_pub_t *dhdp,
537 	struct bcmstrbuf *strbuf, uint32 **hist)
538 {
539 	int i, j;
540 	uint32 *per_cpu_total;
541 	uint32 total = 0;
542 	uint32 num_cpus = num_possible_cpus();
543 
544 	per_cpu_total = (uint32 *)MALLOC(dhdp->osh, sizeof(uint32) * num_cpus);
545 	if (!per_cpu_total) {
546 		DHD_ERROR(("%s(): dhd->per_cpu_total malloc failed \n", __FUNCTION__));
547 		return;
548 	}
549 	bzero(per_cpu_total, sizeof(uint32) * num_cpus);
550 
551 	bcm_bprintf(strbuf, "CPU: \t\t");
552 	for (i = 0; i < num_cpus; i++)
553 		bcm_bprintf(strbuf, "%d\t", i);
554 	bcm_bprintf(strbuf, "\nBin\n");
555 
556 	for (i = 0; i < HIST_BIN_SIZE; i++) {
557 		bcm_bprintf(strbuf, "%d:\t\t", 1<<i);
558 		for (j = 0; j < num_cpus; j++) {
559 			bcm_bprintf(strbuf, "%d\t", hist[i][j]);
560 		}
561 		bcm_bprintf(strbuf, "\n");
562 	}
563 	bcm_bprintf(strbuf, "Per CPU Total \t");
564 	total = 0;
565 	for (i = 0; i < num_cpus; i++) {
566 		for (j = 0; j < HIST_BIN_SIZE; j++) {
567 			per_cpu_total[i] += (hist[j][i] * (1<<j));
568 		}
569 		bcm_bprintf(strbuf, "%d\t", per_cpu_total[i]);
570 		total += per_cpu_total[i];
571 	}
572 	bcm_bprintf(strbuf, "\nTotal\t\t%d \n", total);
573 
574 	if (per_cpu_total) {
575 		MFREE(dhdp->osh, per_cpu_total, sizeof(uint32) * num_cpus);
576 		per_cpu_total = NULL;
577 	}
578 	return;
579 }
580 
dhd_lb_stats_dump_cpu_array(struct bcmstrbuf * strbuf,uint32 * p)581 void dhd_lb_stats_dump_cpu_array(struct bcmstrbuf *strbuf, uint32 *p)
582 {
583 	int i, num_cpus = num_possible_cpus();
584 
585 	bcm_bprintf(strbuf, "CPU: \t");
586 	for (i = 0; i < num_cpus; i++)
587 		bcm_bprintf(strbuf, "%d\t", i);
588 	bcm_bprintf(strbuf, "\n");
589 
590 	bcm_bprintf(strbuf, "Val: \t");
591 	for (i = 0; i < num_cpus; i++)
592 		bcm_bprintf(strbuf, "%u\t", *(p+i));
593 	bcm_bprintf(strbuf, "\n");
594 	return;
595 }
596 
dhd_lb_stats_dump(dhd_pub_t * dhdp,struct bcmstrbuf * strbuf)597 void dhd_lb_stats_dump(dhd_pub_t *dhdp, struct bcmstrbuf *strbuf)
598 {
599 	dhd_info_t *dhd;
600 
601 	if (dhdp == NULL || strbuf == NULL) {
602 		DHD_ERROR(("%s(): Invalid argument dhdp %p strbuf %p \n",
603 			__FUNCTION__, dhdp, strbuf));
604 		return;
605 	}
606 
607 	dhd = dhdp->info;
608 	if (dhd == NULL) {
609 		DHD_ERROR(("%s(): DHD pointer is NULL \n", __FUNCTION__));
610 		return;
611 	}
612 
613 	bcm_bprintf(strbuf, "\ncpu_online_cnt:\n");
614 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_online_cnt);
615 
616 	bcm_bprintf(strbuf, "\ncpu_offline_cnt:\n");
617 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->cpu_offline_cnt);
618 
619 	bcm_bprintf(strbuf, "\nsched_cnt: dhd_dpc %u napi %u rxc %u txc %u\n",
620 		dhd->dhd_dpc_cnt, dhd->napi_sched_cnt, dhd->rxc_sched_cnt,
621 		dhd->txc_sched_cnt);
622 
623 #ifdef DHD_LB_RXP
624 	bcm_bprintf(strbuf, "\nnapi_percpu_run_cnt:\n");
625 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->napi_percpu_run_cnt);
626 	bcm_bprintf(strbuf, "\nNAPI Packets Received Histogram:\n");
627 	dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->napi_rx_hist);
628 #endif /* DHD_LB_RXP */
629 
630 #ifdef DHD_LB_RXC
631 	bcm_bprintf(strbuf, "\nrxc_percpu_run_cnt:\n");
632 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->rxc_percpu_run_cnt);
633 	bcm_bprintf(strbuf, "\nRX Completions (Buffer Post) Histogram:\n");
634 	dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->rxc_hist);
635 #endif /* DHD_LB_RXC */
636 
637 #ifdef DHD_LB_TXC
638 	bcm_bprintf(strbuf, "\ntxc_percpu_run_cnt:\n");
639 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->txc_percpu_run_cnt);
640 	bcm_bprintf(strbuf, "\nTX Completions (Buffer Free) Histogram:\n");
641 	dhd_lb_stats_dump_histo(dhdp, strbuf, dhd->txc_hist);
642 #endif /* DHD_LB_TXC */
643 
644 #ifdef DHD_LB_TXP
645 	bcm_bprintf(strbuf, "\ntxp_percpu_run_cnt:\n");
646 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->txp_percpu_run_cnt);
647 
648 	bcm_bprintf(strbuf, "\ntx_start_percpu_run_cnt:\n");
649 	dhd_lb_stats_dump_cpu_array(strbuf, dhd->tx_start_percpu_run_cnt);
650 #endif /* DHD_LB_TXP */
651 }
652 
653 /* Given a number 'n' returns 'm' that is next larger power of 2 after n */
next_larger_power2(uint32 num)654 static inline uint32 next_larger_power2(uint32 num)
655 {
656 	num--;
657 	num |= (num >> 1);
658 	num |= (num >> 2);
659 	num |= (num >> 4);
660 	num |= (num >> 8);
661 	num |= (num >> 16);
662 
663 	return (num + 1);
664 }
665 
dhd_lb_stats_update_histo(uint32 ** bin,uint32 count,uint32 cpu)666 void dhd_lb_stats_update_histo(uint32 **bin, uint32 count, uint32 cpu)
667 {
668 	uint32 bin_power;
669 	uint32 *p;
670 	bin_power = next_larger_power2(count);
671 
672 	switch (bin_power) {
673 		case   1: p = bin[0] + cpu; break;
674 		case   2: p = bin[1] + cpu; break;
675 		case   4: p = bin[2] + cpu; break;
676 		case   8: p = bin[3] + cpu; break;
677 		case  16: p = bin[4] + cpu; break;
678 		case  32: p = bin[5] + cpu; break;
679 		case  64: p = bin[6] + cpu; break;
680 		case 128: p = bin[7] + cpu; break;
681 		default : p = bin[8] + cpu; break;
682 	}
683 
684 	*p = *p + 1;
685 	return;
686 }
687 
dhd_lb_stats_update_napi_histo(dhd_pub_t * dhdp,uint32 count)688 void dhd_lb_stats_update_napi_histo(dhd_pub_t *dhdp, uint32 count)
689 {
690 	int cpu;
691 	dhd_info_t *dhd = dhdp->info;
692 
693 	cpu = get_cpu();
694 	put_cpu();
695 	dhd_lb_stats_update_histo(dhd->napi_rx_hist, count, cpu);
696 
697 	return;
698 }
699 
dhd_lb_stats_update_txc_histo(dhd_pub_t * dhdp,uint32 count)700 void dhd_lb_stats_update_txc_histo(dhd_pub_t *dhdp, uint32 count)
701 {
702 	int cpu;
703 	dhd_info_t *dhd = dhdp->info;
704 
705 	cpu = get_cpu();
706 	put_cpu();
707 	dhd_lb_stats_update_histo(dhd->txc_hist, count, cpu);
708 
709 	return;
710 }
711 
dhd_lb_stats_update_rxc_histo(dhd_pub_t * dhdp,uint32 count)712 void dhd_lb_stats_update_rxc_histo(dhd_pub_t *dhdp, uint32 count)
713 {
714 	int cpu;
715 	dhd_info_t *dhd = dhdp->info;
716 
717 	cpu = get_cpu();
718 	put_cpu();
719 	dhd_lb_stats_update_histo(dhd->rxc_hist, count, cpu);
720 
721 	return;
722 }
723 
dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t * dhdp)724 void dhd_lb_stats_txc_percpu_cnt_incr(dhd_pub_t *dhdp)
725 {
726 	dhd_info_t *dhd = dhdp->info;
727 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txc_percpu_run_cnt);
728 }
729 
dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t * dhdp)730 void dhd_lb_stats_rxc_percpu_cnt_incr(dhd_pub_t *dhdp)
731 {
732 	dhd_info_t *dhd = dhdp->info;
733 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->rxc_percpu_run_cnt);
734 }
735 #endif /* DHD_LB_STATS */
736 
737 #endif /* DHD_LB */
738 #if defined(DHD_LB)
739 /**
740  * dhd_tasklet_schedule - Function that runs in IPI context of the destination
741  * CPU and schedules a tasklet.
742  * @tasklet: opaque pointer to the tasklet
743  */
744 INLINE void
dhd_tasklet_schedule(void * tasklet)745 dhd_tasklet_schedule(void *tasklet)
746 {
747 	tasklet_schedule((struct tasklet_struct *)tasklet);
748 }
749 /**
750  * dhd_tasklet_schedule_on - Executes the passed takslet in a given CPU
751  * @tasklet: tasklet to be scheduled
752  * @on_cpu: cpu core id
753  *
754  * If the requested cpu is online, then an IPI is sent to this cpu via the
755  * smp_call_function_single with no wait and the tasklet_schedule function
756  * will be invoked to schedule the specified tasklet on the requested CPU.
757  */
758 INLINE void
dhd_tasklet_schedule_on(struct tasklet_struct * tasklet,int on_cpu)759 dhd_tasklet_schedule_on(struct tasklet_struct *tasklet, int on_cpu)
760 {
761 	const int wait = 0;
762 	smp_call_function_single(on_cpu,
763 		dhd_tasklet_schedule, (void *)tasklet, wait);
764 }
765 
766 /**
767  * dhd_work_schedule_on - Executes the passed work in a given CPU
768  * @work: work to be scheduled
769  * @on_cpu: cpu core id
770  *
771  * If the requested cpu is online, then an IPI is sent to this cpu via the
772  * schedule_work_on and the work function
773  * will be invoked to schedule the specified work on the requested CPU.
774  */
775 
776 INLINE void
dhd_work_schedule_on(struct work_struct * work,int on_cpu)777 dhd_work_schedule_on(struct work_struct *work, int on_cpu)
778 {
779 	schedule_work_on(on_cpu, work);
780 }
781 
782 #if defined(DHD_LB_TXC)
783 /**
784  * dhd_lb_tx_compl_dispatch - load balance by dispatching the tx_compl_tasklet
785  * on another cpu. The tx_compl_tasklet will take care of DMA unmapping and
786  * freeing the packets placed in the tx_compl workq
787  */
788 void
dhd_lb_tx_compl_dispatch(dhd_pub_t * dhdp)789 dhd_lb_tx_compl_dispatch(dhd_pub_t *dhdp)
790 {
791 	dhd_info_t *dhd = dhdp->info;
792 	int curr_cpu, on_cpu;
793 
794 	if (dhd->rx_napi_netdev == NULL) {
795 		DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
796 		return;
797 	}
798 
799 	DHD_LB_STATS_INCR(dhd->txc_sched_cnt);
800 	/*
801 	 * If the destination CPU is NOT online or is same as current CPU
802 	 * no need to schedule the work
803 	 */
804 	curr_cpu = get_cpu();
805 	put_cpu();
806 
807 	on_cpu = atomic_read(&dhd->tx_compl_cpu);
808 
809 	if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
810 		dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
811 	} else {
812 		schedule_work(&dhd->tx_compl_dispatcher_work);
813 	}
814 }
815 
dhd_tx_compl_dispatcher_fn(struct work_struct * work)816 static void dhd_tx_compl_dispatcher_fn(struct work_struct * work)
817 {
818 	struct dhd_info *dhd =
819 		container_of(work, struct dhd_info, tx_compl_dispatcher_work);
820 	int cpu;
821 
822 	get_online_cpus();
823 	cpu = atomic_read(&dhd->tx_compl_cpu);
824 	if (!cpu_online(cpu))
825 		dhd_tasklet_schedule(&dhd->tx_compl_tasklet);
826 	else
827 		dhd_tasklet_schedule_on(&dhd->tx_compl_tasklet, cpu);
828 	put_online_cpus();
829 }
830 #endif /* DHD_LB_TXC */
831 
832 #if defined(DHD_LB_RXC)
833 /**
834  * dhd_lb_rx_compl_dispatch - load balance by dispatching the rx_compl_tasklet
835  * on another cpu. The rx_compl_tasklet will take care of reposting rx buffers
836  * in the H2D RxBuffer Post common ring, by using the recycled pktids that were
837  * placed in the rx_compl workq.
838  *
839  * @dhdp: pointer to dhd_pub object
840  */
841 void
dhd_lb_rx_compl_dispatch(dhd_pub_t * dhdp)842 dhd_lb_rx_compl_dispatch(dhd_pub_t *dhdp)
843 {
844 	dhd_info_t *dhd = dhdp->info;
845 	int curr_cpu, on_cpu;
846 
847 	if (dhd->rx_napi_netdev == NULL) {
848 		DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
849 		return;
850 	}
851 
852 	DHD_LB_STATS_INCR(dhd->rxc_sched_cnt);
853 	/*
854 	 * If the destination CPU is NOT online or is same as current CPU
855 	 * no need to schedule the work
856 	 */
857 	curr_cpu = get_cpu();
858 	put_cpu();
859 	on_cpu = atomic_read(&dhd->rx_compl_cpu);
860 
861 	if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu))) {
862 		dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
863 	} else {
864 		schedule_work(&dhd->rx_compl_dispatcher_work);
865 	}
866 }
867 
dhd_rx_compl_dispatcher_fn(struct work_struct * work)868 void dhd_rx_compl_dispatcher_fn(struct work_struct * work)
869 {
870 	struct dhd_info *dhd =
871 		container_of(work, struct dhd_info, rx_compl_dispatcher_work);
872 	int cpu;
873 
874 	get_online_cpus();
875 	cpu = atomic_read(&dhd->rx_compl_cpu);
876 	if (!cpu_online(cpu))
877 		dhd_tasklet_schedule(&dhd->rx_compl_tasklet);
878 	else {
879 		dhd_tasklet_schedule_on(&dhd->rx_compl_tasklet, cpu);
880 	}
881 	put_online_cpus();
882 }
883 #endif /* DHD_LB_RXC */
884 
885 #if defined(DHD_LB_TXP)
dhd_tx_dispatcher_work(struct work_struct * work)886 void dhd_tx_dispatcher_work(struct work_struct * work)
887 {
888 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
889 #pragma GCC diagnostic push
890 #pragma GCC diagnostic ignored "-Wcast-qual"
891 #endif // endif
892 	struct dhd_info *dhd =
893 		container_of(work, struct dhd_info, tx_dispatcher_work);
894 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
895 #pragma GCC diagnostic pop
896 #endif // endif
897 	dhd_tasklet_schedule(&dhd->tx_tasklet);
898 }
899 
dhd_tx_dispatcher_fn(dhd_pub_t * dhdp)900 void dhd_tx_dispatcher_fn(dhd_pub_t *dhdp)
901 {
902 	int cpu;
903 	int net_tx_cpu;
904 	dhd_info_t *dhd = dhdp->info;
905 
906 	preempt_disable();
907 	cpu = atomic_read(&dhd->tx_cpu);
908 	net_tx_cpu = atomic_read(&dhd->net_tx_cpu);
909 
910 	/*
911 	 * Now if the NET_TX has pushed the packet in the same
912 	 * CPU that is chosen for Tx processing, seperate it out
913 	 * i.e run the TX processing tasklet in compl_cpu
914 	 */
915 	if (net_tx_cpu == cpu)
916 		cpu = atomic_read(&dhd->tx_compl_cpu);
917 
918 	if (!cpu_online(cpu)) {
919 		/*
920 		 * Ooohh... but the Chosen CPU is not online,
921 		 * Do the job in the current CPU itself.
922 		 */
923 		dhd_tasklet_schedule(&dhd->tx_tasklet);
924 	} else {
925 		/*
926 		 * Schedule tx_dispatcher_work to on the cpu which
927 		 * in turn will schedule tx_tasklet.
928 		 */
929 		dhd_work_schedule_on(&dhd->tx_dispatcher_work, cpu);
930 	}
931 	preempt_enable();
932 }
933 
934 /**
935  * dhd_lb_tx_dispatch - load balance by dispatching the tx_tasklet
936  * on another cpu. The tx_tasklet will take care of actually putting
937  * the skbs into appropriate flow ring and ringing H2D interrupt
938  *
939  * @dhdp: pointer to dhd_pub object
940  */
941 void
dhd_lb_tx_dispatch(dhd_pub_t * dhdp)942 dhd_lb_tx_dispatch(dhd_pub_t *dhdp)
943 {
944 	dhd_info_t *dhd = dhdp->info;
945 	int curr_cpu;
946 
947 	curr_cpu = get_cpu();
948 	put_cpu();
949 
950 	/* Record the CPU in which the TX request from Network stack came */
951 	atomic_set(&dhd->net_tx_cpu, curr_cpu);
952 
953 	/* Schedule the work to dispatch ... */
954 	dhd_tx_dispatcher_fn(dhdp);
955 }
956 #endif /* DHD_LB_TXP */
957 
958 #if defined(DHD_LB_RXP)
959 /**
960  * dhd_napi_poll - Load balance napi poll function to process received
961  * packets and send up the network stack using netif_receive_skb()
962  *
963  * @napi: napi object in which context this poll function is invoked
964  * @budget: number of packets to be processed.
965  *
966  * Fetch the dhd_info given the rx_napi_struct. Move all packets from the
967  * rx_napi_queue into a local rx_process_queue (lock and queue move and unlock).
968  * Dequeue each packet from head of rx_process_queue, fetch the ifid from the
969  * packet tag and sendup.
970  */
971 int
dhd_napi_poll(struct napi_struct * napi,int budget)972 dhd_napi_poll(struct napi_struct *napi, int budget)
973 {
974 	int ifid;
975 	const int pkt_count = 1;
976 	const int chan = 0;
977 	struct sk_buff * skb;
978 	unsigned long flags;
979 	struct dhd_info *dhd;
980 	int processed = 0;
981 	struct sk_buff_head rx_process_queue;
982 
983 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
984 #pragma GCC diagnostic push
985 #pragma GCC diagnostic ignored "-Wcast-qual"
986 #endif // endif
987 	dhd = container_of(napi, struct dhd_info, rx_napi_struct);
988 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
989 #pragma GCC diagnostic pop
990 #endif // endif
991 
992 	DHD_INFO(("%s napi_queue<%d> budget<%d>\n",
993 		__FUNCTION__, skb_queue_len(&dhd->rx_napi_queue), budget));
994 		__skb_queue_head_init(&rx_process_queue);
995 
996 	/* extract the entire rx_napi_queue into local rx_process_queue */
997 	spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
998 	skb_queue_splice_tail_init(&dhd->rx_napi_queue, &rx_process_queue);
999 	spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1000 
1001 	while ((skb = __skb_dequeue(&rx_process_queue)) != NULL) {
1002 		OSL_PREFETCH(skb->data);
1003 
1004 		ifid = DHD_PKTTAG_IFID((dhd_pkttag_fr_t *)PKTTAG(skb));
1005 
1006 		DHD_INFO(("%s dhd_rx_frame pkt<%p> ifid<%d>\n",
1007 			__FUNCTION__, skb, ifid));
1008 
1009 		dhd_rx_frame(&dhd->pub, ifid, skb, pkt_count, chan);
1010 		processed++;
1011 	}
1012 
1013 	DHD_LB_STATS_UPDATE_NAPI_HISTO(&dhd->pub, processed);
1014 
1015 	DHD_INFO(("%s processed %d\n", __FUNCTION__, processed));
1016 	napi_complete(napi);
1017 
1018 	return budget - 1;
1019 }
1020 
1021 /**
1022  * dhd_napi_schedule - Place the napi struct into the current cpus softnet napi
1023  * poll list. This function may be invoked via the smp_call_function_single
1024  * from a remote CPU.
1025  *
1026  * This function will essentially invoke __raise_softirq_irqoff(NET_RX_SOFTIRQ)
1027  * after the napi_struct is added to the softnet data's poll_list
1028  *
1029  * @info: pointer to a dhd_info struct
1030  */
1031 static void
dhd_napi_schedule(void * info)1032 dhd_napi_schedule(void *info)
1033 {
1034 	dhd_info_t *dhd = (dhd_info_t *)info;
1035 
1036 	DHD_INFO(("%s rx_napi_struct<%p> on cpu<%d>\n",
1037 		__FUNCTION__, &dhd->rx_napi_struct, atomic_read(&dhd->rx_napi_cpu)));
1038 
1039 	/* add napi_struct to softnet data poll list and raise NET_RX_SOFTIRQ */
1040 	if (napi_schedule_prep(&dhd->rx_napi_struct)) {
1041 		__napi_schedule(&dhd->rx_napi_struct);
1042 #ifdef WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE
1043 		raise_softirq(NET_RX_SOFTIRQ);
1044 #endif /* WAKEUP_KSOFTIRQD_POST_NAPI_SCHEDULE */
1045 	}
1046 
1047 	/*
1048 	 * If the rx_napi_struct was already running, then we let it complete
1049 	 * processing all its packets. The rx_napi_struct may only run on one
1050 	 * core at a time, to avoid out-of-order handling.
1051 	 */
1052 }
1053 
1054 /**
1055  * dhd_napi_schedule_on - API to schedule on a desired CPU core a NET_RX_SOFTIRQ
1056  * action after placing the dhd's rx_process napi object in the the remote CPU's
1057  * softnet data's poll_list.
1058  *
1059  * @dhd: dhd_info which has the rx_process napi object
1060  * @on_cpu: desired remote CPU id
1061  */
1062 static INLINE int
dhd_napi_schedule_on(dhd_info_t * dhd,int on_cpu)1063 dhd_napi_schedule_on(dhd_info_t *dhd, int on_cpu)
1064 {
1065 	int wait = 0; /* asynchronous IPI */
1066 	DHD_INFO(("%s dhd<%p> napi<%p> on_cpu<%d>\n",
1067 		__FUNCTION__, dhd, &dhd->rx_napi_struct, on_cpu));
1068 
1069 	if (smp_call_function_single(on_cpu, dhd_napi_schedule, dhd, wait)) {
1070 		DHD_ERROR(("%s smp_call_function_single on_cpu<%d> failed\n",
1071 			__FUNCTION__, on_cpu));
1072 	}
1073 
1074 	DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1075 
1076 	return 0;
1077 }
1078 
1079 /*
1080  * Call get_online_cpus/put_online_cpus around dhd_napi_schedule_on
1081  * Why should we do this?
1082  * The candidacy algorithm is run from the call back function
1083  * registered to CPU hotplug notifier. This call back happens from Worker
1084  * context. The dhd_napi_schedule_on is also from worker context.
1085  * Note that both of this can run on two different CPUs at the same time.
1086  * So we can possibly have a window where a given CPUn is being brought
1087  * down from CPUm while we try to run a function on CPUn.
1088  * To prevent this its better have the whole code to execute an SMP
1089  * function under get_online_cpus.
1090  * This function call ensures that hotplug mechanism does not kick-in
1091  * until we are done dealing with online CPUs
1092  * If the hotplug worker is already running, no worries because the
1093  * candidacy algo would then reflect the same in dhd->rx_napi_cpu.
1094  *
1095  * The below mentioned code structure is proposed in
1096  * https://www.kernel.org/doc/Documentation/cpu-hotplug.txt
1097  * for the question
1098  * Q: I need to ensure that a particular cpu is not removed when there is some
1099  *    work specific to this cpu is in progress
1100  *
1101  * According to the documentation calling get_online_cpus is NOT required, if
1102  * we are running from tasklet context. Since dhd_rx_napi_dispatcher_fn can
1103  * run from Work Queue context we have to call these functions
1104  */
dhd_rx_napi_dispatcher_fn(struct work_struct * work)1105 void dhd_rx_napi_dispatcher_fn(struct work_struct * work)
1106 {
1107 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1108 #pragma GCC diagnostic push
1109 #pragma GCC diagnostic ignored "-Wcast-qual"
1110 #endif // endif
1111 	struct dhd_info *dhd =
1112 		container_of(work, struct dhd_info, rx_napi_dispatcher_work);
1113 #if defined(STRICT_GCC_WARNINGS) && defined(__GNUC__)
1114 #pragma GCC diagnostic pop
1115 #endif // endif
1116 
1117 	dhd_napi_schedule(dhd);
1118 }
1119 
1120 /**
1121  * dhd_lb_rx_napi_dispatch - load balance by dispatching the rx_napi_struct
1122  * to run on another CPU. The rx_napi_struct's poll function will retrieve all
1123  * the packets enqueued into the rx_napi_queue and sendup.
1124  * The producer's rx packet queue is appended to the rx_napi_queue before
1125  * dispatching the rx_napi_struct.
1126  */
1127 void
dhd_lb_rx_napi_dispatch(dhd_pub_t * dhdp)1128 dhd_lb_rx_napi_dispatch(dhd_pub_t *dhdp)
1129 {
1130 	unsigned long flags;
1131 	dhd_info_t *dhd = dhdp->info;
1132 	int curr_cpu;
1133 	int on_cpu;
1134 #ifdef DHD_LB_IRQSET
1135 	cpumask_t cpus;
1136 #endif /* DHD_LB_IRQSET */
1137 
1138 	if (dhd->rx_napi_netdev == NULL) {
1139 		DHD_ERROR(("%s: dhd->rx_napi_netdev is NULL\n", __FUNCTION__));
1140 		return;
1141 	}
1142 
1143 	DHD_INFO(("%s append napi_queue<%d> pend_queue<%d>\n", __FUNCTION__,
1144 		skb_queue_len(&dhd->rx_napi_queue), skb_queue_len(&dhd->rx_pend_queue)));
1145 
1146 	/* append the producer's queue of packets to the napi's rx process queue */
1147 	spin_lock_irqsave(&dhd->rx_napi_queue.lock, flags);
1148 	skb_queue_splice_tail_init(&dhd->rx_pend_queue, &dhd->rx_napi_queue);
1149 	spin_unlock_irqrestore(&dhd->rx_napi_queue.lock, flags);
1150 
1151 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->napi_percpu_run_cnt);
1152 
1153 	/* if LB RXP is disabled directly schedule NAPI */
1154 	if (atomic_read(&dhd->lb_rxp_active) == 0) {
1155 		dhd_napi_schedule(dhd);
1156 		return;
1157 	}
1158 
1159 	/*
1160 	 * If the destination CPU is NOT online or is same as current CPU
1161 	 * no need to schedule the work
1162 	 */
1163 	curr_cpu = get_cpu();
1164 	put_cpu();
1165 
1166 	preempt_disable();
1167 	on_cpu = atomic_read(&dhd->rx_napi_cpu);
1168 #ifdef DHD_LB_IRQSET
1169 	if (cpumask_and(&cpus, cpumask_of(curr_cpu), dhd->cpumask_primary) ||
1170 			(!cpu_online(on_cpu)))
1171 #else
1172 	if ((on_cpu == curr_cpu) || (!cpu_online(on_cpu)))
1173 #endif /* DHD_LB_IRQSET */
1174 	{
1175 		DHD_INFO(("%s : curr_cpu : %d, cpumask : 0x%lx\n", __FUNCTION__,
1176 			curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1177 		dhd_napi_schedule(dhd);
1178 	} else {
1179 		DHD_INFO(("%s : schedule to curr_cpu : %d, cpumask : 0x%lx\n",
1180 			__FUNCTION__, curr_cpu, *cpumask_bits(dhd->cpumask_primary)));
1181 		dhd_work_schedule_on(&dhd->rx_napi_dispatcher_work, on_cpu);
1182 		DHD_LB_STATS_INCR(dhd->napi_sched_cnt);
1183 	}
1184 	preempt_enable();
1185 }
1186 
1187 /**
1188  * dhd_lb_rx_pkt_enqueue - Enqueue the packet into the producer's queue
1189  */
1190 void
dhd_lb_rx_pkt_enqueue(dhd_pub_t * dhdp,void * pkt,int ifidx)1191 dhd_lb_rx_pkt_enqueue(dhd_pub_t *dhdp, void *pkt, int ifidx)
1192 {
1193 	dhd_info_t *dhd = dhdp->info;
1194 
1195 	DHD_INFO(("%s enqueue pkt<%p> ifidx<%d> pend_queue<%d>\n", __FUNCTION__,
1196 		pkt, ifidx, skb_queue_len(&dhd->rx_pend_queue)));
1197 	DHD_PKTTAG_SET_IFID((dhd_pkttag_fr_t *)PKTTAG(pkt), ifidx);
1198 	__skb_queue_tail(&dhd->rx_pend_queue, pkt);
1199 }
1200 #endif /* DHD_LB_RXP */
1201 #endif /* DHD_LB */
1202 
1203 #if defined(DHD_LB_IRQSET) || defined(DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON)
1204 void
dhd_irq_set_affinity(dhd_pub_t * dhdp,const struct cpumask * cpumask)1205 dhd_irq_set_affinity(dhd_pub_t *dhdp, const struct cpumask *cpumask)
1206 {
1207 	unsigned int irq = (unsigned int)-1;
1208 	int err = BCME_OK;
1209 
1210 	if (!dhdp) {
1211 		DHD_ERROR(("%s : dhdp is NULL\n", __FUNCTION__));
1212 		return;
1213 	}
1214 
1215 	if (!dhdp->bus) {
1216 		DHD_ERROR(("%s : bus is NULL\n", __FUNCTION__));
1217 		return;
1218 	}
1219 
1220 	DHD_ERROR(("%s : irq set affinity cpu:0x%lx\n",
1221 			__FUNCTION__, *cpumask_bits(cpumask)));
1222 
1223 	dhdpcie_get_pcieirq(dhdp->bus, &irq);
1224 	err = irq_set_affinity(irq, cpumask);
1225 	if (err)
1226 		DHD_ERROR(("%s : irq set affinity is failed cpu:0x%lx\n",
1227 			__FUNCTION__, *cpumask_bits(cpumask)));
1228 }
1229 #endif /* DHD_LB_IRQSET || DHD_CONTROL_PCIE_CPUCORE_WIFI_TURNON */
1230 
1231 #if defined(DHD_LB_TXP)
1232 
1233 int BCMFASTPATH
dhd_lb_sendpkt(dhd_info_t * dhd,struct net_device * net,int ifidx,void * skb)1234 dhd_lb_sendpkt(dhd_info_t *dhd, struct net_device *net,
1235 	int ifidx, void *skb)
1236 {
1237 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->tx_start_percpu_run_cnt);
1238 
1239 	/* If the feature is disabled run-time do TX from here */
1240 	if (atomic_read(&dhd->lb_txp_active) == 0) {
1241 		DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1242 		 return __dhd_sendpkt(&dhd->pub, ifidx, skb);
1243 	}
1244 
1245 	/* Store the address of net device and interface index in the Packet tag */
1246 	DHD_LB_TX_PKTTAG_SET_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), net);
1247 	DHD_LB_TX_PKTTAG_SET_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb), ifidx);
1248 
1249 	/* Enqueue the skb into tx_pend_queue */
1250 	skb_queue_tail(&dhd->tx_pend_queue, skb);
1251 
1252 	DHD_TRACE(("%s(): Added skb %p for netdev %p \r\n", __FUNCTION__, skb, net));
1253 
1254 	/* Dispatch the Tx job to be processed by the tx_tasklet */
1255 	dhd_lb_tx_dispatch(&dhd->pub);
1256 
1257 	return NETDEV_TX_OK;
1258 }
1259 #endif /* DHD_LB_TXP */
1260 
1261 #ifdef DHD_LB_TXP
1262 #define DHD_LB_TXBOUND	64
1263 /*
1264  * Function that performs the TX processing on a given CPU
1265  */
1266 bool
dhd_lb_tx_process(dhd_info_t * dhd)1267 dhd_lb_tx_process(dhd_info_t *dhd)
1268 {
1269 	struct sk_buff *skb;
1270 	int cnt = 0;
1271 	struct net_device *net;
1272 	int ifidx;
1273 	bool resched = FALSE;
1274 
1275 	DHD_TRACE(("%s(): TX Processing \r\n", __FUNCTION__));
1276 	if (dhd == NULL) {
1277 		DHD_ERROR((" Null pointer DHD \r\n"));
1278 		return resched;
1279 	}
1280 
1281 	BCM_REFERENCE(net);
1282 
1283 	DHD_LB_STATS_PERCPU_ARR_INCR(dhd->txp_percpu_run_cnt);
1284 
1285 	/* Base Loop to perform the actual Tx */
1286 	do {
1287 		skb = skb_dequeue(&dhd->tx_pend_queue);
1288 		if (skb == NULL) {
1289 			DHD_TRACE(("Dequeued a Null Packet \r\n"));
1290 			break;
1291 		}
1292 		cnt++;
1293 
1294 		net =  DHD_LB_TX_PKTTAG_NETDEV((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1295 		ifidx = DHD_LB_TX_PKTTAG_IFIDX((dhd_tx_lb_pkttag_fr_t *)PKTTAG(skb));
1296 
1297 		DHD_TRACE(("Processing skb %p for net %p index %d \r\n", skb,
1298 			net, ifidx));
1299 
1300 		__dhd_sendpkt(&dhd->pub, ifidx, skb);
1301 
1302 		if (cnt >= DHD_LB_TXBOUND) {
1303 			resched = TRUE;
1304 			break;
1305 		}
1306 
1307 	} while (1);
1308 
1309 	DHD_INFO(("%s(): Processed %d packets \r\n", __FUNCTION__, cnt));
1310 
1311 	return resched;
1312 }
1313 
1314 void
dhd_lb_tx_handler(unsigned long data)1315 dhd_lb_tx_handler(unsigned long data)
1316 {
1317 	dhd_info_t *dhd = (dhd_info_t *)data;
1318 
1319 	if (dhd_lb_tx_process(dhd)) {
1320 		dhd_tasklet_schedule(&dhd->tx_tasklet);
1321 	}
1322 }
1323 
1324 #endif /* DHD_LB_TXP */
1325