• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  */
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 
23 #include <linux/kernel.h>
24 #include <linux/interrupt.h>
25 #include <linux/sched.h>
26 #include <linux/wait.h>
27 #include <linux/mm.h>
28 #include <linux/slab.h>
29 #include <linux/list.h>
30 #include <linux/module.h>
31 #include <linux/completion.h>
32 #include <linux/delay.h>
33 #include <linux/hyperv.h>
34 #include <asm/mshyperv.h>
35 
36 #include "hyperv_vmbus.h"
37 
38 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
39 
40 static const struct vmbus_device vmbus_devs[] = {
41 	/* IDE */
42 	{ .dev_type = HV_IDE,
43 	  HV_IDE_GUID,
44 	  .perf_device = true,
45 	},
46 
47 	/* SCSI */
48 	{ .dev_type = HV_SCSI,
49 	  HV_SCSI_GUID,
50 	  .perf_device = true,
51 	},
52 
53 	/* Fibre Channel */
54 	{ .dev_type = HV_FC,
55 	  HV_SYNTHFC_GUID,
56 	  .perf_device = true,
57 	},
58 
59 	/* Synthetic NIC */
60 	{ .dev_type = HV_NIC,
61 	  HV_NIC_GUID,
62 	  .perf_device = true,
63 	},
64 
65 	/* Network Direct */
66 	{ .dev_type = HV_ND,
67 	  HV_ND_GUID,
68 	  .perf_device = true,
69 	},
70 
71 	/* PCIE */
72 	{ .dev_type = HV_PCIE,
73 	  HV_PCIE_GUID,
74 	  .perf_device = false,
75 	},
76 
77 	/* Synthetic Frame Buffer */
78 	{ .dev_type = HV_FB,
79 	  HV_SYNTHVID_GUID,
80 	  .perf_device = false,
81 	},
82 
83 	/* Synthetic Keyboard */
84 	{ .dev_type = HV_KBD,
85 	  HV_KBD_GUID,
86 	  .perf_device = false,
87 	},
88 
89 	/* Synthetic MOUSE */
90 	{ .dev_type = HV_MOUSE,
91 	  HV_MOUSE_GUID,
92 	  .perf_device = false,
93 	},
94 
95 	/* KVP */
96 	{ .dev_type = HV_KVP,
97 	  HV_KVP_GUID,
98 	  .perf_device = false,
99 	},
100 
101 	/* Time Synch */
102 	{ .dev_type = HV_TS,
103 	  HV_TS_GUID,
104 	  .perf_device = false,
105 	},
106 
107 	/* Heartbeat */
108 	{ .dev_type = HV_HB,
109 	  HV_HEART_BEAT_GUID,
110 	  .perf_device = false,
111 	},
112 
113 	/* Shutdown */
114 	{ .dev_type = HV_SHUTDOWN,
115 	  HV_SHUTDOWN_GUID,
116 	  .perf_device = false,
117 	},
118 
119 	/* File copy */
120 	{ .dev_type = HV_FCOPY,
121 	  HV_FCOPY_GUID,
122 	  .perf_device = false,
123 	},
124 
125 	/* Backup */
126 	{ .dev_type = HV_BACKUP,
127 	  HV_VSS_GUID,
128 	  .perf_device = false,
129 	},
130 
131 	/* Dynamic Memory */
132 	{ .dev_type = HV_DM,
133 	  HV_DM_GUID,
134 	  .perf_device = false,
135 	},
136 
137 	/* Unknown GUID */
138 	{ .dev_type = HV_UNKNOWN,
139 	  .perf_device = false,
140 	},
141 };
142 
143 static const struct {
144 	uuid_le guid;
145 } vmbus_unsupported_devs[] = {
146 	{ HV_AVMA1_GUID },
147 	{ HV_AVMA2_GUID },
148 	{ HV_RDV_GUID	},
149 };
150 
151 /*
152  * The rescinded channel may be blocked waiting for a response from the host;
153  * take care of that.
154  */
vmbus_rescind_cleanup(struct vmbus_channel * channel)155 static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
156 {
157 	struct vmbus_channel_msginfo *msginfo;
158 	unsigned long flags;
159 
160 
161 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
162 	channel->rescind = true;
163 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
164 				msglistentry) {
165 
166 		if (msginfo->waiting_channel == channel) {
167 			complete(&msginfo->waitevent);
168 			break;
169 		}
170 	}
171 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
172 }
173 
is_unsupported_vmbus_devs(const uuid_le * guid)174 static bool is_unsupported_vmbus_devs(const uuid_le *guid)
175 {
176 	int i;
177 
178 	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
179 		if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
180 			return true;
181 	return false;
182 }
183 
hv_get_dev_type(const struct vmbus_channel * channel)184 static u16 hv_get_dev_type(const struct vmbus_channel *channel)
185 {
186 	const uuid_le *guid = &channel->offermsg.offer.if_type;
187 	u16 i;
188 
189 	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
190 		return HV_UNKNOWN;
191 
192 	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
193 		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
194 			return i;
195 	}
196 	pr_info("Unknown GUID: %pUl\n", guid);
197 	return i;
198 }
199 
200 /**
201  * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
202  * @icmsghdrp: Pointer to msg header structure
203  * @icmsg_negotiate: Pointer to negotiate message structure
204  * @buf: Raw buffer channel data
205  *
206  * @icmsghdrp is of type &struct icmsg_hdr.
207  * Set up and fill in default negotiate response message.
208  *
209  * The fw_version and fw_vercnt specifies the framework version that
210  * we can support.
211  *
212  * The srv_version and srv_vercnt specifies the service
213  * versions we can support.
214  *
215  * Versions are given in decreasing order.
216  *
217  * nego_fw_version and nego_srv_version store the selected protocol versions.
218  *
219  * Mainly used by Hyper-V drivers.
220  */
vmbus_prep_negotiate_resp(struct icmsg_hdr * icmsghdrp,u8 * buf,const int * fw_version,int fw_vercnt,const int * srv_version,int srv_vercnt,int * nego_fw_version,int * nego_srv_version)221 bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
222 				u8 *buf, const int *fw_version, int fw_vercnt,
223 				const int *srv_version, int srv_vercnt,
224 				int *nego_fw_version, int *nego_srv_version)
225 {
226 	int icframe_major, icframe_minor;
227 	int icmsg_major, icmsg_minor;
228 	int fw_major, fw_minor;
229 	int srv_major, srv_minor;
230 	int i, j;
231 	bool found_match = false;
232 	struct icmsg_negotiate *negop;
233 
234 	icmsghdrp->icmsgsize = 0x10;
235 	negop = (struct icmsg_negotiate *)&buf[
236 		sizeof(struct vmbuspipe_hdr) +
237 		sizeof(struct icmsg_hdr)];
238 
239 	icframe_major = negop->icframe_vercnt;
240 	icframe_minor = 0;
241 
242 	icmsg_major = negop->icmsg_vercnt;
243 	icmsg_minor = 0;
244 
245 	/*
246 	 * Select the framework version number we will
247 	 * support.
248 	 */
249 
250 	for (i = 0; i < fw_vercnt; i++) {
251 		fw_major = (fw_version[i] >> 16);
252 		fw_minor = (fw_version[i] & 0xFFFF);
253 
254 		for (j = 0; j < negop->icframe_vercnt; j++) {
255 			if ((negop->icversion_data[j].major == fw_major) &&
256 			    (negop->icversion_data[j].minor == fw_minor)) {
257 				icframe_major = negop->icversion_data[j].major;
258 				icframe_minor = negop->icversion_data[j].minor;
259 				found_match = true;
260 				break;
261 			}
262 		}
263 
264 		if (found_match)
265 			break;
266 	}
267 
268 	if (!found_match)
269 		goto fw_error;
270 
271 	found_match = false;
272 
273 	for (i = 0; i < srv_vercnt; i++) {
274 		srv_major = (srv_version[i] >> 16);
275 		srv_minor = (srv_version[i] & 0xFFFF);
276 
277 		for (j = negop->icframe_vercnt;
278 			(j < negop->icframe_vercnt + negop->icmsg_vercnt);
279 			j++) {
280 
281 			if ((negop->icversion_data[j].major == srv_major) &&
282 				(negop->icversion_data[j].minor == srv_minor)) {
283 
284 				icmsg_major = negop->icversion_data[j].major;
285 				icmsg_minor = negop->icversion_data[j].minor;
286 				found_match = true;
287 				break;
288 			}
289 		}
290 
291 		if (found_match)
292 			break;
293 	}
294 
295 	/*
296 	 * Respond with the framework and service
297 	 * version numbers we can support.
298 	 */
299 
300 fw_error:
301 	if (!found_match) {
302 		negop->icframe_vercnt = 0;
303 		negop->icmsg_vercnt = 0;
304 	} else {
305 		negop->icframe_vercnt = 1;
306 		negop->icmsg_vercnt = 1;
307 	}
308 
309 	if (nego_fw_version)
310 		*nego_fw_version = (icframe_major << 16) | icframe_minor;
311 
312 	if (nego_srv_version)
313 		*nego_srv_version = (icmsg_major << 16) | icmsg_minor;
314 
315 	negop->icversion_data[0].major = icframe_major;
316 	negop->icversion_data[0].minor = icframe_minor;
317 	negop->icversion_data[1].major = icmsg_major;
318 	negop->icversion_data[1].minor = icmsg_minor;
319 	return found_match;
320 }
321 
322 EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
323 
324 /*
325  * alloc_channel - Allocate and initialize a vmbus channel object
326  */
alloc_channel(void)327 static struct vmbus_channel *alloc_channel(void)
328 {
329 	struct vmbus_channel *channel;
330 
331 	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
332 	if (!channel)
333 		return NULL;
334 
335 	spin_lock_init(&channel->lock);
336 	init_completion(&channel->rescind_event);
337 
338 	INIT_LIST_HEAD(&channel->sc_list);
339 	INIT_LIST_HEAD(&channel->percpu_list);
340 
341 	tasklet_init(&channel->callback_event,
342 		     vmbus_on_event, (unsigned long)channel);
343 
344 	return channel;
345 }
346 
347 /*
348  * free_channel - Release the resources used by the vmbus channel object
349  */
free_channel(struct vmbus_channel * channel)350 static void free_channel(struct vmbus_channel *channel)
351 {
352 	tasklet_kill(&channel->callback_event);
353 
354 	kfree_rcu(channel, rcu);
355 }
356 
percpu_channel_enq(void * arg)357 static void percpu_channel_enq(void *arg)
358 {
359 	struct vmbus_channel *channel = arg;
360 	struct hv_per_cpu_context *hv_cpu
361 		= this_cpu_ptr(hv_context.cpu_context);
362 
363 	list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
364 }
365 
percpu_channel_deq(void * arg)366 static void percpu_channel_deq(void *arg)
367 {
368 	struct vmbus_channel *channel = arg;
369 
370 	list_del_rcu(&channel->percpu_list);
371 }
372 
373 
vmbus_release_relid(u32 relid)374 static void vmbus_release_relid(u32 relid)
375 {
376 	struct vmbus_channel_relid_released msg;
377 
378 	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
379 	msg.child_relid = relid;
380 	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
381 	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
382 		       true);
383 }
384 
hv_process_channel_removal(u32 relid)385 void hv_process_channel_removal(u32 relid)
386 {
387 	unsigned long flags;
388 	struct vmbus_channel *primary_channel, *channel;
389 
390 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
391 
392 	/*
393 	 * Make sure channel is valid as we may have raced.
394 	 */
395 	channel = relid2channel(relid);
396 	if (!channel)
397 		return;
398 
399 	BUG_ON(!channel->rescind);
400 	if (channel->target_cpu != get_cpu()) {
401 		put_cpu();
402 		smp_call_function_single(channel->target_cpu,
403 					 percpu_channel_deq, channel, true);
404 	} else {
405 		percpu_channel_deq(channel);
406 		put_cpu();
407 	}
408 
409 	if (channel->primary_channel == NULL) {
410 		list_del(&channel->listentry);
411 
412 		primary_channel = channel;
413 	} else {
414 		primary_channel = channel->primary_channel;
415 		spin_lock_irqsave(&primary_channel->lock, flags);
416 		list_del(&channel->sc_list);
417 		primary_channel->num_sc--;
418 		spin_unlock_irqrestore(&primary_channel->lock, flags);
419 	}
420 
421 	/*
422 	 * We need to free the bit for init_vp_index() to work in the case
423 	 * of sub-channel, when we reload drivers like hv_netvsc.
424 	 */
425 	if (channel->affinity_policy == HV_LOCALIZED)
426 		cpumask_clear_cpu(channel->target_cpu,
427 				  &primary_channel->alloced_cpus_in_node);
428 
429 	vmbus_release_relid(relid);
430 
431 	free_channel(channel);
432 }
433 
vmbus_free_channels(void)434 void vmbus_free_channels(void)
435 {
436 	struct vmbus_channel *channel, *tmp;
437 
438 	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
439 		listentry) {
440 		/* hv_process_channel_removal() needs this */
441 		channel->rescind = true;
442 
443 		vmbus_device_unregister(channel->device_obj);
444 	}
445 }
446 
447 /* Note: the function can run concurrently for primary/sub channels. */
vmbus_add_channel_work(struct work_struct * work)448 static void vmbus_add_channel_work(struct work_struct *work)
449 {
450 	struct vmbus_channel *newchannel =
451 		container_of(work, struct vmbus_channel, add_channel_work);
452 	struct vmbus_channel *primary_channel = newchannel->primary_channel;
453 	unsigned long flags;
454 	u16 dev_type;
455 	int ret;
456 
457 	dev_type = hv_get_dev_type(newchannel);
458 
459 	init_vp_index(newchannel, dev_type);
460 
461 	if (newchannel->target_cpu != get_cpu()) {
462 		put_cpu();
463 		smp_call_function_single(newchannel->target_cpu,
464 					 percpu_channel_enq,
465 					 newchannel, true);
466 	} else {
467 		percpu_channel_enq(newchannel);
468 		put_cpu();
469 	}
470 
471 	/*
472 	 * This state is used to indicate a successful open
473 	 * so that when we do close the channel normally, we
474 	 * can cleanup properly.
475 	 */
476 	newchannel->state = CHANNEL_OPEN_STATE;
477 
478 	if (primary_channel != NULL) {
479 		/* newchannel is a sub-channel. */
480 
481 		if (primary_channel->sc_creation_callback != NULL)
482 			primary_channel->sc_creation_callback(newchannel);
483 
484 		newchannel->probe_done = true;
485 		return;
486 	}
487 
488 	/*
489 	 * Start the process of binding the primary channel to the driver
490 	 */
491 	newchannel->device_obj = vmbus_device_create(
492 		&newchannel->offermsg.offer.if_type,
493 		&newchannel->offermsg.offer.if_instance,
494 		newchannel);
495 	if (!newchannel->device_obj)
496 		goto err_deq_chan;
497 
498 	newchannel->device_obj->device_id = dev_type;
499 	/*
500 	 * Add the new device to the bus. This will kick off device-driver
501 	 * binding which eventually invokes the device driver's AddDevice()
502 	 * method.
503 	 */
504 	ret = vmbus_device_register(newchannel->device_obj);
505 
506 	if (ret != 0) {
507 		pr_err("unable to add child device object (relid %d)\n",
508 			newchannel->offermsg.child_relid);
509 		kfree(newchannel->device_obj);
510 		goto err_deq_chan;
511 	}
512 
513 	newchannel->probe_done = true;
514 	return;
515 
516 err_deq_chan:
517 	mutex_lock(&vmbus_connection.channel_mutex);
518 
519 	/*
520 	 * We need to set the flag, otherwise
521 	 * vmbus_onoffer_rescind() can be blocked.
522 	 */
523 	newchannel->probe_done = true;
524 
525 	if (primary_channel == NULL) {
526 		list_del(&newchannel->listentry);
527 	} else {
528 		spin_lock_irqsave(&primary_channel->lock, flags);
529 		list_del(&newchannel->sc_list);
530 		spin_unlock_irqrestore(&primary_channel->lock, flags);
531 	}
532 
533 	mutex_unlock(&vmbus_connection.channel_mutex);
534 
535 	if (newchannel->target_cpu != get_cpu()) {
536 		put_cpu();
537 		smp_call_function_single(newchannel->target_cpu,
538 					 percpu_channel_deq,
539 					 newchannel, true);
540 	} else {
541 		percpu_channel_deq(newchannel);
542 		put_cpu();
543 	}
544 
545 	vmbus_release_relid(newchannel->offermsg.child_relid);
546 
547 	free_channel(newchannel);
548 }
549 
550 /*
551  * vmbus_process_offer - Process the offer by creating a channel/device
552  * associated with this offer
553  */
vmbus_process_offer(struct vmbus_channel * newchannel)554 static void vmbus_process_offer(struct vmbus_channel *newchannel)
555 {
556 	struct vmbus_channel *channel;
557 	struct workqueue_struct *wq;
558 	unsigned long flags;
559 	bool fnew = true;
560 
561 	mutex_lock(&vmbus_connection.channel_mutex);
562 
563 	/*
564 	 * Now that we have acquired the channel_mutex,
565 	 * we can release the potentially racing rescind thread.
566 	 */
567 	atomic_dec(&vmbus_connection.offer_in_progress);
568 
569 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
570 		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
571 				 newchannel->offermsg.offer.if_type) &&
572 		    !uuid_le_cmp(channel->offermsg.offer.if_instance,
573 				 newchannel->offermsg.offer.if_instance)) {
574 			fnew = false;
575 			break;
576 		}
577 	}
578 
579 	if (fnew)
580 		list_add_tail(&newchannel->listentry,
581 			      &vmbus_connection.chn_list);
582 	else {
583 		/*
584 		 * Check to see if this is a valid sub-channel.
585 		 */
586 		if (newchannel->offermsg.offer.sub_channel_index == 0) {
587 			mutex_unlock(&vmbus_connection.channel_mutex);
588 			/*
589 			 * Don't call free_channel(), because newchannel->kobj
590 			 * is not initialized yet.
591 			 */
592 			kfree(newchannel);
593 			WARN_ON_ONCE(1);
594 			return;
595 		}
596 		/*
597 		 * Process the sub-channel.
598 		 */
599 		newchannel->primary_channel = channel;
600 		spin_lock_irqsave(&channel->lock, flags);
601 		list_add_tail(&newchannel->sc_list, &channel->sc_list);
602 		spin_unlock_irqrestore(&channel->lock, flags);
603 	}
604 
605 	mutex_unlock(&vmbus_connection.channel_mutex);
606 
607 	/*
608 	 * vmbus_process_offer() mustn't call channel->sc_creation_callback()
609 	 * directly for sub-channels, because sc_creation_callback() ->
610 	 * vmbus_open() may never get the host's response to the
611 	 * OPEN_CHANNEL message (the host may rescind a channel at any time,
612 	 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
613 	 * may not wake up the vmbus_open() as it's blocked due to a non-zero
614 	 * vmbus_connection.offer_in_progress, and finally we have a deadlock.
615 	 *
616 	 * The above is also true for primary channels, if the related device
617 	 * drivers use sync probing mode by default.
618 	 *
619 	 * And, usually the handling of primary channels and sub-channels can
620 	 * depend on each other, so we should offload them to different
621 	 * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
622 	 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
623 	 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
624 	 * and waits for all the sub-channels to appear, but the latter
625 	 * can't get the rtnl_lock and this blocks the handling of
626 	 * sub-channels.
627 	 */
628 	INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
629 	wq = fnew ? vmbus_connection.handle_primary_chan_wq :
630 		    vmbus_connection.handle_sub_chan_wq;
631 	queue_work(wq, &newchannel->add_channel_work);
632 }
633 
634 /*
635  * We use this state to statically distribute the channel interrupt load.
636  */
637 static int next_numa_node_id;
638 /*
639  * init_vp_index() accesses global variables like next_numa_node_id, and
640  * it can run concurrently for primary channels and sub-channels: see
641  * vmbus_process_offer(), so we need the lock to protect the global
642  * variables.
643  */
644 static DEFINE_SPINLOCK(bind_channel_to_cpu_lock);
645 
646 /*
647  * Starting with Win8, we can statically distribute the incoming
648  * channel interrupt load by binding a channel to VCPU.
649  * We do this in a hierarchical fashion:
650  * First distribute the primary channels across available NUMA nodes
651  * and then distribute the subchannels amongst the CPUs in the NUMA
652  * node assigned to the primary channel.
653  *
654  * For pre-win8 hosts or non-performance critical channels we assign the
655  * first CPU in the first NUMA node.
656  */
init_vp_index(struct vmbus_channel * channel,u16 dev_type)657 static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
658 {
659 	u32 cur_cpu;
660 	bool perf_chn = vmbus_devs[dev_type].perf_device;
661 	struct vmbus_channel *primary = channel->primary_channel;
662 	int next_node;
663 	cpumask_var_t available_mask;
664 	struct cpumask *alloced_mask;
665 
666 	if ((vmbus_proto_version == VERSION_WS2008) ||
667 	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) ||
668 	    !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
669 		/*
670 		 * Prior to win8, all channel interrupts are
671 		 * delivered on cpu 0.
672 		 * Also if the channel is not a performance critical
673 		 * channel, bind it to cpu 0.
674 		 * In case alloc_cpumask_var() fails, bind it to cpu 0.
675 		 */
676 		channel->numa_node = 0;
677 		channel->target_cpu = 0;
678 		channel->target_vp = hv_cpu_number_to_vp_number(0);
679 		return;
680 	}
681 
682 	spin_lock(&bind_channel_to_cpu_lock);
683 
684 	/*
685 	 * Based on the channel affinity policy, we will assign the NUMA
686 	 * nodes.
687 	 */
688 
689 	if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
690 		while (true) {
691 			next_node = next_numa_node_id++;
692 			if (next_node == nr_node_ids) {
693 				next_node = next_numa_node_id = 0;
694 				continue;
695 			}
696 			if (cpumask_empty(cpumask_of_node(next_node)))
697 				continue;
698 			break;
699 		}
700 		channel->numa_node = next_node;
701 		primary = channel;
702 	}
703 	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
704 
705 	if (cpumask_weight(alloced_mask) ==
706 	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
707 		/*
708 		 * We have cycled through all the CPUs in the node;
709 		 * reset the alloced map.
710 		 */
711 		cpumask_clear(alloced_mask);
712 	}
713 
714 	cpumask_xor(available_mask, alloced_mask,
715 		    cpumask_of_node(primary->numa_node));
716 
717 	cur_cpu = -1;
718 
719 	if (primary->affinity_policy == HV_LOCALIZED) {
720 		/*
721 		 * Normally Hyper-V host doesn't create more subchannels
722 		 * than there are VCPUs on the node but it is possible when not
723 		 * all present VCPUs on the node are initialized by guest.
724 		 * Clear the alloced_cpus_in_node to start over.
725 		 */
726 		if (cpumask_equal(&primary->alloced_cpus_in_node,
727 				  cpumask_of_node(primary->numa_node)))
728 			cpumask_clear(&primary->alloced_cpus_in_node);
729 	}
730 
731 	while (true) {
732 		cur_cpu = cpumask_next(cur_cpu, available_mask);
733 		if (cur_cpu >= nr_cpu_ids) {
734 			cur_cpu = -1;
735 			cpumask_copy(available_mask,
736 				     cpumask_of_node(primary->numa_node));
737 			continue;
738 		}
739 
740 		if (primary->affinity_policy == HV_LOCALIZED) {
741 			/*
742 			 * NOTE: in the case of sub-channel, we clear the
743 			 * sub-channel related bit(s) in
744 			 * primary->alloced_cpus_in_node in
745 			 * hv_process_channel_removal(), so when we
746 			 * reload drivers like hv_netvsc in SMP guest, here
747 			 * we're able to re-allocate
748 			 * bit from primary->alloced_cpus_in_node.
749 			 */
750 			if (!cpumask_test_cpu(cur_cpu,
751 					      &primary->alloced_cpus_in_node)) {
752 				cpumask_set_cpu(cur_cpu,
753 						&primary->alloced_cpus_in_node);
754 				cpumask_set_cpu(cur_cpu, alloced_mask);
755 				break;
756 			}
757 		} else {
758 			cpumask_set_cpu(cur_cpu, alloced_mask);
759 			break;
760 		}
761 	}
762 
763 	channel->target_cpu = cur_cpu;
764 	channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
765 
766 	spin_unlock(&bind_channel_to_cpu_lock);
767 
768 	free_cpumask_var(available_mask);
769 }
770 
vmbus_wait_for_unload(void)771 static void vmbus_wait_for_unload(void)
772 {
773 	int cpu;
774 	void *page_addr;
775 	struct hv_message *msg;
776 	struct vmbus_channel_message_header *hdr;
777 	u32 message_type;
778 
779 	/*
780 	 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
781 	 * used for initial contact or to CPU0 depending on host version. When
782 	 * we're crashing on a different CPU let's hope that IRQ handler on
783 	 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
784 	 * functional and vmbus_unload_response() will complete
785 	 * vmbus_connection.unload_event. If not, the last thing we can do is
786 	 * read message pages for all CPUs directly.
787 	 */
788 	while (1) {
789 		if (completion_done(&vmbus_connection.unload_event))
790 			break;
791 
792 		for_each_online_cpu(cpu) {
793 			struct hv_per_cpu_context *hv_cpu
794 				= per_cpu_ptr(hv_context.cpu_context, cpu);
795 
796 			page_addr = hv_cpu->synic_message_page;
797 			msg = (struct hv_message *)page_addr
798 				+ VMBUS_MESSAGE_SINT;
799 
800 			message_type = READ_ONCE(msg->header.message_type);
801 			if (message_type == HVMSG_NONE)
802 				continue;
803 
804 			hdr = (struct vmbus_channel_message_header *)
805 				msg->u.payload;
806 
807 			if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
808 				complete(&vmbus_connection.unload_event);
809 
810 			vmbus_signal_eom(msg, message_type);
811 		}
812 
813 		mdelay(10);
814 	}
815 
816 	/*
817 	 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
818 	 * maybe-pending messages on all CPUs to be able to receive new
819 	 * messages after we reconnect.
820 	 */
821 	for_each_online_cpu(cpu) {
822 		struct hv_per_cpu_context *hv_cpu
823 			= per_cpu_ptr(hv_context.cpu_context, cpu);
824 
825 		page_addr = hv_cpu->synic_message_page;
826 		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
827 		msg->header.message_type = HVMSG_NONE;
828 	}
829 }
830 
831 /*
832  * vmbus_unload_response - Handler for the unload response.
833  */
vmbus_unload_response(struct vmbus_channel_message_header * hdr)834 static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
835 {
836 	/*
837 	 * This is a global event; just wakeup the waiting thread.
838 	 * Once we successfully unload, we can cleanup the monitor state.
839 	 */
840 	complete(&vmbus_connection.unload_event);
841 }
842 
vmbus_initiate_unload(bool crash)843 void vmbus_initiate_unload(bool crash)
844 {
845 	struct vmbus_channel_message_header hdr;
846 
847 	/* Pre-Win2012R2 hosts don't support reconnect */
848 	if (vmbus_proto_version < VERSION_WIN8_1)
849 		return;
850 
851 	init_completion(&vmbus_connection.unload_event);
852 	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
853 	hdr.msgtype = CHANNELMSG_UNLOAD;
854 	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
855 		       !crash);
856 
857 	/*
858 	 * vmbus_initiate_unload() is also called on crash and the crash can be
859 	 * happening in an interrupt context, where scheduling is impossible.
860 	 */
861 	if (!crash)
862 		wait_for_completion(&vmbus_connection.unload_event);
863 	else
864 		vmbus_wait_for_unload();
865 }
866 
867 /*
868  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
869  *
870  */
vmbus_onoffer(struct vmbus_channel_message_header * hdr)871 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
872 {
873 	struct vmbus_channel_offer_channel *offer;
874 	struct vmbus_channel *newchannel;
875 
876 	offer = (struct vmbus_channel_offer_channel *)hdr;
877 
878 	/* Allocate the channel object and save this offer. */
879 	newchannel = alloc_channel();
880 	if (!newchannel) {
881 		vmbus_release_relid(offer->child_relid);
882 		atomic_dec(&vmbus_connection.offer_in_progress);
883 		pr_err("Unable to allocate channel object\n");
884 		return;
885 	}
886 
887 	/*
888 	 * Setup state for signalling the host.
889 	 */
890 	newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
891 
892 	if (vmbus_proto_version != VERSION_WS2008) {
893 		newchannel->is_dedicated_interrupt =
894 				(offer->is_dedicated_interrupt != 0);
895 		newchannel->sig_event = offer->connection_id;
896 	}
897 
898 	memcpy(&newchannel->offermsg, offer,
899 	       sizeof(struct vmbus_channel_offer_channel));
900 	newchannel->monitor_grp = (u8)offer->monitorid / 32;
901 	newchannel->monitor_bit = (u8)offer->monitorid % 32;
902 
903 	vmbus_process_offer(newchannel);
904 }
905 
906 /*
907  * vmbus_onoffer_rescind - Rescind offer handler.
908  *
909  * We queue a work item to process this offer synchronously
910  */
vmbus_onoffer_rescind(struct vmbus_channel_message_header * hdr)911 static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
912 {
913 	struct vmbus_channel_rescind_offer *rescind;
914 	struct vmbus_channel *channel;
915 	struct device *dev;
916 
917 	rescind = (struct vmbus_channel_rescind_offer *)hdr;
918 
919 	/*
920 	 * The offer msg and the corresponding rescind msg
921 	 * from the host are guranteed to be ordered -
922 	 * offer comes in first and then the rescind.
923 	 * Since we process these events in work elements,
924 	 * and with preemption, we may end up processing
925 	 * the events out of order. Given that we handle these
926 	 * work elements on the same CPU, this is possible only
927 	 * in the case of preemption. In any case wait here
928 	 * until the offer processing has moved beyond the
929 	 * point where the channel is discoverable.
930 	 */
931 
932 	while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
933 		/*
934 		 * We wait here until any channel offer is currently
935 		 * being processed.
936 		 */
937 		msleep(1);
938 	}
939 
940 	mutex_lock(&vmbus_connection.channel_mutex);
941 	channel = relid2channel(rescind->child_relid);
942 	mutex_unlock(&vmbus_connection.channel_mutex);
943 
944 	if (channel == NULL) {
945 		/*
946 		 * We failed in processing the offer message;
947 		 * we would have cleaned up the relid in that
948 		 * failure path.
949 		 */
950 		return;
951 	}
952 
953 	/*
954 	 * Before setting channel->rescind in vmbus_rescind_cleanup(), we
955 	 * should make sure the channel callback is not running any more.
956 	 */
957 	vmbus_reset_channel_cb(channel);
958 
959 	/*
960 	 * Now wait for offer handling to complete.
961 	 */
962 	vmbus_rescind_cleanup(channel);
963 	while (READ_ONCE(channel->probe_done) == false) {
964 		/*
965 		 * We wait here until any channel offer is currently
966 		 * being processed.
967 		 */
968 		msleep(1);
969 	}
970 
971 	/*
972 	 * At this point, the rescind handling can proceed safely.
973 	 */
974 
975 	if (channel->device_obj) {
976 		if (channel->chn_rescind_callback) {
977 			channel->chn_rescind_callback(channel);
978 			return;
979 		}
980 		/*
981 		 * We will have to unregister this device from the
982 		 * driver core.
983 		 */
984 		dev = get_device(&channel->device_obj->device);
985 		if (dev) {
986 			vmbus_device_unregister(channel->device_obj);
987 			put_device(dev);
988 		}
989 	}
990 	if (channel->primary_channel != NULL) {
991 		/*
992 		 * Sub-channel is being rescinded. Following is the channel
993 		 * close sequence when initiated from the driveri (refer to
994 		 * vmbus_close() for details):
995 		 * 1. Close all sub-channels first
996 		 * 2. Then close the primary channel.
997 		 */
998 		mutex_lock(&vmbus_connection.channel_mutex);
999 		if (channel->state == CHANNEL_OPEN_STATE) {
1000 			/*
1001 			 * The channel is currently not open;
1002 			 * it is safe for us to cleanup the channel.
1003 			 */
1004 			hv_process_channel_removal(rescind->child_relid);
1005 		} else {
1006 			complete(&channel->rescind_event);
1007 		}
1008 		mutex_unlock(&vmbus_connection.channel_mutex);
1009 	}
1010 }
1011 
vmbus_hvsock_device_unregister(struct vmbus_channel * channel)1012 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
1013 {
1014 	BUG_ON(!is_hvsock_channel(channel));
1015 
1016 	/* We always get a rescind msg when a connection is closed. */
1017 	while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
1018 		msleep(1);
1019 
1020 	vmbus_device_unregister(channel->device_obj);
1021 }
1022 EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
1023 
1024 
1025 /*
1026  * vmbus_onoffers_delivered -
1027  * This is invoked when all offers have been delivered.
1028  *
1029  * Nothing to do here.
1030  */
vmbus_onoffers_delivered(struct vmbus_channel_message_header * hdr)1031 static void vmbus_onoffers_delivered(
1032 			struct vmbus_channel_message_header *hdr)
1033 {
1034 }
1035 
1036 /*
1037  * vmbus_onopen_result - Open result handler.
1038  *
1039  * This is invoked when we received a response to our channel open request.
1040  * Find the matching request, copy the response and signal the requesting
1041  * thread.
1042  */
vmbus_onopen_result(struct vmbus_channel_message_header * hdr)1043 static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
1044 {
1045 	struct vmbus_channel_open_result *result;
1046 	struct vmbus_channel_msginfo *msginfo;
1047 	struct vmbus_channel_message_header *requestheader;
1048 	struct vmbus_channel_open_channel *openmsg;
1049 	unsigned long flags;
1050 
1051 	result = (struct vmbus_channel_open_result *)hdr;
1052 
1053 	/*
1054 	 * Find the open msg, copy the result and signal/unblock the wait event
1055 	 */
1056 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1057 
1058 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1059 				msglistentry) {
1060 		requestheader =
1061 			(struct vmbus_channel_message_header *)msginfo->msg;
1062 
1063 		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
1064 			openmsg =
1065 			(struct vmbus_channel_open_channel *)msginfo->msg;
1066 			if (openmsg->child_relid == result->child_relid &&
1067 			    openmsg->openid == result->openid) {
1068 				memcpy(&msginfo->response.open_result,
1069 				       result,
1070 				       sizeof(
1071 					struct vmbus_channel_open_result));
1072 				complete(&msginfo->waitevent);
1073 				break;
1074 			}
1075 		}
1076 	}
1077 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1078 }
1079 
1080 /*
1081  * vmbus_ongpadl_created - GPADL created handler.
1082  *
1083  * This is invoked when we received a response to our gpadl create request.
1084  * Find the matching request, copy the response and signal the requesting
1085  * thread.
1086  */
vmbus_ongpadl_created(struct vmbus_channel_message_header * hdr)1087 static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
1088 {
1089 	struct vmbus_channel_gpadl_created *gpadlcreated;
1090 	struct vmbus_channel_msginfo *msginfo;
1091 	struct vmbus_channel_message_header *requestheader;
1092 	struct vmbus_channel_gpadl_header *gpadlheader;
1093 	unsigned long flags;
1094 
1095 	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
1096 
1097 	/*
1098 	 * Find the establish msg, copy the result and signal/unblock the wait
1099 	 * event
1100 	 */
1101 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1102 
1103 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1104 				msglistentry) {
1105 		requestheader =
1106 			(struct vmbus_channel_message_header *)msginfo->msg;
1107 
1108 		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
1109 			gpadlheader =
1110 			(struct vmbus_channel_gpadl_header *)requestheader;
1111 
1112 			if ((gpadlcreated->child_relid ==
1113 			     gpadlheader->child_relid) &&
1114 			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
1115 				memcpy(&msginfo->response.gpadl_created,
1116 				       gpadlcreated,
1117 				       sizeof(
1118 					struct vmbus_channel_gpadl_created));
1119 				complete(&msginfo->waitevent);
1120 				break;
1121 			}
1122 		}
1123 	}
1124 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1125 }
1126 
1127 /*
1128  * vmbus_ongpadl_torndown - GPADL torndown handler.
1129  *
1130  * This is invoked when we received a response to our gpadl teardown request.
1131  * Find the matching request, copy the response and signal the requesting
1132  * thread.
1133  */
vmbus_ongpadl_torndown(struct vmbus_channel_message_header * hdr)1134 static void vmbus_ongpadl_torndown(
1135 			struct vmbus_channel_message_header *hdr)
1136 {
1137 	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1138 	struct vmbus_channel_msginfo *msginfo;
1139 	struct vmbus_channel_message_header *requestheader;
1140 	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1141 	unsigned long flags;
1142 
1143 	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1144 
1145 	/*
1146 	 * Find the open msg, copy the result and signal/unblock the wait event
1147 	 */
1148 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1149 
1150 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1151 				msglistentry) {
1152 		requestheader =
1153 			(struct vmbus_channel_message_header *)msginfo->msg;
1154 
1155 		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1156 			gpadl_teardown =
1157 			(struct vmbus_channel_gpadl_teardown *)requestheader;
1158 
1159 			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1160 				memcpy(&msginfo->response.gpadl_torndown,
1161 				       gpadl_torndown,
1162 				       sizeof(
1163 					struct vmbus_channel_gpadl_torndown));
1164 				complete(&msginfo->waitevent);
1165 				break;
1166 			}
1167 		}
1168 	}
1169 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1170 }
1171 
1172 /*
1173  * vmbus_onversion_response - Version response handler
1174  *
1175  * This is invoked when we received a response to our initiate contact request.
1176  * Find the matching request, copy the response and signal the requesting
1177  * thread.
1178  */
vmbus_onversion_response(struct vmbus_channel_message_header * hdr)1179 static void vmbus_onversion_response(
1180 		struct vmbus_channel_message_header *hdr)
1181 {
1182 	struct vmbus_channel_msginfo *msginfo;
1183 	struct vmbus_channel_message_header *requestheader;
1184 	struct vmbus_channel_version_response *version_response;
1185 	unsigned long flags;
1186 
1187 	version_response = (struct vmbus_channel_version_response *)hdr;
1188 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1189 
1190 	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1191 				msglistentry) {
1192 		requestheader =
1193 			(struct vmbus_channel_message_header *)msginfo->msg;
1194 
1195 		if (requestheader->msgtype ==
1196 		    CHANNELMSG_INITIATE_CONTACT) {
1197 			memcpy(&msginfo->response.version_response,
1198 			      version_response,
1199 			      sizeof(struct vmbus_channel_version_response));
1200 			complete(&msginfo->waitevent);
1201 		}
1202 	}
1203 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1204 }
1205 
1206 /* Channel message dispatch table */
1207 const struct vmbus_channel_message_table_entry
1208 channel_message_table[CHANNELMSG_COUNT] = {
1209 	{ CHANNELMSG_INVALID,			0, NULL },
1210 	{ CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer },
1211 	{ CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind },
1212 	{ CHANNELMSG_REQUESTOFFERS,		0, NULL },
1213 	{ CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered },
1214 	{ CHANNELMSG_OPENCHANNEL,		0, NULL },
1215 	{ CHANNELMSG_OPENCHANNEL_RESULT,	1, vmbus_onopen_result },
1216 	{ CHANNELMSG_CLOSECHANNEL,		0, NULL },
1217 	{ CHANNELMSG_GPADL_HEADER,		0, NULL },
1218 	{ CHANNELMSG_GPADL_BODY,		0, NULL },
1219 	{ CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created },
1220 	{ CHANNELMSG_GPADL_TEARDOWN,		0, NULL },
1221 	{ CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown },
1222 	{ CHANNELMSG_RELID_RELEASED,		0, NULL },
1223 	{ CHANNELMSG_INITIATE_CONTACT,		0, NULL },
1224 	{ CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response },
1225 	{ CHANNELMSG_UNLOAD,			0, NULL },
1226 	{ CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response },
1227 	{ CHANNELMSG_18,			0, NULL },
1228 	{ CHANNELMSG_19,			0, NULL },
1229 	{ CHANNELMSG_20,			0, NULL },
1230 	{ CHANNELMSG_TL_CONNECT_REQUEST,	0, NULL },
1231 };
1232 
1233 /*
1234  * vmbus_onmessage - Handler for channel protocol messages.
1235  *
1236  * This is invoked in the vmbus worker thread context.
1237  */
vmbus_onmessage(void * context)1238 void vmbus_onmessage(void *context)
1239 {
1240 	struct hv_message *msg = context;
1241 	struct vmbus_channel_message_header *hdr;
1242 	int size;
1243 
1244 	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
1245 	size = msg->header.payload_size;
1246 
1247 	if (hdr->msgtype >= CHANNELMSG_COUNT) {
1248 		pr_err("Received invalid channel message type %d size %d\n",
1249 			   hdr->msgtype, size);
1250 		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1251 				     (unsigned char *)msg->u.payload, size);
1252 		return;
1253 	}
1254 
1255 	if (channel_message_table[hdr->msgtype].message_handler)
1256 		channel_message_table[hdr->msgtype].message_handler(hdr);
1257 	else
1258 		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1259 }
1260 
1261 /*
1262  * vmbus_request_offers - Send a request to get all our pending offers.
1263  */
vmbus_request_offers(void)1264 int vmbus_request_offers(void)
1265 {
1266 	struct vmbus_channel_message_header *msg;
1267 	struct vmbus_channel_msginfo *msginfo;
1268 	int ret;
1269 
1270 	msginfo = kmalloc(sizeof(*msginfo) +
1271 			  sizeof(struct vmbus_channel_message_header),
1272 			  GFP_KERNEL);
1273 	if (!msginfo)
1274 		return -ENOMEM;
1275 
1276 	msg = (struct vmbus_channel_message_header *)msginfo->msg;
1277 
1278 	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1279 
1280 
1281 	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
1282 			     true);
1283 	if (ret != 0) {
1284 		pr_err("Unable to request offers - %d\n", ret);
1285 
1286 		goto cleanup;
1287 	}
1288 
1289 cleanup:
1290 	kfree(msginfo);
1291 
1292 	return ret;
1293 }
1294 
1295 /*
1296  * Retrieve the (sub) channel on which to send an outgoing request.
1297  * When a primary channel has multiple sub-channels, we try to
1298  * distribute the load equally amongst all available channels.
1299  */
vmbus_get_outgoing_channel(struct vmbus_channel * primary)1300 struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
1301 {
1302 	struct list_head *cur, *tmp;
1303 	int cur_cpu;
1304 	struct vmbus_channel *cur_channel;
1305 	struct vmbus_channel *outgoing_channel = primary;
1306 	int next_channel;
1307 	int i = 1;
1308 
1309 	if (list_empty(&primary->sc_list))
1310 		return outgoing_channel;
1311 
1312 	next_channel = primary->next_oc++;
1313 
1314 	if (next_channel > (primary->num_sc)) {
1315 		primary->next_oc = 0;
1316 		return outgoing_channel;
1317 	}
1318 
1319 	cur_cpu = hv_cpu_number_to_vp_number(smp_processor_id());
1320 	list_for_each_safe(cur, tmp, &primary->sc_list) {
1321 		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1322 		if (cur_channel->state != CHANNEL_OPENED_STATE)
1323 			continue;
1324 
1325 		if (cur_channel->target_vp == cur_cpu)
1326 			return cur_channel;
1327 
1328 		if (i == next_channel)
1329 			return cur_channel;
1330 
1331 		i++;
1332 	}
1333 
1334 	return outgoing_channel;
1335 }
1336 EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
1337 
invoke_sc_cb(struct vmbus_channel * primary_channel)1338 static void invoke_sc_cb(struct vmbus_channel *primary_channel)
1339 {
1340 	struct list_head *cur, *tmp;
1341 	struct vmbus_channel *cur_channel;
1342 
1343 	if (primary_channel->sc_creation_callback == NULL)
1344 		return;
1345 
1346 	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
1347 		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
1348 
1349 		primary_channel->sc_creation_callback(cur_channel);
1350 	}
1351 }
1352 
vmbus_set_sc_create_callback(struct vmbus_channel * primary_channel,void (* sc_cr_cb)(struct vmbus_channel * new_sc))1353 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1354 				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
1355 {
1356 	primary_channel->sc_creation_callback = sc_cr_cb;
1357 }
1358 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1359 
vmbus_are_subchannels_present(struct vmbus_channel * primary)1360 bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
1361 {
1362 	bool ret;
1363 
1364 	ret = !list_empty(&primary->sc_list);
1365 
1366 	if (ret) {
1367 		/*
1368 		 * Invoke the callback on sub-channel creation.
1369 		 * This will present a uniform interface to the
1370 		 * clients.
1371 		 */
1372 		invoke_sc_cb(primary);
1373 	}
1374 
1375 	return ret;
1376 }
1377 EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1378 
vmbus_set_chn_rescind_callback(struct vmbus_channel * channel,void (* chn_rescind_cb)(struct vmbus_channel *))1379 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1380 		void (*chn_rescind_cb)(struct vmbus_channel *))
1381 {
1382 	channel->chn_rescind_callback = chn_rescind_cb;
1383 }
1384 EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
1385