• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3  *
4  * membarrier system call
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  */
16 
17 #include <linux/syscalls.h>
18 #include <linux/membarrier.h>
19 #include <linux/tick.h>
20 #include <linux/cpumask.h>
21 #include <linux/atomic.h>
22 
23 #include "sched.h"	/* for cpu_rq(). */
24 
25 /*
26  * Bitmask made from a "or" of all commands within enum membarrier_cmd,
27  * except MEMBARRIER_CMD_QUERY.
28  */
29 #define MEMBARRIER_CMD_BITMASK	\
30 	(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED	\
31 	| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
32 
ipi_mb(void * info)33 static void ipi_mb(void *info)
34 {
35 	smp_mb();	/* IPIs should be serializing but paranoid. */
36 }
37 
membarrier_private_expedited(void)38 static int membarrier_private_expedited(void)
39 {
40 	int cpu;
41 	bool fallback = false;
42 	cpumask_var_t tmpmask;
43 
44 	if (!(atomic_read(&current->mm->membarrier_state)
45 			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
46 		return -EPERM;
47 
48 	if (num_online_cpus() == 1)
49 		return 0;
50 
51 	/*
52 	 * Matches memory barriers around rq->curr modification in
53 	 * scheduler.
54 	 */
55 	smp_mb();	/* system call entry is not a mb. */
56 
57 	/*
58 	 * Expedited membarrier commands guarantee that they won't
59 	 * block, hence the GFP_NOWAIT allocation flag and fallback
60 	 * implementation.
61 	 */
62 	if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
63 		/* Fallback for OOM. */
64 		fallback = true;
65 	}
66 
67 	cpus_read_lock();
68 	for_each_online_cpu(cpu) {
69 		struct task_struct *p;
70 
71 		/*
72 		 * Skipping the current CPU is OK even through we can be
73 		 * migrated at any point. The current CPU, at the point
74 		 * where we read raw_smp_processor_id(), is ensured to
75 		 * be in program order with respect to the caller
76 		 * thread. Therefore, we can skip this CPU from the
77 		 * iteration.
78 		 */
79 		if (cpu == raw_smp_processor_id())
80 			continue;
81 		rcu_read_lock();
82 		p = task_rcu_dereference(&cpu_rq(cpu)->curr);
83 		if (p && p->mm == current->mm) {
84 			if (!fallback)
85 				__cpumask_set_cpu(cpu, tmpmask);
86 			else
87 				smp_call_function_single(cpu, ipi_mb, NULL, 1);
88 		}
89 		rcu_read_unlock();
90 	}
91 	if (!fallback) {
92 		preempt_disable();
93 		smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
94 		preempt_enable();
95 		free_cpumask_var(tmpmask);
96 	}
97 	cpus_read_unlock();
98 
99 	/*
100 	 * Memory barrier on the caller thread _after_ we finished
101 	 * waiting for the last IPI. Matches memory barriers around
102 	 * rq->curr modification in scheduler.
103 	 */
104 	smp_mb();	/* exit from system call is not a mb */
105 	return 0;
106 }
107 
membarrier_register_private_expedited(void)108 static void membarrier_register_private_expedited(void)
109 {
110 	struct task_struct *p = current;
111 	struct mm_struct *mm = p->mm;
112 
113 	/*
114 	 * We need to consider threads belonging to different thread
115 	 * groups, which use the same mm. (CLONE_VM but not
116 	 * CLONE_THREAD).
117 	 */
118 	if (atomic_read(&mm->membarrier_state)
119 			& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
120 		return;
121 	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
122 			&mm->membarrier_state);
123 }
124 
125 /**
126  * sys_membarrier - issue memory barriers on a set of threads
127  * @cmd:   Takes command values defined in enum membarrier_cmd.
128  * @flags: Currently needs to be 0. For future extensions.
129  *
130  * If this system call is not implemented, -ENOSYS is returned. If the
131  * command specified does not exist, not available on the running
132  * kernel, or if the command argument is invalid, this system call
133  * returns -EINVAL. For a given command, with flags argument set to 0,
134  * this system call is guaranteed to always return the same value until
135  * reboot.
136  *
137  * All memory accesses performed in program order from each targeted thread
138  * is guaranteed to be ordered with respect to sys_membarrier(). If we use
139  * the semantic "barrier()" to represent a compiler barrier forcing memory
140  * accesses to be performed in program order across the barrier, and
141  * smp_mb() to represent explicit memory barriers forcing full memory
142  * ordering across the barrier, we have the following ordering table for
143  * each pair of barrier(), sys_membarrier() and smp_mb():
144  *
145  * The pair ordering is detailed as (O: ordered, X: not ordered):
146  *
147  *                        barrier()   smp_mb() sys_membarrier()
148  *        barrier()          X           X            O
149  *        smp_mb()           X           O            O
150  *        sys_membarrier()   O           O            O
151  */
SYSCALL_DEFINE2(membarrier,int,cmd,int,flags)152 SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
153 {
154 	if (unlikely(flags))
155 		return -EINVAL;
156 	switch (cmd) {
157 	case MEMBARRIER_CMD_QUERY:
158 	{
159 		int cmd_mask = MEMBARRIER_CMD_BITMASK;
160 
161 		if (tick_nohz_full_enabled())
162 			cmd_mask &= ~MEMBARRIER_CMD_SHARED;
163 		return cmd_mask;
164 	}
165 	case MEMBARRIER_CMD_SHARED:
166 		/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
167 		if (tick_nohz_full_enabled())
168 			return -EINVAL;
169 		if (num_online_cpus() > 1)
170 			synchronize_sched();
171 		return 0;
172 	case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
173 		return membarrier_private_expedited();
174 	case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
175 		membarrier_register_private_expedited();
176 		return 0;
177 	default:
178 		return -EINVAL;
179 	}
180 }
181