• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * sysctl.c: General linux system control interface
4  *
5  * Begun 24 March 1995, Stephen Tweedie
6  * Added /proc support, Dec 1995
7  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10  * Dynamic registration fixes, Stephen Tweedie.
11  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13  *  Horn.
14  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17  *  Wendling.
18  * The list_for_each() macro wasn't appropriate for the sysctl loop.
19  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20  */
21 
22 #include <linux/module.h>
23 #include <linux/aio.h>
24 #include <linux/mm.h>
25 #include <linux/swap.h>
26 #include <linux/slab.h>
27 #include <linux/sysctl.h>
28 #include <linux/bitmap.h>
29 #include <linux/signal.h>
30 #include <linux/printk.h>
31 #include <linux/proc_fs.h>
32 #include <linux/security.h>
33 #include <linux/ctype.h>
34 #include <linux/kmemleak.h>
35 #include <linux/fs.h>
36 #include <linux/init.h>
37 #include <linux/kernel.h>
38 #include <linux/kobject.h>
39 #include <linux/net.h>
40 #include <linux/sysrq.h>
41 #include <linux/highuid.h>
42 #include <linux/writeback.h>
43 #include <linux/ratelimit.h>
44 #include <linux/compaction.h>
45 #include <linux/hugetlb.h>
46 #include <linux/initrd.h>
47 #include <linux/key.h>
48 #include <linux/times.h>
49 #include <linux/limits.h>
50 #include <linux/dcache.h>
51 #include <linux/dnotify.h>
52 #include <linux/syscalls.h>
53 #include <linux/vmstat.h>
54 #include <linux/nfs_fs.h>
55 #include <linux/acpi.h>
56 #include <linux/reboot.h>
57 #include <linux/ftrace.h>
58 #include <linux/perf_event.h>
59 #include <linux/kprobes.h>
60 #include <linux/pipe_fs_i.h>
61 #include <linux/oom.h>
62 #include <linux/kmod.h>
63 #include <linux/capability.h>
64 #include <linux/binfmts.h>
65 #include <linux/sched/sysctl.h>
66 #include <linux/sched/coredump.h>
67 #include <linux/kexec.h>
68 #include <linux/bpf.h>
69 #include <linux/mount.h>
70 #include <linux/userfaultfd_k.h>
71 #include <linux/coredump.h>
72 #include <linux/latencytop.h>
73 #include <linux/pid.h>
74 
75 #include "../lib/kstrtox.h"
76 
77 #include <linux/uaccess.h>
78 #include <asm/processor.h>
79 
80 #ifdef CONFIG_X86
81 #include <asm/nmi.h>
82 #include <asm/stacktrace.h>
83 #include <asm/io.h>
84 #endif
85 #ifdef CONFIG_SPARC
86 #include <asm/setup.h>
87 #endif
88 #ifdef CONFIG_BSD_PROCESS_ACCT
89 #include <linux/acct.h>
90 #endif
91 #ifdef CONFIG_RT_MUTEXES
92 #include <linux/rtmutex.h>
93 #endif
94 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
95 #include <linux/lockdep.h>
96 #endif
97 #ifdef CONFIG_CHR_DEV_SG
98 #include <scsi/sg.h>
99 #endif
100 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
101 #include <linux/stackleak.h>
102 #endif
103 #ifdef CONFIG_LOCKUP_DETECTOR
104 #include <linux/nmi.h>
105 #endif
106 
107 #if defined(CONFIG_SYSCTL)
108 
109 /* Constants used for minimum and  maximum */
110 #ifdef CONFIG_LOCKUP_DETECTOR
111 static int sixty = 60;
112 #endif
113 
114 static unsigned long zero_ul;
115 static unsigned long one_ul = 1;
116 static unsigned long long_max = LONG_MAX;
117 #ifdef CONFIG_PRINTK
118 static int ten_thousand = 10000;
119 #endif
120 #ifdef CONFIG_PERF_EVENTS
121 static int six_hundred_forty_kb = 640 * 1024;
122 #endif
123 
124 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
125 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
126 
127 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
128 static int maxolduid = 65535;
129 static int minolduid;
130 
131 static int ngroups_max = NGROUPS_MAX;
132 static const int cap_last_cap = CAP_LAST_CAP;
133 
134 /*
135  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
136  * and hung_task_check_interval_secs
137  */
138 #ifdef CONFIG_DETECT_HUNG_TASK
139 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
140 #endif
141 
142 #ifdef CONFIG_INOTIFY_USER
143 #include <linux/inotify.h>
144 #endif
145 
146 #ifdef CONFIG_PROC_SYSCTL
147 
148 /**
149  * enum sysctl_writes_mode - supported sysctl write modes
150  *
151  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
152  *	to be written, and multiple writes on the same sysctl file descriptor
153  *	will rewrite the sysctl value, regardless of file position. No warning
154  *	is issued when the initial position is not 0.
155  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
156  *	not 0.
157  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
158  *	file position 0 and the value must be fully contained in the buffer
159  *	sent to the write syscall. If dealing with strings respect the file
160  *	position, but restrict this to the max length of the buffer, anything
161  *	passed the max length will be ignored. Multiple writes will append
162  *	to the buffer.
163  *
164  * These write modes control how current file position affects the behavior of
165  * updating sysctl values through the proc interface on each write.
166  */
167 enum sysctl_writes_mode {
168 	SYSCTL_WRITES_LEGACY		= -1,
169 	SYSCTL_WRITES_WARN		= 0,
170 	SYSCTL_WRITES_STRICT		= 1,
171 };
172 
173 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
174 #endif /* CONFIG_PROC_SYSCTL */
175 
176 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
177     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
178 int sysctl_legacy_va_layout;
179 #endif
180 
181 #ifdef CONFIG_SCHED_DEBUG
182 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
183 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
184 static int min_wakeup_granularity_ns;			/* 0 usecs */
185 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
186 #ifdef CONFIG_SMP
187 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
188 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
189 #endif /* CONFIG_SMP */
190 #endif /* CONFIG_SCHED_DEBUG */
191 
192 #ifdef CONFIG_COMPACTION
193 static int min_extfrag_threshold;
194 static int max_extfrag_threshold = 1000;
195 #endif
196 
197 #endif /* CONFIG_SYSCTL */
198 
199 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
bpf_stats_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)200 static int bpf_stats_handler(struct ctl_table *table, int write,
201 			     void *buffer, size_t *lenp, loff_t *ppos)
202 {
203 	struct static_key *key = (struct static_key *)table->data;
204 	static int saved_val;
205 	int val, ret;
206 	struct ctl_table tmp = {
207 		.data   = &val,
208 		.maxlen = sizeof(val),
209 		.mode   = table->mode,
210 		.extra1 = SYSCTL_ZERO,
211 		.extra2 = SYSCTL_ONE,
212 	};
213 
214 	if (write && !capable(CAP_SYS_ADMIN))
215 		return -EPERM;
216 
217 	mutex_lock(&bpf_stats_enabled_mutex);
218 	val = saved_val;
219 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
220 	if (write && !ret && val != saved_val) {
221 		if (val)
222 			static_key_slow_inc(key);
223 		else
224 			static_key_slow_dec(key);
225 		saved_val = val;
226 	}
227 	mutex_unlock(&bpf_stats_enabled_mutex);
228 	return ret;
229 }
230 
unpriv_ebpf_notify(int new_state)231 void __weak unpriv_ebpf_notify(int new_state)
232 {
233 }
234 
bpf_unpriv_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)235 static int bpf_unpriv_handler(struct ctl_table *table, int write,
236 			      void *buffer, size_t *lenp, loff_t *ppos)
237 {
238 	int ret, unpriv_enable = *(int *)table->data;
239 	bool locked_state = unpriv_enable == 1;
240 	struct ctl_table tmp = *table;
241 
242 	if (write && !capable(CAP_SYS_ADMIN))
243 		return -EPERM;
244 
245 	tmp.data = &unpriv_enable;
246 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
247 	if (write && !ret) {
248 		if (locked_state && unpriv_enable != 1)
249 			return -EPERM;
250 		*(int *)table->data = unpriv_enable;
251 	}
252 
253 	unpriv_ebpf_notify(unpriv_enable);
254 
255 	return ret;
256 }
257 #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
258 
259 /*
260  * /proc/sys support
261  */
262 
263 #ifdef CONFIG_PROC_SYSCTL
264 
_proc_do_string(char * data,int maxlen,int write,char * buffer,size_t * lenp,loff_t * ppos)265 static int _proc_do_string(char *data, int maxlen, int write,
266 		char *buffer, size_t *lenp, loff_t *ppos)
267 {
268 	size_t len;
269 	char c, *p;
270 
271 	if (!data || !maxlen || !*lenp) {
272 		*lenp = 0;
273 		return 0;
274 	}
275 
276 	if (write) {
277 		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
278 			/* Only continue writes not past the end of buffer. */
279 			len = strlen(data);
280 			if (len > maxlen - 1)
281 				len = maxlen - 1;
282 
283 			if (*ppos > len)
284 				return 0;
285 			len = *ppos;
286 		} else {
287 			/* Start writing from beginning of buffer. */
288 			len = 0;
289 		}
290 
291 		*ppos += *lenp;
292 		p = buffer;
293 		while ((p - buffer) < *lenp && len < maxlen - 1) {
294 			c = *(p++);
295 			if (c == 0 || c == '\n')
296 				break;
297 			data[len++] = c;
298 		}
299 		data[len] = 0;
300 	} else {
301 		len = strlen(data);
302 		if (len > maxlen)
303 			len = maxlen;
304 
305 		if (*ppos > len) {
306 			*lenp = 0;
307 			return 0;
308 		}
309 
310 		data += *ppos;
311 		len  -= *ppos;
312 
313 		if (len > *lenp)
314 			len = *lenp;
315 		if (len)
316 			memcpy(buffer, data, len);
317 		if (len < *lenp) {
318 			buffer[len] = '\n';
319 			len++;
320 		}
321 		*lenp = len;
322 		*ppos += len;
323 	}
324 	return 0;
325 }
326 
warn_sysctl_write(struct ctl_table * table)327 static void warn_sysctl_write(struct ctl_table *table)
328 {
329 	pr_warn_once("%s wrote to %s when file position was not 0!\n"
330 		"This will not be supported in the future. To silence this\n"
331 		"warning, set kernel.sysctl_writes_strict = -1\n",
332 		current->comm, table->procname);
333 }
334 
335 /**
336  * proc_first_pos_non_zero_ignore - check if first position is allowed
337  * @ppos: file position
338  * @table: the sysctl table
339  *
340  * Returns true if the first position is non-zero and the sysctl_writes_strict
341  * mode indicates this is not allowed for numeric input types. String proc
342  * handlers can ignore the return value.
343  */
proc_first_pos_non_zero_ignore(loff_t * ppos,struct ctl_table * table)344 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
345 					   struct ctl_table *table)
346 {
347 	if (!*ppos)
348 		return false;
349 
350 	switch (sysctl_writes_strict) {
351 	case SYSCTL_WRITES_STRICT:
352 		return true;
353 	case SYSCTL_WRITES_WARN:
354 		warn_sysctl_write(table);
355 		return false;
356 	default:
357 		return false;
358 	}
359 }
360 
361 /**
362  * proc_dostring - read a string sysctl
363  * @table: the sysctl table
364  * @write: %TRUE if this is a write to the sysctl file
365  * @buffer: the user buffer
366  * @lenp: the size of the user buffer
367  * @ppos: file position
368  *
369  * Reads/writes a string from/to the user buffer. If the kernel
370  * buffer provided is not large enough to hold the string, the
371  * string is truncated. The copied string is %NULL-terminated.
372  * If the string is being read by the user process, it is copied
373  * and a newline '\n' is added. It is truncated if the buffer is
374  * not large enough.
375  *
376  * Returns 0 on success.
377  */
proc_dostring(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)378 int proc_dostring(struct ctl_table *table, int write,
379 		  void *buffer, size_t *lenp, loff_t *ppos)
380 {
381 	if (write)
382 		proc_first_pos_non_zero_ignore(ppos, table);
383 
384 	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
385 			ppos);
386 }
387 
proc_skip_spaces(char ** buf,size_t * size)388 static void proc_skip_spaces(char **buf, size_t *size)
389 {
390 	while (*size) {
391 		if (!isspace(**buf))
392 			break;
393 		(*size)--;
394 		(*buf)++;
395 	}
396 }
397 
proc_skip_char(char ** buf,size_t * size,const char v)398 static void proc_skip_char(char **buf, size_t *size, const char v)
399 {
400 	while (*size) {
401 		if (**buf != v)
402 			break;
403 		(*size)--;
404 		(*buf)++;
405 	}
406 }
407 
408 /**
409  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
410  *                   fail on overflow
411  *
412  * @cp: kernel buffer containing the string to parse
413  * @endp: pointer to store the trailing characters
414  * @base: the base to use
415  * @res: where the parsed integer will be stored
416  *
417  * In case of success 0 is returned and @res will contain the parsed integer,
418  * @endp will hold any trailing characters.
419  * This function will fail the parse on overflow. If there wasn't an overflow
420  * the function will defer the decision what characters count as invalid to the
421  * caller.
422  */
strtoul_lenient(const char * cp,char ** endp,unsigned int base,unsigned long * res)423 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
424 			   unsigned long *res)
425 {
426 	unsigned long long result;
427 	unsigned int rv;
428 
429 	cp = _parse_integer_fixup_radix(cp, &base);
430 	rv = _parse_integer(cp, base, &result);
431 	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
432 		return -ERANGE;
433 
434 	cp += rv;
435 
436 	if (endp)
437 		*endp = (char *)cp;
438 
439 	*res = (unsigned long)result;
440 	return 0;
441 }
442 
443 #define TMPBUFLEN 22
444 /**
445  * proc_get_long - reads an ASCII formatted integer from a user buffer
446  *
447  * @buf: a kernel buffer
448  * @size: size of the kernel buffer
449  * @val: this is where the number will be stored
450  * @neg: set to %TRUE if number is negative
451  * @perm_tr: a vector which contains the allowed trailers
452  * @perm_tr_len: size of the perm_tr vector
453  * @tr: pointer to store the trailer character
454  *
455  * In case of success %0 is returned and @buf and @size are updated with
456  * the amount of bytes read. If @tr is non-NULL and a trailing
457  * character exists (size is non-zero after returning from this
458  * function), @tr is updated with the trailing character.
459  */
proc_get_long(char ** buf,size_t * size,unsigned long * val,bool * neg,const char * perm_tr,unsigned perm_tr_len,char * tr)460 static int proc_get_long(char **buf, size_t *size,
461 			  unsigned long *val, bool *neg,
462 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
463 {
464 	char *p, tmp[TMPBUFLEN];
465 	ssize_t len = *size;
466 
467 	if (len <= 0)
468 		return -EINVAL;
469 
470 	if (len > TMPBUFLEN - 1)
471 		len = TMPBUFLEN - 1;
472 
473 	memcpy(tmp, *buf, len);
474 
475 	tmp[len] = 0;
476 	p = tmp;
477 	if (*p == '-' && *size > 1) {
478 		*neg = true;
479 		p++;
480 	} else
481 		*neg = false;
482 	if (!isdigit(*p))
483 		return -EINVAL;
484 
485 	if (strtoul_lenient(p, &p, 0, val))
486 		return -EINVAL;
487 
488 	len = p - tmp;
489 
490 	/* We don't know if the next char is whitespace thus we may accept
491 	 * invalid integers (e.g. 1234...a) or two integers instead of one
492 	 * (e.g. 123...1). So lets not allow such large numbers. */
493 	if (len == TMPBUFLEN - 1)
494 		return -EINVAL;
495 
496 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
497 		return -EINVAL;
498 
499 	if (tr && (len < *size))
500 		*tr = *p;
501 
502 	*buf += len;
503 	*size -= len;
504 
505 	return 0;
506 }
507 
508 /**
509  * proc_put_long - converts an integer to a decimal ASCII formatted string
510  *
511  * @buf: the user buffer
512  * @size: the size of the user buffer
513  * @val: the integer to be converted
514  * @neg: sign of the number, %TRUE for negative
515  *
516  * In case of success @buf and @size are updated with the amount of bytes
517  * written.
518  */
proc_put_long(void ** buf,size_t * size,unsigned long val,bool neg)519 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
520 {
521 	int len;
522 	char tmp[TMPBUFLEN], *p = tmp;
523 
524 	sprintf(p, "%s%lu", neg ? "-" : "", val);
525 	len = strlen(tmp);
526 	if (len > *size)
527 		len = *size;
528 	memcpy(*buf, tmp, len);
529 	*size -= len;
530 	*buf += len;
531 }
532 #undef TMPBUFLEN
533 
proc_put_char(void ** buf,size_t * size,char c)534 static void proc_put_char(void **buf, size_t *size, char c)
535 {
536 	if (*size) {
537 		char **buffer = (char **)buf;
538 		**buffer = c;
539 
540 		(*size)--;
541 		(*buffer)++;
542 		*buf = *buffer;
543 	}
544 }
545 
do_proc_dointvec_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)546 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
547 				 int *valp,
548 				 int write, void *data)
549 {
550 	if (write) {
551 		if (*negp) {
552 			if (*lvalp > (unsigned long) INT_MAX + 1)
553 				return -EINVAL;
554 			WRITE_ONCE(*valp, -*lvalp);
555 		} else {
556 			if (*lvalp > (unsigned long) INT_MAX)
557 				return -EINVAL;
558 			WRITE_ONCE(*valp, *lvalp);
559 		}
560 	} else {
561 		int val = READ_ONCE(*valp);
562 		if (val < 0) {
563 			*negp = true;
564 			*lvalp = -(unsigned long)val;
565 		} else {
566 			*negp = false;
567 			*lvalp = (unsigned long)val;
568 		}
569 	}
570 	return 0;
571 }
572 
do_proc_douintvec_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)573 static int do_proc_douintvec_conv(unsigned long *lvalp,
574 				  unsigned int *valp,
575 				  int write, void *data)
576 {
577 	if (write) {
578 		if (*lvalp > UINT_MAX)
579 			return -EINVAL;
580 		WRITE_ONCE(*valp, *lvalp);
581 	} else {
582 		unsigned int val = READ_ONCE(*valp);
583 		*lvalp = (unsigned long)val;
584 	}
585 	return 0;
586 }
587 
588 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
589 
__do_proc_dointvec(void * tbl_data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(bool * negp,unsigned long * lvalp,int * valp,int write,void * data),void * data)590 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
591 		  int write, void *buffer,
592 		  size_t *lenp, loff_t *ppos,
593 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
594 			      int write, void *data),
595 		  void *data)
596 {
597 	int *i, vleft, first = 1, err = 0;
598 	size_t left;
599 	char *p;
600 
601 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
602 		*lenp = 0;
603 		return 0;
604 	}
605 
606 	i = (int *) tbl_data;
607 	vleft = table->maxlen / sizeof(*i);
608 	left = *lenp;
609 
610 	if (!conv)
611 		conv = do_proc_dointvec_conv;
612 
613 	if (write) {
614 		if (proc_first_pos_non_zero_ignore(ppos, table))
615 			goto out;
616 
617 		if (left > PAGE_SIZE - 1)
618 			left = PAGE_SIZE - 1;
619 		p = buffer;
620 	}
621 
622 	for (; left && vleft--; i++, first=0) {
623 		unsigned long lval;
624 		bool neg;
625 
626 		if (write) {
627 			proc_skip_spaces(&p, &left);
628 
629 			if (!left)
630 				break;
631 			err = proc_get_long(&p, &left, &lval, &neg,
632 					     proc_wspace_sep,
633 					     sizeof(proc_wspace_sep), NULL);
634 			if (err)
635 				break;
636 			if (conv(&neg, &lval, i, 1, data)) {
637 				err = -EINVAL;
638 				break;
639 			}
640 		} else {
641 			if (conv(&neg, &lval, i, 0, data)) {
642 				err = -EINVAL;
643 				break;
644 			}
645 			if (!first)
646 				proc_put_char(&buffer, &left, '\t');
647 			proc_put_long(&buffer, &left, lval, neg);
648 		}
649 	}
650 
651 	if (!write && !first && left && !err)
652 		proc_put_char(&buffer, &left, '\n');
653 	if (write && !err && left)
654 		proc_skip_spaces(&p, &left);
655 	if (write && first)
656 		return err ? : -EINVAL;
657 	*lenp -= left;
658 out:
659 	*ppos += *lenp;
660 	return err;
661 }
662 
do_proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(bool * negp,unsigned long * lvalp,int * valp,int write,void * data),void * data)663 static int do_proc_dointvec(struct ctl_table *table, int write,
664 		  void *buffer, size_t *lenp, loff_t *ppos,
665 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
666 			      int write, void *data),
667 		  void *data)
668 {
669 	return __do_proc_dointvec(table->data, table, write,
670 			buffer, lenp, ppos, conv, data);
671 }
672 
do_proc_douintvec_w(unsigned int * tbl_data,struct ctl_table * table,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)673 static int do_proc_douintvec_w(unsigned int *tbl_data,
674 			       struct ctl_table *table,
675 			       void *buffer,
676 			       size_t *lenp, loff_t *ppos,
677 			       int (*conv)(unsigned long *lvalp,
678 					   unsigned int *valp,
679 					   int write, void *data),
680 			       void *data)
681 {
682 	unsigned long lval;
683 	int err = 0;
684 	size_t left;
685 	bool neg;
686 	char *p = buffer;
687 
688 	left = *lenp;
689 
690 	if (proc_first_pos_non_zero_ignore(ppos, table))
691 		goto bail_early;
692 
693 	if (left > PAGE_SIZE - 1)
694 		left = PAGE_SIZE - 1;
695 
696 	proc_skip_spaces(&p, &left);
697 	if (!left) {
698 		err = -EINVAL;
699 		goto out_free;
700 	}
701 
702 	err = proc_get_long(&p, &left, &lval, &neg,
703 			     proc_wspace_sep,
704 			     sizeof(proc_wspace_sep), NULL);
705 	if (err || neg) {
706 		err = -EINVAL;
707 		goto out_free;
708 	}
709 
710 	if (conv(&lval, tbl_data, 1, data)) {
711 		err = -EINVAL;
712 		goto out_free;
713 	}
714 
715 	if (!err && left)
716 		proc_skip_spaces(&p, &left);
717 
718 out_free:
719 	if (err)
720 		return -EINVAL;
721 
722 	return 0;
723 
724 	/* This is in keeping with old __do_proc_dointvec() */
725 bail_early:
726 	*ppos += *lenp;
727 	return err;
728 }
729 
do_proc_douintvec_r(unsigned int * tbl_data,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)730 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
731 			       size_t *lenp, loff_t *ppos,
732 			       int (*conv)(unsigned long *lvalp,
733 					   unsigned int *valp,
734 					   int write, void *data),
735 			       void *data)
736 {
737 	unsigned long lval;
738 	int err = 0;
739 	size_t left;
740 
741 	left = *lenp;
742 
743 	if (conv(&lval, tbl_data, 0, data)) {
744 		err = -EINVAL;
745 		goto out;
746 	}
747 
748 	proc_put_long(&buffer, &left, lval, false);
749 	if (!left)
750 		goto out;
751 
752 	proc_put_char(&buffer, &left, '\n');
753 
754 out:
755 	*lenp -= left;
756 	*ppos += *lenp;
757 
758 	return err;
759 }
760 
__do_proc_douintvec(void * tbl_data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)761 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
762 			       int write, void *buffer,
763 			       size_t *lenp, loff_t *ppos,
764 			       int (*conv)(unsigned long *lvalp,
765 					   unsigned int *valp,
766 					   int write, void *data),
767 			       void *data)
768 {
769 	unsigned int *i, vleft;
770 
771 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
772 		*lenp = 0;
773 		return 0;
774 	}
775 
776 	i = (unsigned int *) tbl_data;
777 	vleft = table->maxlen / sizeof(*i);
778 
779 	/*
780 	 * Arrays are not supported, keep this simple. *Do not* add
781 	 * support for them.
782 	 */
783 	if (vleft != 1) {
784 		*lenp = 0;
785 		return -EINVAL;
786 	}
787 
788 	if (!conv)
789 		conv = do_proc_douintvec_conv;
790 
791 	if (write)
792 		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
793 					   conv, data);
794 	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
795 }
796 
do_proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,int (* conv)(unsigned long * lvalp,unsigned int * valp,int write,void * data),void * data)797 static int do_proc_douintvec(struct ctl_table *table, int write,
798 			     void *buffer, size_t *lenp, loff_t *ppos,
799 			     int (*conv)(unsigned long *lvalp,
800 					 unsigned int *valp,
801 					 int write, void *data),
802 			     void *data)
803 {
804 	return __do_proc_douintvec(table->data, table, write,
805 				   buffer, lenp, ppos, conv, data);
806 }
807 
808 /**
809  * proc_dointvec - read a vector of integers
810  * @table: the sysctl table
811  * @write: %TRUE if this is a write to the sysctl file
812  * @buffer: the user buffer
813  * @lenp: the size of the user buffer
814  * @ppos: file position
815  *
816  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
817  * values from/to the user buffer, treated as an ASCII string.
818  *
819  * Returns 0 on success.
820  */
proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)821 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
822 		  size_t *lenp, loff_t *ppos)
823 {
824 	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
825 }
826 
827 #ifdef CONFIG_COMPACTION
proc_dointvec_minmax_warn_RT_change(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)828 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
829 		int write, void *buffer, size_t *lenp, loff_t *ppos)
830 {
831 	int ret, old;
832 
833 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
834 		return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
835 
836 	old = *(int *)table->data;
837 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
838 	if (ret)
839 		return ret;
840 	if (old != *(int *)table->data)
841 		pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
842 			     table->procname, current->comm,
843 			     task_pid_nr(current));
844 	return ret;
845 }
846 #endif
847 
848 /**
849  * proc_douintvec - read a vector of unsigned integers
850  * @table: the sysctl table
851  * @write: %TRUE if this is a write to the sysctl file
852  * @buffer: the user buffer
853  * @lenp: the size of the user buffer
854  * @ppos: file position
855  *
856  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
857  * values from/to the user buffer, treated as an ASCII string.
858  *
859  * Returns 0 on success.
860  */
proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)861 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
862 		size_t *lenp, loff_t *ppos)
863 {
864 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
865 				 do_proc_douintvec_conv, NULL);
866 }
867 
868 /*
869  * Taint values can only be increased
870  * This means we can safely use a temporary.
871  */
proc_taint(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)872 static int proc_taint(struct ctl_table *table, int write,
873 			       void *buffer, size_t *lenp, loff_t *ppos)
874 {
875 	struct ctl_table t;
876 	unsigned long tmptaint = get_taint();
877 	int err;
878 
879 	if (write && !capable(CAP_SYS_ADMIN))
880 		return -EPERM;
881 
882 	t = *table;
883 	t.data = &tmptaint;
884 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
885 	if (err < 0)
886 		return err;
887 
888 	if (write) {
889 		int i;
890 
891 		/*
892 		 * If we are relying on panic_on_taint not producing
893 		 * false positives due to userspace input, bail out
894 		 * before setting the requested taint flags.
895 		 */
896 		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
897 			return -EINVAL;
898 
899 		/*
900 		 * Poor man's atomic or. Not worth adding a primitive
901 		 * to everyone's atomic.h for this
902 		 */
903 		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
904 			if ((1UL << i) & tmptaint)
905 				add_taint(i, LOCKDEP_STILL_OK);
906 	}
907 
908 	return err;
909 }
910 
911 #ifdef CONFIG_PRINTK
proc_dointvec_minmax_sysadmin(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)912 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
913 				void *buffer, size_t *lenp, loff_t *ppos)
914 {
915 	if (write && !capable(CAP_SYS_ADMIN))
916 		return -EPERM;
917 
918 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
919 }
920 #endif
921 
922 /**
923  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
924  * @min: pointer to minimum allowable value
925  * @max: pointer to maximum allowable value
926  *
927  * The do_proc_dointvec_minmax_conv_param structure provides the
928  * minimum and maximum values for doing range checking for those sysctl
929  * parameters that use the proc_dointvec_minmax() handler.
930  */
931 struct do_proc_dointvec_minmax_conv_param {
932 	int *min;
933 	int *max;
934 };
935 
do_proc_dointvec_minmax_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)936 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
937 					int *valp,
938 					int write, void *data)
939 {
940 	int tmp, ret;
941 	struct do_proc_dointvec_minmax_conv_param *param = data;
942 	/*
943 	 * If writing, first do so via a temporary local int so we can
944 	 * bounds-check it before touching *valp.
945 	 */
946 	int *ip = write ? &tmp : valp;
947 
948 	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
949 	if (ret)
950 		return ret;
951 
952 	if (write) {
953 		if ((param->min && *param->min > tmp) ||
954 		    (param->max && *param->max < tmp))
955 			return -EINVAL;
956 		WRITE_ONCE(*valp, tmp);
957 	}
958 
959 	return 0;
960 }
961 
962 /**
963  * proc_dointvec_minmax - read a vector of integers with min/max values
964  * @table: the sysctl table
965  * @write: %TRUE if this is a write to the sysctl file
966  * @buffer: the user buffer
967  * @lenp: the size of the user buffer
968  * @ppos: file position
969  *
970  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
971  * values from/to the user buffer, treated as an ASCII string.
972  *
973  * This routine will ensure the values are within the range specified by
974  * table->extra1 (min) and table->extra2 (max).
975  *
976  * Returns 0 on success or -EINVAL on write when the range check fails.
977  */
proc_dointvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)978 int proc_dointvec_minmax(struct ctl_table *table, int write,
979 		  void *buffer, size_t *lenp, loff_t *ppos)
980 {
981 	struct do_proc_dointvec_minmax_conv_param param = {
982 		.min = (int *) table->extra1,
983 		.max = (int *) table->extra2,
984 	};
985 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
986 				do_proc_dointvec_minmax_conv, &param);
987 }
988 
989 /**
990  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
991  * @min: pointer to minimum allowable value
992  * @max: pointer to maximum allowable value
993  *
994  * The do_proc_douintvec_minmax_conv_param structure provides the
995  * minimum and maximum values for doing range checking for those sysctl
996  * parameters that use the proc_douintvec_minmax() handler.
997  */
998 struct do_proc_douintvec_minmax_conv_param {
999 	unsigned int *min;
1000 	unsigned int *max;
1001 };
1002 
do_proc_douintvec_minmax_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)1003 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
1004 					 unsigned int *valp,
1005 					 int write, void *data)
1006 {
1007 	int ret;
1008 	unsigned int tmp;
1009 	struct do_proc_douintvec_minmax_conv_param *param = data;
1010 	/* write via temporary local uint for bounds-checking */
1011 	unsigned int *up = write ? &tmp : valp;
1012 
1013 	ret = do_proc_douintvec_conv(lvalp, up, write, data);
1014 	if (ret)
1015 		return ret;
1016 
1017 	if (write) {
1018 		if ((param->min && *param->min > tmp) ||
1019 		    (param->max && *param->max < tmp))
1020 			return -ERANGE;
1021 
1022 		WRITE_ONCE(*valp, tmp);
1023 	}
1024 
1025 	return 0;
1026 }
1027 
1028 /**
1029  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
1030  * @table: the sysctl table
1031  * @write: %TRUE if this is a write to the sysctl file
1032  * @buffer: the user buffer
1033  * @lenp: the size of the user buffer
1034  * @ppos: file position
1035  *
1036  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
1037  * values from/to the user buffer, treated as an ASCII string. Negative
1038  * strings are not allowed.
1039  *
1040  * This routine will ensure the values are within the range specified by
1041  * table->extra1 (min) and table->extra2 (max). There is a final sanity
1042  * check for UINT_MAX to avoid having to support wrap around uses from
1043  * userspace.
1044  *
1045  * Returns 0 on success or -ERANGE on write when the range check fails.
1046  */
proc_douintvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1047 int proc_douintvec_minmax(struct ctl_table *table, int write,
1048 			  void *buffer, size_t *lenp, loff_t *ppos)
1049 {
1050 	struct do_proc_douintvec_minmax_conv_param param = {
1051 		.min = (unsigned int *) table->extra1,
1052 		.max = (unsigned int *) table->extra2,
1053 	};
1054 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
1055 				 do_proc_douintvec_minmax_conv, &param);
1056 }
1057 
1058 /**
1059  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
1060  * @table: the sysctl table
1061  * @write: %TRUE if this is a write to the sysctl file
1062  * @buffer: the user buffer
1063  * @lenp: the size of the user buffer
1064  * @ppos: file position
1065  *
1066  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
1067  * values from/to the user buffer, treated as an ASCII string. Negative
1068  * strings are not allowed.
1069  *
1070  * This routine will ensure the values are within the range specified by
1071  * table->extra1 (min) and table->extra2 (max).
1072  *
1073  * Returns 0 on success or an error on write when the range check fails.
1074  */
proc_dou8vec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1075 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1076 			void *buffer, size_t *lenp, loff_t *ppos)
1077 {
1078 	struct ctl_table tmp;
1079 	unsigned int min = 0, max = 255U, val;
1080 	u8 *data = table->data;
1081 	struct do_proc_douintvec_minmax_conv_param param = {
1082 		.min = &min,
1083 		.max = &max,
1084 	};
1085 	int res;
1086 
1087 	/* Do not support arrays yet. */
1088 	if (table->maxlen != sizeof(u8))
1089 		return -EINVAL;
1090 
1091 	if (table->extra1) {
1092 		min = *(unsigned int *) table->extra1;
1093 		if (min > 255U)
1094 			return -EINVAL;
1095 	}
1096 	if (table->extra2) {
1097 		max = *(unsigned int *) table->extra2;
1098 		if (max > 255U)
1099 			return -EINVAL;
1100 	}
1101 
1102 	tmp = *table;
1103 
1104 	tmp.maxlen = sizeof(val);
1105 	tmp.data = &val;
1106 	val = READ_ONCE(*data);
1107 	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1108 				do_proc_douintvec_minmax_conv, &param);
1109 	if (res)
1110 		return res;
1111 	if (write)
1112 		WRITE_ONCE(*data, val);
1113 	return 0;
1114 }
1115 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1116 
do_proc_dopipe_max_size_conv(unsigned long * lvalp,unsigned int * valp,int write,void * data)1117 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
1118 					unsigned int *valp,
1119 					int write, void *data)
1120 {
1121 	if (write) {
1122 		unsigned int val;
1123 
1124 		val = round_pipe_size(*lvalp);
1125 		if (val == 0)
1126 			return -EINVAL;
1127 
1128 		*valp = val;
1129 	} else {
1130 		unsigned int val = *valp;
1131 		*lvalp = (unsigned long) val;
1132 	}
1133 
1134 	return 0;
1135 }
1136 
proc_dopipe_max_size(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1137 static int proc_dopipe_max_size(struct ctl_table *table, int write,
1138 				void *buffer, size_t *lenp, loff_t *ppos)
1139 {
1140 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
1141 				 do_proc_dopipe_max_size_conv, NULL);
1142 }
1143 
validate_coredump_safety(void)1144 static void validate_coredump_safety(void)
1145 {
1146 #ifdef CONFIG_COREDUMP
1147 	if (suid_dumpable == SUID_DUMP_ROOT &&
1148 	    core_pattern[0] != '/' && core_pattern[0] != '|') {
1149 		printk(KERN_WARNING
1150 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
1151 "Pipe handler or fully qualified core dump path required.\n"
1152 "Set kernel.core_pattern before fs.suid_dumpable.\n"
1153 		);
1154 	}
1155 #endif
1156 }
1157 
proc_dointvec_minmax_coredump(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1158 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
1159 		void *buffer, size_t *lenp, loff_t *ppos)
1160 {
1161 	int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1162 	if (!error)
1163 		validate_coredump_safety();
1164 	return error;
1165 }
1166 
1167 #ifdef CONFIG_COREDUMP
proc_dostring_coredump(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1168 static int proc_dostring_coredump(struct ctl_table *table, int write,
1169 		  void *buffer, size_t *lenp, loff_t *ppos)
1170 {
1171 	int error = proc_dostring(table, write, buffer, lenp, ppos);
1172 	if (!error)
1173 		validate_coredump_safety();
1174 	return error;
1175 }
1176 #endif
1177 
1178 #ifdef CONFIG_MAGIC_SYSRQ
sysrq_sysctl_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1179 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1180 				void *buffer, size_t *lenp, loff_t *ppos)
1181 {
1182 	int tmp, ret;
1183 
1184 	tmp = sysrq_mask();
1185 
1186 	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1187 			       lenp, ppos, NULL, NULL);
1188 	if (ret || !write)
1189 		return ret;
1190 
1191 	if (write)
1192 		sysrq_toggle_support(tmp);
1193 
1194 	return 0;
1195 }
1196 #endif
1197 
__do_proc_doulongvec_minmax(void * data,struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,unsigned long convmul,unsigned long convdiv)1198 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1199 		int write, void *buffer, size_t *lenp, loff_t *ppos,
1200 		unsigned long convmul, unsigned long convdiv)
1201 {
1202 	unsigned long *i, *min, *max;
1203 	int vleft, first = 1, err = 0;
1204 	size_t left;
1205 	char *p;
1206 
1207 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1208 		*lenp = 0;
1209 		return 0;
1210 	}
1211 
1212 	i = (unsigned long *) data;
1213 	min = (unsigned long *) table->extra1;
1214 	max = (unsigned long *) table->extra2;
1215 	vleft = table->maxlen / sizeof(unsigned long);
1216 	left = *lenp;
1217 
1218 	if (write) {
1219 		if (proc_first_pos_non_zero_ignore(ppos, table))
1220 			goto out;
1221 
1222 		if (left > PAGE_SIZE - 1)
1223 			left = PAGE_SIZE - 1;
1224 		p = buffer;
1225 	}
1226 
1227 	for (; left && vleft--; i++, first = 0) {
1228 		unsigned long val;
1229 
1230 		if (write) {
1231 			bool neg;
1232 
1233 			proc_skip_spaces(&p, &left);
1234 			if (!left)
1235 				break;
1236 
1237 			err = proc_get_long(&p, &left, &val, &neg,
1238 					     proc_wspace_sep,
1239 					     sizeof(proc_wspace_sep), NULL);
1240 			if (err)
1241 				break;
1242 			if (neg)
1243 				continue;
1244 			val = convmul * val / convdiv;
1245 			if ((min && val < *min) || (max && val > *max)) {
1246 				err = -EINVAL;
1247 				break;
1248 			}
1249 			WRITE_ONCE(*i, val);
1250 		} else {
1251 			val = convdiv * READ_ONCE(*i) / convmul;
1252 			if (!first)
1253 				proc_put_char(&buffer, &left, '\t');
1254 			proc_put_long(&buffer, &left, val, false);
1255 		}
1256 	}
1257 
1258 	if (!write && !first && left && !err)
1259 		proc_put_char(&buffer, &left, '\n');
1260 	if (write && !err)
1261 		proc_skip_spaces(&p, &left);
1262 	if (write && first)
1263 		return err ? : -EINVAL;
1264 	*lenp -= left;
1265 out:
1266 	*ppos += *lenp;
1267 	return err;
1268 }
1269 
do_proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos,unsigned long convmul,unsigned long convdiv)1270 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1271 		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1272 		unsigned long convdiv)
1273 {
1274 	return __do_proc_doulongvec_minmax(table->data, table, write,
1275 			buffer, lenp, ppos, convmul, convdiv);
1276 }
1277 
1278 /**
1279  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1280  * @table: the sysctl table
1281  * @write: %TRUE if this is a write to the sysctl file
1282  * @buffer: the user buffer
1283  * @lenp: the size of the user buffer
1284  * @ppos: file position
1285  *
1286  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1287  * values from/to the user buffer, treated as an ASCII string.
1288  *
1289  * This routine will ensure the values are within the range specified by
1290  * table->extra1 (min) and table->extra2 (max).
1291  *
1292  * Returns 0 on success.
1293  */
proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1294 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1295 			   void *buffer, size_t *lenp, loff_t *ppos)
1296 {
1297     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1298 }
1299 
1300 /**
1301  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1302  * @table: the sysctl table
1303  * @write: %TRUE if this is a write to the sysctl file
1304  * @buffer: the user buffer
1305  * @lenp: the size of the user buffer
1306  * @ppos: file position
1307  *
1308  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1309  * values from/to the user buffer, treated as an ASCII string. The values
1310  * are treated as milliseconds, and converted to jiffies when they are stored.
1311  *
1312  * This routine will ensure the values are within the range specified by
1313  * table->extra1 (min) and table->extra2 (max).
1314  *
1315  * Returns 0 on success.
1316  */
proc_doulongvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1317 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1318 				      void *buffer, size_t *lenp, loff_t *ppos)
1319 {
1320     return do_proc_doulongvec_minmax(table, write, buffer,
1321 				     lenp, ppos, HZ, 1000l);
1322 }
1323 
1324 
do_proc_dointvec_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1325 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1326 					 int *valp,
1327 					 int write, void *data)
1328 {
1329 	if (write) {
1330 		if (*lvalp > INT_MAX / HZ)
1331 			return 1;
1332 		if (*negp)
1333 			WRITE_ONCE(*valp, -*lvalp * HZ);
1334 		else
1335 			WRITE_ONCE(*valp, *lvalp * HZ);
1336 	} else {
1337 		int val = READ_ONCE(*valp);
1338 		unsigned long lval;
1339 		if (val < 0) {
1340 			*negp = true;
1341 			lval = -(unsigned long)val;
1342 		} else {
1343 			*negp = false;
1344 			lval = (unsigned long)val;
1345 		}
1346 		*lvalp = lval / HZ;
1347 	}
1348 	return 0;
1349 }
1350 
do_proc_dointvec_userhz_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1351 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1352 						int *valp,
1353 						int write, void *data)
1354 {
1355 	if (write) {
1356 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1357 			return 1;
1358 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1359 	} else {
1360 		int val = *valp;
1361 		unsigned long lval;
1362 		if (val < 0) {
1363 			*negp = true;
1364 			lval = -(unsigned long)val;
1365 		} else {
1366 			*negp = false;
1367 			lval = (unsigned long)val;
1368 		}
1369 		*lvalp = jiffies_to_clock_t(lval);
1370 	}
1371 	return 0;
1372 }
1373 
do_proc_dointvec_ms_jiffies_conv(bool * negp,unsigned long * lvalp,int * valp,int write,void * data)1374 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1375 					    int *valp,
1376 					    int write, void *data)
1377 {
1378 	if (write) {
1379 		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1380 
1381 		if (jif > INT_MAX)
1382 			return 1;
1383 		WRITE_ONCE(*valp, (int)jif);
1384 	} else {
1385 		int val = READ_ONCE(*valp);
1386 		unsigned long lval;
1387 		if (val < 0) {
1388 			*negp = true;
1389 			lval = -(unsigned long)val;
1390 		} else {
1391 			*negp = false;
1392 			lval = (unsigned long)val;
1393 		}
1394 		*lvalp = jiffies_to_msecs(lval);
1395 	}
1396 	return 0;
1397 }
1398 
1399 /**
1400  * proc_dointvec_jiffies - read a vector of integers as seconds
1401  * @table: the sysctl table
1402  * @write: %TRUE if this is a write to the sysctl file
1403  * @buffer: the user buffer
1404  * @lenp: the size of the user buffer
1405  * @ppos: file position
1406  *
1407  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1408  * values from/to the user buffer, treated as an ASCII string.
1409  * The values read are assumed to be in seconds, and are converted into
1410  * jiffies.
1411  *
1412  * Returns 0 on success.
1413  */
proc_dointvec_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1414 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1415 			  void *buffer, size_t *lenp, loff_t *ppos)
1416 {
1417     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1418 		    	    do_proc_dointvec_jiffies_conv,NULL);
1419 }
1420 
1421 /**
1422  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1423  * @table: the sysctl table
1424  * @write: %TRUE if this is a write to the sysctl file
1425  * @buffer: the user buffer
1426  * @lenp: the size of the user buffer
1427  * @ppos: pointer to the file position
1428  *
1429  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1430  * values from/to the user buffer, treated as an ASCII string.
1431  * The values read are assumed to be in 1/USER_HZ seconds, and
1432  * are converted into jiffies.
1433  *
1434  * Returns 0 on success.
1435  */
proc_dointvec_userhz_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1436 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1437 				 void *buffer, size_t *lenp, loff_t *ppos)
1438 {
1439     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1440 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
1441 }
1442 
1443 /**
1444  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1445  * @table: the sysctl table
1446  * @write: %TRUE if this is a write to the sysctl file
1447  * @buffer: the user buffer
1448  * @lenp: the size of the user buffer
1449  * @ppos: file position
1450  * @ppos: the current position in the file
1451  *
1452  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1453  * values from/to the user buffer, treated as an ASCII string.
1454  * The values read are assumed to be in 1/1000 seconds, and
1455  * are converted into jiffies.
1456  *
1457  * Returns 0 on success.
1458  */
proc_dointvec_ms_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1459 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1460 		size_t *lenp, loff_t *ppos)
1461 {
1462 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1463 				do_proc_dointvec_ms_jiffies_conv, NULL);
1464 }
1465 
proc_do_cad_pid(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1466 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1467 		size_t *lenp, loff_t *ppos)
1468 {
1469 	struct pid *new_pid;
1470 	pid_t tmp;
1471 	int r;
1472 
1473 	tmp = pid_vnr(cad_pid);
1474 
1475 	r = __do_proc_dointvec(&tmp, table, write, buffer,
1476 			       lenp, ppos, NULL, NULL);
1477 	if (r || !write)
1478 		return r;
1479 
1480 	new_pid = find_get_pid(tmp);
1481 	if (!new_pid)
1482 		return -ESRCH;
1483 
1484 	put_pid(xchg(&cad_pid, new_pid));
1485 	return 0;
1486 }
1487 
1488 /**
1489  * proc_do_large_bitmap - read/write from/to a large bitmap
1490  * @table: the sysctl table
1491  * @write: %TRUE if this is a write to the sysctl file
1492  * @buffer: the user buffer
1493  * @lenp: the size of the user buffer
1494  * @ppos: file position
1495  *
1496  * The bitmap is stored at table->data and the bitmap length (in bits)
1497  * in table->maxlen.
1498  *
1499  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1500  * large bitmaps may be represented in a compact manner. Writing into
1501  * the file will clear the bitmap then update it with the given input.
1502  *
1503  * Returns 0 on success.
1504  */
proc_do_large_bitmap(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1505 int proc_do_large_bitmap(struct ctl_table *table, int write,
1506 			 void *buffer, size_t *lenp, loff_t *ppos)
1507 {
1508 	int err = 0;
1509 	bool first = 1;
1510 	size_t left = *lenp;
1511 	unsigned long bitmap_len = table->maxlen;
1512 	unsigned long *bitmap = *(unsigned long **) table->data;
1513 	unsigned long *tmp_bitmap = NULL;
1514 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1515 
1516 	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1517 		*lenp = 0;
1518 		return 0;
1519 	}
1520 
1521 	if (write) {
1522 		char *p = buffer;
1523 		size_t skipped = 0;
1524 
1525 		if (left > PAGE_SIZE - 1) {
1526 			left = PAGE_SIZE - 1;
1527 			/* How much of the buffer we'll skip this pass */
1528 			skipped = *lenp - left;
1529 		}
1530 
1531 		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1532 		if (!tmp_bitmap)
1533 			return -ENOMEM;
1534 		proc_skip_char(&p, &left, '\n');
1535 		while (!err && left) {
1536 			unsigned long val_a, val_b;
1537 			bool neg;
1538 			size_t saved_left;
1539 
1540 			/* In case we stop parsing mid-number, we can reset */
1541 			saved_left = left;
1542 			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1543 					     sizeof(tr_a), &c);
1544 			/*
1545 			 * If we consumed the entirety of a truncated buffer or
1546 			 * only one char is left (may be a "-"), then stop here,
1547 			 * reset, & come back for more.
1548 			 */
1549 			if ((left <= 1) && skipped) {
1550 				left = saved_left;
1551 				break;
1552 			}
1553 
1554 			if (err)
1555 				break;
1556 			if (val_a >= bitmap_len || neg) {
1557 				err = -EINVAL;
1558 				break;
1559 			}
1560 
1561 			val_b = val_a;
1562 			if (left) {
1563 				p++;
1564 				left--;
1565 			}
1566 
1567 			if (c == '-') {
1568 				err = proc_get_long(&p, &left, &val_b,
1569 						     &neg, tr_b, sizeof(tr_b),
1570 						     &c);
1571 				/*
1572 				 * If we consumed all of a truncated buffer or
1573 				 * then stop here, reset, & come back for more.
1574 				 */
1575 				if (!left && skipped) {
1576 					left = saved_left;
1577 					break;
1578 				}
1579 
1580 				if (err)
1581 					break;
1582 				if (val_b >= bitmap_len || neg ||
1583 				    val_a > val_b) {
1584 					err = -EINVAL;
1585 					break;
1586 				}
1587 				if (left) {
1588 					p++;
1589 					left--;
1590 				}
1591 			}
1592 
1593 			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1594 			first = 0;
1595 			proc_skip_char(&p, &left, '\n');
1596 		}
1597 		left += skipped;
1598 	} else {
1599 		unsigned long bit_a, bit_b = 0;
1600 
1601 		while (left) {
1602 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1603 			if (bit_a >= bitmap_len)
1604 				break;
1605 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1606 						   bit_a + 1) - 1;
1607 
1608 			if (!first)
1609 				proc_put_char(&buffer, &left, ',');
1610 			proc_put_long(&buffer, &left, bit_a, false);
1611 			if (bit_a != bit_b) {
1612 				proc_put_char(&buffer, &left, '-');
1613 				proc_put_long(&buffer, &left, bit_b, false);
1614 			}
1615 
1616 			first = 0; bit_b++;
1617 		}
1618 		proc_put_char(&buffer, &left, '\n');
1619 	}
1620 
1621 	if (!err) {
1622 		if (write) {
1623 			if (*ppos)
1624 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1625 			else
1626 				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1627 		}
1628 		*lenp -= left;
1629 		*ppos += *lenp;
1630 	}
1631 
1632 	bitmap_free(tmp_bitmap);
1633 	return err;
1634 }
1635 
1636 #else /* CONFIG_PROC_SYSCTL */
1637 
proc_dostring(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1638 int proc_dostring(struct ctl_table *table, int write,
1639 		  void *buffer, size_t *lenp, loff_t *ppos)
1640 {
1641 	return -ENOSYS;
1642 }
1643 
proc_dointvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1644 int proc_dointvec(struct ctl_table *table, int write,
1645 		  void *buffer, size_t *lenp, loff_t *ppos)
1646 {
1647 	return -ENOSYS;
1648 }
1649 
proc_douintvec(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1650 int proc_douintvec(struct ctl_table *table, int write,
1651 		  void *buffer, size_t *lenp, loff_t *ppos)
1652 {
1653 	return -ENOSYS;
1654 }
1655 
proc_dointvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1656 int proc_dointvec_minmax(struct ctl_table *table, int write,
1657 		    void *buffer, size_t *lenp, loff_t *ppos)
1658 {
1659 	return -ENOSYS;
1660 }
1661 
proc_douintvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1662 int proc_douintvec_minmax(struct ctl_table *table, int write,
1663 			  void *buffer, size_t *lenp, loff_t *ppos)
1664 {
1665 	return -ENOSYS;
1666 }
1667 
proc_dou8vec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1668 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1669 			void *buffer, size_t *lenp, loff_t *ppos)
1670 {
1671 	return -ENOSYS;
1672 }
1673 
proc_dointvec_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1674 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1675 		    void *buffer, size_t *lenp, loff_t *ppos)
1676 {
1677 	return -ENOSYS;
1678 }
1679 
proc_dointvec_userhz_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1680 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1681 		    void *buffer, size_t *lenp, loff_t *ppos)
1682 {
1683 	return -ENOSYS;
1684 }
1685 
proc_dointvec_ms_jiffies(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1686 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1687 			     void *buffer, size_t *lenp, loff_t *ppos)
1688 {
1689 	return -ENOSYS;
1690 }
1691 
proc_doulongvec_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1692 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1693 		    void *buffer, size_t *lenp, loff_t *ppos)
1694 {
1695 	return -ENOSYS;
1696 }
1697 
proc_doulongvec_ms_jiffies_minmax(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1698 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1699 				      void *buffer, size_t *lenp, loff_t *ppos)
1700 {
1701 	return -ENOSYS;
1702 }
1703 
proc_do_large_bitmap(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1704 int proc_do_large_bitmap(struct ctl_table *table, int write,
1705 			 void *buffer, size_t *lenp, loff_t *ppos)
1706 {
1707 	return -ENOSYS;
1708 }
1709 
1710 #endif /* CONFIG_PROC_SYSCTL */
1711 
1712 #if defined(CONFIG_SYSCTL)
proc_do_static_key(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1713 int proc_do_static_key(struct ctl_table *table, int write,
1714 		       void *buffer, size_t *lenp, loff_t *ppos)
1715 {
1716 	struct static_key *key = (struct static_key *)table->data;
1717 	static DEFINE_MUTEX(static_key_mutex);
1718 	int val, ret;
1719 	struct ctl_table tmp = {
1720 		.data   = &val,
1721 		.maxlen = sizeof(val),
1722 		.mode   = table->mode,
1723 		.extra1 = SYSCTL_ZERO,
1724 		.extra2 = SYSCTL_ONE,
1725 	};
1726 
1727 	if (write && !capable(CAP_SYS_ADMIN))
1728 		return -EPERM;
1729 
1730 	mutex_lock(&static_key_mutex);
1731 	val = static_key_enabled(key);
1732 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1733 	if (write && !ret) {
1734 		if (val)
1735 			static_key_enable(key);
1736 		else
1737 			static_key_disable(key);
1738 	}
1739 	mutex_unlock(&static_key_mutex);
1740 	return ret;
1741 }
1742 
1743 static struct ctl_table kern_table[] = {
1744 	{
1745 		.procname	= "sched_child_runs_first",
1746 		.data		= &sysctl_sched_child_runs_first,
1747 		.maxlen		= sizeof(unsigned int),
1748 		.mode		= 0644,
1749 		.proc_handler	= proc_dointvec,
1750 	},
1751 #ifdef CONFIG_SCHED_RT_CAS
1752 	{
1753 		.procname	= "sched_enable_rt_cas",
1754 		.data		= &sysctl_sched_enable_rt_cas,
1755 		.maxlen		= sizeof(unsigned int),
1756 		.mode		= 0644,
1757 		.proc_handler	= proc_dointvec,
1758 	},
1759 #endif
1760 #ifdef CONFIG_SCHED_RT_ACTIVE_LB
1761 	{
1762 		.procname	= "sched_enable_rt_active_lb",
1763 		.data		= &sysctl_sched_enable_rt_active_lb,
1764 		.maxlen		= sizeof(unsigned int),
1765 		.mode		= 0644,
1766 		.proc_handler	= proc_dointvec,
1767 	},
1768 #endif
1769 #ifdef CONFIG_SCHED_WALT
1770 	{
1771 		.procname	= "sched_use_walt_cpu_util",
1772 		.data		= &sysctl_sched_use_walt_cpu_util,
1773 		.maxlen		= sizeof(unsigned int),
1774 		.mode		= 0644,
1775 		.proc_handler	= proc_dointvec_minmax,
1776 		.extra1		= SYSCTL_ZERO,
1777 		.extra2		= SYSCTL_ONE,
1778 	},
1779 	{
1780 		.procname	= "sched_use_walt_task_util",
1781 		.data		= &sysctl_sched_use_walt_task_util,
1782 		.maxlen		= sizeof(unsigned int),
1783 		.mode		= 0644,
1784 		.proc_handler	= proc_dointvec_minmax,
1785 		.extra1		= SYSCTL_ZERO,
1786 		.extra2		= SYSCTL_ONE,
1787 	},
1788 	{
1789 		.procname	= "sched_walt_init_task_load_pct",
1790 		.data		= &sysctl_sched_walt_init_task_load_pct,
1791 		.maxlen		= sizeof(unsigned int),
1792 		.mode		= 0644,
1793 		.proc_handler	= sysctl_sched_walt_init_task_load_pct_sysctl_handler,
1794 	},
1795 	{
1796 		.procname	= "sched_cpu_high_irqload",
1797 		.data		= &sysctl_sched_cpu_high_irqload,
1798 		.maxlen		= sizeof(unsigned int),
1799 		.mode		= 0644,
1800 		.proc_handler	= proc_dointvec,
1801 	},
1802 #endif
1803 #ifdef CONFIG_SCHED_DEBUG
1804 	{
1805 		.procname	= "sched_min_granularity_ns",
1806 		.data		= &sysctl_sched_min_granularity,
1807 		.maxlen		= sizeof(unsigned int),
1808 		.mode		= 0644,
1809 		.proc_handler	= sched_proc_update_handler,
1810 		.extra1		= &min_sched_granularity_ns,
1811 		.extra2		= &max_sched_granularity_ns,
1812 	},
1813 	{
1814 		.procname	= "sched_latency_ns",
1815 		.data		= &sysctl_sched_latency,
1816 		.maxlen		= sizeof(unsigned int),
1817 		.mode		= 0644,
1818 		.proc_handler	= sched_proc_update_handler,
1819 		.extra1		= &min_sched_granularity_ns,
1820 		.extra2		= &max_sched_granularity_ns,
1821 	},
1822 	{
1823 		.procname	= "sched_wakeup_granularity_ns",
1824 		.data		= &sysctl_sched_wakeup_granularity,
1825 		.maxlen		= sizeof(unsigned int),
1826 		.mode		= 0644,
1827 		.proc_handler	= sched_proc_update_handler,
1828 		.extra1		= &min_wakeup_granularity_ns,
1829 		.extra2		= &max_wakeup_granularity_ns,
1830 	},
1831 #ifdef CONFIG_SMP
1832 	{
1833 		.procname	= "sched_tunable_scaling",
1834 		.data		= &sysctl_sched_tunable_scaling,
1835 		.maxlen		= sizeof(enum sched_tunable_scaling),
1836 		.mode		= 0644,
1837 		.proc_handler	= sched_proc_update_handler,
1838 		.extra1		= &min_sched_tunable_scaling,
1839 		.extra2		= &max_sched_tunable_scaling,
1840 	},
1841 	{
1842 		.procname	= "sched_migration_cost_ns",
1843 		.data		= &sysctl_sched_migration_cost,
1844 		.maxlen		= sizeof(unsigned int),
1845 		.mode		= 0644,
1846 		.proc_handler	= proc_dointvec,
1847 	},
1848 	{
1849 		.procname	= "sched_nr_migrate",
1850 		.data		= &sysctl_sched_nr_migrate,
1851 		.maxlen		= sizeof(unsigned int),
1852 		.mode		= 0644,
1853 		.proc_handler	= proc_dointvec,
1854 	},
1855 #ifdef CONFIG_SCHEDSTATS
1856 	{
1857 		.procname	= "sched_schedstats",
1858 		.data		= NULL,
1859 		.maxlen		= sizeof(unsigned int),
1860 		.mode		= 0644,
1861 		.proc_handler	= sysctl_schedstats,
1862 		.extra1		= SYSCTL_ZERO,
1863 		.extra2		= SYSCTL_ONE,
1864 	},
1865 #endif /* CONFIG_SCHEDSTATS */
1866 #endif /* CONFIG_SMP */
1867 #ifdef CONFIG_NUMA_BALANCING
1868 	{
1869 		.procname	= "numa_balancing_scan_delay_ms",
1870 		.data		= &sysctl_numa_balancing_scan_delay,
1871 		.maxlen		= sizeof(unsigned int),
1872 		.mode		= 0644,
1873 		.proc_handler	= proc_dointvec,
1874 	},
1875 	{
1876 		.procname	= "numa_balancing_scan_period_min_ms",
1877 		.data		= &sysctl_numa_balancing_scan_period_min,
1878 		.maxlen		= sizeof(unsigned int),
1879 		.mode		= 0644,
1880 		.proc_handler	= proc_dointvec,
1881 	},
1882 	{
1883 		.procname	= "numa_balancing_scan_period_max_ms",
1884 		.data		= &sysctl_numa_balancing_scan_period_max,
1885 		.maxlen		= sizeof(unsigned int),
1886 		.mode		= 0644,
1887 		.proc_handler	= proc_dointvec,
1888 	},
1889 	{
1890 		.procname	= "numa_balancing_scan_size_mb",
1891 		.data		= &sysctl_numa_balancing_scan_size,
1892 		.maxlen		= sizeof(unsigned int),
1893 		.mode		= 0644,
1894 		.proc_handler	= proc_dointvec_minmax,
1895 		.extra1		= SYSCTL_ONE,
1896 	},
1897 	{
1898 		.procname	= "numa_balancing",
1899 		.data		= NULL, /* filled in by handler */
1900 		.maxlen		= sizeof(unsigned int),
1901 		.mode		= 0644,
1902 		.proc_handler	= sysctl_numa_balancing,
1903 		.extra1		= SYSCTL_ZERO,
1904 		.extra2		= SYSCTL_ONE,
1905 	},
1906 #endif /* CONFIG_NUMA_BALANCING */
1907 #endif /* CONFIG_SCHED_DEBUG */
1908 	{
1909 		.procname	= "sched_rt_period_us",
1910 		.data		= &sysctl_sched_rt_period,
1911 		.maxlen		= sizeof(unsigned int),
1912 		.mode		= 0644,
1913 		.proc_handler	= sched_rt_handler,
1914 	},
1915 	{
1916 		.procname	= "sched_rt_runtime_us",
1917 		.data		= &sysctl_sched_rt_runtime,
1918 		.maxlen		= sizeof(int),
1919 		.mode		= 0644,
1920 		.proc_handler	= sched_rt_handler,
1921 	},
1922 	{
1923 		.procname	= "sched_deadline_period_max_us",
1924 		.data		= &sysctl_sched_dl_period_max,
1925 		.maxlen		= sizeof(unsigned int),
1926 		.mode		= 0644,
1927 		.proc_handler	= proc_dointvec,
1928 	},
1929 	{
1930 		.procname	= "sched_deadline_period_min_us",
1931 		.data		= &sysctl_sched_dl_period_min,
1932 		.maxlen		= sizeof(unsigned int),
1933 		.mode		= 0644,
1934 		.proc_handler	= proc_dointvec,
1935 	},
1936 	{
1937 		.procname	= "sched_rr_timeslice_ms",
1938 		.data		= &sysctl_sched_rr_timeslice,
1939 		.maxlen		= sizeof(int),
1940 		.mode		= 0644,
1941 		.proc_handler	= sched_rr_handler,
1942 	},
1943 #ifdef CONFIG_UCLAMP_TASK
1944 	{
1945 		.procname	= "sched_util_clamp_min",
1946 		.data		= &sysctl_sched_uclamp_util_min,
1947 		.maxlen		= sizeof(unsigned int),
1948 		.mode		= 0644,
1949 		.proc_handler	= sysctl_sched_uclamp_handler,
1950 	},
1951 	{
1952 		.procname	= "sched_util_clamp_max",
1953 		.data		= &sysctl_sched_uclamp_util_max,
1954 		.maxlen		= sizeof(unsigned int),
1955 		.mode		= 0644,
1956 		.proc_handler	= sysctl_sched_uclamp_handler,
1957 	},
1958 	{
1959 		.procname	= "sched_util_clamp_min_rt_default",
1960 		.data		= &sysctl_sched_uclamp_util_min_rt_default,
1961 		.maxlen		= sizeof(unsigned int),
1962 		.mode		= 0644,
1963 		.proc_handler	= sysctl_sched_uclamp_handler,
1964 	},
1965 #endif
1966 #ifdef CONFIG_SCHED_AUTOGROUP
1967 	{
1968 		.procname	= "sched_autogroup_enabled",
1969 		.data		= &sysctl_sched_autogroup_enabled,
1970 		.maxlen		= sizeof(unsigned int),
1971 		.mode		= 0644,
1972 		.proc_handler	= proc_dointvec_minmax,
1973 		.extra1		= SYSCTL_ZERO,
1974 		.extra2		= SYSCTL_ONE,
1975 	},
1976 #endif
1977 #ifdef CONFIG_CFS_BANDWIDTH
1978 	{
1979 		.procname	= "sched_cfs_bandwidth_slice_us",
1980 		.data		= &sysctl_sched_cfs_bandwidth_slice,
1981 		.maxlen		= sizeof(unsigned int),
1982 		.mode		= 0644,
1983 		.proc_handler	= proc_dointvec_minmax,
1984 		.extra1		= SYSCTL_ONE,
1985 	},
1986 #endif
1987 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
1988 	{
1989 		.procname	= "sched_energy_aware",
1990 		.data		= &sysctl_sched_energy_aware,
1991 		.maxlen		= sizeof(unsigned int),
1992 		.mode		= 0644,
1993 		.proc_handler	= sched_energy_aware_handler,
1994 		.extra1		= SYSCTL_ZERO,
1995 		.extra2		= SYSCTL_ONE,
1996 	},
1997 #endif
1998 #ifdef CONFIG_PROVE_LOCKING
1999 	{
2000 		.procname	= "prove_locking",
2001 		.data		= &prove_locking,
2002 		.maxlen		= sizeof(int),
2003 		.mode		= 0644,
2004 		.proc_handler	= proc_dointvec,
2005 	},
2006 #endif
2007 #ifdef CONFIG_LOCK_STAT
2008 	{
2009 		.procname	= "lock_stat",
2010 		.data		= &lock_stat,
2011 		.maxlen		= sizeof(int),
2012 		.mode		= 0644,
2013 		.proc_handler	= proc_dointvec,
2014 	},
2015 #endif
2016 	{
2017 		.procname	= "panic",
2018 		.data		= &panic_timeout,
2019 		.maxlen		= sizeof(int),
2020 		.mode		= 0644,
2021 		.proc_handler	= proc_dointvec,
2022 	},
2023 #ifdef CONFIG_COREDUMP
2024 	{
2025 		.procname	= "core_uses_pid",
2026 		.data		= &core_uses_pid,
2027 		.maxlen		= sizeof(int),
2028 		.mode		= 0644,
2029 		.proc_handler	= proc_dointvec,
2030 	},
2031 	{
2032 		.procname	= "core_pattern",
2033 		.data		= core_pattern,
2034 		.maxlen		= CORENAME_MAX_SIZE,
2035 		.mode		= 0644,
2036 		.proc_handler	= proc_dostring_coredump,
2037 	},
2038 	{
2039 		.procname	= "core_pipe_limit",
2040 		.data		= &core_pipe_limit,
2041 		.maxlen		= sizeof(unsigned int),
2042 		.mode		= 0644,
2043 		.proc_handler	= proc_dointvec,
2044 	},
2045 #endif
2046 #ifdef CONFIG_PROC_SYSCTL
2047 	{
2048 		.procname	= "tainted",
2049 		.maxlen 	= sizeof(long),
2050 		.mode		= 0644,
2051 		.proc_handler	= proc_taint,
2052 	},
2053 	{
2054 		.procname	= "sysctl_writes_strict",
2055 		.data		= &sysctl_writes_strict,
2056 		.maxlen		= sizeof(int),
2057 		.mode		= 0644,
2058 		.proc_handler	= proc_dointvec_minmax,
2059 		.extra1		= SYSCTL_NEG_ONE,
2060 		.extra2		= SYSCTL_ONE,
2061 	},
2062 #endif
2063 #ifdef CONFIG_LATENCYTOP
2064 	{
2065 		.procname	= "latencytop",
2066 		.data		= &latencytop_enabled,
2067 		.maxlen		= sizeof(int),
2068 		.mode		= 0644,
2069 		.proc_handler	= sysctl_latencytop,
2070 	},
2071 #endif
2072 #ifdef CONFIG_BLK_DEV_INITRD
2073 	{
2074 		.procname	= "real-root-dev",
2075 		.data		= &real_root_dev,
2076 		.maxlen		= sizeof(int),
2077 		.mode		= 0644,
2078 		.proc_handler	= proc_dointvec,
2079 	},
2080 #endif
2081 	{
2082 		.procname	= "print-fatal-signals",
2083 		.data		= &print_fatal_signals,
2084 		.maxlen		= sizeof(int),
2085 		.mode		= 0644,
2086 		.proc_handler	= proc_dointvec,
2087 	},
2088 #ifdef CONFIG_SPARC
2089 	{
2090 		.procname	= "reboot-cmd",
2091 		.data		= reboot_command,
2092 		.maxlen		= 256,
2093 		.mode		= 0644,
2094 		.proc_handler	= proc_dostring,
2095 	},
2096 	{
2097 		.procname	= "stop-a",
2098 		.data		= &stop_a_enabled,
2099 		.maxlen		= sizeof (int),
2100 		.mode		= 0644,
2101 		.proc_handler	= proc_dointvec,
2102 	},
2103 	{
2104 		.procname	= "scons-poweroff",
2105 		.data		= &scons_pwroff,
2106 		.maxlen		= sizeof (int),
2107 		.mode		= 0644,
2108 		.proc_handler	= proc_dointvec,
2109 	},
2110 #endif
2111 #ifdef CONFIG_SPARC64
2112 	{
2113 		.procname	= "tsb-ratio",
2114 		.data		= &sysctl_tsb_ratio,
2115 		.maxlen		= sizeof (int),
2116 		.mode		= 0644,
2117 		.proc_handler	= proc_dointvec,
2118 	},
2119 #endif
2120 #ifdef CONFIG_PARISC
2121 	{
2122 		.procname	= "soft-power",
2123 		.data		= &pwrsw_enabled,
2124 		.maxlen		= sizeof (int),
2125 		.mode		= 0644,
2126 		.proc_handler	= proc_dointvec,
2127 	},
2128 #endif
2129 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
2130 	{
2131 		.procname	= "unaligned-trap",
2132 		.data		= &unaligned_enabled,
2133 		.maxlen		= sizeof (int),
2134 		.mode		= 0644,
2135 		.proc_handler	= proc_dointvec,
2136 	},
2137 #endif
2138 	{
2139 		.procname	= "ctrl-alt-del",
2140 		.data		= &C_A_D,
2141 		.maxlen		= sizeof(int),
2142 		.mode		= 0644,
2143 		.proc_handler	= proc_dointvec,
2144 	},
2145 #ifdef CONFIG_FUNCTION_TRACER
2146 	{
2147 		.procname	= "ftrace_enabled",
2148 		.data		= &ftrace_enabled,
2149 		.maxlen		= sizeof(int),
2150 		.mode		= 0644,
2151 		.proc_handler	= ftrace_enable_sysctl,
2152 	},
2153 #endif
2154 #ifdef CONFIG_STACK_TRACER
2155 	{
2156 		.procname	= "stack_tracer_enabled",
2157 		.data		= &stack_tracer_enabled,
2158 		.maxlen		= sizeof(int),
2159 		.mode		= 0644,
2160 		.proc_handler	= stack_trace_sysctl,
2161 	},
2162 #endif
2163 #ifdef CONFIG_TRACING
2164 	{
2165 		.procname	= "ftrace_dump_on_oops",
2166 		.data		= &ftrace_dump_on_oops,
2167 		.maxlen		= sizeof(int),
2168 		.mode		= 0644,
2169 		.proc_handler	= proc_dointvec,
2170 	},
2171 	{
2172 		.procname	= "traceoff_on_warning",
2173 		.data		= &__disable_trace_on_warning,
2174 		.maxlen		= sizeof(__disable_trace_on_warning),
2175 		.mode		= 0644,
2176 		.proc_handler	= proc_dointvec,
2177 	},
2178 	{
2179 		.procname	= "tracepoint_printk",
2180 		.data		= &tracepoint_printk,
2181 		.maxlen		= sizeof(tracepoint_printk),
2182 		.mode		= 0644,
2183 		.proc_handler	= tracepoint_printk_sysctl,
2184 	},
2185 #endif
2186 #ifdef CONFIG_KEXEC_CORE
2187 	{
2188 		.procname	= "kexec_load_disabled",
2189 		.data		= &kexec_load_disabled,
2190 		.maxlen		= sizeof(int),
2191 		.mode		= 0644,
2192 		/* only handle a transition from default "0" to "1" */
2193 		.proc_handler	= proc_dointvec_minmax,
2194 		.extra1		= SYSCTL_ONE,
2195 		.extra2		= SYSCTL_ONE,
2196 	},
2197 #endif
2198 #ifdef CONFIG_MODULES
2199 	{
2200 		.procname	= "modprobe",
2201 		.data		= &modprobe_path,
2202 		.maxlen		= KMOD_PATH_LEN,
2203 		.mode		= 0644,
2204 		.proc_handler	= proc_dostring,
2205 	},
2206 	{
2207 		.procname	= "modules_disabled",
2208 		.data		= &modules_disabled,
2209 		.maxlen		= sizeof(int),
2210 		.mode		= 0644,
2211 		/* only handle a transition from default "0" to "1" */
2212 		.proc_handler	= proc_dointvec_minmax,
2213 		.extra1		= SYSCTL_ONE,
2214 		.extra2		= SYSCTL_ONE,
2215 	},
2216 #endif
2217 #ifdef CONFIG_UEVENT_HELPER
2218 	{
2219 		.procname	= "hotplug",
2220 		.data		= &uevent_helper,
2221 		.maxlen		= UEVENT_HELPER_PATH_LEN,
2222 		.mode		= 0644,
2223 		.proc_handler	= proc_dostring,
2224 	},
2225 #endif
2226 #ifdef CONFIG_CHR_DEV_SG
2227 	{
2228 		.procname	= "sg-big-buff",
2229 		.data		= &sg_big_buff,
2230 		.maxlen		= sizeof (int),
2231 		.mode		= 0444,
2232 		.proc_handler	= proc_dointvec,
2233 	},
2234 #endif
2235 #ifdef CONFIG_BSD_PROCESS_ACCT
2236 	{
2237 		.procname	= "acct",
2238 		.data		= &acct_parm,
2239 		.maxlen		= 3*sizeof(int),
2240 		.mode		= 0644,
2241 		.proc_handler	= proc_dointvec,
2242 	},
2243 #endif
2244 #ifdef CONFIG_MAGIC_SYSRQ
2245 	{
2246 		.procname	= "sysrq",
2247 		.data		= NULL,
2248 		.maxlen		= sizeof (int),
2249 		.mode		= 0644,
2250 		.proc_handler	= sysrq_sysctl_handler,
2251 	},
2252 #endif
2253 #ifdef CONFIG_PROC_SYSCTL
2254 	{
2255 		.procname	= "cad_pid",
2256 		.data		= NULL,
2257 		.maxlen		= sizeof (int),
2258 		.mode		= 0600,
2259 		.proc_handler	= proc_do_cad_pid,
2260 	},
2261 #endif
2262 	{
2263 		.procname	= "threads-max",
2264 		.data		= NULL,
2265 		.maxlen		= sizeof(int),
2266 		.mode		= 0644,
2267 		.proc_handler	= sysctl_max_threads,
2268 	},
2269 	{
2270 		.procname	= "random",
2271 		.mode		= 0555,
2272 		.child		= random_table,
2273 	},
2274 	{
2275 		.procname	= "usermodehelper",
2276 		.mode		= 0555,
2277 		.child		= usermodehelper_table,
2278 	},
2279 #ifdef CONFIG_FW_LOADER_USER_HELPER
2280 	{
2281 		.procname	= "firmware_config",
2282 		.mode		= 0555,
2283 		.child		= firmware_config_table,
2284 	},
2285 #endif
2286 	{
2287 		.procname	= "overflowuid",
2288 		.data		= &overflowuid,
2289 		.maxlen		= sizeof(int),
2290 		.mode		= 0644,
2291 		.proc_handler	= proc_dointvec_minmax,
2292 		.extra1		= &minolduid,
2293 		.extra2		= &maxolduid,
2294 	},
2295 	{
2296 		.procname	= "overflowgid",
2297 		.data		= &overflowgid,
2298 		.maxlen		= sizeof(int),
2299 		.mode		= 0644,
2300 		.proc_handler	= proc_dointvec_minmax,
2301 		.extra1		= &minolduid,
2302 		.extra2		= &maxolduid,
2303 	},
2304 #ifdef CONFIG_S390
2305 	{
2306 		.procname	= "userprocess_debug",
2307 		.data		= &show_unhandled_signals,
2308 		.maxlen		= sizeof(int),
2309 		.mode		= 0644,
2310 		.proc_handler	= proc_dointvec,
2311 	},
2312 #endif
2313 	{
2314 		.procname	= "pid_max",
2315 		.data		= &pid_max,
2316 		.maxlen		= sizeof (int),
2317 		.mode		= 0644,
2318 		.proc_handler	= proc_dointvec_minmax,
2319 		.extra1		= &pid_max_min,
2320 		.extra2		= &pid_max_max,
2321 	},
2322 	{
2323 		.procname	= "panic_on_oops",
2324 		.data		= &panic_on_oops,
2325 		.maxlen		= sizeof(int),
2326 		.mode		= 0644,
2327 		.proc_handler	= proc_dointvec,
2328 	},
2329 	{
2330 		.procname	= "panic_print",
2331 		.data		= &panic_print,
2332 		.maxlen		= sizeof(unsigned long),
2333 		.mode		= 0644,
2334 		.proc_handler	= proc_doulongvec_minmax,
2335 	},
2336 #if defined CONFIG_PRINTK
2337 	{
2338 		.procname	= "printk",
2339 		.data		= &console_loglevel,
2340 		.maxlen		= 4*sizeof(int),
2341 		.mode		= 0644,
2342 		.proc_handler	= proc_dointvec,
2343 	},
2344 	{
2345 		.procname	= "printk_ratelimit",
2346 		.data		= &printk_ratelimit_state.interval,
2347 		.maxlen		= sizeof(int),
2348 		.mode		= 0644,
2349 		.proc_handler	= proc_dointvec_jiffies,
2350 	},
2351 	{
2352 		.procname	= "printk_ratelimit_burst",
2353 		.data		= &printk_ratelimit_state.burst,
2354 		.maxlen		= sizeof(int),
2355 		.mode		= 0644,
2356 		.proc_handler	= proc_dointvec,
2357 	},
2358 	{
2359 		.procname	= "printk_delay",
2360 		.data		= &printk_delay_msec,
2361 		.maxlen		= sizeof(int),
2362 		.mode		= 0644,
2363 		.proc_handler	= proc_dointvec_minmax,
2364 		.extra1		= SYSCTL_ZERO,
2365 		.extra2		= &ten_thousand,
2366 	},
2367 	{
2368 		.procname	= "printk_devkmsg",
2369 		.data		= devkmsg_log_str,
2370 		.maxlen		= DEVKMSG_STR_MAX_SIZE,
2371 		.mode		= 0644,
2372 		.proc_handler	= devkmsg_sysctl_set_loglvl,
2373 	},
2374 	{
2375 		.procname	= "dmesg_restrict",
2376 		.data		= &dmesg_restrict,
2377 		.maxlen		= sizeof(int),
2378 		.mode		= 0644,
2379 		.proc_handler	= proc_dointvec_minmax_sysadmin,
2380 		.extra1		= SYSCTL_ZERO,
2381 		.extra2		= SYSCTL_ONE,
2382 	},
2383 	{
2384 		.procname	= "kptr_restrict",
2385 		.data		= &kptr_restrict,
2386 		.maxlen		= sizeof(int),
2387 		.mode		= 0644,
2388 		.proc_handler	= proc_dointvec_minmax_sysadmin,
2389 		.extra1		= SYSCTL_ZERO,
2390 		.extra2		= SYSCTL_TWO,
2391 	},
2392 #endif
2393 	{
2394 		.procname	= "ngroups_max",
2395 		.data		= &ngroups_max,
2396 		.maxlen		= sizeof (int),
2397 		.mode		= 0444,
2398 		.proc_handler	= proc_dointvec,
2399 	},
2400 	{
2401 		.procname	= "cap_last_cap",
2402 		.data		= (void *)&cap_last_cap,
2403 		.maxlen		= sizeof(int),
2404 		.mode		= 0444,
2405 		.proc_handler	= proc_dointvec,
2406 	},
2407 #if defined(CONFIG_LOCKUP_DETECTOR)
2408 	{
2409 		.procname       = "watchdog",
2410 		.data		= &watchdog_user_enabled,
2411 		.maxlen		= sizeof(int),
2412 		.mode		= 0644,
2413 		.proc_handler   = proc_watchdog,
2414 		.extra1		= SYSCTL_ZERO,
2415 		.extra2		= SYSCTL_ONE,
2416 	},
2417 	{
2418 		.procname	= "watchdog_thresh",
2419 		.data		= &watchdog_thresh,
2420 		.maxlen		= sizeof(int),
2421 		.mode		= 0644,
2422 		.proc_handler	= proc_watchdog_thresh,
2423 		.extra1		= SYSCTL_ZERO,
2424 		.extra2		= &sixty,
2425 	},
2426 	{
2427 		.procname       = "nmi_watchdog",
2428 		.data		= &nmi_watchdog_user_enabled,
2429 		.maxlen		= sizeof(int),
2430 		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
2431 		.proc_handler   = proc_nmi_watchdog,
2432 		.extra1		= SYSCTL_ZERO,
2433 		.extra2		= SYSCTL_ONE,
2434 	},
2435 	{
2436 		.procname	= "watchdog_cpumask",
2437 		.data		= &watchdog_cpumask_bits,
2438 		.maxlen		= NR_CPUS,
2439 		.mode		= 0644,
2440 		.proc_handler	= proc_watchdog_cpumask,
2441 	},
2442 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
2443 	{
2444 		.procname       = "soft_watchdog",
2445 		.data		= &soft_watchdog_user_enabled,
2446 		.maxlen		= sizeof(int),
2447 		.mode		= 0644,
2448 		.proc_handler   = proc_soft_watchdog,
2449 		.extra1		= SYSCTL_ZERO,
2450 		.extra2		= SYSCTL_ONE,
2451 	},
2452 	{
2453 		.procname	= "softlockup_panic",
2454 		.data		= &softlockup_panic,
2455 		.maxlen		= sizeof(int),
2456 		.mode		= 0644,
2457 		.proc_handler	= proc_dointvec_minmax,
2458 		.extra1		= SYSCTL_ZERO,
2459 		.extra2		= SYSCTL_ONE,
2460 	},
2461 #ifdef CONFIG_SMP
2462 	{
2463 		.procname	= "softlockup_all_cpu_backtrace",
2464 		.data		= &sysctl_softlockup_all_cpu_backtrace,
2465 		.maxlen		= sizeof(int),
2466 		.mode		= 0644,
2467 		.proc_handler	= proc_dointvec_minmax,
2468 		.extra1		= SYSCTL_ZERO,
2469 		.extra2		= SYSCTL_ONE,
2470 	},
2471 #endif /* CONFIG_SMP */
2472 #endif
2473 #ifdef CONFIG_HARDLOCKUP_DETECTOR
2474 	{
2475 		.procname	= "hardlockup_panic",
2476 		.data		= &hardlockup_panic,
2477 		.maxlen		= sizeof(int),
2478 		.mode		= 0644,
2479 		.proc_handler	= proc_dointvec_minmax,
2480 		.extra1		= SYSCTL_ZERO,
2481 		.extra2		= SYSCTL_ONE,
2482 	},
2483 #ifdef CONFIG_SMP
2484 	{
2485 		.procname	= "hardlockup_all_cpu_backtrace",
2486 		.data		= &sysctl_hardlockup_all_cpu_backtrace,
2487 		.maxlen		= sizeof(int),
2488 		.mode		= 0644,
2489 		.proc_handler	= proc_dointvec_minmax,
2490 		.extra1		= SYSCTL_ZERO,
2491 		.extra2		= SYSCTL_ONE,
2492 	},
2493 #endif /* CONFIG_SMP */
2494 #endif
2495 #endif
2496 
2497 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
2498 	{
2499 		.procname       = "unknown_nmi_panic",
2500 		.data           = &unknown_nmi_panic,
2501 		.maxlen         = sizeof (int),
2502 		.mode           = 0644,
2503 		.proc_handler   = proc_dointvec,
2504 	},
2505 #endif
2506 
2507 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
2508 	defined(CONFIG_DEBUG_STACKOVERFLOW)
2509 	{
2510 		.procname	= "panic_on_stackoverflow",
2511 		.data		= &sysctl_panic_on_stackoverflow,
2512 		.maxlen		= sizeof(int),
2513 		.mode		= 0644,
2514 		.proc_handler	= proc_dointvec,
2515 	},
2516 #endif
2517 #if defined(CONFIG_X86)
2518 	{
2519 		.procname	= "panic_on_unrecovered_nmi",
2520 		.data		= &panic_on_unrecovered_nmi,
2521 		.maxlen		= sizeof(int),
2522 		.mode		= 0644,
2523 		.proc_handler	= proc_dointvec,
2524 	},
2525 	{
2526 		.procname	= "panic_on_io_nmi",
2527 		.data		= &panic_on_io_nmi,
2528 		.maxlen		= sizeof(int),
2529 		.mode		= 0644,
2530 		.proc_handler	= proc_dointvec,
2531 	},
2532 	{
2533 		.procname	= "bootloader_type",
2534 		.data		= &bootloader_type,
2535 		.maxlen		= sizeof (int),
2536 		.mode		= 0444,
2537 		.proc_handler	= proc_dointvec,
2538 	},
2539 	{
2540 		.procname	= "bootloader_version",
2541 		.data		= &bootloader_version,
2542 		.maxlen		= sizeof (int),
2543 		.mode		= 0444,
2544 		.proc_handler	= proc_dointvec,
2545 	},
2546 	{
2547 		.procname	= "io_delay_type",
2548 		.data		= &io_delay_type,
2549 		.maxlen		= sizeof(int),
2550 		.mode		= 0644,
2551 		.proc_handler	= proc_dointvec,
2552 	},
2553 #endif
2554 #if defined(CONFIG_MMU)
2555 	{
2556 		.procname	= "randomize_va_space",
2557 		.data		= &randomize_va_space,
2558 		.maxlen		= sizeof(int),
2559 		.mode		= 0644,
2560 		.proc_handler	= proc_dointvec,
2561 	},
2562 #endif
2563 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
2564 	{
2565 		.procname	= "spin_retry",
2566 		.data		= &spin_retry,
2567 		.maxlen		= sizeof (int),
2568 		.mode		= 0644,
2569 		.proc_handler	= proc_dointvec,
2570 	},
2571 #endif
2572 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
2573 	{
2574 		.procname	= "acpi_video_flags",
2575 		.data		= &acpi_realmode_flags,
2576 		.maxlen		= sizeof (unsigned long),
2577 		.mode		= 0644,
2578 		.proc_handler	= proc_doulongvec_minmax,
2579 	},
2580 #endif
2581 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
2582 	{
2583 		.procname	= "ignore-unaligned-usertrap",
2584 		.data		= &no_unaligned_warning,
2585 		.maxlen		= sizeof (int),
2586 		.mode		= 0644,
2587 		.proc_handler	= proc_dointvec,
2588 	},
2589 #endif
2590 #ifdef CONFIG_IA64
2591 	{
2592 		.procname	= "unaligned-dump-stack",
2593 		.data		= &unaligned_dump_stack,
2594 		.maxlen		= sizeof (int),
2595 		.mode		= 0644,
2596 		.proc_handler	= proc_dointvec,
2597 	},
2598 #endif
2599 #ifdef CONFIG_DETECT_HUNG_TASK
2600 #ifdef CONFIG_SMP
2601 	{
2602 		.procname	= "hung_task_all_cpu_backtrace",
2603 		.data		= &sysctl_hung_task_all_cpu_backtrace,
2604 		.maxlen		= sizeof(int),
2605 		.mode		= 0644,
2606 		.proc_handler	= proc_dointvec_minmax,
2607 		.extra1		= SYSCTL_ZERO,
2608 		.extra2		= SYSCTL_ONE,
2609 	},
2610 #endif /* CONFIG_SMP */
2611 	{
2612 		.procname	= "hung_task_panic",
2613 		.data		= &sysctl_hung_task_panic,
2614 		.maxlen		= sizeof(int),
2615 		.mode		= 0644,
2616 		.proc_handler	= proc_dointvec_minmax,
2617 		.extra1		= SYSCTL_ZERO,
2618 		.extra2		= SYSCTL_ONE,
2619 	},
2620 	{
2621 		.procname	= "hung_task_check_count",
2622 		.data		= &sysctl_hung_task_check_count,
2623 		.maxlen		= sizeof(int),
2624 		.mode		= 0644,
2625 		.proc_handler	= proc_dointvec_minmax,
2626 		.extra1		= SYSCTL_ZERO,
2627 	},
2628 	{
2629 		.procname	= "hung_task_timeout_secs",
2630 		.data		= &sysctl_hung_task_timeout_secs,
2631 		.maxlen		= sizeof(unsigned long),
2632 		.mode		= 0644,
2633 		.proc_handler	= proc_dohung_task_timeout_secs,
2634 		.extra2		= &hung_task_timeout_max,
2635 	},
2636 	{
2637 		.procname	= "hung_task_check_interval_secs",
2638 		.data		= &sysctl_hung_task_check_interval_secs,
2639 		.maxlen		= sizeof(unsigned long),
2640 		.mode		= 0644,
2641 		.proc_handler	= proc_dohung_task_timeout_secs,
2642 		.extra2		= &hung_task_timeout_max,
2643 	},
2644 	{
2645 		.procname	= "hung_task_warnings",
2646 		.data		= &sysctl_hung_task_warnings,
2647 		.maxlen		= sizeof(int),
2648 		.mode		= 0644,
2649 		.proc_handler	= proc_dointvec_minmax,
2650 		.extra1		= SYSCTL_NEG_ONE,
2651 	},
2652 #endif
2653 #ifdef CONFIG_RT_MUTEXES
2654 	{
2655 		.procname	= "max_lock_depth",
2656 		.data		= &max_lock_depth,
2657 		.maxlen		= sizeof(int),
2658 		.mode		= 0644,
2659 		.proc_handler	= proc_dointvec,
2660 	},
2661 #endif
2662 	{
2663 		.procname	= "poweroff_cmd",
2664 		.data		= &poweroff_cmd,
2665 		.maxlen		= POWEROFF_CMD_PATH_LEN,
2666 		.mode		= 0644,
2667 		.proc_handler	= proc_dostring,
2668 	},
2669 #ifdef CONFIG_KEYS
2670 	{
2671 		.procname	= "keys",
2672 		.mode		= 0555,
2673 		.child		= key_sysctls,
2674 	},
2675 #endif
2676 #ifdef CONFIG_PERF_EVENTS
2677 	/*
2678 	 * User-space scripts rely on the existence of this file
2679 	 * as a feature check for perf_events being enabled.
2680 	 *
2681 	 * So it's an ABI, do not remove!
2682 	 */
2683 	{
2684 		.procname	= "perf_event_paranoid",
2685 		.data		= &sysctl_perf_event_paranoid,
2686 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
2687 		.mode		= 0644,
2688 		.proc_handler	= proc_dointvec,
2689 	},
2690 	{
2691 		.procname	= "perf_event_mlock_kb",
2692 		.data		= &sysctl_perf_event_mlock,
2693 		.maxlen		= sizeof(sysctl_perf_event_mlock),
2694 		.mode		= 0644,
2695 		.proc_handler	= proc_dointvec,
2696 	},
2697 	{
2698 		.procname	= "perf_event_max_sample_rate",
2699 		.data		= &sysctl_perf_event_sample_rate,
2700 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
2701 		.mode		= 0644,
2702 		.proc_handler	= perf_proc_update_handler,
2703 		.extra1		= SYSCTL_ONE,
2704 	},
2705 	{
2706 		.procname	= "perf_cpu_time_max_percent",
2707 		.data		= &sysctl_perf_cpu_time_max_percent,
2708 		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
2709 		.mode		= 0644,
2710 		.proc_handler	= perf_cpu_time_max_percent_handler,
2711 		.extra1		= SYSCTL_ZERO,
2712 		.extra2		= SYSCTL_ONE_HUNDRED,
2713 	},
2714 	{
2715 		.procname	= "perf_event_max_stack",
2716 		.data		= &sysctl_perf_event_max_stack,
2717 		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2718 		.mode		= 0644,
2719 		.proc_handler	= perf_event_max_stack_handler,
2720 		.extra1		= SYSCTL_ZERO,
2721 		.extra2		= &six_hundred_forty_kb,
2722 	},
2723 	{
2724 		.procname	= "perf_event_max_contexts_per_stack",
2725 		.data		= &sysctl_perf_event_max_contexts_per_stack,
2726 		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2727 		.mode		= 0644,
2728 		.proc_handler	= perf_event_max_stack_handler,
2729 		.extra1		= SYSCTL_ZERO,
2730 		.extra2		= SYSCTL_ONE_THOUSAND,
2731 	},
2732 #endif
2733 	{
2734 		.procname	= "panic_on_warn",
2735 		.data		= &panic_on_warn,
2736 		.maxlen		= sizeof(int),
2737 		.mode		= 0644,
2738 		.proc_handler	= proc_dointvec_minmax,
2739 		.extra1		= SYSCTL_ZERO,
2740 		.extra2		= SYSCTL_ONE,
2741 	},
2742 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
2743 	{
2744 		.procname	= "timer_migration",
2745 		.data		= &sysctl_timer_migration,
2746 		.maxlen		= sizeof(unsigned int),
2747 		.mode		= 0644,
2748 		.proc_handler	= timer_migration_handler,
2749 		.extra1		= SYSCTL_ZERO,
2750 		.extra2		= SYSCTL_ONE,
2751 	},
2752 #endif
2753 #ifdef CONFIG_BPF_SYSCALL
2754 	{
2755 		.procname	= "unprivileged_bpf_disabled",
2756 		.data		= &sysctl_unprivileged_bpf_disabled,
2757 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
2758 		.mode		= 0644,
2759 		.proc_handler	= bpf_unpriv_handler,
2760 		.extra1		= SYSCTL_ZERO,
2761 		.extra2		= SYSCTL_TWO,
2762 	},
2763 	{
2764 		.procname	= "bpf_stats_enabled",
2765 		.data		= &bpf_stats_enabled_key.key,
2766 		.maxlen		= sizeof(bpf_stats_enabled_key),
2767 		.mode		= 0644,
2768 		.proc_handler	= bpf_stats_handler,
2769 	},
2770 #endif
2771 #if defined(CONFIG_TREE_RCU)
2772 	{
2773 		.procname	= "panic_on_rcu_stall",
2774 		.data		= &sysctl_panic_on_rcu_stall,
2775 		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2776 		.mode		= 0644,
2777 		.proc_handler	= proc_dointvec_minmax,
2778 		.extra1		= SYSCTL_ZERO,
2779 		.extra2		= SYSCTL_ONE,
2780 	},
2781 #endif
2782 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
2783 	{
2784 		.procname	= "stack_erasing",
2785 		.data		= NULL,
2786 		.maxlen		= sizeof(int),
2787 		.mode		= 0600,
2788 		.proc_handler	= stack_erasing_sysctl,
2789 		.extra1		= SYSCTL_ZERO,
2790 		.extra2		= SYSCTL_ONE,
2791 	},
2792 #endif
2793 	{ }
2794 };
2795 
2796 static struct ctl_table vm_table[] = {
2797 	{
2798 		.procname	= "overcommit_memory",
2799 		.data		= &sysctl_overcommit_memory,
2800 		.maxlen		= sizeof(sysctl_overcommit_memory),
2801 		.mode		= 0644,
2802 		.proc_handler	= overcommit_policy_handler,
2803 		.extra1		= SYSCTL_ZERO,
2804 		.extra2		= SYSCTL_TWO,
2805 	},
2806 	{
2807 		.procname	= "panic_on_oom",
2808 		.data		= &sysctl_panic_on_oom,
2809 		.maxlen		= sizeof(sysctl_panic_on_oom),
2810 		.mode		= 0644,
2811 		.proc_handler	= proc_dointvec_minmax,
2812 		.extra1		= SYSCTL_ZERO,
2813 		.extra2		= SYSCTL_TWO,
2814 	},
2815 	{
2816 		.procname	= "oom_kill_allocating_task",
2817 		.data		= &sysctl_oom_kill_allocating_task,
2818 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
2819 		.mode		= 0644,
2820 		.proc_handler	= proc_dointvec,
2821 	},
2822 	{
2823 		.procname	= "oom_dump_tasks",
2824 		.data		= &sysctl_oom_dump_tasks,
2825 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
2826 		.mode		= 0644,
2827 		.proc_handler	= proc_dointvec,
2828 	},
2829 	{
2830 		.procname	= "overcommit_ratio",
2831 		.data		= &sysctl_overcommit_ratio,
2832 		.maxlen		= sizeof(sysctl_overcommit_ratio),
2833 		.mode		= 0644,
2834 		.proc_handler	= overcommit_ratio_handler,
2835 	},
2836 	{
2837 		.procname	= "overcommit_kbytes",
2838 		.data		= &sysctl_overcommit_kbytes,
2839 		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2840 		.mode		= 0644,
2841 		.proc_handler	= overcommit_kbytes_handler,
2842 	},
2843 	{
2844 		.procname	= "page-cluster",
2845 		.data		= &page_cluster,
2846 		.maxlen		= sizeof(int),
2847 		.mode		= 0644,
2848 		.proc_handler	= proc_dointvec_minmax,
2849 		.extra1		= SYSCTL_ZERO,
2850 	},
2851 	{
2852 		.procname	= "dirty_background_ratio",
2853 		.data		= &dirty_background_ratio,
2854 		.maxlen		= sizeof(dirty_background_ratio),
2855 		.mode		= 0644,
2856 		.proc_handler	= dirty_background_ratio_handler,
2857 		.extra1		= SYSCTL_ZERO,
2858 		.extra2		= SYSCTL_ONE_HUNDRED,
2859 	},
2860 	{
2861 		.procname	= "dirty_background_bytes",
2862 		.data		= &dirty_background_bytes,
2863 		.maxlen		= sizeof(dirty_background_bytes),
2864 		.mode		= 0644,
2865 		.proc_handler	= dirty_background_bytes_handler,
2866 		.extra1		= &one_ul,
2867 	},
2868 	{
2869 		.procname	= "dirty_ratio",
2870 		.data		= &vm_dirty_ratio,
2871 		.maxlen		= sizeof(vm_dirty_ratio),
2872 		.mode		= 0644,
2873 		.proc_handler	= dirty_ratio_handler,
2874 		.extra1		= SYSCTL_ZERO,
2875 		.extra2		= SYSCTL_ONE_HUNDRED,
2876 	},
2877 	{
2878 		.procname	= "dirty_bytes",
2879 		.data		= &vm_dirty_bytes,
2880 		.maxlen		= sizeof(vm_dirty_bytes),
2881 		.mode		= 0644,
2882 		.proc_handler	= dirty_bytes_handler,
2883 		.extra1		= &dirty_bytes_min,
2884 	},
2885 	{
2886 		.procname	= "dirty_writeback_centisecs",
2887 		.data		= &dirty_writeback_interval,
2888 		.maxlen		= sizeof(dirty_writeback_interval),
2889 		.mode		= 0644,
2890 		.proc_handler	= dirty_writeback_centisecs_handler,
2891 	},
2892 	{
2893 		.procname	= "dirty_expire_centisecs",
2894 		.data		= &dirty_expire_interval,
2895 		.maxlen		= sizeof(dirty_expire_interval),
2896 		.mode		= 0644,
2897 		.proc_handler	= proc_dointvec_minmax,
2898 		.extra1		= SYSCTL_ZERO,
2899 	},
2900 	{
2901 		.procname	= "dirtytime_expire_seconds",
2902 		.data		= &dirtytime_expire_interval,
2903 		.maxlen		= sizeof(dirtytime_expire_interval),
2904 		.mode		= 0644,
2905 		.proc_handler	= dirtytime_interval_handler,
2906 		.extra1		= SYSCTL_ZERO,
2907 	},
2908 	{
2909 		.procname	= "swappiness",
2910 		.data		= &vm_swappiness,
2911 		.maxlen		= sizeof(vm_swappiness),
2912 		.mode		= 0644,
2913 		.proc_handler	= proc_dointvec_minmax,
2914 		.extra1		= SYSCTL_ZERO,
2915 		.extra2		= SYSCTL_TWO_HUNDRED,
2916 	},
2917 #ifdef CONFIG_NUMA
2918 	{
2919 		.procname	= "numa_stat",
2920 		.data		= &sysctl_vm_numa_stat,
2921 		.maxlen		= sizeof(int),
2922 		.mode		= 0644,
2923 		.proc_handler	= sysctl_vm_numa_stat_handler,
2924 		.extra1		= SYSCTL_ZERO,
2925 		.extra2		= SYSCTL_ONE,
2926 	},
2927 #endif
2928 #ifdef CONFIG_HUGETLB_PAGE
2929 	{
2930 		.procname	= "nr_hugepages",
2931 		.data		= NULL,
2932 		.maxlen		= sizeof(unsigned long),
2933 		.mode		= 0644,
2934 		.proc_handler	= hugetlb_sysctl_handler,
2935 	},
2936 #ifdef CONFIG_NUMA
2937 	{
2938 		.procname       = "nr_hugepages_mempolicy",
2939 		.data           = NULL,
2940 		.maxlen         = sizeof(unsigned long),
2941 		.mode           = 0644,
2942 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2943 	},
2944 #endif
2945 	 {
2946 		.procname	= "hugetlb_shm_group",
2947 		.data		= &sysctl_hugetlb_shm_group,
2948 		.maxlen		= sizeof(gid_t),
2949 		.mode		= 0644,
2950 		.proc_handler	= proc_dointvec,
2951 	 },
2952 	{
2953 		.procname	= "nr_overcommit_hugepages",
2954 		.data		= NULL,
2955 		.maxlen		= sizeof(unsigned long),
2956 		.mode		= 0644,
2957 		.proc_handler	= hugetlb_overcommit_handler,
2958 	},
2959 #endif
2960 	{
2961 		.procname	= "lowmem_reserve_ratio",
2962 		.data		= &sysctl_lowmem_reserve_ratio,
2963 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
2964 		.mode		= 0644,
2965 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
2966 	},
2967 	{
2968 		.procname	= "drop_caches",
2969 		.data		= &sysctl_drop_caches,
2970 		.maxlen		= sizeof(int),
2971 		.mode		= 0200,
2972 		.proc_handler	= drop_caches_sysctl_handler,
2973 		.extra1		= SYSCTL_ONE,
2974 		.extra2		= SYSCTL_FOUR,
2975 	},
2976 #ifdef CONFIG_COMPACTION
2977 	{
2978 		.procname	= "compact_memory",
2979 		.data		= &sysctl_compact_memory,
2980 		.maxlen		= sizeof(int),
2981 		.mode		= 0200,
2982 		.proc_handler	= sysctl_compaction_handler,
2983 	},
2984 	{
2985 		.procname	= "compaction_proactiveness",
2986 		.data		= &sysctl_compaction_proactiveness,
2987 		.maxlen		= sizeof(sysctl_compaction_proactiveness),
2988 		.mode		= 0644,
2989 		.proc_handler	= proc_dointvec_minmax,
2990 		.extra1		= SYSCTL_ZERO,
2991 		.extra2		= SYSCTL_ONE_HUNDRED,
2992 	},
2993 	{
2994 		.procname	= "extfrag_threshold",
2995 		.data		= &sysctl_extfrag_threshold,
2996 		.maxlen		= sizeof(int),
2997 		.mode		= 0644,
2998 		.proc_handler	= proc_dointvec_minmax,
2999 		.extra1		= &min_extfrag_threshold,
3000 		.extra2		= &max_extfrag_threshold,
3001 	},
3002 	{
3003 		.procname	= "compact_unevictable_allowed",
3004 		.data		= &sysctl_compact_unevictable_allowed,
3005 		.maxlen		= sizeof(int),
3006 		.mode		= 0644,
3007 		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
3008 		.extra1		= SYSCTL_ZERO,
3009 		.extra2		= SYSCTL_ONE,
3010 	},
3011 
3012 #endif /* CONFIG_COMPACTION */
3013 	{
3014 		.procname	= "min_free_kbytes",
3015 		.data		= &min_free_kbytes,
3016 		.maxlen		= sizeof(min_free_kbytes),
3017 		.mode		= 0644,
3018 		.proc_handler	= min_free_kbytes_sysctl_handler,
3019 		.extra1		= SYSCTL_ZERO,
3020 	},
3021 	{
3022 		.procname	= "watermark_boost_factor",
3023 		.data		= &watermark_boost_factor,
3024 		.maxlen		= sizeof(watermark_boost_factor),
3025 		.mode		= 0644,
3026 		.proc_handler	= proc_dointvec_minmax,
3027 		.extra1		= SYSCTL_ZERO,
3028 	},
3029 	{
3030 		.procname	= "watermark_scale_factor",
3031 		.data		= &watermark_scale_factor,
3032 		.maxlen		= sizeof(watermark_scale_factor),
3033 		.mode		= 0644,
3034 		.proc_handler	= watermark_scale_factor_sysctl_handler,
3035 		.extra1		= SYSCTL_ONE,
3036 		.extra2		= SYSCTL_THREE_THOUSAND,
3037 	},
3038 	{
3039 		.procname	= "percpu_pagelist_fraction",
3040 		.data		= &percpu_pagelist_fraction,
3041 		.maxlen		= sizeof(percpu_pagelist_fraction),
3042 		.mode		= 0644,
3043 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
3044 		.extra1		= SYSCTL_ZERO,
3045 	},
3046 	{
3047 		.procname	= "page_lock_unfairness",
3048 		.data		= &sysctl_page_lock_unfairness,
3049 		.maxlen		= sizeof(sysctl_page_lock_unfairness),
3050 		.mode		= 0644,
3051 		.proc_handler	= proc_dointvec_minmax,
3052 		.extra1		= SYSCTL_ZERO,
3053 	},
3054 #ifdef CONFIG_MMU
3055 	{
3056 		.procname	= "max_map_count",
3057 		.data		= &sysctl_max_map_count,
3058 		.maxlen		= sizeof(sysctl_max_map_count),
3059 		.mode		= 0644,
3060 		.proc_handler	= proc_dointvec_minmax,
3061 		.extra1		= SYSCTL_ZERO,
3062 	},
3063 #else
3064 	{
3065 		.procname	= "nr_trim_pages",
3066 		.data		= &sysctl_nr_trim_pages,
3067 		.maxlen		= sizeof(sysctl_nr_trim_pages),
3068 		.mode		= 0644,
3069 		.proc_handler	= proc_dointvec_minmax,
3070 		.extra1		= SYSCTL_ZERO,
3071 	},
3072 #endif
3073 	{
3074 		.procname	= "laptop_mode",
3075 		.data		= &laptop_mode,
3076 		.maxlen		= sizeof(laptop_mode),
3077 		.mode		= 0644,
3078 		.proc_handler	= proc_dointvec_jiffies,
3079 	},
3080 	{
3081 		.procname	= "vfs_cache_pressure",
3082 		.data		= &sysctl_vfs_cache_pressure,
3083 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
3084 		.mode		= 0644,
3085 		.proc_handler	= proc_dointvec_minmax,
3086 		.extra1		= SYSCTL_ZERO,
3087 	},
3088 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
3089     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
3090 	{
3091 		.procname	= "legacy_va_layout",
3092 		.data		= &sysctl_legacy_va_layout,
3093 		.maxlen		= sizeof(sysctl_legacy_va_layout),
3094 		.mode		= 0644,
3095 		.proc_handler	= proc_dointvec_minmax,
3096 		.extra1		= SYSCTL_ZERO,
3097 	},
3098 #endif
3099 #ifdef CONFIG_NUMA
3100 	{
3101 		.procname	= "zone_reclaim_mode",
3102 		.data		= &node_reclaim_mode,
3103 		.maxlen		= sizeof(node_reclaim_mode),
3104 		.mode		= 0644,
3105 		.proc_handler	= proc_dointvec_minmax,
3106 		.extra1		= SYSCTL_ZERO,
3107 	},
3108 	{
3109 		.procname	= "min_unmapped_ratio",
3110 		.data		= &sysctl_min_unmapped_ratio,
3111 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
3112 		.mode		= 0644,
3113 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
3114 		.extra1		= SYSCTL_ZERO,
3115 		.extra2		= SYSCTL_ONE_HUNDRED,
3116 	},
3117 	{
3118 		.procname	= "min_slab_ratio",
3119 		.data		= &sysctl_min_slab_ratio,
3120 		.maxlen		= sizeof(sysctl_min_slab_ratio),
3121 		.mode		= 0644,
3122 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
3123 		.extra1		= SYSCTL_ZERO,
3124 		.extra2		= SYSCTL_ONE_HUNDRED,
3125 	},
3126 #endif
3127 #ifdef CONFIG_SMP
3128 	{
3129 		.procname	= "stat_interval",
3130 		.data		= &sysctl_stat_interval,
3131 		.maxlen		= sizeof(sysctl_stat_interval),
3132 		.mode		= 0644,
3133 		.proc_handler	= proc_dointvec_jiffies,
3134 	},
3135 	{
3136 		.procname	= "stat_refresh",
3137 		.data		= NULL,
3138 		.maxlen		= 0,
3139 		.mode		= 0600,
3140 		.proc_handler	= vmstat_refresh,
3141 	},
3142 #endif
3143 #ifdef CONFIG_MMU
3144 	{
3145 		.procname	= "mmap_min_addr",
3146 		.data		= &dac_mmap_min_addr,
3147 		.maxlen		= sizeof(unsigned long),
3148 		.mode		= 0644,
3149 		.proc_handler	= mmap_min_addr_handler,
3150 	},
3151 #endif
3152 #ifdef CONFIG_NUMA
3153 	{
3154 		.procname	= "numa_zonelist_order",
3155 		.data		= &numa_zonelist_order,
3156 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
3157 		.mode		= 0644,
3158 		.proc_handler	= numa_zonelist_order_handler,
3159 	},
3160 #endif
3161 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
3162    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
3163 	{
3164 		.procname	= "vdso_enabled",
3165 #ifdef CONFIG_X86_32
3166 		.data		= &vdso32_enabled,
3167 		.maxlen		= sizeof(vdso32_enabled),
3168 #else
3169 		.data		= &vdso_enabled,
3170 		.maxlen		= sizeof(vdso_enabled),
3171 #endif
3172 		.mode		= 0644,
3173 		.proc_handler	= proc_dointvec,
3174 		.extra1		= SYSCTL_ZERO,
3175 	},
3176 #endif
3177 #ifdef CONFIG_HIGHMEM
3178 	{
3179 		.procname	= "highmem_is_dirtyable",
3180 		.data		= &vm_highmem_is_dirtyable,
3181 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
3182 		.mode		= 0644,
3183 		.proc_handler	= proc_dointvec_minmax,
3184 		.extra1		= SYSCTL_ZERO,
3185 		.extra2		= SYSCTL_ONE,
3186 	},
3187 #endif
3188 #ifdef CONFIG_MEMORY_FAILURE
3189 	{
3190 		.procname	= "memory_failure_early_kill",
3191 		.data		= &sysctl_memory_failure_early_kill,
3192 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
3193 		.mode		= 0644,
3194 		.proc_handler	= proc_dointvec_minmax,
3195 		.extra1		= SYSCTL_ZERO,
3196 		.extra2		= SYSCTL_ONE,
3197 	},
3198 	{
3199 		.procname	= "memory_failure_recovery",
3200 		.data		= &sysctl_memory_failure_recovery,
3201 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
3202 		.mode		= 0644,
3203 		.proc_handler	= proc_dointvec_minmax,
3204 		.extra1		= SYSCTL_ZERO,
3205 		.extra2		= SYSCTL_ONE,
3206 	},
3207 #endif
3208 	{
3209 		.procname	= "user_reserve_kbytes",
3210 		.data		= &sysctl_user_reserve_kbytes,
3211 		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
3212 		.mode		= 0644,
3213 		.proc_handler	= proc_doulongvec_minmax,
3214 	},
3215 	{
3216 		.procname	= "admin_reserve_kbytes",
3217 		.data		= &sysctl_admin_reserve_kbytes,
3218 		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
3219 		.mode		= 0644,
3220 		.proc_handler	= proc_doulongvec_minmax,
3221 	},
3222 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
3223 	{
3224 		.procname	= "mmap_rnd_bits",
3225 		.data		= &mmap_rnd_bits,
3226 		.maxlen		= sizeof(mmap_rnd_bits),
3227 		.mode		= 0600,
3228 		.proc_handler	= proc_dointvec_minmax,
3229 		.extra1		= (void *)&mmap_rnd_bits_min,
3230 		.extra2		= (void *)&mmap_rnd_bits_max,
3231 	},
3232 #endif
3233 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
3234 	{
3235 		.procname	= "mmap_rnd_compat_bits",
3236 		.data		= &mmap_rnd_compat_bits,
3237 		.maxlen		= sizeof(mmap_rnd_compat_bits),
3238 		.mode		= 0600,
3239 		.proc_handler	= proc_dointvec_minmax,
3240 		.extra1		= (void *)&mmap_rnd_compat_bits_min,
3241 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
3242 	},
3243 #endif
3244 #ifdef CONFIG_USERFAULTFD
3245 	{
3246 		.procname	= "unprivileged_userfaultfd",
3247 		.data		= &sysctl_unprivileged_userfaultfd,
3248 		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
3249 		.mode		= 0644,
3250 		.proc_handler	= proc_dointvec_minmax,
3251 		.extra1		= SYSCTL_ZERO,
3252 		.extra2		= SYSCTL_ONE,
3253 	},
3254 #endif
3255 	{ }
3256 };
3257 
3258 static struct ctl_table fs_table[] = {
3259 	{
3260 		.procname	= "inode-nr",
3261 		.data		= &inodes_stat,
3262 		.maxlen		= 2*sizeof(long),
3263 		.mode		= 0444,
3264 		.proc_handler	= proc_nr_inodes,
3265 	},
3266 	{
3267 		.procname	= "inode-state",
3268 		.data		= &inodes_stat,
3269 		.maxlen		= 7*sizeof(long),
3270 		.mode		= 0444,
3271 		.proc_handler	= proc_nr_inodes,
3272 	},
3273 	{
3274 		.procname	= "file-nr",
3275 		.data		= &files_stat,
3276 		.maxlen		= sizeof(files_stat),
3277 		.mode		= 0444,
3278 		.proc_handler	= proc_nr_files,
3279 	},
3280 	{
3281 		.procname	= "file-max",
3282 		.data		= &files_stat.max_files,
3283 		.maxlen		= sizeof(files_stat.max_files),
3284 		.mode		= 0644,
3285 		.proc_handler	= proc_doulongvec_minmax,
3286 		.extra1		= &zero_ul,
3287 		.extra2		= &long_max,
3288 	},
3289 	{
3290 		.procname	= "nr_open",
3291 		.data		= &sysctl_nr_open,
3292 		.maxlen		= sizeof(unsigned int),
3293 		.mode		= 0644,
3294 		.proc_handler	= proc_dointvec_minmax,
3295 		.extra1		= &sysctl_nr_open_min,
3296 		.extra2		= &sysctl_nr_open_max,
3297 	},
3298 	{
3299 		.procname	= "dentry-state",
3300 		.data		= &dentry_stat,
3301 		.maxlen		= 6*sizeof(long),
3302 		.mode		= 0444,
3303 		.proc_handler	= proc_nr_dentry,
3304 	},
3305 	{
3306 		.procname	= "overflowuid",
3307 		.data		= &fs_overflowuid,
3308 		.maxlen		= sizeof(int),
3309 		.mode		= 0644,
3310 		.proc_handler	= proc_dointvec_minmax,
3311 		.extra1		= &minolduid,
3312 		.extra2		= &maxolduid,
3313 	},
3314 	{
3315 		.procname	= "overflowgid",
3316 		.data		= &fs_overflowgid,
3317 		.maxlen		= sizeof(int),
3318 		.mode		= 0644,
3319 		.proc_handler	= proc_dointvec_minmax,
3320 		.extra1		= &minolduid,
3321 		.extra2		= &maxolduid,
3322 	},
3323 #ifdef CONFIG_FILE_LOCKING
3324 	{
3325 		.procname	= "leases-enable",
3326 		.data		= &leases_enable,
3327 		.maxlen		= sizeof(int),
3328 		.mode		= 0644,
3329 		.proc_handler	= proc_dointvec,
3330 	},
3331 #endif
3332 #ifdef CONFIG_DNOTIFY
3333 	{
3334 		.procname	= "dir-notify-enable",
3335 		.data		= &dir_notify_enable,
3336 		.maxlen		= sizeof(int),
3337 		.mode		= 0644,
3338 		.proc_handler	= proc_dointvec,
3339 	},
3340 #endif
3341 #ifdef CONFIG_MMU
3342 #ifdef CONFIG_FILE_LOCKING
3343 	{
3344 		.procname	= "lease-break-time",
3345 		.data		= &lease_break_time,
3346 		.maxlen		= sizeof(int),
3347 		.mode		= 0644,
3348 		.proc_handler	= proc_dointvec,
3349 	},
3350 #endif
3351 #ifdef CONFIG_AIO
3352 	{
3353 		.procname	= "aio-nr",
3354 		.data		= &aio_nr,
3355 		.maxlen		= sizeof(aio_nr),
3356 		.mode		= 0444,
3357 		.proc_handler	= proc_doulongvec_minmax,
3358 	},
3359 	{
3360 		.procname	= "aio-max-nr",
3361 		.data		= &aio_max_nr,
3362 		.maxlen		= sizeof(aio_max_nr),
3363 		.mode		= 0644,
3364 		.proc_handler	= proc_doulongvec_minmax,
3365 	},
3366 #endif /* CONFIG_AIO */
3367 #ifdef CONFIG_INOTIFY_USER
3368 	{
3369 		.procname	= "inotify",
3370 		.mode		= 0555,
3371 		.child		= inotify_table,
3372 	},
3373 #endif
3374 #ifdef CONFIG_EPOLL
3375 	{
3376 		.procname	= "epoll",
3377 		.mode		= 0555,
3378 		.child		= epoll_table,
3379 	},
3380 #endif
3381 #endif
3382 	{
3383 		.procname	= "protected_symlinks",
3384 		.data		= &sysctl_protected_symlinks,
3385 		.maxlen		= sizeof(int),
3386 		.mode		= 0600,
3387 		.proc_handler	= proc_dointvec_minmax,
3388 		.extra1		= SYSCTL_ZERO,
3389 		.extra2		= SYSCTL_ONE,
3390 	},
3391 	{
3392 		.procname	= "protected_hardlinks",
3393 		.data		= &sysctl_protected_hardlinks,
3394 		.maxlen		= sizeof(int),
3395 		.mode		= 0600,
3396 		.proc_handler	= proc_dointvec_minmax,
3397 		.extra1		= SYSCTL_ZERO,
3398 		.extra2		= SYSCTL_ONE,
3399 	},
3400 	{
3401 		.procname	= "protected_fifos",
3402 		.data		= &sysctl_protected_fifos,
3403 		.maxlen		= sizeof(int),
3404 		.mode		= 0600,
3405 		.proc_handler	= proc_dointvec_minmax,
3406 		.extra1		= SYSCTL_ZERO,
3407 		.extra2		= SYSCTL_TWO,
3408 	},
3409 	{
3410 		.procname	= "protected_regular",
3411 		.data		= &sysctl_protected_regular,
3412 		.maxlen		= sizeof(int),
3413 		.mode		= 0600,
3414 		.proc_handler	= proc_dointvec_minmax,
3415 		.extra1		= SYSCTL_ZERO,
3416 		.extra2		= SYSCTL_TWO,
3417 	},
3418 	{
3419 		.procname	= "suid_dumpable",
3420 		.data		= &suid_dumpable,
3421 		.maxlen		= sizeof(int),
3422 		.mode		= 0644,
3423 		.proc_handler	= proc_dointvec_minmax_coredump,
3424 		.extra1		= SYSCTL_ZERO,
3425 		.extra2		= SYSCTL_TWO,
3426 	},
3427 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
3428 	{
3429 		.procname	= "binfmt_misc",
3430 		.mode		= 0555,
3431 		.child		= sysctl_mount_point,
3432 	},
3433 #endif
3434 	{
3435 		.procname	= "pipe-max-size",
3436 		.data		= &pipe_max_size,
3437 		.maxlen		= sizeof(pipe_max_size),
3438 		.mode		= 0644,
3439 		.proc_handler	= proc_dopipe_max_size,
3440 	},
3441 	{
3442 		.procname	= "pipe-user-pages-hard",
3443 		.data		= &pipe_user_pages_hard,
3444 		.maxlen		= sizeof(pipe_user_pages_hard),
3445 		.mode		= 0644,
3446 		.proc_handler	= proc_doulongvec_minmax,
3447 	},
3448 	{
3449 		.procname	= "pipe-user-pages-soft",
3450 		.data		= &pipe_user_pages_soft,
3451 		.maxlen		= sizeof(pipe_user_pages_soft),
3452 		.mode		= 0644,
3453 		.proc_handler	= proc_doulongvec_minmax,
3454 	},
3455 	{
3456 		.procname	= "mount-max",
3457 		.data		= &sysctl_mount_max,
3458 		.maxlen		= sizeof(unsigned int),
3459 		.mode		= 0644,
3460 		.proc_handler	= proc_dointvec_minmax,
3461 		.extra1		= SYSCTL_ONE,
3462 	},
3463 	{ }
3464 };
3465 
3466 static struct ctl_table debug_table[] = {
3467 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
3468 	{
3469 		.procname	= "exception-trace",
3470 		.data		= &show_unhandled_signals,
3471 		.maxlen		= sizeof(int),
3472 		.mode		= 0644,
3473 		.proc_handler	= proc_dointvec
3474 	},
3475 #endif
3476 #if defined(CONFIG_OPTPROBES)
3477 	{
3478 		.procname	= "kprobes-optimization",
3479 		.data		= &sysctl_kprobes_optimization,
3480 		.maxlen		= sizeof(int),
3481 		.mode		= 0644,
3482 		.proc_handler	= proc_kprobes_optimization_handler,
3483 		.extra1		= SYSCTL_ZERO,
3484 		.extra2		= SYSCTL_ONE,
3485 	},
3486 #endif
3487 	{ }
3488 };
3489 
3490 static struct ctl_table dev_table[] = {
3491 	{ }
3492 };
3493 
3494 static struct ctl_table sysctl_base_table[] = {
3495 	{
3496 		.procname	= "kernel",
3497 		.mode		= 0555,
3498 		.child		= kern_table,
3499 	},
3500 	{
3501 		.procname	= "vm",
3502 		.mode		= 0555,
3503 		.child		= vm_table,
3504 	},
3505 	{
3506 		.procname	= "fs",
3507 		.mode		= 0555,
3508 		.child		= fs_table,
3509 	},
3510 	{
3511 		.procname	= "debug",
3512 		.mode		= 0555,
3513 		.child		= debug_table,
3514 	},
3515 	{
3516 		.procname	= "dev",
3517 		.mode		= 0555,
3518 		.child		= dev_table,
3519 	},
3520 	{ }
3521 };
3522 
sysctl_init(void)3523 int __init sysctl_init(void)
3524 {
3525 	struct ctl_table_header *hdr;
3526 
3527 	hdr = register_sysctl_table(sysctl_base_table);
3528 	kmemleak_not_leak(hdr);
3529 	return 0;
3530 }
3531 #endif /* CONFIG_SYSCTL */
3532 /*
3533  * No sense putting this after each symbol definition, twice,
3534  * exception granted :-)
3535  */
3536 EXPORT_SYMBOL(proc_dointvec);
3537 EXPORT_SYMBOL(proc_douintvec);
3538 EXPORT_SYMBOL(proc_dointvec_jiffies);
3539 EXPORT_SYMBOL(proc_dointvec_minmax);
3540 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3541 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3542 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3543 EXPORT_SYMBOL(proc_dostring);
3544 EXPORT_SYMBOL(proc_doulongvec_minmax);
3545 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3546 EXPORT_SYMBOL(proc_do_large_bitmap);
3547