• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
4 * Subject to the GNU Public License v2.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
13#include <asm/cpufeatures.h>
14#include <asm/alternative-asm.h>
15#include <asm/asm.h>
16#include <asm/smap.h>
17#include <asm/export.h>
18
19/* Standard copy_to_user with segment limit checking */
20ENTRY(_copy_to_user)
21	mov PER_CPU_VAR(current_task), %rax
22	movq %rdi,%rcx
23	addq %rdx,%rcx
24	jc bad_to_user
25	cmpq TASK_addr_limit(%rax),%rcx
26	ja bad_to_user
27	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
28		      "jmp copy_user_generic_string",		\
29		      X86_FEATURE_REP_GOOD,			\
30		      "jmp copy_user_enhanced_fast_string",	\
31		      X86_FEATURE_ERMS
32ENDPROC(_copy_to_user)
33EXPORT_SYMBOL(_copy_to_user)
34
35/* Standard copy_from_user with segment limit checking */
36ENTRY(_copy_from_user)
37	mov PER_CPU_VAR(current_task), %rax
38	movq %rsi,%rcx
39	addq %rdx,%rcx
40	jc bad_from_user
41	cmpq TASK_addr_limit(%rax),%rcx
42	ja bad_from_user
43	ALTERNATIVE_2 "jmp copy_user_generic_unrolled",		\
44		      "jmp copy_user_generic_string",		\
45		      X86_FEATURE_REP_GOOD,			\
46		      "jmp copy_user_enhanced_fast_string",	\
47		      X86_FEATURE_ERMS
48ENDPROC(_copy_from_user)
49EXPORT_SYMBOL(_copy_from_user)
50
51
52	.section .fixup,"ax"
53	/* must zero dest */
54ENTRY(bad_from_user)
55bad_from_user:
56	movl %edx,%ecx
57	xorl %eax,%eax
58	rep
59	stosb
60bad_to_user:
61	movl %edx,%eax
62	ret
63ENDPROC(bad_from_user)
64	.previous
65
66/*
67 * copy_user_generic_unrolled - memory copy with exception handling.
68 * This version is for CPUs like P4 that don't have efficient micro
69 * code for rep movsq
70 *
71 * Input:
72 * rdi destination
73 * rsi source
74 * rdx count
75 *
76 * Output:
77 * eax uncopied bytes or 0 if successful.
78 */
79ENTRY(copy_user_generic_unrolled)
80	ASM_STAC
81	cmpl $8,%edx
82	jb 20f		/* less then 8 bytes, go to byte copy loop */
83	ALIGN_DESTINATION
84	movl %edx,%ecx
85	andl $63,%edx
86	shrl $6,%ecx
87	jz .L_copy_short_string
881:	movq (%rsi),%r8
892:	movq 1*8(%rsi),%r9
903:	movq 2*8(%rsi),%r10
914:	movq 3*8(%rsi),%r11
925:	movq %r8,(%rdi)
936:	movq %r9,1*8(%rdi)
947:	movq %r10,2*8(%rdi)
958:	movq %r11,3*8(%rdi)
969:	movq 4*8(%rsi),%r8
9710:	movq 5*8(%rsi),%r9
9811:	movq 6*8(%rsi),%r10
9912:	movq 7*8(%rsi),%r11
10013:	movq %r8,4*8(%rdi)
10114:	movq %r9,5*8(%rdi)
10215:	movq %r10,6*8(%rdi)
10316:	movq %r11,7*8(%rdi)
104	leaq 64(%rsi),%rsi
105	leaq 64(%rdi),%rdi
106	decl %ecx
107	jnz 1b
108.L_copy_short_string:
109	movl %edx,%ecx
110	andl $7,%edx
111	shrl $3,%ecx
112	jz 20f
11318:	movq (%rsi),%r8
11419:	movq %r8,(%rdi)
115	leaq 8(%rsi),%rsi
116	leaq 8(%rdi),%rdi
117	decl %ecx
118	jnz 18b
11920:	andl %edx,%edx
120	jz 23f
121	movl %edx,%ecx
12221:	movb (%rsi),%al
12322:	movb %al,(%rdi)
124	incq %rsi
125	incq %rdi
126	decl %ecx
127	jnz 21b
12823:	xor %eax,%eax
129	ASM_CLAC
130	ret
131
132	.section .fixup,"ax"
13330:	shll $6,%ecx
134	addl %ecx,%edx
135	jmp 60f
13640:	leal (%rdx,%rcx,8),%edx
137	jmp 60f
13850:	movl %ecx,%edx
13960:	jmp copy_user_handle_tail /* ecx is zerorest also */
140	.previous
141
142	_ASM_EXTABLE(1b,30b)
143	_ASM_EXTABLE(2b,30b)
144	_ASM_EXTABLE(3b,30b)
145	_ASM_EXTABLE(4b,30b)
146	_ASM_EXTABLE(5b,30b)
147	_ASM_EXTABLE(6b,30b)
148	_ASM_EXTABLE(7b,30b)
149	_ASM_EXTABLE(8b,30b)
150	_ASM_EXTABLE(9b,30b)
151	_ASM_EXTABLE(10b,30b)
152	_ASM_EXTABLE(11b,30b)
153	_ASM_EXTABLE(12b,30b)
154	_ASM_EXTABLE(13b,30b)
155	_ASM_EXTABLE(14b,30b)
156	_ASM_EXTABLE(15b,30b)
157	_ASM_EXTABLE(16b,30b)
158	_ASM_EXTABLE(18b,40b)
159	_ASM_EXTABLE(19b,40b)
160	_ASM_EXTABLE(21b,50b)
161	_ASM_EXTABLE(22b,50b)
162ENDPROC(copy_user_generic_unrolled)
163EXPORT_SYMBOL(copy_user_generic_unrolled)
164
165/* Some CPUs run faster using the string copy instructions.
166 * This is also a lot simpler. Use them when possible.
167 *
168 * Only 4GB of copy is supported. This shouldn't be a problem
169 * because the kernel normally only writes from/to page sized chunks
170 * even if user space passed a longer buffer.
171 * And more would be dangerous because both Intel and AMD have
172 * errata with rep movsq > 4GB. If someone feels the need to fix
173 * this please consider this.
174 *
175 * Input:
176 * rdi destination
177 * rsi source
178 * rdx count
179 *
180 * Output:
181 * eax uncopied bytes or 0 if successful.
182 */
183ENTRY(copy_user_generic_string)
184	ASM_STAC
185	cmpl $8,%edx
186	jb 2f		/* less than 8 bytes, go to byte copy loop */
187	ALIGN_DESTINATION
188	movl %edx,%ecx
189	shrl $3,%ecx
190	andl $7,%edx
1911:	rep
192	movsq
1932:	movl %edx,%ecx
1943:	rep
195	movsb
196	xorl %eax,%eax
197	ASM_CLAC
198	ret
199
200	.section .fixup,"ax"
20111:	leal (%rdx,%rcx,8),%ecx
20212:	movl %ecx,%edx		/* ecx is zerorest also */
203	jmp copy_user_handle_tail
204	.previous
205
206	_ASM_EXTABLE(1b,11b)
207	_ASM_EXTABLE(3b,12b)
208ENDPROC(copy_user_generic_string)
209EXPORT_SYMBOL(copy_user_generic_string)
210
211/*
212 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
213 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
214 *
215 * Input:
216 * rdi destination
217 * rsi source
218 * rdx count
219 *
220 * Output:
221 * eax uncopied bytes or 0 if successful.
222 */
223ENTRY(copy_user_enhanced_fast_string)
224	ASM_STAC
225	cmpl $64,%edx
226	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
227	movl %edx,%ecx
2281:	rep
229	movsb
230	xorl %eax,%eax
231	ASM_CLAC
232	ret
233
234	.section .fixup,"ax"
23512:	movl %ecx,%edx		/* ecx is zerorest also */
236	jmp copy_user_handle_tail
237	.previous
238
239	_ASM_EXTABLE(1b,12b)
240ENDPROC(copy_user_enhanced_fast_string)
241EXPORT_SYMBOL(copy_user_enhanced_fast_string)
242
243/*
244 * copy_user_nocache - Uncached memory copy with exception handling
245 * This will force destination out of cache for more performance.
246 *
247 * Note: Cached memory copy is used when destination or size is not
248 * naturally aligned. That is:
249 *  - Require 8-byte alignment when size is 8 bytes or larger.
250 *  - Require 4-byte alignment when size is 4 bytes.
251 */
252ENTRY(__copy_user_nocache)
253	ASM_STAC
254
255	/* If size is less than 8 bytes, go to 4-byte copy */
256	cmpl $8,%edx
257	jb .L_4b_nocache_copy_entry
258
259	/* If destination is not 8-byte aligned, "cache" copy to align it */
260	ALIGN_DESTINATION
261
262	/* Set 4x8-byte copy count and remainder */
263	movl %edx,%ecx
264	andl $63,%edx
265	shrl $6,%ecx
266	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
267
268	/* Perform 4x8-byte nocache loop-copy */
269.L_4x8b_nocache_copy_loop:
2701:	movq (%rsi),%r8
2712:	movq 1*8(%rsi),%r9
2723:	movq 2*8(%rsi),%r10
2734:	movq 3*8(%rsi),%r11
2745:	movnti %r8,(%rdi)
2756:	movnti %r9,1*8(%rdi)
2767:	movnti %r10,2*8(%rdi)
2778:	movnti %r11,3*8(%rdi)
2789:	movq 4*8(%rsi),%r8
27910:	movq 5*8(%rsi),%r9
28011:	movq 6*8(%rsi),%r10
28112:	movq 7*8(%rsi),%r11
28213:	movnti %r8,4*8(%rdi)
28314:	movnti %r9,5*8(%rdi)
28415:	movnti %r10,6*8(%rdi)
28516:	movnti %r11,7*8(%rdi)
286	leaq 64(%rsi),%rsi
287	leaq 64(%rdi),%rdi
288	decl %ecx
289	jnz .L_4x8b_nocache_copy_loop
290
291	/* Set 8-byte copy count and remainder */
292.L_8b_nocache_copy_entry:
293	movl %edx,%ecx
294	andl $7,%edx
295	shrl $3,%ecx
296	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
297
298	/* Perform 8-byte nocache loop-copy */
299.L_8b_nocache_copy_loop:
30020:	movq (%rsi),%r8
30121:	movnti %r8,(%rdi)
302	leaq 8(%rsi),%rsi
303	leaq 8(%rdi),%rdi
304	decl %ecx
305	jnz .L_8b_nocache_copy_loop
306
307	/* If no byte left, we're done */
308.L_4b_nocache_copy_entry:
309	andl %edx,%edx
310	jz .L_finish_copy
311
312	/* If destination is not 4-byte aligned, go to byte copy: */
313	movl %edi,%ecx
314	andl $3,%ecx
315	jnz .L_1b_cache_copy_entry
316
317	/* Set 4-byte copy count (1 or 0) and remainder */
318	movl %edx,%ecx
319	andl $3,%edx
320	shrl $2,%ecx
321	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
322
323	/* Perform 4-byte nocache copy: */
32430:	movl (%rsi),%r8d
32531:	movnti %r8d,(%rdi)
326	leaq 4(%rsi),%rsi
327	leaq 4(%rdi),%rdi
328
329	/* If no bytes left, we're done: */
330	andl %edx,%edx
331	jz .L_finish_copy
332
333	/* Perform byte "cache" loop-copy for the remainder */
334.L_1b_cache_copy_entry:
335	movl %edx,%ecx
336.L_1b_cache_copy_loop:
33740:	movb (%rsi),%al
33841:	movb %al,(%rdi)
339	incq %rsi
340	incq %rdi
341	decl %ecx
342	jnz .L_1b_cache_copy_loop
343
344	/* Finished copying; fence the prior stores */
345.L_finish_copy:
346	xorl %eax,%eax
347	ASM_CLAC
348	sfence
349	ret
350
351	.section .fixup,"ax"
352.L_fixup_4x8b_copy:
353	shll $6,%ecx
354	addl %ecx,%edx
355	jmp .L_fixup_handle_tail
356.L_fixup_8b_copy:
357	lea (%rdx,%rcx,8),%rdx
358	jmp .L_fixup_handle_tail
359.L_fixup_4b_copy:
360	lea (%rdx,%rcx,4),%rdx
361	jmp .L_fixup_handle_tail
362.L_fixup_1b_copy:
363	movl %ecx,%edx
364.L_fixup_handle_tail:
365	sfence
366	jmp copy_user_handle_tail
367	.previous
368
369	_ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
370	_ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
371	_ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
372	_ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
373	_ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
374	_ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
375	_ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
376	_ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
377	_ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
378	_ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
379	_ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
380	_ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
381	_ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
382	_ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
383	_ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
384	_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
385	_ASM_EXTABLE(20b,.L_fixup_8b_copy)
386	_ASM_EXTABLE(21b,.L_fixup_8b_copy)
387	_ASM_EXTABLE(30b,.L_fixup_4b_copy)
388	_ASM_EXTABLE(31b,.L_fixup_4b_copy)
389	_ASM_EXTABLE(40b,.L_fixup_1b_copy)
390	_ASM_EXTABLE(41b,.L_fixup_1b_copy)
391ENDPROC(__copy_user_nocache)
392EXPORT_SYMBOL(__copy_user_nocache)
393