1/* 2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> 3 * Copyright 2002 Andi Kleen, SuSE Labs. 4 * Subject to the GNU Public License v2. 5 * 6 * Functions to copy from and to user space. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/current.h> 11#include <asm/asm-offsets.h> 12#include <asm/thread_info.h> 13#include <asm/cpufeatures.h> 14#include <asm/alternative-asm.h> 15#include <asm/asm.h> 16#include <asm/smap.h> 17#include <asm/export.h> 18 19/* Standard copy_to_user with segment limit checking */ 20ENTRY(_copy_to_user) 21 mov PER_CPU_VAR(current_task), %rax 22 movq %rdi,%rcx 23 addq %rdx,%rcx 24 jc bad_to_user 25 cmpq TASK_addr_limit(%rax),%rcx 26 ja bad_to_user 27 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ 28 "jmp copy_user_generic_string", \ 29 X86_FEATURE_REP_GOOD, \ 30 "jmp copy_user_enhanced_fast_string", \ 31 X86_FEATURE_ERMS 32ENDPROC(_copy_to_user) 33EXPORT_SYMBOL(_copy_to_user) 34 35/* Standard copy_from_user with segment limit checking */ 36ENTRY(_copy_from_user) 37 mov PER_CPU_VAR(current_task), %rax 38 movq %rsi,%rcx 39 addq %rdx,%rcx 40 jc bad_from_user 41 cmpq TASK_addr_limit(%rax),%rcx 42 ja bad_from_user 43 ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ 44 "jmp copy_user_generic_string", \ 45 X86_FEATURE_REP_GOOD, \ 46 "jmp copy_user_enhanced_fast_string", \ 47 X86_FEATURE_ERMS 48ENDPROC(_copy_from_user) 49EXPORT_SYMBOL(_copy_from_user) 50 51 52 .section .fixup,"ax" 53 /* must zero dest */ 54ENTRY(bad_from_user) 55bad_from_user: 56 movl %edx,%ecx 57 xorl %eax,%eax 58 rep 59 stosb 60bad_to_user: 61 movl %edx,%eax 62 ret 63ENDPROC(bad_from_user) 64 .previous 65 66/* 67 * copy_user_generic_unrolled - memory copy with exception handling. 68 * This version is for CPUs like P4 that don't have efficient micro 69 * code for rep movsq 70 * 71 * Input: 72 * rdi destination 73 * rsi source 74 * rdx count 75 * 76 * Output: 77 * eax uncopied bytes or 0 if successful. 78 */ 79ENTRY(copy_user_generic_unrolled) 80 ASM_STAC 81 cmpl $8,%edx 82 jb 20f /* less then 8 bytes, go to byte copy loop */ 83 ALIGN_DESTINATION 84 movl %edx,%ecx 85 andl $63,%edx 86 shrl $6,%ecx 87 jz .L_copy_short_string 881: movq (%rsi),%r8 892: movq 1*8(%rsi),%r9 903: movq 2*8(%rsi),%r10 914: movq 3*8(%rsi),%r11 925: movq %r8,(%rdi) 936: movq %r9,1*8(%rdi) 947: movq %r10,2*8(%rdi) 958: movq %r11,3*8(%rdi) 969: movq 4*8(%rsi),%r8 9710: movq 5*8(%rsi),%r9 9811: movq 6*8(%rsi),%r10 9912: movq 7*8(%rsi),%r11 10013: movq %r8,4*8(%rdi) 10114: movq %r9,5*8(%rdi) 10215: movq %r10,6*8(%rdi) 10316: movq %r11,7*8(%rdi) 104 leaq 64(%rsi),%rsi 105 leaq 64(%rdi),%rdi 106 decl %ecx 107 jnz 1b 108.L_copy_short_string: 109 movl %edx,%ecx 110 andl $7,%edx 111 shrl $3,%ecx 112 jz 20f 11318: movq (%rsi),%r8 11419: movq %r8,(%rdi) 115 leaq 8(%rsi),%rsi 116 leaq 8(%rdi),%rdi 117 decl %ecx 118 jnz 18b 11920: andl %edx,%edx 120 jz 23f 121 movl %edx,%ecx 12221: movb (%rsi),%al 12322: movb %al,(%rdi) 124 incq %rsi 125 incq %rdi 126 decl %ecx 127 jnz 21b 12823: xor %eax,%eax 129 ASM_CLAC 130 ret 131 132 .section .fixup,"ax" 13330: shll $6,%ecx 134 addl %ecx,%edx 135 jmp 60f 13640: leal (%rdx,%rcx,8),%edx 137 jmp 60f 13850: movl %ecx,%edx 13960: jmp copy_user_handle_tail /* ecx is zerorest also */ 140 .previous 141 142 _ASM_EXTABLE(1b,30b) 143 _ASM_EXTABLE(2b,30b) 144 _ASM_EXTABLE(3b,30b) 145 _ASM_EXTABLE(4b,30b) 146 _ASM_EXTABLE(5b,30b) 147 _ASM_EXTABLE(6b,30b) 148 _ASM_EXTABLE(7b,30b) 149 _ASM_EXTABLE(8b,30b) 150 _ASM_EXTABLE(9b,30b) 151 _ASM_EXTABLE(10b,30b) 152 _ASM_EXTABLE(11b,30b) 153 _ASM_EXTABLE(12b,30b) 154 _ASM_EXTABLE(13b,30b) 155 _ASM_EXTABLE(14b,30b) 156 _ASM_EXTABLE(15b,30b) 157 _ASM_EXTABLE(16b,30b) 158 _ASM_EXTABLE(18b,40b) 159 _ASM_EXTABLE(19b,40b) 160 _ASM_EXTABLE(21b,50b) 161 _ASM_EXTABLE(22b,50b) 162ENDPROC(copy_user_generic_unrolled) 163EXPORT_SYMBOL(copy_user_generic_unrolled) 164 165/* Some CPUs run faster using the string copy instructions. 166 * This is also a lot simpler. Use them when possible. 167 * 168 * Only 4GB of copy is supported. This shouldn't be a problem 169 * because the kernel normally only writes from/to page sized chunks 170 * even if user space passed a longer buffer. 171 * And more would be dangerous because both Intel and AMD have 172 * errata with rep movsq > 4GB. If someone feels the need to fix 173 * this please consider this. 174 * 175 * Input: 176 * rdi destination 177 * rsi source 178 * rdx count 179 * 180 * Output: 181 * eax uncopied bytes or 0 if successful. 182 */ 183ENTRY(copy_user_generic_string) 184 ASM_STAC 185 cmpl $8,%edx 186 jb 2f /* less than 8 bytes, go to byte copy loop */ 187 ALIGN_DESTINATION 188 movl %edx,%ecx 189 shrl $3,%ecx 190 andl $7,%edx 1911: rep 192 movsq 1932: movl %edx,%ecx 1943: rep 195 movsb 196 xorl %eax,%eax 197 ASM_CLAC 198 ret 199 200 .section .fixup,"ax" 20111: leal (%rdx,%rcx,8),%ecx 20212: movl %ecx,%edx /* ecx is zerorest also */ 203 jmp copy_user_handle_tail 204 .previous 205 206 _ASM_EXTABLE(1b,11b) 207 _ASM_EXTABLE(3b,12b) 208ENDPROC(copy_user_generic_string) 209EXPORT_SYMBOL(copy_user_generic_string) 210 211/* 212 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. 213 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. 214 * 215 * Input: 216 * rdi destination 217 * rsi source 218 * rdx count 219 * 220 * Output: 221 * eax uncopied bytes or 0 if successful. 222 */ 223ENTRY(copy_user_enhanced_fast_string) 224 ASM_STAC 225 cmpl $64,%edx 226 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ 227 movl %edx,%ecx 2281: rep 229 movsb 230 xorl %eax,%eax 231 ASM_CLAC 232 ret 233 234 .section .fixup,"ax" 23512: movl %ecx,%edx /* ecx is zerorest also */ 236 jmp copy_user_handle_tail 237 .previous 238 239 _ASM_EXTABLE(1b,12b) 240ENDPROC(copy_user_enhanced_fast_string) 241EXPORT_SYMBOL(copy_user_enhanced_fast_string) 242 243/* 244 * copy_user_nocache - Uncached memory copy with exception handling 245 * This will force destination out of cache for more performance. 246 * 247 * Note: Cached memory copy is used when destination or size is not 248 * naturally aligned. That is: 249 * - Require 8-byte alignment when size is 8 bytes or larger. 250 * - Require 4-byte alignment when size is 4 bytes. 251 */ 252ENTRY(__copy_user_nocache) 253 ASM_STAC 254 255 /* If size is less than 8 bytes, go to 4-byte copy */ 256 cmpl $8,%edx 257 jb .L_4b_nocache_copy_entry 258 259 /* If destination is not 8-byte aligned, "cache" copy to align it */ 260 ALIGN_DESTINATION 261 262 /* Set 4x8-byte copy count and remainder */ 263 movl %edx,%ecx 264 andl $63,%edx 265 shrl $6,%ecx 266 jz .L_8b_nocache_copy_entry /* jump if count is 0 */ 267 268 /* Perform 4x8-byte nocache loop-copy */ 269.L_4x8b_nocache_copy_loop: 2701: movq (%rsi),%r8 2712: movq 1*8(%rsi),%r9 2723: movq 2*8(%rsi),%r10 2734: movq 3*8(%rsi),%r11 2745: movnti %r8,(%rdi) 2756: movnti %r9,1*8(%rdi) 2767: movnti %r10,2*8(%rdi) 2778: movnti %r11,3*8(%rdi) 2789: movq 4*8(%rsi),%r8 27910: movq 5*8(%rsi),%r9 28011: movq 6*8(%rsi),%r10 28112: movq 7*8(%rsi),%r11 28213: movnti %r8,4*8(%rdi) 28314: movnti %r9,5*8(%rdi) 28415: movnti %r10,6*8(%rdi) 28516: movnti %r11,7*8(%rdi) 286 leaq 64(%rsi),%rsi 287 leaq 64(%rdi),%rdi 288 decl %ecx 289 jnz .L_4x8b_nocache_copy_loop 290 291 /* Set 8-byte copy count and remainder */ 292.L_8b_nocache_copy_entry: 293 movl %edx,%ecx 294 andl $7,%edx 295 shrl $3,%ecx 296 jz .L_4b_nocache_copy_entry /* jump if count is 0 */ 297 298 /* Perform 8-byte nocache loop-copy */ 299.L_8b_nocache_copy_loop: 30020: movq (%rsi),%r8 30121: movnti %r8,(%rdi) 302 leaq 8(%rsi),%rsi 303 leaq 8(%rdi),%rdi 304 decl %ecx 305 jnz .L_8b_nocache_copy_loop 306 307 /* If no byte left, we're done */ 308.L_4b_nocache_copy_entry: 309 andl %edx,%edx 310 jz .L_finish_copy 311 312 /* If destination is not 4-byte aligned, go to byte copy: */ 313 movl %edi,%ecx 314 andl $3,%ecx 315 jnz .L_1b_cache_copy_entry 316 317 /* Set 4-byte copy count (1 or 0) and remainder */ 318 movl %edx,%ecx 319 andl $3,%edx 320 shrl $2,%ecx 321 jz .L_1b_cache_copy_entry /* jump if count is 0 */ 322 323 /* Perform 4-byte nocache copy: */ 32430: movl (%rsi),%r8d 32531: movnti %r8d,(%rdi) 326 leaq 4(%rsi),%rsi 327 leaq 4(%rdi),%rdi 328 329 /* If no bytes left, we're done: */ 330 andl %edx,%edx 331 jz .L_finish_copy 332 333 /* Perform byte "cache" loop-copy for the remainder */ 334.L_1b_cache_copy_entry: 335 movl %edx,%ecx 336.L_1b_cache_copy_loop: 33740: movb (%rsi),%al 33841: movb %al,(%rdi) 339 incq %rsi 340 incq %rdi 341 decl %ecx 342 jnz .L_1b_cache_copy_loop 343 344 /* Finished copying; fence the prior stores */ 345.L_finish_copy: 346 xorl %eax,%eax 347 ASM_CLAC 348 sfence 349 ret 350 351 .section .fixup,"ax" 352.L_fixup_4x8b_copy: 353 shll $6,%ecx 354 addl %ecx,%edx 355 jmp .L_fixup_handle_tail 356.L_fixup_8b_copy: 357 lea (%rdx,%rcx,8),%rdx 358 jmp .L_fixup_handle_tail 359.L_fixup_4b_copy: 360 lea (%rdx,%rcx,4),%rdx 361 jmp .L_fixup_handle_tail 362.L_fixup_1b_copy: 363 movl %ecx,%edx 364.L_fixup_handle_tail: 365 sfence 366 jmp copy_user_handle_tail 367 .previous 368 369 _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) 370 _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) 371 _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) 372 _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) 373 _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) 374 _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) 375 _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) 376 _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) 377 _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) 378 _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) 379 _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) 380 _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) 381 _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) 382 _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) 383 _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) 384 _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) 385 _ASM_EXTABLE(20b,.L_fixup_8b_copy) 386 _ASM_EXTABLE(21b,.L_fixup_8b_copy) 387 _ASM_EXTABLE(30b,.L_fixup_4b_copy) 388 _ASM_EXTABLE(31b,.L_fixup_4b_copy) 389 _ASM_EXTABLE(40b,.L_fixup_1b_copy) 390 _ASM_EXTABLE(41b,.L_fixup_1b_copy) 391ENDPROC(__copy_user_nocache) 392EXPORT_SYMBOL(__copy_user_nocache) 393