• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/export.h>
3 #include <linux/bvec.h>
4 #include <linux/uio.h>
5 #include <linux/pagemap.h>
6 #include <linux/slab.h>
7 #include <linux/vmalloc.h>
8 #include <linux/splice.h>
9 #include <net/checksum.h>
10 #include <linux/scatterlist.h>
11 
12 #define PIPE_PARANOIA /* for now */
13 
14 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
15 	size_t left;					\
16 	size_t wanted = n;				\
17 	__p = i->iov;					\
18 	__v.iov_len = min(n, __p->iov_len - skip);	\
19 	if (likely(__v.iov_len)) {			\
20 		__v.iov_base = __p->iov_base + skip;	\
21 		left = (STEP);				\
22 		__v.iov_len -= left;			\
23 		skip += __v.iov_len;			\
24 		n -= __v.iov_len;			\
25 	} else {					\
26 		left = 0;				\
27 	}						\
28 	while (unlikely(!left && n)) {			\
29 		__p++;					\
30 		__v.iov_len = min(n, __p->iov_len);	\
31 		if (unlikely(!__v.iov_len))		\
32 			continue;			\
33 		__v.iov_base = __p->iov_base;		\
34 		left = (STEP);				\
35 		__v.iov_len -= left;			\
36 		skip = __v.iov_len;			\
37 		n -= __v.iov_len;			\
38 	}						\
39 	n = wanted - n;					\
40 }
41 
42 #define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
43 	size_t wanted = n;				\
44 	__p = i->kvec;					\
45 	__v.iov_len = min(n, __p->iov_len - skip);	\
46 	if (likely(__v.iov_len)) {			\
47 		__v.iov_base = __p->iov_base + skip;	\
48 		(void)(STEP);				\
49 		skip += __v.iov_len;			\
50 		n -= __v.iov_len;			\
51 	}						\
52 	while (unlikely(n)) {				\
53 		__p++;					\
54 		__v.iov_len = min(n, __p->iov_len);	\
55 		if (unlikely(!__v.iov_len))		\
56 			continue;			\
57 		__v.iov_base = __p->iov_base;		\
58 		(void)(STEP);				\
59 		skip = __v.iov_len;			\
60 		n -= __v.iov_len;			\
61 	}						\
62 	n = wanted;					\
63 }
64 
65 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
66 	struct bvec_iter __start;			\
67 	__start.bi_size = n;				\
68 	__start.bi_bvec_done = skip;			\
69 	__start.bi_idx = 0;				\
70 	for_each_bvec(__v, i->bvec, __bi, __start) {	\
71 		if (!__v.bv_len)			\
72 			continue;			\
73 		(void)(STEP);				\
74 	}						\
75 }
76 
77 #define iterate_all_kinds(i, n, v, I, B, K) {			\
78 	if (likely(n)) {					\
79 		size_t skip = i->iov_offset;			\
80 		if (unlikely(i->type & ITER_BVEC)) {		\
81 			struct bio_vec v;			\
82 			struct bvec_iter __bi;			\
83 			iterate_bvec(i, n, v, __bi, skip, (B))	\
84 		} else if (unlikely(i->type & ITER_KVEC)) {	\
85 			const struct kvec *kvec;		\
86 			struct kvec v;				\
87 			iterate_kvec(i, n, v, kvec, skip, (K))	\
88 		} else if (unlikely(i->type & ITER_DISCARD)) {	\
89 		} else {					\
90 			const struct iovec *iov;		\
91 			struct iovec v;				\
92 			iterate_iovec(i, n, v, iov, skip, (I))	\
93 		}						\
94 	}							\
95 }
96 
97 #define iterate_and_advance(i, n, v, I, B, K) {			\
98 	if (unlikely(i->count < n))				\
99 		n = i->count;					\
100 	if (i->count) {						\
101 		size_t skip = i->iov_offset;			\
102 		if (unlikely(i->type & ITER_BVEC)) {		\
103 			const struct bio_vec *bvec = i->bvec;	\
104 			struct bio_vec v;			\
105 			struct bvec_iter __bi;			\
106 			iterate_bvec(i, n, v, __bi, skip, (B))	\
107 			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
108 			i->nr_segs -= i->bvec - bvec;		\
109 			skip = __bi.bi_bvec_done;		\
110 		} else if (unlikely(i->type & ITER_KVEC)) {	\
111 			const struct kvec *kvec;		\
112 			struct kvec v;				\
113 			iterate_kvec(i, n, v, kvec, skip, (K))	\
114 			if (skip == kvec->iov_len) {		\
115 				kvec++;				\
116 				skip = 0;			\
117 			}					\
118 			i->nr_segs -= kvec - i->kvec;		\
119 			i->kvec = kvec;				\
120 		} else if (unlikely(i->type & ITER_DISCARD)) {	\
121 			skip += n;				\
122 		} else {					\
123 			const struct iovec *iov;		\
124 			struct iovec v;				\
125 			iterate_iovec(i, n, v, iov, skip, (I))	\
126 			if (skip == iov->iov_len) {		\
127 				iov++;				\
128 				skip = 0;			\
129 			}					\
130 			i->nr_segs -= iov - i->iov;		\
131 			i->iov = iov;				\
132 		}						\
133 		i->count -= n;					\
134 		i->iov_offset = skip;				\
135 	}							\
136 }
137 
copyout(void __user * to,const void * from,size_t n)138 static int copyout(void __user *to, const void *from, size_t n)
139 {
140 	if (access_ok(to, n)) {
141 		kasan_check_read(from, n);
142 		n = raw_copy_to_user(to, from, n);
143 	}
144 	return n;
145 }
146 
copyin(void * to,const void __user * from,size_t n)147 static int copyin(void *to, const void __user *from, size_t n)
148 {
149 	if (access_ok(from, n)) {
150 		kasan_check_write(to, n);
151 		n = raw_copy_from_user(to, from, n);
152 	}
153 	return n;
154 }
155 
copy_page_to_iter_iovec(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)156 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
157 			 struct iov_iter *i)
158 {
159 	size_t skip, copy, left, wanted;
160 	const struct iovec *iov;
161 	char __user *buf;
162 	void *kaddr, *from;
163 
164 	if (unlikely(bytes > i->count))
165 		bytes = i->count;
166 
167 	if (unlikely(!bytes))
168 		return 0;
169 
170 	might_fault();
171 	wanted = bytes;
172 	iov = i->iov;
173 	skip = i->iov_offset;
174 	buf = iov->iov_base + skip;
175 	copy = min(bytes, iov->iov_len - skip);
176 
177 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
178 		kaddr = kmap_atomic(page);
179 		from = kaddr + offset;
180 
181 		/* first chunk, usually the only one */
182 		left = copyout(buf, from, copy);
183 		copy -= left;
184 		skip += copy;
185 		from += copy;
186 		bytes -= copy;
187 
188 		while (unlikely(!left && bytes)) {
189 			iov++;
190 			buf = iov->iov_base;
191 			copy = min(bytes, iov->iov_len);
192 			left = copyout(buf, from, copy);
193 			copy -= left;
194 			skip = copy;
195 			from += copy;
196 			bytes -= copy;
197 		}
198 		if (likely(!bytes)) {
199 			kunmap_atomic(kaddr);
200 			goto done;
201 		}
202 		offset = from - kaddr;
203 		buf += copy;
204 		kunmap_atomic(kaddr);
205 		copy = min(bytes, iov->iov_len - skip);
206 	}
207 	/* Too bad - revert to non-atomic kmap */
208 
209 	kaddr = kmap(page);
210 	from = kaddr + offset;
211 	left = copyout(buf, from, copy);
212 	copy -= left;
213 	skip += copy;
214 	from += copy;
215 	bytes -= copy;
216 	while (unlikely(!left && bytes)) {
217 		iov++;
218 		buf = iov->iov_base;
219 		copy = min(bytes, iov->iov_len);
220 		left = copyout(buf, from, copy);
221 		copy -= left;
222 		skip = copy;
223 		from += copy;
224 		bytes -= copy;
225 	}
226 	kunmap(page);
227 
228 done:
229 	if (skip == iov->iov_len) {
230 		iov++;
231 		skip = 0;
232 	}
233 	i->count -= wanted - bytes;
234 	i->nr_segs -= iov - i->iov;
235 	i->iov = iov;
236 	i->iov_offset = skip;
237 	return wanted - bytes;
238 }
239 
copy_page_from_iter_iovec(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)240 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
241 			 struct iov_iter *i)
242 {
243 	size_t skip, copy, left, wanted;
244 	const struct iovec *iov;
245 	char __user *buf;
246 	void *kaddr, *to;
247 
248 	if (unlikely(bytes > i->count))
249 		bytes = i->count;
250 
251 	if (unlikely(!bytes))
252 		return 0;
253 
254 	might_fault();
255 	wanted = bytes;
256 	iov = i->iov;
257 	skip = i->iov_offset;
258 	buf = iov->iov_base + skip;
259 	copy = min(bytes, iov->iov_len - skip);
260 
261 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
262 		kaddr = kmap_atomic(page);
263 		to = kaddr + offset;
264 
265 		/* first chunk, usually the only one */
266 		left = copyin(to, buf, copy);
267 		copy -= left;
268 		skip += copy;
269 		to += copy;
270 		bytes -= copy;
271 
272 		while (unlikely(!left && bytes)) {
273 			iov++;
274 			buf = iov->iov_base;
275 			copy = min(bytes, iov->iov_len);
276 			left = copyin(to, buf, copy);
277 			copy -= left;
278 			skip = copy;
279 			to += copy;
280 			bytes -= copy;
281 		}
282 		if (likely(!bytes)) {
283 			kunmap_atomic(kaddr);
284 			goto done;
285 		}
286 		offset = to - kaddr;
287 		buf += copy;
288 		kunmap_atomic(kaddr);
289 		copy = min(bytes, iov->iov_len - skip);
290 	}
291 	/* Too bad - revert to non-atomic kmap */
292 
293 	kaddr = kmap(page);
294 	to = kaddr + offset;
295 	left = copyin(to, buf, copy);
296 	copy -= left;
297 	skip += copy;
298 	to += copy;
299 	bytes -= copy;
300 	while (unlikely(!left && bytes)) {
301 		iov++;
302 		buf = iov->iov_base;
303 		copy = min(bytes, iov->iov_len);
304 		left = copyin(to, buf, copy);
305 		copy -= left;
306 		skip = copy;
307 		to += copy;
308 		bytes -= copy;
309 	}
310 	kunmap(page);
311 
312 done:
313 	if (skip == iov->iov_len) {
314 		iov++;
315 		skip = 0;
316 	}
317 	i->count -= wanted - bytes;
318 	i->nr_segs -= iov - i->iov;
319 	i->iov = iov;
320 	i->iov_offset = skip;
321 	return wanted - bytes;
322 }
323 
324 #ifdef PIPE_PARANOIA
sanity(const struct iov_iter * i)325 static bool sanity(const struct iov_iter *i)
326 {
327 	struct pipe_inode_info *pipe = i->pipe;
328 	int idx = i->idx;
329 	int next = pipe->curbuf + pipe->nrbufs;
330 	if (i->iov_offset) {
331 		struct pipe_buffer *p;
332 		if (unlikely(!pipe->nrbufs))
333 			goto Bad;	// pipe must be non-empty
334 		if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
335 			goto Bad;	// must be at the last buffer...
336 
337 		p = &pipe->bufs[idx];
338 		if (unlikely(p->offset + p->len != i->iov_offset))
339 			goto Bad;	// ... at the end of segment
340 	} else {
341 		if (idx != (next & (pipe->buffers - 1)))
342 			goto Bad;	// must be right after the last buffer
343 	}
344 	return true;
345 Bad:
346 	printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
347 	printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
348 			pipe->curbuf, pipe->nrbufs, pipe->buffers);
349 	for (idx = 0; idx < pipe->buffers; idx++)
350 		printk(KERN_ERR "[%p %p %d %d]\n",
351 			pipe->bufs[idx].ops,
352 			pipe->bufs[idx].page,
353 			pipe->bufs[idx].offset,
354 			pipe->bufs[idx].len);
355 	WARN_ON(1);
356 	return false;
357 }
358 #else
359 #define sanity(i) true
360 #endif
361 
next_idx(int idx,struct pipe_inode_info * pipe)362 static inline int next_idx(int idx, struct pipe_inode_info *pipe)
363 {
364 	return (idx + 1) & (pipe->buffers - 1);
365 }
366 
copy_page_to_iter_pipe(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)367 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
368 			 struct iov_iter *i)
369 {
370 	struct pipe_inode_info *pipe = i->pipe;
371 	struct pipe_buffer *buf;
372 	size_t off;
373 	int idx;
374 
375 	if (unlikely(bytes > i->count))
376 		bytes = i->count;
377 
378 	if (unlikely(!bytes))
379 		return 0;
380 
381 	if (!sanity(i))
382 		return 0;
383 
384 	off = i->iov_offset;
385 	idx = i->idx;
386 	buf = &pipe->bufs[idx];
387 	if (off) {
388 		if (offset == off && buf->page == page) {
389 			/* merge with the last one */
390 			buf->len += bytes;
391 			i->iov_offset += bytes;
392 			goto out;
393 		}
394 		idx = next_idx(idx, pipe);
395 		buf = &pipe->bufs[idx];
396 	}
397 	if (idx == pipe->curbuf && pipe->nrbufs)
398 		return 0;
399 	pipe->nrbufs++;
400 	buf->ops = &page_cache_pipe_buf_ops;
401 	buf->flags = 0;
402 	get_page(buf->page = page);
403 	buf->offset = offset;
404 	buf->len = bytes;
405 	i->iov_offset = offset + bytes;
406 	i->idx = idx;
407 out:
408 	i->count -= bytes;
409 	return bytes;
410 }
411 
412 /*
413  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
414  * bytes.  For each iovec, fault in each page that constitutes the iovec.
415  *
416  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
417  * because it is an invalid address).
418  */
iov_iter_fault_in_readable(struct iov_iter * i,size_t bytes)419 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
420 {
421 	size_t skip = i->iov_offset;
422 	const struct iovec *iov;
423 	int err;
424 	struct iovec v;
425 
426 	if (iter_is_iovec(i)) {
427 		iterate_iovec(i, bytes, v, iov, skip, ({
428 			err = fault_in_pages_readable(v.iov_base, v.iov_len);
429 			if (unlikely(err))
430 			return err;
431 		0;}))
432 	}
433 	return 0;
434 }
435 EXPORT_SYMBOL(iov_iter_fault_in_readable);
436 
iov_iter_init(struct iov_iter * i,unsigned int direction,const struct iovec * iov,unsigned long nr_segs,size_t count)437 void iov_iter_init(struct iov_iter *i, unsigned int direction,
438 			const struct iovec *iov, unsigned long nr_segs,
439 			size_t count)
440 {
441 	WARN_ON(direction & ~(READ | WRITE));
442 	direction &= READ | WRITE;
443 
444 	/* It will get better.  Eventually... */
445 	if (uaccess_kernel()) {
446 		i->type = ITER_KVEC | direction;
447 		i->kvec = (struct kvec *)iov;
448 	} else {
449 		i->type = ITER_IOVEC | direction;
450 		i->iov = iov;
451 	}
452 	i->nr_segs = nr_segs;
453 	i->iov_offset = 0;
454 	i->count = count;
455 }
456 EXPORT_SYMBOL(iov_iter_init);
457 
memzero_page(struct page * page,size_t offset,size_t len)458 static void memzero_page(struct page *page, size_t offset, size_t len)
459 {
460 	char *addr = kmap_atomic(page);
461 	memset(addr + offset, 0, len);
462 	kunmap_atomic(addr);
463 }
464 
allocated(struct pipe_buffer * buf)465 static inline bool allocated(struct pipe_buffer *buf)
466 {
467 	return buf->ops == &default_pipe_buf_ops;
468 }
469 
data_start(const struct iov_iter * i,int * idxp,size_t * offp)470 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
471 {
472 	size_t off = i->iov_offset;
473 	int idx = i->idx;
474 	if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
475 		idx = next_idx(idx, i->pipe);
476 		off = 0;
477 	}
478 	*idxp = idx;
479 	*offp = off;
480 }
481 
push_pipe(struct iov_iter * i,size_t size,int * idxp,size_t * offp)482 static size_t push_pipe(struct iov_iter *i, size_t size,
483 			int *idxp, size_t *offp)
484 {
485 	struct pipe_inode_info *pipe = i->pipe;
486 	size_t off;
487 	int idx;
488 	ssize_t left;
489 
490 	if (unlikely(size > i->count))
491 		size = i->count;
492 	if (unlikely(!size))
493 		return 0;
494 
495 	left = size;
496 	data_start(i, &idx, &off);
497 	*idxp = idx;
498 	*offp = off;
499 	if (off) {
500 		left -= PAGE_SIZE - off;
501 		if (left <= 0) {
502 			pipe->bufs[idx].len += size;
503 			return size;
504 		}
505 		pipe->bufs[idx].len = PAGE_SIZE;
506 		idx = next_idx(idx, pipe);
507 	}
508 	while (idx != pipe->curbuf || !pipe->nrbufs) {
509 		struct page *page = alloc_page(GFP_USER);
510 		if (!page)
511 			break;
512 		pipe->nrbufs++;
513 		pipe->bufs[idx].ops = &default_pipe_buf_ops;
514 		pipe->bufs[idx].flags = 0;
515 		pipe->bufs[idx].page = page;
516 		pipe->bufs[idx].offset = 0;
517 		if (left <= PAGE_SIZE) {
518 			pipe->bufs[idx].len = left;
519 			return size;
520 		}
521 		pipe->bufs[idx].len = PAGE_SIZE;
522 		left -= PAGE_SIZE;
523 		idx = next_idx(idx, pipe);
524 	}
525 	return size - left;
526 }
527 
copy_pipe_to_iter(const void * addr,size_t bytes,struct iov_iter * i)528 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
529 				struct iov_iter *i)
530 {
531 	struct pipe_inode_info *pipe = i->pipe;
532 	size_t n, off;
533 	int idx;
534 
535 	if (!sanity(i))
536 		return 0;
537 
538 	bytes = n = push_pipe(i, bytes, &idx, &off);
539 	if (unlikely(!n))
540 		return 0;
541 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
542 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
543 		memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
544 		i->idx = idx;
545 		i->iov_offset = off + chunk;
546 		n -= chunk;
547 		addr += chunk;
548 	}
549 	i->count -= bytes;
550 	return bytes;
551 }
552 
csum_and_memcpy(void * to,const void * from,size_t len,__wsum sum,size_t off)553 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
554 			      __wsum sum, size_t off)
555 {
556 	__wsum next = csum_partial_copy_nocheck(from, to, len, 0);
557 	return csum_block_add(sum, next, off);
558 }
559 
csum_and_copy_to_pipe_iter(const void * addr,size_t bytes,struct csum_state * csstate,struct iov_iter * i)560 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
561 					 struct csum_state *csstate,
562 					 struct iov_iter *i)
563 {
564 	struct pipe_inode_info *pipe = i->pipe;
565 	__wsum sum = csstate->csum;
566 	size_t off = csstate->off;
567 	size_t n, r;
568 	int idx;
569 
570 	if (!sanity(i))
571 		return 0;
572 
573 	bytes = n = push_pipe(i, bytes, &idx, &r);
574 	if (unlikely(!n))
575 		return 0;
576 	for ( ; n; idx = next_idx(idx, pipe), r = 0) {
577 		size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
578 		char *p = kmap_atomic(pipe->bufs[idx].page);
579 		sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
580 		kunmap_atomic(p);
581 		i->idx = idx;
582 		i->iov_offset = r + chunk;
583 		n -= chunk;
584 		off += chunk;
585 		addr += chunk;
586 	}
587 	i->count -= bytes;
588 	csstate->csum = sum;
589 	csstate->off = off;
590 	return bytes;
591 }
592 
_copy_to_iter(const void * addr,size_t bytes,struct iov_iter * i)593 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
594 {
595 	const char *from = addr;
596 	if (unlikely(iov_iter_is_pipe(i)))
597 		return copy_pipe_to_iter(addr, bytes, i);
598 	if (iter_is_iovec(i))
599 		might_fault();
600 	iterate_and_advance(i, bytes, v,
601 		copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
602 		memcpy_to_page(v.bv_page, v.bv_offset,
603 			       (from += v.bv_len) - v.bv_len, v.bv_len),
604 		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
605 	)
606 
607 	return bytes;
608 }
609 EXPORT_SYMBOL(_copy_to_iter);
610 
611 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
copyout_mcsafe(void __user * to,const void * from,size_t n)612 static int copyout_mcsafe(void __user *to, const void *from, size_t n)
613 {
614 	if (access_ok(to, n)) {
615 		kasan_check_read(from, n);
616 		n = copy_to_user_mcsafe((__force void *) to, from, n);
617 	}
618 	return n;
619 }
620 
memcpy_mcsafe_to_page(struct page * page,size_t offset,const char * from,size_t len)621 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
622 		const char *from, size_t len)
623 {
624 	unsigned long ret;
625 	char *to;
626 
627 	to = kmap_atomic(page);
628 	ret = memcpy_mcsafe(to + offset, from, len);
629 	kunmap_atomic(to);
630 
631 	return ret;
632 }
633 
copy_pipe_to_iter_mcsafe(const void * addr,size_t bytes,struct iov_iter * i)634 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
635 				struct iov_iter *i)
636 {
637 	struct pipe_inode_info *pipe = i->pipe;
638 	size_t n, off, xfer = 0;
639 	int idx;
640 
641 	if (!sanity(i))
642 		return 0;
643 
644 	bytes = n = push_pipe(i, bytes, &idx, &off);
645 	if (unlikely(!n))
646 		return 0;
647 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
648 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
649 		unsigned long rem;
650 
651 		rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
652 				chunk);
653 		i->idx = idx;
654 		i->iov_offset = off + chunk - rem;
655 		xfer += chunk - rem;
656 		if (rem)
657 			break;
658 		n -= chunk;
659 		addr += chunk;
660 	}
661 	i->count -= xfer;
662 	return xfer;
663 }
664 
665 /**
666  * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
667  * @addr: source kernel address
668  * @bytes: total transfer length
669  * @iter: destination iterator
670  *
671  * The pmem driver arranges for filesystem-dax to use this facility via
672  * dax_copy_to_iter() for protecting read/write to persistent memory.
673  * Unless / until an architecture can guarantee identical performance
674  * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
675  * performance regression to switch more users to the mcsafe version.
676  *
677  * Otherwise, the main differences between this and typical _copy_to_iter().
678  *
679  * * Typical tail/residue handling after a fault retries the copy
680  *   byte-by-byte until the fault happens again. Re-triggering machine
681  *   checks is potentially fatal so the implementation uses source
682  *   alignment and poison alignment assumptions to avoid re-triggering
683  *   hardware exceptions.
684  *
685  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
686  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
687  *   a short copy.
688  *
689  * See MCSAFE_TEST for self-test.
690  */
_copy_to_iter_mcsafe(const void * addr,size_t bytes,struct iov_iter * i)691 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
692 {
693 	const char *from = addr;
694 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
695 
696 	if (unlikely(iov_iter_is_pipe(i)))
697 		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
698 	if (iter_is_iovec(i))
699 		might_fault();
700 	iterate_and_advance(i, bytes, v,
701 		copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
702 		({
703 		rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
704                                (from += v.bv_len) - v.bv_len, v.bv_len);
705 		if (rem) {
706 			curr_addr = (unsigned long) from;
707 			bytes = curr_addr - s_addr - rem;
708 			return bytes;
709 		}
710 		}),
711 		({
712 		rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
713 				v.iov_len);
714 		if (rem) {
715 			curr_addr = (unsigned long) from;
716 			bytes = curr_addr - s_addr - rem;
717 			return bytes;
718 		}
719 		})
720 	)
721 
722 	return bytes;
723 }
724 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
725 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
726 
_copy_from_iter(void * addr,size_t bytes,struct iov_iter * i)727 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
728 {
729 	char *to = addr;
730 	if (unlikely(iov_iter_is_pipe(i))) {
731 		WARN_ON(1);
732 		return 0;
733 	}
734 	if (iter_is_iovec(i))
735 		might_fault();
736 	iterate_and_advance(i, bytes, v,
737 		copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
738 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
739 				 v.bv_offset, v.bv_len),
740 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
741 	)
742 
743 	return bytes;
744 }
745 EXPORT_SYMBOL(_copy_from_iter);
746 
_copy_from_iter_full(void * addr,size_t bytes,struct iov_iter * i)747 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
748 {
749 	char *to = addr;
750 	if (unlikely(iov_iter_is_pipe(i))) {
751 		WARN_ON(1);
752 		return false;
753 	}
754 	if (unlikely(i->count < bytes))
755 		return false;
756 
757 	if (iter_is_iovec(i))
758 		might_fault();
759 	iterate_all_kinds(i, bytes, v, ({
760 		if (copyin((to += v.iov_len) - v.iov_len,
761 				      v.iov_base, v.iov_len))
762 			return false;
763 		0;}),
764 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
765 				 v.bv_offset, v.bv_len),
766 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
767 	)
768 
769 	iov_iter_advance(i, bytes);
770 	return true;
771 }
772 EXPORT_SYMBOL(_copy_from_iter_full);
773 
_copy_from_iter_nocache(void * addr,size_t bytes,struct iov_iter * i)774 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
775 {
776 	char *to = addr;
777 	if (unlikely(iov_iter_is_pipe(i))) {
778 		WARN_ON(1);
779 		return 0;
780 	}
781 	iterate_and_advance(i, bytes, v,
782 		__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
783 					 v.iov_base, v.iov_len),
784 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
785 				 v.bv_offset, v.bv_len),
786 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
787 	)
788 
789 	return bytes;
790 }
791 EXPORT_SYMBOL(_copy_from_iter_nocache);
792 
793 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
794 /**
795  * _copy_from_iter_flushcache - write destination through cpu cache
796  * @addr: destination kernel address
797  * @bytes: total transfer length
798  * @iter: source iterator
799  *
800  * The pmem driver arranges for filesystem-dax to use this facility via
801  * dax_copy_from_iter() for ensuring that writes to persistent memory
802  * are flushed through the CPU cache. It is differentiated from
803  * _copy_from_iter_nocache() in that guarantees all data is flushed for
804  * all iterator types. The _copy_from_iter_nocache() only attempts to
805  * bypass the cache for the ITER_IOVEC case, and on some archs may use
806  * instructions that strand dirty-data in the cache.
807  */
_copy_from_iter_flushcache(void * addr,size_t bytes,struct iov_iter * i)808 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
809 {
810 	char *to = addr;
811 	if (unlikely(iov_iter_is_pipe(i))) {
812 		WARN_ON(1);
813 		return 0;
814 	}
815 	iterate_and_advance(i, bytes, v,
816 		__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
817 					 v.iov_base, v.iov_len),
818 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
819 				 v.bv_offset, v.bv_len),
820 		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
821 			v.iov_len)
822 	)
823 
824 	return bytes;
825 }
826 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
827 #endif
828 
_copy_from_iter_full_nocache(void * addr,size_t bytes,struct iov_iter * i)829 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
830 {
831 	char *to = addr;
832 	if (unlikely(iov_iter_is_pipe(i))) {
833 		WARN_ON(1);
834 		return false;
835 	}
836 	if (unlikely(i->count < bytes))
837 		return false;
838 	iterate_all_kinds(i, bytes, v, ({
839 		if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
840 					     v.iov_base, v.iov_len))
841 			return false;
842 		0;}),
843 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
844 				 v.bv_offset, v.bv_len),
845 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
846 	)
847 
848 	iov_iter_advance(i, bytes);
849 	return true;
850 }
851 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
852 
page_copy_sane(struct page * page,size_t offset,size_t n)853 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
854 {
855 	struct page *head;
856 	size_t v = n + offset;
857 
858 	/*
859 	 * The general case needs to access the page order in order
860 	 * to compute the page size.
861 	 * However, we mostly deal with order-0 pages and thus can
862 	 * avoid a possible cache line miss for requests that fit all
863 	 * page orders.
864 	 */
865 	if (n <= v && v <= PAGE_SIZE)
866 		return true;
867 
868 	head = compound_head(page);
869 	v += (page - head) << PAGE_SHIFT;
870 
871 	if (likely(n <= v && v <= (page_size(head))))
872 		return true;
873 	WARN_ON(1);
874 	return false;
875 }
876 
copy_page_to_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)877 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
878 			 struct iov_iter *i)
879 {
880 	if (unlikely(!page_copy_sane(page, offset, bytes)))
881 		return 0;
882 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
883 		void *kaddr = kmap_atomic(page);
884 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
885 		kunmap_atomic(kaddr);
886 		return wanted;
887 	} else if (unlikely(iov_iter_is_discard(i))) {
888 		if (unlikely(i->count < bytes))
889 			bytes = i->count;
890 		i->count -= bytes;
891 		return bytes;
892 	} else if (likely(!iov_iter_is_pipe(i)))
893 		return copy_page_to_iter_iovec(page, offset, bytes, i);
894 	else
895 		return copy_page_to_iter_pipe(page, offset, bytes, i);
896 }
897 EXPORT_SYMBOL(copy_page_to_iter);
898 
copy_page_from_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)899 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
900 			 struct iov_iter *i)
901 {
902 	if (unlikely(!page_copy_sane(page, offset, bytes)))
903 		return 0;
904 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
905 		WARN_ON(1);
906 		return 0;
907 	}
908 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
909 		void *kaddr = kmap_atomic(page);
910 		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
911 		kunmap_atomic(kaddr);
912 		return wanted;
913 	} else
914 		return copy_page_from_iter_iovec(page, offset, bytes, i);
915 }
916 EXPORT_SYMBOL(copy_page_from_iter);
917 
pipe_zero(size_t bytes,struct iov_iter * i)918 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
919 {
920 	struct pipe_inode_info *pipe = i->pipe;
921 	size_t n, off;
922 	int idx;
923 
924 	if (!sanity(i))
925 		return 0;
926 
927 	bytes = n = push_pipe(i, bytes, &idx, &off);
928 	if (unlikely(!n))
929 		return 0;
930 
931 	for ( ; n; idx = next_idx(idx, pipe), off = 0) {
932 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
933 		memzero_page(pipe->bufs[idx].page, off, chunk);
934 		i->idx = idx;
935 		i->iov_offset = off + chunk;
936 		n -= chunk;
937 	}
938 	i->count -= bytes;
939 	return bytes;
940 }
941 
iov_iter_zero(size_t bytes,struct iov_iter * i)942 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
943 {
944 	if (unlikely(iov_iter_is_pipe(i)))
945 		return pipe_zero(bytes, i);
946 	iterate_and_advance(i, bytes, v,
947 		clear_user(v.iov_base, v.iov_len),
948 		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
949 		memset(v.iov_base, 0, v.iov_len)
950 	)
951 
952 	return bytes;
953 }
954 EXPORT_SYMBOL(iov_iter_zero);
955 
iov_iter_copy_from_user_atomic(struct page * page,struct iov_iter * i,unsigned long offset,size_t bytes)956 size_t iov_iter_copy_from_user_atomic(struct page *page,
957 		struct iov_iter *i, unsigned long offset, size_t bytes)
958 {
959 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
960 	if (unlikely(!page_copy_sane(page, offset, bytes))) {
961 		kunmap_atomic(kaddr);
962 		return 0;
963 	}
964 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
965 		kunmap_atomic(kaddr);
966 		WARN_ON(1);
967 		return 0;
968 	}
969 	iterate_all_kinds(i, bytes, v,
970 		copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
971 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
972 				 v.bv_offset, v.bv_len),
973 		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
974 	)
975 	kunmap_atomic(kaddr);
976 	return bytes;
977 }
978 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
979 
pipe_truncate(struct iov_iter * i)980 static inline void pipe_truncate(struct iov_iter *i)
981 {
982 	struct pipe_inode_info *pipe = i->pipe;
983 	if (pipe->nrbufs) {
984 		size_t off = i->iov_offset;
985 		int idx = i->idx;
986 		int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
987 		if (off) {
988 			pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
989 			idx = next_idx(idx, pipe);
990 			nrbufs++;
991 		}
992 		while (pipe->nrbufs > nrbufs) {
993 			pipe_buf_release(pipe, &pipe->bufs[idx]);
994 			idx = next_idx(idx, pipe);
995 			pipe->nrbufs--;
996 		}
997 	}
998 }
999 
pipe_advance(struct iov_iter * i,size_t size)1000 static void pipe_advance(struct iov_iter *i, size_t size)
1001 {
1002 	struct pipe_inode_info *pipe = i->pipe;
1003 	if (unlikely(i->count < size))
1004 		size = i->count;
1005 	if (size) {
1006 		struct pipe_buffer *buf;
1007 		size_t off = i->iov_offset, left = size;
1008 		int idx = i->idx;
1009 		if (off) /* make it relative to the beginning of buffer */
1010 			left += off - pipe->bufs[idx].offset;
1011 		while (1) {
1012 			buf = &pipe->bufs[idx];
1013 			if (left <= buf->len)
1014 				break;
1015 			left -= buf->len;
1016 			idx = next_idx(idx, pipe);
1017 		}
1018 		i->idx = idx;
1019 		i->iov_offset = buf->offset + left;
1020 	}
1021 	i->count -= size;
1022 	/* ... and discard everything past that point */
1023 	pipe_truncate(i);
1024 }
1025 
iov_iter_advance(struct iov_iter * i,size_t size)1026 void iov_iter_advance(struct iov_iter *i, size_t size)
1027 {
1028 	if (unlikely(iov_iter_is_pipe(i))) {
1029 		pipe_advance(i, size);
1030 		return;
1031 	}
1032 	if (unlikely(iov_iter_is_discard(i))) {
1033 		i->count -= size;
1034 		return;
1035 	}
1036 	iterate_and_advance(i, size, v, 0, 0, 0)
1037 }
1038 EXPORT_SYMBOL(iov_iter_advance);
1039 
iov_iter_revert(struct iov_iter * i,size_t unroll)1040 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1041 {
1042 	if (!unroll)
1043 		return;
1044 	if (WARN_ON(unroll > MAX_RW_COUNT))
1045 		return;
1046 	i->count += unroll;
1047 	if (unlikely(iov_iter_is_pipe(i))) {
1048 		struct pipe_inode_info *pipe = i->pipe;
1049 		int idx = i->idx;
1050 		size_t off = i->iov_offset;
1051 		while (1) {
1052 			size_t n = off - pipe->bufs[idx].offset;
1053 			if (unroll < n) {
1054 				off -= unroll;
1055 				break;
1056 			}
1057 			unroll -= n;
1058 			if (!unroll && idx == i->start_idx) {
1059 				off = 0;
1060 				break;
1061 			}
1062 			if (!idx--)
1063 				idx = pipe->buffers - 1;
1064 			off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
1065 		}
1066 		i->iov_offset = off;
1067 		i->idx = idx;
1068 		pipe_truncate(i);
1069 		return;
1070 	}
1071 	if (unlikely(iov_iter_is_discard(i)))
1072 		return;
1073 	if (unroll <= i->iov_offset) {
1074 		i->iov_offset -= unroll;
1075 		return;
1076 	}
1077 	unroll -= i->iov_offset;
1078 	if (iov_iter_is_bvec(i)) {
1079 		const struct bio_vec *bvec = i->bvec;
1080 		while (1) {
1081 			size_t n = (--bvec)->bv_len;
1082 			i->nr_segs++;
1083 			if (unroll <= n) {
1084 				i->bvec = bvec;
1085 				i->iov_offset = n - unroll;
1086 				return;
1087 			}
1088 			unroll -= n;
1089 		}
1090 	} else { /* same logics for iovec and kvec */
1091 		const struct iovec *iov = i->iov;
1092 		while (1) {
1093 			size_t n = (--iov)->iov_len;
1094 			i->nr_segs++;
1095 			if (unroll <= n) {
1096 				i->iov = iov;
1097 				i->iov_offset = n - unroll;
1098 				return;
1099 			}
1100 			unroll -= n;
1101 		}
1102 	}
1103 }
1104 EXPORT_SYMBOL(iov_iter_revert);
1105 
1106 /*
1107  * Return the count of just the current iov_iter segment.
1108  */
iov_iter_single_seg_count(const struct iov_iter * i)1109 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1110 {
1111 	if (unlikely(iov_iter_is_pipe(i)))
1112 		return i->count;	// it is a silly place, anyway
1113 	if (i->nr_segs == 1)
1114 		return i->count;
1115 	if (unlikely(iov_iter_is_discard(i)))
1116 		return i->count;
1117 	else if (iov_iter_is_bvec(i))
1118 		return min(i->count, i->bvec->bv_len - i->iov_offset);
1119 	else
1120 		return min(i->count, i->iov->iov_len - i->iov_offset);
1121 }
1122 EXPORT_SYMBOL(iov_iter_single_seg_count);
1123 
iov_iter_kvec(struct iov_iter * i,unsigned int direction,const struct kvec * kvec,unsigned long nr_segs,size_t count)1124 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1125 			const struct kvec *kvec, unsigned long nr_segs,
1126 			size_t count)
1127 {
1128 	WARN_ON(direction & ~(READ | WRITE));
1129 	i->type = ITER_KVEC | (direction & (READ | WRITE));
1130 	i->kvec = kvec;
1131 	i->nr_segs = nr_segs;
1132 	i->iov_offset = 0;
1133 	i->count = count;
1134 }
1135 EXPORT_SYMBOL(iov_iter_kvec);
1136 
iov_iter_bvec(struct iov_iter * i,unsigned int direction,const struct bio_vec * bvec,unsigned long nr_segs,size_t count)1137 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1138 			const struct bio_vec *bvec, unsigned long nr_segs,
1139 			size_t count)
1140 {
1141 	WARN_ON(direction & ~(READ | WRITE));
1142 	i->type = ITER_BVEC | (direction & (READ | WRITE));
1143 	i->bvec = bvec;
1144 	i->nr_segs = nr_segs;
1145 	i->iov_offset = 0;
1146 	i->count = count;
1147 }
1148 EXPORT_SYMBOL(iov_iter_bvec);
1149 
iov_iter_pipe(struct iov_iter * i,unsigned int direction,struct pipe_inode_info * pipe,size_t count)1150 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1151 			struct pipe_inode_info *pipe,
1152 			size_t count)
1153 {
1154 	BUG_ON(direction != READ);
1155 	WARN_ON(pipe->nrbufs == pipe->buffers);
1156 	i->type = ITER_PIPE | READ;
1157 	i->pipe = pipe;
1158 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1159 	i->iov_offset = 0;
1160 	i->count = count;
1161 	i->start_idx = i->idx;
1162 }
1163 EXPORT_SYMBOL(iov_iter_pipe);
1164 
1165 /**
1166  * iov_iter_discard - Initialise an I/O iterator that discards data
1167  * @i: The iterator to initialise.
1168  * @direction: The direction of the transfer.
1169  * @count: The size of the I/O buffer in bytes.
1170  *
1171  * Set up an I/O iterator that just discards everything that's written to it.
1172  * It's only available as a READ iterator.
1173  */
iov_iter_discard(struct iov_iter * i,unsigned int direction,size_t count)1174 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1175 {
1176 	BUG_ON(direction != READ);
1177 	i->type = ITER_DISCARD | READ;
1178 	i->count = count;
1179 	i->iov_offset = 0;
1180 }
1181 EXPORT_SYMBOL(iov_iter_discard);
1182 
iov_iter_alignment(const struct iov_iter * i)1183 unsigned long iov_iter_alignment(const struct iov_iter *i)
1184 {
1185 	unsigned long res = 0;
1186 	size_t size = i->count;
1187 
1188 	if (unlikely(iov_iter_is_pipe(i))) {
1189 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
1190 			return size | i->iov_offset;
1191 		return size;
1192 	}
1193 	iterate_all_kinds(i, size, v,
1194 		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
1195 		res |= v.bv_offset | v.bv_len,
1196 		res |= (unsigned long)v.iov_base | v.iov_len
1197 	)
1198 	return res;
1199 }
1200 EXPORT_SYMBOL(iov_iter_alignment);
1201 
iov_iter_gap_alignment(const struct iov_iter * i)1202 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1203 {
1204 	unsigned long res = 0;
1205 	size_t size = i->count;
1206 
1207 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1208 		WARN_ON(1);
1209 		return ~0U;
1210 	}
1211 
1212 	iterate_all_kinds(i, size, v,
1213 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1214 			(size != v.iov_len ? size : 0), 0),
1215 		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1216 			(size != v.bv_len ? size : 0)),
1217 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1218 			(size != v.iov_len ? size : 0))
1219 		);
1220 	return res;
1221 }
1222 EXPORT_SYMBOL(iov_iter_gap_alignment);
1223 
__pipe_get_pages(struct iov_iter * i,size_t maxsize,struct page ** pages,int idx,size_t * start)1224 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1225 				size_t maxsize,
1226 				struct page **pages,
1227 				int idx,
1228 				size_t *start)
1229 {
1230 	struct pipe_inode_info *pipe = i->pipe;
1231 	ssize_t n = push_pipe(i, maxsize, &idx, start);
1232 	if (!n)
1233 		return -EFAULT;
1234 
1235 	maxsize = n;
1236 	n += *start;
1237 	while (n > 0) {
1238 		get_page(*pages++ = pipe->bufs[idx].page);
1239 		idx = next_idx(idx, pipe);
1240 		n -= PAGE_SIZE;
1241 	}
1242 
1243 	return maxsize;
1244 }
1245 
pipe_get_pages(struct iov_iter * i,struct page ** pages,size_t maxsize,unsigned maxpages,size_t * start)1246 static ssize_t pipe_get_pages(struct iov_iter *i,
1247 		   struct page **pages, size_t maxsize, unsigned maxpages,
1248 		   size_t *start)
1249 {
1250 	unsigned npages;
1251 	size_t capacity;
1252 	int idx;
1253 
1254 	if (!maxsize)
1255 		return 0;
1256 
1257 	if (!sanity(i))
1258 		return -EFAULT;
1259 
1260 	data_start(i, &idx, start);
1261 	/* some of this one + all after this one */
1262 	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1263 	capacity = min(npages,maxpages) * PAGE_SIZE - *start;
1264 
1265 	return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
1266 }
1267 
iov_iter_get_pages(struct iov_iter * i,struct page ** pages,size_t maxsize,unsigned maxpages,size_t * start)1268 ssize_t iov_iter_get_pages(struct iov_iter *i,
1269 		   struct page **pages, size_t maxsize, unsigned maxpages,
1270 		   size_t *start)
1271 {
1272 	if (maxsize > i->count)
1273 		maxsize = i->count;
1274 
1275 	if (unlikely(iov_iter_is_pipe(i)))
1276 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
1277 	if (unlikely(iov_iter_is_discard(i)))
1278 		return -EFAULT;
1279 
1280 	iterate_all_kinds(i, maxsize, v, ({
1281 		unsigned long addr = (unsigned long)v.iov_base;
1282 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1283 		int n;
1284 		int res;
1285 
1286 		if (len > maxpages * PAGE_SIZE)
1287 			len = maxpages * PAGE_SIZE;
1288 		addr &= ~(PAGE_SIZE - 1);
1289 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1290 		res = get_user_pages_fast(addr, n,
1291 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1292 				pages);
1293 		if (unlikely(res <= 0))
1294 			return res;
1295 		return (res == n ? len : res * PAGE_SIZE) - *start;
1296 	0;}),({
1297 		/* can't be more than PAGE_SIZE */
1298 		*start = v.bv_offset;
1299 		get_page(*pages = v.bv_page);
1300 		return v.bv_len;
1301 	}),({
1302 		return -EFAULT;
1303 	})
1304 	)
1305 	return 0;
1306 }
1307 EXPORT_SYMBOL(iov_iter_get_pages);
1308 
get_pages_array(size_t n)1309 static struct page **get_pages_array(size_t n)
1310 {
1311 	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1312 }
1313 
pipe_get_pages_alloc(struct iov_iter * i,struct page *** pages,size_t maxsize,size_t * start)1314 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1315 		   struct page ***pages, size_t maxsize,
1316 		   size_t *start)
1317 {
1318 	struct page **p;
1319 	ssize_t n;
1320 	int idx;
1321 	int npages;
1322 
1323 	if (!maxsize)
1324 		return 0;
1325 
1326 	if (!sanity(i))
1327 		return -EFAULT;
1328 
1329 	data_start(i, &idx, start);
1330 	/* some of this one + all after this one */
1331 	npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
1332 	n = npages * PAGE_SIZE - *start;
1333 	if (maxsize > n)
1334 		maxsize = n;
1335 	else
1336 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1337 	p = get_pages_array(npages);
1338 	if (!p)
1339 		return -ENOMEM;
1340 	n = __pipe_get_pages(i, maxsize, p, idx, start);
1341 	if (n > 0)
1342 		*pages = p;
1343 	else
1344 		kvfree(p);
1345 	return n;
1346 }
1347 
iov_iter_get_pages_alloc(struct iov_iter * i,struct page *** pages,size_t maxsize,size_t * start)1348 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1349 		   struct page ***pages, size_t maxsize,
1350 		   size_t *start)
1351 {
1352 	struct page **p;
1353 
1354 	if (maxsize > i->count)
1355 		maxsize = i->count;
1356 
1357 	if (unlikely(iov_iter_is_pipe(i)))
1358 		return pipe_get_pages_alloc(i, pages, maxsize, start);
1359 	if (unlikely(iov_iter_is_discard(i)))
1360 		return -EFAULT;
1361 
1362 	iterate_all_kinds(i, maxsize, v, ({
1363 		unsigned long addr = (unsigned long)v.iov_base;
1364 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1365 		int n;
1366 		int res;
1367 
1368 		addr &= ~(PAGE_SIZE - 1);
1369 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1370 		p = get_pages_array(n);
1371 		if (!p)
1372 			return -ENOMEM;
1373 		res = get_user_pages_fast(addr, n,
1374 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1375 		if (unlikely(res <= 0)) {
1376 			kvfree(p);
1377 			*pages = NULL;
1378 			return res;
1379 		}
1380 		*pages = p;
1381 		return (res == n ? len : res * PAGE_SIZE) - *start;
1382 	0;}),({
1383 		/* can't be more than PAGE_SIZE */
1384 		*start = v.bv_offset;
1385 		*pages = p = get_pages_array(1);
1386 		if (!p)
1387 			return -ENOMEM;
1388 		get_page(*p = v.bv_page);
1389 		return v.bv_len;
1390 	}),({
1391 		return -EFAULT;
1392 	})
1393 	)
1394 	return 0;
1395 }
1396 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1397 
csum_and_copy_from_iter(void * addr,size_t bytes,__wsum * csum,struct iov_iter * i)1398 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1399 			       struct iov_iter *i)
1400 {
1401 	char *to = addr;
1402 	__wsum sum, next;
1403 	size_t off = 0;
1404 	sum = *csum;
1405 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1406 		WARN_ON(1);
1407 		return 0;
1408 	}
1409 	iterate_and_advance(i, bytes, v, ({
1410 		int err = 0;
1411 		next = csum_and_copy_from_user(v.iov_base,
1412 					       (to += v.iov_len) - v.iov_len,
1413 					       v.iov_len, 0, &err);
1414 		if (!err) {
1415 			sum = csum_block_add(sum, next, off);
1416 			off += v.iov_len;
1417 		}
1418 		err ? v.iov_len : 0;
1419 	}), ({
1420 		char *p = kmap_atomic(v.bv_page);
1421 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1422 				      p + v.bv_offset, v.bv_len,
1423 				      sum, off);
1424 		kunmap_atomic(p);
1425 		off += v.bv_len;
1426 	}),({
1427 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1428 				      v.iov_base, v.iov_len,
1429 				      sum, off);
1430 		off += v.iov_len;
1431 	})
1432 	)
1433 	*csum = sum;
1434 	return bytes;
1435 }
1436 EXPORT_SYMBOL(csum_and_copy_from_iter);
1437 
csum_and_copy_from_iter_full(void * addr,size_t bytes,__wsum * csum,struct iov_iter * i)1438 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1439 			       struct iov_iter *i)
1440 {
1441 	char *to = addr;
1442 	__wsum sum, next;
1443 	size_t off = 0;
1444 	sum = *csum;
1445 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1446 		WARN_ON(1);
1447 		return false;
1448 	}
1449 	if (unlikely(i->count < bytes))
1450 		return false;
1451 	iterate_all_kinds(i, bytes, v, ({
1452 		int err = 0;
1453 		next = csum_and_copy_from_user(v.iov_base,
1454 					       (to += v.iov_len) - v.iov_len,
1455 					       v.iov_len, 0, &err);
1456 		if (err)
1457 			return false;
1458 		sum = csum_block_add(sum, next, off);
1459 		off += v.iov_len;
1460 		0;
1461 	}), ({
1462 		char *p = kmap_atomic(v.bv_page);
1463 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1464 				      p + v.bv_offset, v.bv_len,
1465 				      sum, off);
1466 		kunmap_atomic(p);
1467 		off += v.bv_len;
1468 	}),({
1469 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1470 				      v.iov_base, v.iov_len,
1471 				      sum, off);
1472 		off += v.iov_len;
1473 	})
1474 	)
1475 	*csum = sum;
1476 	iov_iter_advance(i, bytes);
1477 	return true;
1478 }
1479 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1480 
csum_and_copy_to_iter(const void * addr,size_t bytes,void * _csstate,struct iov_iter * i)1481 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1482 			     struct iov_iter *i)
1483 {
1484 	struct csum_state *csstate = _csstate;
1485 	const char *from = addr;
1486 	__wsum sum, next;
1487 	size_t off;
1488 
1489 	if (unlikely(iov_iter_is_pipe(i)))
1490 		return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1491 
1492 	sum = csstate->csum;
1493 	off = csstate->off;
1494 	if (unlikely(iov_iter_is_discard(i))) {
1495 		WARN_ON(1);	/* for now */
1496 		return 0;
1497 	}
1498 	iterate_and_advance(i, bytes, v, ({
1499 		int err = 0;
1500 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1501 					     v.iov_base,
1502 					     v.iov_len, 0, &err);
1503 		if (!err) {
1504 			sum = csum_block_add(sum, next, off);
1505 			off += v.iov_len;
1506 		}
1507 		err ? v.iov_len : 0;
1508 	}), ({
1509 		char *p = kmap_atomic(v.bv_page);
1510 		sum = csum_and_memcpy(p + v.bv_offset,
1511 				      (from += v.bv_len) - v.bv_len,
1512 				      v.bv_len, sum, off);
1513 		kunmap_atomic(p);
1514 		off += v.bv_len;
1515 	}),({
1516 		sum = csum_and_memcpy(v.iov_base,
1517 				     (from += v.iov_len) - v.iov_len,
1518 				     v.iov_len, sum, off);
1519 		off += v.iov_len;
1520 	})
1521 	)
1522 	csstate->csum = sum;
1523 	csstate->off = off;
1524 	return bytes;
1525 }
1526 EXPORT_SYMBOL(csum_and_copy_to_iter);
1527 
hash_and_copy_to_iter(const void * addr,size_t bytes,void * hashp,struct iov_iter * i)1528 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1529 		struct iov_iter *i)
1530 {
1531 #ifdef CONFIG_CRYPTO
1532 	struct ahash_request *hash = hashp;
1533 	struct scatterlist sg;
1534 	size_t copied;
1535 
1536 	copied = copy_to_iter(addr, bytes, i);
1537 	sg_init_one(&sg, addr, copied);
1538 	ahash_request_set_crypt(hash, &sg, NULL, copied);
1539 	crypto_ahash_update(hash);
1540 	return copied;
1541 #else
1542 	return 0;
1543 #endif
1544 }
1545 EXPORT_SYMBOL(hash_and_copy_to_iter);
1546 
iov_iter_npages(const struct iov_iter * i,int maxpages)1547 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1548 {
1549 	size_t size = i->count;
1550 	int npages = 0;
1551 
1552 	if (!size)
1553 		return 0;
1554 	if (unlikely(iov_iter_is_discard(i)))
1555 		return 0;
1556 
1557 	if (unlikely(iov_iter_is_pipe(i))) {
1558 		struct pipe_inode_info *pipe = i->pipe;
1559 		size_t off;
1560 		int idx;
1561 
1562 		if (!sanity(i))
1563 			return 0;
1564 
1565 		data_start(i, &idx, &off);
1566 		/* some of this one + all after this one */
1567 		npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1568 		if (npages >= maxpages)
1569 			return maxpages;
1570 	} else iterate_all_kinds(i, size, v, ({
1571 		unsigned long p = (unsigned long)v.iov_base;
1572 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1573 			- p / PAGE_SIZE;
1574 		if (npages >= maxpages)
1575 			return maxpages;
1576 	0;}),({
1577 		npages++;
1578 		if (npages >= maxpages)
1579 			return maxpages;
1580 	}),({
1581 		unsigned long p = (unsigned long)v.iov_base;
1582 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1583 			- p / PAGE_SIZE;
1584 		if (npages >= maxpages)
1585 			return maxpages;
1586 	})
1587 	)
1588 	return npages;
1589 }
1590 EXPORT_SYMBOL(iov_iter_npages);
1591 
dup_iter(struct iov_iter * new,struct iov_iter * old,gfp_t flags)1592 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1593 {
1594 	*new = *old;
1595 	if (unlikely(iov_iter_is_pipe(new))) {
1596 		WARN_ON(1);
1597 		return NULL;
1598 	}
1599 	if (unlikely(iov_iter_is_discard(new)))
1600 		return NULL;
1601 	if (iov_iter_is_bvec(new))
1602 		return new->bvec = kmemdup(new->bvec,
1603 				    new->nr_segs * sizeof(struct bio_vec),
1604 				    flags);
1605 	else
1606 		/* iovec and kvec have identical layout */
1607 		return new->iov = kmemdup(new->iov,
1608 				   new->nr_segs * sizeof(struct iovec),
1609 				   flags);
1610 }
1611 EXPORT_SYMBOL(dup_iter);
1612 
1613 /**
1614  * import_iovec() - Copy an array of &struct iovec from userspace
1615  *     into the kernel, check that it is valid, and initialize a new
1616  *     &struct iov_iter iterator to access it.
1617  *
1618  * @type: One of %READ or %WRITE.
1619  * @uvector: Pointer to the userspace array.
1620  * @nr_segs: Number of elements in userspace array.
1621  * @fast_segs: Number of elements in @iov.
1622  * @iov: (input and output parameter) Pointer to pointer to (usually small
1623  *     on-stack) kernel array.
1624  * @i: Pointer to iterator that will be initialized on success.
1625  *
1626  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1627  * then this function places %NULL in *@iov on return. Otherwise, a new
1628  * array will be allocated and the result placed in *@iov. This means that
1629  * the caller may call kfree() on *@iov regardless of whether the small
1630  * on-stack array was used or not (and regardless of whether this function
1631  * returns an error or not).
1632  *
1633  * Return: Negative error code on error, bytes imported on success
1634  */
import_iovec(int type,const struct iovec __user * uvector,unsigned nr_segs,unsigned fast_segs,struct iovec ** iov,struct iov_iter * i)1635 ssize_t import_iovec(int type, const struct iovec __user * uvector,
1636 		 unsigned nr_segs, unsigned fast_segs,
1637 		 struct iovec **iov, struct iov_iter *i)
1638 {
1639 	ssize_t n;
1640 	struct iovec *p;
1641 	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1642 				  *iov, &p);
1643 	if (n < 0) {
1644 		if (p != *iov)
1645 			kfree(p);
1646 		*iov = NULL;
1647 		return n;
1648 	}
1649 	iov_iter_init(i, type, p, nr_segs, n);
1650 	*iov = p == *iov ? NULL : p;
1651 	return n;
1652 }
1653 EXPORT_SYMBOL(import_iovec);
1654 
1655 #ifdef CONFIG_COMPAT
1656 #include <linux/compat.h>
1657 
compat_import_iovec(int type,const struct compat_iovec __user * uvector,unsigned nr_segs,unsigned fast_segs,struct iovec ** iov,struct iov_iter * i)1658 ssize_t compat_import_iovec(int type,
1659 		const struct compat_iovec __user * uvector,
1660 		unsigned nr_segs, unsigned fast_segs,
1661 		struct iovec **iov, struct iov_iter *i)
1662 {
1663 	ssize_t n;
1664 	struct iovec *p;
1665 	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1666 				  *iov, &p);
1667 	if (n < 0) {
1668 		if (p != *iov)
1669 			kfree(p);
1670 		*iov = NULL;
1671 		return n;
1672 	}
1673 	iov_iter_init(i, type, p, nr_segs, n);
1674 	*iov = p == *iov ? NULL : p;
1675 	return n;
1676 }
1677 #endif
1678 
import_single_range(int rw,void __user * buf,size_t len,struct iovec * iov,struct iov_iter * i)1679 int import_single_range(int rw, void __user *buf, size_t len,
1680 		 struct iovec *iov, struct iov_iter *i)
1681 {
1682 	if (len > MAX_RW_COUNT)
1683 		len = MAX_RW_COUNT;
1684 	if (unlikely(!access_ok(buf, len)))
1685 		return -EFAULT;
1686 
1687 	iov->iov_base = buf;
1688 	iov->iov_len = len;
1689 	iov_iter_init(i, rw, iov, 1, len);
1690 	return 0;
1691 }
1692 EXPORT_SYMBOL(import_single_range);
1693 
iov_iter_for_each_range(struct iov_iter * i,size_t bytes,int (* f)(struct kvec * vec,void * context),void * context)1694 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
1695 			    int (*f)(struct kvec *vec, void *context),
1696 			    void *context)
1697 {
1698 	struct kvec w;
1699 	int err = -EINVAL;
1700 	if (!bytes)
1701 		return 0;
1702 
1703 	iterate_all_kinds(i, bytes, v, -EINVAL, ({
1704 		w.iov_base = kmap(v.bv_page) + v.bv_offset;
1705 		w.iov_len = v.bv_len;
1706 		err = f(&w, context);
1707 		kunmap(v.bv_page);
1708 		err;}), ({
1709 		w = v;
1710 		err = f(&w, context);})
1711 	)
1712 	return err;
1713 }
1714 EXPORT_SYMBOL(iov_iter_for_each_range);
1715