• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/slab.h>
9 #include <linux/vmalloc.h>
10 #include <linux/splice.h>
11 #include <linux/compat.h>
12 #include <net/checksum.h>
13 #include <linux/scatterlist.h>
14 #include <linux/instrumented.h>
15 
16 #define PIPE_PARANOIA /* for now */
17 
18 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
19 	size_t left;					\
20 	size_t wanted = n;				\
21 	__p = i->iov;					\
22 	__v.iov_len = min(n, __p->iov_len - skip);	\
23 	if (likely(__v.iov_len)) {			\
24 		__v.iov_base = __p->iov_base + skip;	\
25 		left = (STEP);				\
26 		__v.iov_len -= left;			\
27 		skip += __v.iov_len;			\
28 		n -= __v.iov_len;			\
29 	} else {					\
30 		left = 0;				\
31 	}						\
32 	while (unlikely(!left && n)) {			\
33 		__p++;					\
34 		__v.iov_len = min(n, __p->iov_len);	\
35 		if (unlikely(!__v.iov_len))		\
36 			continue;			\
37 		__v.iov_base = __p->iov_base;		\
38 		left = (STEP);				\
39 		__v.iov_len -= left;			\
40 		skip = __v.iov_len;			\
41 		n -= __v.iov_len;			\
42 	}						\
43 	n = wanted - n;					\
44 }
45 
46 #define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
47 	size_t wanted = n;				\
48 	__p = i->kvec;					\
49 	__v.iov_len = min(n, __p->iov_len - skip);	\
50 	if (likely(__v.iov_len)) {			\
51 		__v.iov_base = __p->iov_base + skip;	\
52 		(void)(STEP);				\
53 		skip += __v.iov_len;			\
54 		n -= __v.iov_len;			\
55 	}						\
56 	while (unlikely(n)) {				\
57 		__p++;					\
58 		__v.iov_len = min(n, __p->iov_len);	\
59 		if (unlikely(!__v.iov_len))		\
60 			continue;			\
61 		__v.iov_base = __p->iov_base;		\
62 		(void)(STEP);				\
63 		skip = __v.iov_len;			\
64 		n -= __v.iov_len;			\
65 	}						\
66 	n = wanted;					\
67 }
68 
69 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
70 	struct bvec_iter __start;			\
71 	__start.bi_size = n;				\
72 	__start.bi_bvec_done = skip;			\
73 	__start.bi_idx = 0;				\
74 	for_each_bvec(__v, i->bvec, __bi, __start) {	\
75 		if (!__v.bv_len)			\
76 			continue;			\
77 		(void)(STEP);				\
78 	}						\
79 }
80 
81 #define iterate_all_kinds(i, n, v, I, B, K) {			\
82 	if (likely(n)) {					\
83 		size_t skip = i->iov_offset;			\
84 		if (unlikely(i->type & ITER_BVEC)) {		\
85 			struct bio_vec v;			\
86 			struct bvec_iter __bi;			\
87 			iterate_bvec(i, n, v, __bi, skip, (B))	\
88 		} else if (unlikely(i->type & ITER_KVEC)) {	\
89 			const struct kvec *kvec;		\
90 			struct kvec v;				\
91 			iterate_kvec(i, n, v, kvec, skip, (K))	\
92 		} else if (unlikely(i->type & ITER_DISCARD)) {	\
93 		} else {					\
94 			const struct iovec *iov;		\
95 			struct iovec v;				\
96 			iterate_iovec(i, n, v, iov, skip, (I))	\
97 		}						\
98 	}							\
99 }
100 
101 #define iterate_and_advance(i, n, v, I, B, K) {			\
102 	if (unlikely(i->count < n))				\
103 		n = i->count;					\
104 	if (i->count) {						\
105 		size_t skip = i->iov_offset;			\
106 		if (unlikely(i->type & ITER_BVEC)) {		\
107 			const struct bio_vec *bvec = i->bvec;	\
108 			struct bio_vec v;			\
109 			struct bvec_iter __bi;			\
110 			iterate_bvec(i, n, v, __bi, skip, (B))	\
111 			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
112 			i->nr_segs -= i->bvec - bvec;		\
113 			skip = __bi.bi_bvec_done;		\
114 		} else if (unlikely(i->type & ITER_KVEC)) {	\
115 			const struct kvec *kvec;		\
116 			struct kvec v;				\
117 			iterate_kvec(i, n, v, kvec, skip, (K))	\
118 			if (skip == kvec->iov_len) {		\
119 				kvec++;				\
120 				skip = 0;			\
121 			}					\
122 			i->nr_segs -= kvec - i->kvec;		\
123 			i->kvec = kvec;				\
124 		} else if (unlikely(i->type & ITER_DISCARD)) {	\
125 			skip += n;				\
126 		} else {					\
127 			const struct iovec *iov;		\
128 			struct iovec v;				\
129 			iterate_iovec(i, n, v, iov, skip, (I))	\
130 			if (skip == iov->iov_len) {		\
131 				iov++;				\
132 				skip = 0;			\
133 			}					\
134 			i->nr_segs -= iov - i->iov;		\
135 			i->iov = iov;				\
136 		}						\
137 		i->count -= n;					\
138 		i->iov_offset = skip;				\
139 	}							\
140 }
141 
copyout(void __user * to,const void * from,size_t n)142 static int copyout(void __user *to, const void *from, size_t n)
143 {
144 	if (should_fail_usercopy())
145 		return n;
146 	if (access_ok(to, n)) {
147 		instrument_copy_to_user(to, from, n);
148 		n = raw_copy_to_user(to, from, n);
149 	}
150 	return n;
151 }
152 
copyin(void * to,const void __user * from,size_t n)153 static int copyin(void *to, const void __user *from, size_t n)
154 {
155 	if (should_fail_usercopy())
156 		return n;
157 	if (access_ok(from, n)) {
158 		instrument_copy_from_user(to, from, n);
159 		n = raw_copy_from_user(to, from, n);
160 	}
161 	return n;
162 }
163 
copy_page_to_iter_iovec(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)164 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
165 			 struct iov_iter *i)
166 {
167 	size_t skip, copy, left, wanted;
168 	const struct iovec *iov;
169 	char __user *buf;
170 	void *kaddr, *from;
171 
172 	if (unlikely(bytes > i->count))
173 		bytes = i->count;
174 
175 	if (unlikely(!bytes))
176 		return 0;
177 
178 	might_fault();
179 	wanted = bytes;
180 	iov = i->iov;
181 	skip = i->iov_offset;
182 	buf = iov->iov_base + skip;
183 	copy = min(bytes, iov->iov_len - skip);
184 
185 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
186 		kaddr = kmap_atomic(page);
187 		from = kaddr + offset;
188 
189 		/* first chunk, usually the only one */
190 		left = copyout(buf, from, copy);
191 		copy -= left;
192 		skip += copy;
193 		from += copy;
194 		bytes -= copy;
195 
196 		while (unlikely(!left && bytes)) {
197 			iov++;
198 			buf = iov->iov_base;
199 			copy = min(bytes, iov->iov_len);
200 			left = copyout(buf, from, copy);
201 			copy -= left;
202 			skip = copy;
203 			from += copy;
204 			bytes -= copy;
205 		}
206 		if (likely(!bytes)) {
207 			kunmap_atomic(kaddr);
208 			goto done;
209 		}
210 		offset = from - kaddr;
211 		buf += copy;
212 		kunmap_atomic(kaddr);
213 		copy = min(bytes, iov->iov_len - skip);
214 	}
215 	/* Too bad - revert to non-atomic kmap */
216 
217 	kaddr = kmap(page);
218 	from = kaddr + offset;
219 	left = copyout(buf, from, copy);
220 	copy -= left;
221 	skip += copy;
222 	from += copy;
223 	bytes -= copy;
224 	while (unlikely(!left && bytes)) {
225 		iov++;
226 		buf = iov->iov_base;
227 		copy = min(bytes, iov->iov_len);
228 		left = copyout(buf, from, copy);
229 		copy -= left;
230 		skip = copy;
231 		from += copy;
232 		bytes -= copy;
233 	}
234 	kunmap(page);
235 
236 done:
237 	if (skip == iov->iov_len) {
238 		iov++;
239 		skip = 0;
240 	}
241 	i->count -= wanted - bytes;
242 	i->nr_segs -= iov - i->iov;
243 	i->iov = iov;
244 	i->iov_offset = skip;
245 	return wanted - bytes;
246 }
247 
copy_page_from_iter_iovec(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)248 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
249 			 struct iov_iter *i)
250 {
251 	size_t skip, copy, left, wanted;
252 	const struct iovec *iov;
253 	char __user *buf;
254 	void *kaddr, *to;
255 
256 	if (unlikely(bytes > i->count))
257 		bytes = i->count;
258 
259 	if (unlikely(!bytes))
260 		return 0;
261 
262 	might_fault();
263 	wanted = bytes;
264 	iov = i->iov;
265 	skip = i->iov_offset;
266 	buf = iov->iov_base + skip;
267 	copy = min(bytes, iov->iov_len - skip);
268 
269 	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
270 		kaddr = kmap_atomic(page);
271 		to = kaddr + offset;
272 
273 		/* first chunk, usually the only one */
274 		left = copyin(to, buf, copy);
275 		copy -= left;
276 		skip += copy;
277 		to += copy;
278 		bytes -= copy;
279 
280 		while (unlikely(!left && bytes)) {
281 			iov++;
282 			buf = iov->iov_base;
283 			copy = min(bytes, iov->iov_len);
284 			left = copyin(to, buf, copy);
285 			copy -= left;
286 			skip = copy;
287 			to += copy;
288 			bytes -= copy;
289 		}
290 		if (likely(!bytes)) {
291 			kunmap_atomic(kaddr);
292 			goto done;
293 		}
294 		offset = to - kaddr;
295 		buf += copy;
296 		kunmap_atomic(kaddr);
297 		copy = min(bytes, iov->iov_len - skip);
298 	}
299 	/* Too bad - revert to non-atomic kmap */
300 
301 	kaddr = kmap(page);
302 	to = kaddr + offset;
303 	left = copyin(to, buf, copy);
304 	copy -= left;
305 	skip += copy;
306 	to += copy;
307 	bytes -= copy;
308 	while (unlikely(!left && bytes)) {
309 		iov++;
310 		buf = iov->iov_base;
311 		copy = min(bytes, iov->iov_len);
312 		left = copyin(to, buf, copy);
313 		copy -= left;
314 		skip = copy;
315 		to += copy;
316 		bytes -= copy;
317 	}
318 	kunmap(page);
319 
320 done:
321 	if (skip == iov->iov_len) {
322 		iov++;
323 		skip = 0;
324 	}
325 	i->count -= wanted - bytes;
326 	i->nr_segs -= iov - i->iov;
327 	i->iov = iov;
328 	i->iov_offset = skip;
329 	return wanted - bytes;
330 }
331 
332 #ifdef PIPE_PARANOIA
sanity(const struct iov_iter * i)333 static bool sanity(const struct iov_iter *i)
334 {
335 	struct pipe_inode_info *pipe = i->pipe;
336 	unsigned int p_head = pipe->head;
337 	unsigned int p_tail = pipe->tail;
338 	unsigned int p_mask = pipe->ring_size - 1;
339 	unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
340 	unsigned int i_head = i->head;
341 	unsigned int idx;
342 
343 	if (i->iov_offset) {
344 		struct pipe_buffer *p;
345 		if (unlikely(p_occupancy == 0))
346 			goto Bad;	// pipe must be non-empty
347 		if (unlikely(i_head != p_head - 1))
348 			goto Bad;	// must be at the last buffer...
349 
350 		p = &pipe->bufs[i_head & p_mask];
351 		if (unlikely(p->offset + p->len != i->iov_offset))
352 			goto Bad;	// ... at the end of segment
353 	} else {
354 		if (i_head != p_head)
355 			goto Bad;	// must be right after the last buffer
356 	}
357 	return true;
358 Bad:
359 	printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
360 	printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
361 			p_head, p_tail, pipe->ring_size);
362 	for (idx = 0; idx < pipe->ring_size; idx++)
363 		printk(KERN_ERR "[%p %p %d %d]\n",
364 			pipe->bufs[idx].ops,
365 			pipe->bufs[idx].page,
366 			pipe->bufs[idx].offset,
367 			pipe->bufs[idx].len);
368 	WARN_ON(1);
369 	return false;
370 }
371 #else
372 #define sanity(i) true
373 #endif
374 
copy_page_to_iter_pipe(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)375 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
376 			 struct iov_iter *i)
377 {
378 	struct pipe_inode_info *pipe = i->pipe;
379 	struct pipe_buffer *buf;
380 	unsigned int p_tail = pipe->tail;
381 	unsigned int p_mask = pipe->ring_size - 1;
382 	unsigned int i_head = i->head;
383 	size_t off;
384 
385 	if (unlikely(bytes > i->count))
386 		bytes = i->count;
387 
388 	if (unlikely(!bytes))
389 		return 0;
390 
391 	if (!sanity(i))
392 		return 0;
393 
394 	off = i->iov_offset;
395 	buf = &pipe->bufs[i_head & p_mask];
396 	if (off) {
397 		if (offset == off && buf->page == page) {
398 			/* merge with the last one */
399 			buf->len += bytes;
400 			i->iov_offset += bytes;
401 			goto out;
402 		}
403 		i_head++;
404 		buf = &pipe->bufs[i_head & p_mask];
405 	}
406 	if (pipe_full(i_head, p_tail, pipe->max_usage))
407 		return 0;
408 
409 	buf->ops = &page_cache_pipe_buf_ops;
410 	buf->flags = 0;
411 	get_page(page);
412 	buf->page = page;
413 	buf->offset = offset;
414 	buf->len = bytes;
415 
416 	pipe->head = i_head + 1;
417 	i->iov_offset = offset + bytes;
418 	i->head = i_head;
419 out:
420 	i->count -= bytes;
421 	return bytes;
422 }
423 
424 /*
425  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
426  * bytes.  For each iovec, fault in each page that constitutes the iovec.
427  *
428  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
429  * because it is an invalid address).
430  */
iov_iter_fault_in_readable(struct iov_iter * i,size_t bytes)431 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
432 {
433 	size_t skip = i->iov_offset;
434 	const struct iovec *iov;
435 	int err;
436 	struct iovec v;
437 
438 	if (iter_is_iovec(i)) {
439 		iterate_iovec(i, bytes, v, iov, skip, ({
440 			err = fault_in_pages_readable(v.iov_base, v.iov_len);
441 			if (unlikely(err))
442 			return err;
443 		0;}))
444 	}
445 	return 0;
446 }
447 EXPORT_SYMBOL(iov_iter_fault_in_readable);
448 
iov_iter_init(struct iov_iter * i,unsigned int direction,const struct iovec * iov,unsigned long nr_segs,size_t count)449 void iov_iter_init(struct iov_iter *i, unsigned int direction,
450 			const struct iovec *iov, unsigned long nr_segs,
451 			size_t count)
452 {
453 	WARN_ON(direction & ~(READ | WRITE));
454 	direction &= READ | WRITE;
455 
456 	/* It will get better.  Eventually... */
457 	if (uaccess_kernel()) {
458 		i->type = ITER_KVEC | direction;
459 		i->kvec = (struct kvec *)iov;
460 	} else {
461 		i->type = ITER_IOVEC | direction;
462 		i->iov = iov;
463 	}
464 	i->nr_segs = nr_segs;
465 	i->iov_offset = 0;
466 	i->count = count;
467 }
468 EXPORT_SYMBOL(iov_iter_init);
469 
memzero_page(struct page * page,size_t offset,size_t len)470 static void memzero_page(struct page *page, size_t offset, size_t len)
471 {
472 	char *addr = kmap_atomic(page);
473 	memset(addr + offset, 0, len);
474 	kunmap_atomic(addr);
475 }
476 
allocated(struct pipe_buffer * buf)477 static inline bool allocated(struct pipe_buffer *buf)
478 {
479 	return buf->ops == &default_pipe_buf_ops;
480 }
481 
data_start(const struct iov_iter * i,unsigned int * iter_headp,size_t * offp)482 static inline void data_start(const struct iov_iter *i,
483 			      unsigned int *iter_headp, size_t *offp)
484 {
485 	unsigned int p_mask = i->pipe->ring_size - 1;
486 	unsigned int iter_head = i->head;
487 	size_t off = i->iov_offset;
488 
489 	if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
490 		    off == PAGE_SIZE)) {
491 		iter_head++;
492 		off = 0;
493 	}
494 	*iter_headp = iter_head;
495 	*offp = off;
496 }
497 
push_pipe(struct iov_iter * i,size_t size,int * iter_headp,size_t * offp)498 static size_t push_pipe(struct iov_iter *i, size_t size,
499 			int *iter_headp, size_t *offp)
500 {
501 	struct pipe_inode_info *pipe = i->pipe;
502 	unsigned int p_tail = pipe->tail;
503 	unsigned int p_mask = pipe->ring_size - 1;
504 	unsigned int iter_head;
505 	size_t off;
506 	ssize_t left;
507 
508 	if (unlikely(size > i->count))
509 		size = i->count;
510 	if (unlikely(!size))
511 		return 0;
512 
513 	left = size;
514 	data_start(i, &iter_head, &off);
515 	*iter_headp = iter_head;
516 	*offp = off;
517 	if (off) {
518 		left -= PAGE_SIZE - off;
519 		if (left <= 0) {
520 			pipe->bufs[iter_head & p_mask].len += size;
521 			return size;
522 		}
523 		pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
524 		iter_head++;
525 	}
526 	while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
527 		struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
528 		struct page *page = alloc_page(GFP_USER);
529 		if (!page)
530 			break;
531 
532 		buf->ops = &default_pipe_buf_ops;
533 		buf->flags = 0;
534 		buf->page = page;
535 		buf->offset = 0;
536 		buf->len = min_t(ssize_t, left, PAGE_SIZE);
537 		left -= buf->len;
538 		iter_head++;
539 		pipe->head = iter_head;
540 
541 		if (left == 0)
542 			return size;
543 	}
544 	return size - left;
545 }
546 
copy_pipe_to_iter(const void * addr,size_t bytes,struct iov_iter * i)547 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
548 				struct iov_iter *i)
549 {
550 	struct pipe_inode_info *pipe = i->pipe;
551 	unsigned int p_mask = pipe->ring_size - 1;
552 	unsigned int i_head;
553 	size_t n, off;
554 
555 	if (!sanity(i))
556 		return 0;
557 
558 	bytes = n = push_pipe(i, bytes, &i_head, &off);
559 	if (unlikely(!n))
560 		return 0;
561 	do {
562 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
563 		memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
564 		i->head = i_head;
565 		i->iov_offset = off + chunk;
566 		n -= chunk;
567 		addr += chunk;
568 		off = 0;
569 		i_head++;
570 	} while (n);
571 	i->count -= bytes;
572 	return bytes;
573 }
574 
csum_and_memcpy(void * to,const void * from,size_t len,__wsum sum,size_t off)575 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
576 			      __wsum sum, size_t off)
577 {
578 	__wsum next = csum_partial_copy_nocheck(from, to, len);
579 	return csum_block_add(sum, next, off);
580 }
581 
csum_and_copy_to_pipe_iter(const void * addr,size_t bytes,struct csum_state * csstate,struct iov_iter * i)582 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
583 					 struct csum_state *csstate,
584 					 struct iov_iter *i)
585 {
586 	struct pipe_inode_info *pipe = i->pipe;
587 	unsigned int p_mask = pipe->ring_size - 1;
588 	__wsum sum = csstate->csum;
589 	size_t off = csstate->off;
590 	unsigned int i_head;
591 	size_t n, r;
592 
593 	if (!sanity(i))
594 		return 0;
595 
596 	bytes = n = push_pipe(i, bytes, &i_head, &r);
597 	if (unlikely(!n))
598 		return 0;
599 	do {
600 		size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
601 		char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
602 		sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
603 		kunmap_atomic(p);
604 		i->head = i_head;
605 		i->iov_offset = r + chunk;
606 		n -= chunk;
607 		off += chunk;
608 		addr += chunk;
609 		r = 0;
610 		i_head++;
611 	} while (n);
612 	i->count -= bytes;
613 	csstate->csum = sum;
614 	csstate->off = off;
615 	return bytes;
616 }
617 
_copy_to_iter(const void * addr,size_t bytes,struct iov_iter * i)618 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
619 {
620 	const char *from = addr;
621 	if (unlikely(iov_iter_is_pipe(i)))
622 		return copy_pipe_to_iter(addr, bytes, i);
623 	if (iter_is_iovec(i))
624 		might_fault();
625 	iterate_and_advance(i, bytes, v,
626 		copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
627 		memcpy_to_page(v.bv_page, v.bv_offset,
628 			       (from += v.bv_len) - v.bv_len, v.bv_len),
629 		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
630 	)
631 
632 	return bytes;
633 }
634 EXPORT_SYMBOL(_copy_to_iter);
635 
636 #ifdef CONFIG_ARCH_HAS_COPY_MC
copyout_mc(void __user * to,const void * from,size_t n)637 static int copyout_mc(void __user *to, const void *from, size_t n)
638 {
639 	if (access_ok(to, n)) {
640 		instrument_copy_to_user(to, from, n);
641 		n = copy_mc_to_user((__force void *) to, from, n);
642 	}
643 	return n;
644 }
645 
copy_mc_to_page(struct page * page,size_t offset,const char * from,size_t len)646 static unsigned long copy_mc_to_page(struct page *page, size_t offset,
647 		const char *from, size_t len)
648 {
649 	unsigned long ret;
650 	char *to;
651 
652 	to = kmap_atomic(page);
653 	ret = copy_mc_to_kernel(to + offset, from, len);
654 	kunmap_atomic(to);
655 
656 	return ret;
657 }
658 
copy_mc_pipe_to_iter(const void * addr,size_t bytes,struct iov_iter * i)659 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
660 				struct iov_iter *i)
661 {
662 	struct pipe_inode_info *pipe = i->pipe;
663 	unsigned int p_mask = pipe->ring_size - 1;
664 	unsigned int i_head;
665 	size_t n, off, xfer = 0;
666 
667 	if (!sanity(i))
668 		return 0;
669 
670 	bytes = n = push_pipe(i, bytes, &i_head, &off);
671 	if (unlikely(!n))
672 		return 0;
673 	do {
674 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
675 		unsigned long rem;
676 
677 		rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
678 					    off, addr, chunk);
679 		i->head = i_head;
680 		i->iov_offset = off + chunk - rem;
681 		xfer += chunk - rem;
682 		if (rem)
683 			break;
684 		n -= chunk;
685 		addr += chunk;
686 		off = 0;
687 		i_head++;
688 	} while (n);
689 	i->count -= xfer;
690 	return xfer;
691 }
692 
693 /**
694  * _copy_mc_to_iter - copy to iter with source memory error exception handling
695  * @addr: source kernel address
696  * @bytes: total transfer length
697  * @iter: destination iterator
698  *
699  * The pmem driver deploys this for the dax operation
700  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
701  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
702  * successfully copied.
703  *
704  * The main differences between this and typical _copy_to_iter().
705  *
706  * * Typical tail/residue handling after a fault retries the copy
707  *   byte-by-byte until the fault happens again. Re-triggering machine
708  *   checks is potentially fatal so the implementation uses source
709  *   alignment and poison alignment assumptions to avoid re-triggering
710  *   hardware exceptions.
711  *
712  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
713  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
714  *   a short copy.
715  */
_copy_mc_to_iter(const void * addr,size_t bytes,struct iov_iter * i)716 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
717 {
718 	const char *from = addr;
719 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
720 
721 	if (unlikely(iov_iter_is_pipe(i)))
722 		return copy_mc_pipe_to_iter(addr, bytes, i);
723 	if (iter_is_iovec(i))
724 		might_fault();
725 	iterate_and_advance(i, bytes, v,
726 		copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
727 			   v.iov_len),
728 		({
729 		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
730 				      (from += v.bv_len) - v.bv_len, v.bv_len);
731 		if (rem) {
732 			curr_addr = (unsigned long) from;
733 			bytes = curr_addr - s_addr - rem;
734 			return bytes;
735 		}
736 		}),
737 		({
738 		rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
739 					- v.iov_len, v.iov_len);
740 		if (rem) {
741 			curr_addr = (unsigned long) from;
742 			bytes = curr_addr - s_addr - rem;
743 			return bytes;
744 		}
745 		})
746 	)
747 
748 	return bytes;
749 }
750 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
751 #endif /* CONFIG_ARCH_HAS_COPY_MC */
752 
_copy_from_iter(void * addr,size_t bytes,struct iov_iter * i)753 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
754 {
755 	char *to = addr;
756 	if (unlikely(iov_iter_is_pipe(i))) {
757 		WARN_ON(1);
758 		return 0;
759 	}
760 	if (iter_is_iovec(i))
761 		might_fault();
762 	iterate_and_advance(i, bytes, v,
763 		copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
764 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
765 				 v.bv_offset, v.bv_len),
766 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
767 	)
768 
769 	return bytes;
770 }
771 EXPORT_SYMBOL(_copy_from_iter);
772 
_copy_from_iter_full(void * addr,size_t bytes,struct iov_iter * i)773 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
774 {
775 	char *to = addr;
776 	if (unlikely(iov_iter_is_pipe(i))) {
777 		WARN_ON(1);
778 		return false;
779 	}
780 	if (unlikely(i->count < bytes))
781 		return false;
782 
783 	if (iter_is_iovec(i))
784 		might_fault();
785 	iterate_all_kinds(i, bytes, v, ({
786 		if (copyin((to += v.iov_len) - v.iov_len,
787 				      v.iov_base, v.iov_len))
788 			return false;
789 		0;}),
790 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
791 				 v.bv_offset, v.bv_len),
792 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
793 	)
794 
795 	iov_iter_advance(i, bytes);
796 	return true;
797 }
798 EXPORT_SYMBOL(_copy_from_iter_full);
799 
_copy_from_iter_nocache(void * addr,size_t bytes,struct iov_iter * i)800 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
801 {
802 	char *to = addr;
803 	if (unlikely(iov_iter_is_pipe(i))) {
804 		WARN_ON(1);
805 		return 0;
806 	}
807 	iterate_and_advance(i, bytes, v,
808 		__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
809 					 v.iov_base, v.iov_len),
810 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
811 				 v.bv_offset, v.bv_len),
812 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
813 	)
814 
815 	return bytes;
816 }
817 EXPORT_SYMBOL(_copy_from_iter_nocache);
818 
819 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
820 /**
821  * _copy_from_iter_flushcache - write destination through cpu cache
822  * @addr: destination kernel address
823  * @bytes: total transfer length
824  * @iter: source iterator
825  *
826  * The pmem driver arranges for filesystem-dax to use this facility via
827  * dax_copy_from_iter() for ensuring that writes to persistent memory
828  * are flushed through the CPU cache. It is differentiated from
829  * _copy_from_iter_nocache() in that guarantees all data is flushed for
830  * all iterator types. The _copy_from_iter_nocache() only attempts to
831  * bypass the cache for the ITER_IOVEC case, and on some archs may use
832  * instructions that strand dirty-data in the cache.
833  */
_copy_from_iter_flushcache(void * addr,size_t bytes,struct iov_iter * i)834 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
835 {
836 	char *to = addr;
837 	if (unlikely(iov_iter_is_pipe(i))) {
838 		WARN_ON(1);
839 		return 0;
840 	}
841 	iterate_and_advance(i, bytes, v,
842 		__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
843 					 v.iov_base, v.iov_len),
844 		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
845 				 v.bv_offset, v.bv_len),
846 		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
847 			v.iov_len)
848 	)
849 
850 	return bytes;
851 }
852 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
853 #endif
854 
_copy_from_iter_full_nocache(void * addr,size_t bytes,struct iov_iter * i)855 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
856 {
857 	char *to = addr;
858 	if (unlikely(iov_iter_is_pipe(i))) {
859 		WARN_ON(1);
860 		return false;
861 	}
862 	if (unlikely(i->count < bytes))
863 		return false;
864 	iterate_all_kinds(i, bytes, v, ({
865 		if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
866 					     v.iov_base, v.iov_len))
867 			return false;
868 		0;}),
869 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
870 				 v.bv_offset, v.bv_len),
871 		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
872 	)
873 
874 	iov_iter_advance(i, bytes);
875 	return true;
876 }
877 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
878 
page_copy_sane(struct page * page,size_t offset,size_t n)879 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
880 {
881 	struct page *head;
882 	size_t v = n + offset;
883 
884 	/*
885 	 * The general case needs to access the page order in order
886 	 * to compute the page size.
887 	 * However, we mostly deal with order-0 pages and thus can
888 	 * avoid a possible cache line miss for requests that fit all
889 	 * page orders.
890 	 */
891 	if (n <= v && v <= PAGE_SIZE)
892 		return true;
893 
894 	head = compound_head(page);
895 	v += (page - head) << PAGE_SHIFT;
896 
897 	if (likely(n <= v && v <= (page_size(head))))
898 		return true;
899 	WARN_ON(1);
900 	return false;
901 }
902 
copy_page_to_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)903 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
904 			 struct iov_iter *i)
905 {
906 	if (unlikely(!page_copy_sane(page, offset, bytes)))
907 		return 0;
908 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
909 		void *kaddr = kmap_atomic(page);
910 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
911 		kunmap_atomic(kaddr);
912 		return wanted;
913 	} else if (unlikely(iov_iter_is_discard(i))) {
914 		if (unlikely(i->count < bytes))
915 			bytes = i->count;
916 		i->count -= bytes;
917 		return bytes;
918 	} else if (likely(!iov_iter_is_pipe(i)))
919 		return copy_page_to_iter_iovec(page, offset, bytes, i);
920 	else
921 		return copy_page_to_iter_pipe(page, offset, bytes, i);
922 }
923 EXPORT_SYMBOL(copy_page_to_iter);
924 
copy_page_from_iter(struct page * page,size_t offset,size_t bytes,struct iov_iter * i)925 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
926 			 struct iov_iter *i)
927 {
928 	if (unlikely(!page_copy_sane(page, offset, bytes)))
929 		return 0;
930 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
931 		WARN_ON(1);
932 		return 0;
933 	}
934 	if (i->type & (ITER_BVEC|ITER_KVEC)) {
935 		void *kaddr = kmap_atomic(page);
936 		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
937 		kunmap_atomic(kaddr);
938 		return wanted;
939 	} else
940 		return copy_page_from_iter_iovec(page, offset, bytes, i);
941 }
942 EXPORT_SYMBOL(copy_page_from_iter);
943 
pipe_zero(size_t bytes,struct iov_iter * i)944 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
945 {
946 	struct pipe_inode_info *pipe = i->pipe;
947 	unsigned int p_mask = pipe->ring_size - 1;
948 	unsigned int i_head;
949 	size_t n, off;
950 
951 	if (!sanity(i))
952 		return 0;
953 
954 	bytes = n = push_pipe(i, bytes, &i_head, &off);
955 	if (unlikely(!n))
956 		return 0;
957 
958 	do {
959 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
960 		memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
961 		i->head = i_head;
962 		i->iov_offset = off + chunk;
963 		n -= chunk;
964 		off = 0;
965 		i_head++;
966 	} while (n);
967 	i->count -= bytes;
968 	return bytes;
969 }
970 
iov_iter_zero(size_t bytes,struct iov_iter * i)971 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
972 {
973 	if (unlikely(iov_iter_is_pipe(i)))
974 		return pipe_zero(bytes, i);
975 	iterate_and_advance(i, bytes, v,
976 		clear_user(v.iov_base, v.iov_len),
977 		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
978 		memset(v.iov_base, 0, v.iov_len)
979 	)
980 
981 	return bytes;
982 }
983 EXPORT_SYMBOL(iov_iter_zero);
984 
iov_iter_copy_from_user_atomic(struct page * page,struct iov_iter * i,unsigned long offset,size_t bytes)985 size_t iov_iter_copy_from_user_atomic(struct page *page,
986 		struct iov_iter *i, unsigned long offset, size_t bytes)
987 {
988 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
989 	if (unlikely(!page_copy_sane(page, offset, bytes))) {
990 		kunmap_atomic(kaddr);
991 		return 0;
992 	}
993 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
994 		kunmap_atomic(kaddr);
995 		WARN_ON(1);
996 		return 0;
997 	}
998 	iterate_all_kinds(i, bytes, v,
999 		copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1000 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1001 				 v.bv_offset, v.bv_len),
1002 		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
1003 	)
1004 	kunmap_atomic(kaddr);
1005 	return bytes;
1006 }
1007 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1008 
pipe_truncate(struct iov_iter * i)1009 static inline void pipe_truncate(struct iov_iter *i)
1010 {
1011 	struct pipe_inode_info *pipe = i->pipe;
1012 	unsigned int p_tail = pipe->tail;
1013 	unsigned int p_head = pipe->head;
1014 	unsigned int p_mask = pipe->ring_size - 1;
1015 
1016 	if (!pipe_empty(p_head, p_tail)) {
1017 		struct pipe_buffer *buf;
1018 		unsigned int i_head = i->head;
1019 		size_t off = i->iov_offset;
1020 
1021 		if (off) {
1022 			buf = &pipe->bufs[i_head & p_mask];
1023 			buf->len = off - buf->offset;
1024 			i_head++;
1025 		}
1026 		while (p_head != i_head) {
1027 			p_head--;
1028 			pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1029 		}
1030 
1031 		pipe->head = p_head;
1032 	}
1033 }
1034 
pipe_advance(struct iov_iter * i,size_t size)1035 static void pipe_advance(struct iov_iter *i, size_t size)
1036 {
1037 	struct pipe_inode_info *pipe = i->pipe;
1038 	if (unlikely(i->count < size))
1039 		size = i->count;
1040 	if (size) {
1041 		struct pipe_buffer *buf;
1042 		unsigned int p_mask = pipe->ring_size - 1;
1043 		unsigned int i_head = i->head;
1044 		size_t off = i->iov_offset, left = size;
1045 
1046 		if (off) /* make it relative to the beginning of buffer */
1047 			left += off - pipe->bufs[i_head & p_mask].offset;
1048 		while (1) {
1049 			buf = &pipe->bufs[i_head & p_mask];
1050 			if (left <= buf->len)
1051 				break;
1052 			left -= buf->len;
1053 			i_head++;
1054 		}
1055 		i->head = i_head;
1056 		i->iov_offset = buf->offset + left;
1057 	}
1058 	i->count -= size;
1059 	/* ... and discard everything past that point */
1060 	pipe_truncate(i);
1061 }
1062 
iov_iter_advance(struct iov_iter * i,size_t size)1063 void iov_iter_advance(struct iov_iter *i, size_t size)
1064 {
1065 	if (unlikely(iov_iter_is_pipe(i))) {
1066 		pipe_advance(i, size);
1067 		return;
1068 	}
1069 	if (unlikely(iov_iter_is_discard(i))) {
1070 		i->count -= size;
1071 		return;
1072 	}
1073 	iterate_and_advance(i, size, v, 0, 0, 0)
1074 }
1075 EXPORT_SYMBOL(iov_iter_advance);
1076 
iov_iter_revert(struct iov_iter * i,size_t unroll)1077 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1078 {
1079 	if (!unroll)
1080 		return;
1081 	if (WARN_ON(unroll > MAX_RW_COUNT))
1082 		return;
1083 	i->count += unroll;
1084 	if (unlikely(iov_iter_is_pipe(i))) {
1085 		struct pipe_inode_info *pipe = i->pipe;
1086 		unsigned int p_mask = pipe->ring_size - 1;
1087 		unsigned int i_head = i->head;
1088 		size_t off = i->iov_offset;
1089 		while (1) {
1090 			struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1091 			size_t n = off - b->offset;
1092 			if (unroll < n) {
1093 				off -= unroll;
1094 				break;
1095 			}
1096 			unroll -= n;
1097 			if (!unroll && i_head == i->start_head) {
1098 				off = 0;
1099 				break;
1100 			}
1101 			i_head--;
1102 			b = &pipe->bufs[i_head & p_mask];
1103 			off = b->offset + b->len;
1104 		}
1105 		i->iov_offset = off;
1106 		i->head = i_head;
1107 		pipe_truncate(i);
1108 		return;
1109 	}
1110 	if (unlikely(iov_iter_is_discard(i)))
1111 		return;
1112 	if (unroll <= i->iov_offset) {
1113 		i->iov_offset -= unroll;
1114 		return;
1115 	}
1116 	unroll -= i->iov_offset;
1117 	if (iov_iter_is_bvec(i)) {
1118 		const struct bio_vec *bvec = i->bvec;
1119 		while (1) {
1120 			size_t n = (--bvec)->bv_len;
1121 			i->nr_segs++;
1122 			if (unroll <= n) {
1123 				i->bvec = bvec;
1124 				i->iov_offset = n - unroll;
1125 				return;
1126 			}
1127 			unroll -= n;
1128 		}
1129 	} else { /* same logics for iovec and kvec */
1130 		const struct iovec *iov = i->iov;
1131 		while (1) {
1132 			size_t n = (--iov)->iov_len;
1133 			i->nr_segs++;
1134 			if (unroll <= n) {
1135 				i->iov = iov;
1136 				i->iov_offset = n - unroll;
1137 				return;
1138 			}
1139 			unroll -= n;
1140 		}
1141 	}
1142 }
1143 EXPORT_SYMBOL(iov_iter_revert);
1144 
1145 /*
1146  * Return the count of just the current iov_iter segment.
1147  */
iov_iter_single_seg_count(const struct iov_iter * i)1148 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1149 {
1150 	if (unlikely(iov_iter_is_pipe(i)))
1151 		return i->count;	// it is a silly place, anyway
1152 	if (i->nr_segs == 1)
1153 		return i->count;
1154 	if (unlikely(iov_iter_is_discard(i)))
1155 		return i->count;
1156 	else if (iov_iter_is_bvec(i))
1157 		return min(i->count, i->bvec->bv_len - i->iov_offset);
1158 	else
1159 		return min(i->count, i->iov->iov_len - i->iov_offset);
1160 }
1161 EXPORT_SYMBOL(iov_iter_single_seg_count);
1162 
iov_iter_kvec(struct iov_iter * i,unsigned int direction,const struct kvec * kvec,unsigned long nr_segs,size_t count)1163 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1164 			const struct kvec *kvec, unsigned long nr_segs,
1165 			size_t count)
1166 {
1167 	WARN_ON(direction & ~(READ | WRITE));
1168 	i->type = ITER_KVEC | (direction & (READ | WRITE));
1169 	i->kvec = kvec;
1170 	i->nr_segs = nr_segs;
1171 	i->iov_offset = 0;
1172 	i->count = count;
1173 }
1174 EXPORT_SYMBOL(iov_iter_kvec);
1175 
iov_iter_bvec(struct iov_iter * i,unsigned int direction,const struct bio_vec * bvec,unsigned long nr_segs,size_t count)1176 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1177 			const struct bio_vec *bvec, unsigned long nr_segs,
1178 			size_t count)
1179 {
1180 	WARN_ON(direction & ~(READ | WRITE));
1181 	i->type = ITER_BVEC | (direction & (READ | WRITE));
1182 	i->bvec = bvec;
1183 	i->nr_segs = nr_segs;
1184 	i->iov_offset = 0;
1185 	i->count = count;
1186 }
1187 EXPORT_SYMBOL(iov_iter_bvec);
1188 
iov_iter_pipe(struct iov_iter * i,unsigned int direction,struct pipe_inode_info * pipe,size_t count)1189 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1190 			struct pipe_inode_info *pipe,
1191 			size_t count)
1192 {
1193 	BUG_ON(direction != READ);
1194 	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1195 	i->type = ITER_PIPE | READ;
1196 	i->pipe = pipe;
1197 	i->head = pipe->head;
1198 	i->iov_offset = 0;
1199 	i->count = count;
1200 	i->start_head = i->head;
1201 }
1202 EXPORT_SYMBOL(iov_iter_pipe);
1203 
1204 /**
1205  * iov_iter_discard - Initialise an I/O iterator that discards data
1206  * @i: The iterator to initialise.
1207  * @direction: The direction of the transfer.
1208  * @count: The size of the I/O buffer in bytes.
1209  *
1210  * Set up an I/O iterator that just discards everything that's written to it.
1211  * It's only available as a READ iterator.
1212  */
iov_iter_discard(struct iov_iter * i,unsigned int direction,size_t count)1213 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1214 {
1215 	BUG_ON(direction != READ);
1216 	i->type = ITER_DISCARD | READ;
1217 	i->count = count;
1218 	i->iov_offset = 0;
1219 }
1220 EXPORT_SYMBOL(iov_iter_discard);
1221 
iov_iter_alignment(const struct iov_iter * i)1222 unsigned long iov_iter_alignment(const struct iov_iter *i)
1223 {
1224 	unsigned long res = 0;
1225 	size_t size = i->count;
1226 
1227 	if (unlikely(iov_iter_is_pipe(i))) {
1228 		unsigned int p_mask = i->pipe->ring_size - 1;
1229 
1230 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1231 			return size | i->iov_offset;
1232 		return size;
1233 	}
1234 	iterate_all_kinds(i, size, v,
1235 		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
1236 		res |= v.bv_offset | v.bv_len,
1237 		res |= (unsigned long)v.iov_base | v.iov_len
1238 	)
1239 	return res;
1240 }
1241 EXPORT_SYMBOL(iov_iter_alignment);
1242 
iov_iter_gap_alignment(const struct iov_iter * i)1243 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1244 {
1245 	unsigned long res = 0;
1246 	size_t size = i->count;
1247 
1248 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1249 		WARN_ON(1);
1250 		return ~0U;
1251 	}
1252 
1253 	iterate_all_kinds(i, size, v,
1254 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1255 			(size != v.iov_len ? size : 0), 0),
1256 		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1257 			(size != v.bv_len ? size : 0)),
1258 		(res |= (!res ? 0 : (unsigned long)v.iov_base) |
1259 			(size != v.iov_len ? size : 0))
1260 		);
1261 	return res;
1262 }
1263 EXPORT_SYMBOL(iov_iter_gap_alignment);
1264 
__pipe_get_pages(struct iov_iter * i,size_t maxsize,struct page ** pages,int iter_head,size_t * start)1265 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1266 				size_t maxsize,
1267 				struct page **pages,
1268 				int iter_head,
1269 				size_t *start)
1270 {
1271 	struct pipe_inode_info *pipe = i->pipe;
1272 	unsigned int p_mask = pipe->ring_size - 1;
1273 	ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1274 	if (!n)
1275 		return -EFAULT;
1276 
1277 	maxsize = n;
1278 	n += *start;
1279 	while (n > 0) {
1280 		get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1281 		iter_head++;
1282 		n -= PAGE_SIZE;
1283 	}
1284 
1285 	return maxsize;
1286 }
1287 
pipe_get_pages(struct iov_iter * i,struct page ** pages,size_t maxsize,unsigned maxpages,size_t * start)1288 static ssize_t pipe_get_pages(struct iov_iter *i,
1289 		   struct page **pages, size_t maxsize, unsigned maxpages,
1290 		   size_t *start)
1291 {
1292 	unsigned int iter_head, npages;
1293 	size_t capacity;
1294 
1295 	if (!maxsize)
1296 		return 0;
1297 
1298 	if (!sanity(i))
1299 		return -EFAULT;
1300 
1301 	data_start(i, &iter_head, start);
1302 	/* Amount of free space: some of this one + all after this one */
1303 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1304 	capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1305 
1306 	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1307 }
1308 
iov_iter_get_pages(struct iov_iter * i,struct page ** pages,size_t maxsize,unsigned maxpages,size_t * start)1309 ssize_t iov_iter_get_pages(struct iov_iter *i,
1310 		   struct page **pages, size_t maxsize, unsigned maxpages,
1311 		   size_t *start)
1312 {
1313 	if (maxsize > i->count)
1314 		maxsize = i->count;
1315 
1316 	if (unlikely(iov_iter_is_pipe(i)))
1317 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
1318 	if (unlikely(iov_iter_is_discard(i)))
1319 		return -EFAULT;
1320 
1321 	iterate_all_kinds(i, maxsize, v, ({
1322 		unsigned long addr = (unsigned long)v.iov_base;
1323 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1324 		int n;
1325 		int res;
1326 
1327 		if (len > maxpages * PAGE_SIZE)
1328 			len = maxpages * PAGE_SIZE;
1329 		addr &= ~(PAGE_SIZE - 1);
1330 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1331 		res = get_user_pages_fast(addr, n,
1332 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1333 				pages);
1334 		if (unlikely(res <= 0))
1335 			return res;
1336 		return (res == n ? len : res * PAGE_SIZE) - *start;
1337 	0;}),({
1338 		/* can't be more than PAGE_SIZE */
1339 		*start = v.bv_offset;
1340 		get_page(*pages = v.bv_page);
1341 		return v.bv_len;
1342 	}),({
1343 		return -EFAULT;
1344 	})
1345 	)
1346 	return 0;
1347 }
1348 EXPORT_SYMBOL(iov_iter_get_pages);
1349 
get_pages_array(size_t n)1350 static struct page **get_pages_array(size_t n)
1351 {
1352 	return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1353 }
1354 
pipe_get_pages_alloc(struct iov_iter * i,struct page *** pages,size_t maxsize,size_t * start)1355 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1356 		   struct page ***pages, size_t maxsize,
1357 		   size_t *start)
1358 {
1359 	struct page **p;
1360 	unsigned int iter_head, npages;
1361 	ssize_t n;
1362 
1363 	if (!maxsize)
1364 		return 0;
1365 
1366 	if (!sanity(i))
1367 		return -EFAULT;
1368 
1369 	data_start(i, &iter_head, start);
1370 	/* Amount of free space: some of this one + all after this one */
1371 	npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1372 	n = npages * PAGE_SIZE - *start;
1373 	if (maxsize > n)
1374 		maxsize = n;
1375 	else
1376 		npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1377 	p = get_pages_array(npages);
1378 	if (!p)
1379 		return -ENOMEM;
1380 	n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1381 	if (n > 0)
1382 		*pages = p;
1383 	else
1384 		kvfree(p);
1385 	return n;
1386 }
1387 
iov_iter_get_pages_alloc(struct iov_iter * i,struct page *** pages,size_t maxsize,size_t * start)1388 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1389 		   struct page ***pages, size_t maxsize,
1390 		   size_t *start)
1391 {
1392 	struct page **p;
1393 
1394 	if (maxsize > i->count)
1395 		maxsize = i->count;
1396 
1397 	if (unlikely(iov_iter_is_pipe(i)))
1398 		return pipe_get_pages_alloc(i, pages, maxsize, start);
1399 	if (unlikely(iov_iter_is_discard(i)))
1400 		return -EFAULT;
1401 
1402 	iterate_all_kinds(i, maxsize, v, ({
1403 		unsigned long addr = (unsigned long)v.iov_base;
1404 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1405 		int n;
1406 		int res;
1407 
1408 		addr &= ~(PAGE_SIZE - 1);
1409 		n = DIV_ROUND_UP(len, PAGE_SIZE);
1410 		p = get_pages_array(n);
1411 		if (!p)
1412 			return -ENOMEM;
1413 		res = get_user_pages_fast(addr, n,
1414 				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1415 		if (unlikely(res <= 0)) {
1416 			kvfree(p);
1417 			*pages = NULL;
1418 			return res;
1419 		}
1420 		*pages = p;
1421 		return (res == n ? len : res * PAGE_SIZE) - *start;
1422 	0;}),({
1423 		/* can't be more than PAGE_SIZE */
1424 		*start = v.bv_offset;
1425 		*pages = p = get_pages_array(1);
1426 		if (!p)
1427 			return -ENOMEM;
1428 		get_page(*p = v.bv_page);
1429 		return v.bv_len;
1430 	}),({
1431 		return -EFAULT;
1432 	})
1433 	)
1434 	return 0;
1435 }
1436 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1437 
csum_and_copy_from_iter(void * addr,size_t bytes,__wsum * csum,struct iov_iter * i)1438 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1439 			       struct iov_iter *i)
1440 {
1441 	char *to = addr;
1442 	__wsum sum, next;
1443 	size_t off = 0;
1444 	sum = *csum;
1445 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1446 		WARN_ON(1);
1447 		return 0;
1448 	}
1449 	iterate_and_advance(i, bytes, v, ({
1450 		next = csum_and_copy_from_user(v.iov_base,
1451 					       (to += v.iov_len) - v.iov_len,
1452 					       v.iov_len);
1453 		if (next) {
1454 			sum = csum_block_add(sum, next, off);
1455 			off += v.iov_len;
1456 		}
1457 		next ? 0 : v.iov_len;
1458 	}), ({
1459 		char *p = kmap_atomic(v.bv_page);
1460 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1461 				      p + v.bv_offset, v.bv_len,
1462 				      sum, off);
1463 		kunmap_atomic(p);
1464 		off += v.bv_len;
1465 	}),({
1466 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1467 				      v.iov_base, v.iov_len,
1468 				      sum, off);
1469 		off += v.iov_len;
1470 	})
1471 	)
1472 	*csum = sum;
1473 	return bytes;
1474 }
1475 EXPORT_SYMBOL(csum_and_copy_from_iter);
1476 
csum_and_copy_from_iter_full(void * addr,size_t bytes,__wsum * csum,struct iov_iter * i)1477 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1478 			       struct iov_iter *i)
1479 {
1480 	char *to = addr;
1481 	__wsum sum, next;
1482 	size_t off = 0;
1483 	sum = *csum;
1484 	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1485 		WARN_ON(1);
1486 		return false;
1487 	}
1488 	if (unlikely(i->count < bytes))
1489 		return false;
1490 	iterate_all_kinds(i, bytes, v, ({
1491 		next = csum_and_copy_from_user(v.iov_base,
1492 					       (to += v.iov_len) - v.iov_len,
1493 					       v.iov_len);
1494 		if (!next)
1495 			return false;
1496 		sum = csum_block_add(sum, next, off);
1497 		off += v.iov_len;
1498 		0;
1499 	}), ({
1500 		char *p = kmap_atomic(v.bv_page);
1501 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1502 				      p + v.bv_offset, v.bv_len,
1503 				      sum, off);
1504 		kunmap_atomic(p);
1505 		off += v.bv_len;
1506 	}),({
1507 		sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1508 				      v.iov_base, v.iov_len,
1509 				      sum, off);
1510 		off += v.iov_len;
1511 	})
1512 	)
1513 	*csum = sum;
1514 	iov_iter_advance(i, bytes);
1515 	return true;
1516 }
1517 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1518 
csum_and_copy_to_iter(const void * addr,size_t bytes,void * _csstate,struct iov_iter * i)1519 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1520 			     struct iov_iter *i)
1521 {
1522 	struct csum_state *csstate = _csstate;
1523 	const char *from = addr;
1524 	__wsum sum, next;
1525 	size_t off;
1526 
1527 	if (unlikely(iov_iter_is_pipe(i)))
1528 		return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1529 
1530 	sum = csstate->csum;
1531 	off = csstate->off;
1532 	if (unlikely(iov_iter_is_discard(i))) {
1533 		WARN_ON(1);	/* for now */
1534 		return 0;
1535 	}
1536 	iterate_and_advance(i, bytes, v, ({
1537 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1538 					     v.iov_base,
1539 					     v.iov_len);
1540 		if (next) {
1541 			sum = csum_block_add(sum, next, off);
1542 			off += v.iov_len;
1543 		}
1544 		next ? 0 : v.iov_len;
1545 	}), ({
1546 		char *p = kmap_atomic(v.bv_page);
1547 		sum = csum_and_memcpy(p + v.bv_offset,
1548 				      (from += v.bv_len) - v.bv_len,
1549 				      v.bv_len, sum, off);
1550 		kunmap_atomic(p);
1551 		off += v.bv_len;
1552 	}),({
1553 		sum = csum_and_memcpy(v.iov_base,
1554 				     (from += v.iov_len) - v.iov_len,
1555 				     v.iov_len, sum, off);
1556 		off += v.iov_len;
1557 	})
1558 	)
1559 	csstate->csum = sum;
1560 	csstate->off = off;
1561 	return bytes;
1562 }
1563 EXPORT_SYMBOL(csum_and_copy_to_iter);
1564 
hash_and_copy_to_iter(const void * addr,size_t bytes,void * hashp,struct iov_iter * i)1565 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1566 		struct iov_iter *i)
1567 {
1568 #ifdef CONFIG_CRYPTO_HASH
1569 	struct ahash_request *hash = hashp;
1570 	struct scatterlist sg;
1571 	size_t copied;
1572 
1573 	copied = copy_to_iter(addr, bytes, i);
1574 	sg_init_one(&sg, addr, copied);
1575 	ahash_request_set_crypt(hash, &sg, NULL, copied);
1576 	crypto_ahash_update(hash);
1577 	return copied;
1578 #else
1579 	return 0;
1580 #endif
1581 }
1582 EXPORT_SYMBOL(hash_and_copy_to_iter);
1583 
iov_iter_npages(const struct iov_iter * i,int maxpages)1584 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1585 {
1586 	size_t size = i->count;
1587 	int npages = 0;
1588 
1589 	if (!size)
1590 		return 0;
1591 	if (unlikely(iov_iter_is_discard(i)))
1592 		return 0;
1593 
1594 	if (unlikely(iov_iter_is_pipe(i))) {
1595 		struct pipe_inode_info *pipe = i->pipe;
1596 		unsigned int iter_head;
1597 		size_t off;
1598 
1599 		if (!sanity(i))
1600 			return 0;
1601 
1602 		data_start(i, &iter_head, &off);
1603 		/* some of this one + all after this one */
1604 		npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1605 		if (npages >= maxpages)
1606 			return maxpages;
1607 	} else iterate_all_kinds(i, size, v, ({
1608 		unsigned long p = (unsigned long)v.iov_base;
1609 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1610 			- p / PAGE_SIZE;
1611 		if (npages >= maxpages)
1612 			return maxpages;
1613 	0;}),({
1614 		npages++;
1615 		if (npages >= maxpages)
1616 			return maxpages;
1617 	}),({
1618 		unsigned long p = (unsigned long)v.iov_base;
1619 		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1620 			- p / PAGE_SIZE;
1621 		if (npages >= maxpages)
1622 			return maxpages;
1623 	})
1624 	)
1625 	return npages;
1626 }
1627 EXPORT_SYMBOL(iov_iter_npages);
1628 
dup_iter(struct iov_iter * new,struct iov_iter * old,gfp_t flags)1629 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1630 {
1631 	*new = *old;
1632 	if (unlikely(iov_iter_is_pipe(new))) {
1633 		WARN_ON(1);
1634 		return NULL;
1635 	}
1636 	if (unlikely(iov_iter_is_discard(new)))
1637 		return NULL;
1638 	if (iov_iter_is_bvec(new))
1639 		return new->bvec = kmemdup(new->bvec,
1640 				    new->nr_segs * sizeof(struct bio_vec),
1641 				    flags);
1642 	else
1643 		/* iovec and kvec have identical layout */
1644 		return new->iov = kmemdup(new->iov,
1645 				   new->nr_segs * sizeof(struct iovec),
1646 				   flags);
1647 }
1648 EXPORT_SYMBOL(dup_iter);
1649 
copy_compat_iovec_from_user(struct iovec * iov,const struct iovec __user * uvec,unsigned long nr_segs)1650 static int copy_compat_iovec_from_user(struct iovec *iov,
1651 		const struct iovec __user *uvec, unsigned long nr_segs)
1652 {
1653 	const struct compat_iovec __user *uiov =
1654 		(const struct compat_iovec __user *)uvec;
1655 	int ret = -EFAULT, i;
1656 
1657 	if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1658 		return -EFAULT;
1659 
1660 	for (i = 0; i < nr_segs; i++) {
1661 		compat_uptr_t buf;
1662 		compat_ssize_t len;
1663 
1664 		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1665 		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1666 
1667 		/* check for compat_size_t not fitting in compat_ssize_t .. */
1668 		if (len < 0) {
1669 			ret = -EINVAL;
1670 			goto uaccess_end;
1671 		}
1672 		iov[i].iov_base = compat_ptr(buf);
1673 		iov[i].iov_len = len;
1674 	}
1675 
1676 	ret = 0;
1677 uaccess_end:
1678 	user_access_end();
1679 	return ret;
1680 }
1681 
copy_iovec_from_user(struct iovec * iov,const struct iovec __user * uvec,unsigned long nr_segs)1682 static int copy_iovec_from_user(struct iovec *iov,
1683 		const struct iovec __user *uvec, unsigned long nr_segs)
1684 {
1685 	unsigned long seg;
1686 
1687 	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1688 		return -EFAULT;
1689 	for (seg = 0; seg < nr_segs; seg++) {
1690 		if ((ssize_t)iov[seg].iov_len < 0)
1691 			return -EINVAL;
1692 	}
1693 
1694 	return 0;
1695 }
1696 
iovec_from_user(const struct iovec __user * uvec,unsigned long nr_segs,unsigned long fast_segs,struct iovec * fast_iov,bool compat)1697 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1698 		unsigned long nr_segs, unsigned long fast_segs,
1699 		struct iovec *fast_iov, bool compat)
1700 {
1701 	struct iovec *iov = fast_iov;
1702 	int ret;
1703 
1704 	/*
1705 	 * SuS says "The readv() function *may* fail if the iovcnt argument was
1706 	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1707 	 * traditionally returned zero for zero segments, so...
1708 	 */
1709 	if (nr_segs == 0)
1710 		return iov;
1711 	if (nr_segs > UIO_MAXIOV)
1712 		return ERR_PTR(-EINVAL);
1713 	if (nr_segs > fast_segs) {
1714 		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1715 		if (!iov)
1716 			return ERR_PTR(-ENOMEM);
1717 	}
1718 
1719 	if (compat)
1720 		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1721 	else
1722 		ret = copy_iovec_from_user(iov, uvec, nr_segs);
1723 	if (ret) {
1724 		if (iov != fast_iov)
1725 			kfree(iov);
1726 		return ERR_PTR(ret);
1727 	}
1728 
1729 	return iov;
1730 }
1731 
__import_iovec(int type,const struct iovec __user * uvec,unsigned nr_segs,unsigned fast_segs,struct iovec ** iovp,struct iov_iter * i,bool compat)1732 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1733 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1734 		 struct iov_iter *i, bool compat)
1735 {
1736 	ssize_t total_len = 0;
1737 	unsigned long seg;
1738 	struct iovec *iov;
1739 
1740 	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1741 	if (IS_ERR(iov)) {
1742 		*iovp = NULL;
1743 		return PTR_ERR(iov);
1744 	}
1745 
1746 	/*
1747 	 * According to the Single Unix Specification we should return EINVAL if
1748 	 * an element length is < 0 when cast to ssize_t or if the total length
1749 	 * would overflow the ssize_t return value of the system call.
1750 	 *
1751 	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1752 	 * overflow case.
1753 	 */
1754 	for (seg = 0; seg < nr_segs; seg++) {
1755 		ssize_t len = (ssize_t)iov[seg].iov_len;
1756 
1757 		if (!access_ok(iov[seg].iov_base, len)) {
1758 			if (iov != *iovp)
1759 				kfree(iov);
1760 			*iovp = NULL;
1761 			return -EFAULT;
1762 		}
1763 
1764 		if (len > MAX_RW_COUNT - total_len) {
1765 			len = MAX_RW_COUNT - total_len;
1766 			iov[seg].iov_len = len;
1767 		}
1768 		total_len += len;
1769 	}
1770 
1771 	iov_iter_init(i, type, iov, nr_segs, total_len);
1772 	if (iov == *iovp)
1773 		*iovp = NULL;
1774 	else
1775 		*iovp = iov;
1776 	return total_len;
1777 }
1778 
1779 /**
1780  * import_iovec() - Copy an array of &struct iovec from userspace
1781  *     into the kernel, check that it is valid, and initialize a new
1782  *     &struct iov_iter iterator to access it.
1783  *
1784  * @type: One of %READ or %WRITE.
1785  * @uvec: Pointer to the userspace array.
1786  * @nr_segs: Number of elements in userspace array.
1787  * @fast_segs: Number of elements in @iov.
1788  * @iovp: (input and output parameter) Pointer to pointer to (usually small
1789  *     on-stack) kernel array.
1790  * @i: Pointer to iterator that will be initialized on success.
1791  *
1792  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1793  * then this function places %NULL in *@iov on return. Otherwise, a new
1794  * array will be allocated and the result placed in *@iov. This means that
1795  * the caller may call kfree() on *@iov regardless of whether the small
1796  * on-stack array was used or not (and regardless of whether this function
1797  * returns an error or not).
1798  *
1799  * Return: Negative error code on error, bytes imported on success
1800  */
import_iovec(int type,const struct iovec __user * uvec,unsigned nr_segs,unsigned fast_segs,struct iovec ** iovp,struct iov_iter * i)1801 ssize_t import_iovec(int type, const struct iovec __user *uvec,
1802 		 unsigned nr_segs, unsigned fast_segs,
1803 		 struct iovec **iovp, struct iov_iter *i)
1804 {
1805 	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
1806 			      in_compat_syscall());
1807 }
1808 EXPORT_SYMBOL(import_iovec);
1809 
import_single_range(int rw,void __user * buf,size_t len,struct iovec * iov,struct iov_iter * i)1810 int import_single_range(int rw, void __user *buf, size_t len,
1811 		 struct iovec *iov, struct iov_iter *i)
1812 {
1813 	if (len > MAX_RW_COUNT)
1814 		len = MAX_RW_COUNT;
1815 	if (unlikely(!access_ok(buf, len)))
1816 		return -EFAULT;
1817 
1818 	iov->iov_base = buf;
1819 	iov->iov_len = len;
1820 	iov_iter_init(i, rw, iov, 1, len);
1821 	return 0;
1822 }
1823 EXPORT_SYMBOL(import_single_range);
1824 
1825 /**
1826  * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
1827  *     iov_iter_save_state() was called.
1828  *
1829  * @i: &struct iov_iter to restore
1830  * @state: state to restore from
1831  *
1832  * Used after iov_iter_save_state() to bring restore @i, if operations may
1833  * have advanced it.
1834  *
1835  * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
1836  */
iov_iter_restore(struct iov_iter * i,struct iov_iter_state * state)1837 void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
1838 {
1839 	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
1840 			 !iov_iter_is_kvec(i))
1841 		return;
1842 	i->iov_offset = state->iov_offset;
1843 	i->count = state->count;
1844 	/*
1845 	 * For the *vec iters, nr_segs + iov is constant - if we increment
1846 	 * the vec, then we also decrement the nr_segs count. Hence we don't
1847 	 * need to track both of these, just one is enough and we can deduct
1848 	 * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
1849 	 * size, so we can just increment the iov pointer as they are unionzed.
1850 	 * ITER_BVEC _may_ be the same size on some archs, but on others it is
1851 	 * not. Be safe and handle it separately.
1852 	 */
1853 	BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
1854 	if (iov_iter_is_bvec(i))
1855 		i->bvec -= state->nr_segs - i->nr_segs;
1856 	else
1857 		i->iov -= state->nr_segs - i->nr_segs;
1858 	i->nr_segs = state->nr_segs;
1859 }
1860