1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2012 Alexander Block. All rights reserved.
4 */
5
6 #include <linux/bsearch.h>
7 #include <linux/fs.h>
8 #include <linux/file.h>
9 #include <linux/sort.h>
10 #include <linux/mount.h>
11 #include <linux/xattr.h>
12 #include <linux/posix_acl_xattr.h>
13 #include <linux/radix-tree.h>
14 #include <linux/vmalloc.h>
15 #include <linux/string.h>
16 #include <linux/compat.h>
17 #include <linux/crc32c.h>
18
19 #include "send.h"
20 #include "backref.h"
21 #include "locking.h"
22 #include "disk-io.h"
23 #include "btrfs_inode.h"
24 #include "transaction.h"
25 #include "compression.h"
26
27 /*
28 * Maximum number of references an extent can have in order for us to attempt to
29 * issue clone operations instead of write operations. This currently exists to
30 * avoid hitting limitations of the backreference walking code (taking a lot of
31 * time and using too much memory for extents with large number of references).
32 */
33 #define SEND_MAX_EXTENT_REFS 64
34
35 /*
36 * A fs_path is a helper to dynamically build path names with unknown size.
37 * It reallocates the internal buffer on demand.
38 * It allows fast adding of path elements on the right side (normal path) and
39 * fast adding to the left side (reversed path). A reversed path can also be
40 * unreversed if needed.
41 */
42 struct fs_path {
43 union {
44 struct {
45 char *start;
46 char *end;
47
48 char *buf;
49 unsigned short buf_len:15;
50 unsigned short reversed:1;
51 char inline_buf[];
52 };
53 /*
54 * Average path length does not exceed 200 bytes, we'll have
55 * better packing in the slab and higher chance to satisfy
56 * a allocation later during send.
57 */
58 char pad[256];
59 };
60 };
61 #define FS_PATH_INLINE_SIZE \
62 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
63
64
65 /* reused for each extent */
66 struct clone_root {
67 struct btrfs_root *root;
68 u64 ino;
69 u64 offset;
70
71 u64 found_refs;
72 };
73
74 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128
75 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
76
77 struct send_ctx {
78 struct file *send_filp;
79 loff_t send_off;
80 char *send_buf;
81 u32 send_size;
82 u32 send_max_size;
83 u64 total_send_size;
84 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
85 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
86
87 struct btrfs_root *send_root;
88 struct btrfs_root *parent_root;
89 struct clone_root *clone_roots;
90 int clone_roots_cnt;
91
92 /* current state of the compare_tree call */
93 struct btrfs_path *left_path;
94 struct btrfs_path *right_path;
95 struct btrfs_key *cmp_key;
96
97 /*
98 * infos of the currently processed inode. In case of deleted inodes,
99 * these are the values from the deleted inode.
100 */
101 u64 cur_ino;
102 u64 cur_inode_gen;
103 int cur_inode_new;
104 int cur_inode_new_gen;
105 int cur_inode_deleted;
106 u64 cur_inode_size;
107 u64 cur_inode_mode;
108 u64 cur_inode_rdev;
109 u64 cur_inode_last_extent;
110 u64 cur_inode_next_write_offset;
111 bool ignore_cur_inode;
112
113 u64 send_progress;
114
115 struct list_head new_refs;
116 struct list_head deleted_refs;
117
118 struct radix_tree_root name_cache;
119 struct list_head name_cache_list;
120 int name_cache_size;
121
122 struct file_ra_state ra;
123
124 char *read_buf;
125
126 /*
127 * We process inodes by their increasing order, so if before an
128 * incremental send we reverse the parent/child relationship of
129 * directories such that a directory with a lower inode number was
130 * the parent of a directory with a higher inode number, and the one
131 * becoming the new parent got renamed too, we can't rename/move the
132 * directory with lower inode number when we finish processing it - we
133 * must process the directory with higher inode number first, then
134 * rename/move it and then rename/move the directory with lower inode
135 * number. Example follows.
136 *
137 * Tree state when the first send was performed:
138 *
139 * .
140 * |-- a (ino 257)
141 * |-- b (ino 258)
142 * |
143 * |
144 * |-- c (ino 259)
145 * | |-- d (ino 260)
146 * |
147 * |-- c2 (ino 261)
148 *
149 * Tree state when the second (incremental) send is performed:
150 *
151 * .
152 * |-- a (ino 257)
153 * |-- b (ino 258)
154 * |-- c2 (ino 261)
155 * |-- d2 (ino 260)
156 * |-- cc (ino 259)
157 *
158 * The sequence of steps that lead to the second state was:
159 *
160 * mv /a/b/c/d /a/b/c2/d2
161 * mv /a/b/c /a/b/c2/d2/cc
162 *
163 * "c" has lower inode number, but we can't move it (2nd mv operation)
164 * before we move "d", which has higher inode number.
165 *
166 * So we just memorize which move/rename operations must be performed
167 * later when their respective parent is processed and moved/renamed.
168 */
169
170 /* Indexed by parent directory inode number. */
171 struct rb_root pending_dir_moves;
172
173 /*
174 * Reverse index, indexed by the inode number of a directory that
175 * is waiting for the move/rename of its immediate parent before its
176 * own move/rename can be performed.
177 */
178 struct rb_root waiting_dir_moves;
179
180 /*
181 * A directory that is going to be rm'ed might have a child directory
182 * which is in the pending directory moves index above. In this case,
183 * the directory can only be removed after the move/rename of its child
184 * is performed. Example:
185 *
186 * Parent snapshot:
187 *
188 * . (ino 256)
189 * |-- a/ (ino 257)
190 * |-- b/ (ino 258)
191 * |-- c/ (ino 259)
192 * | |-- x/ (ino 260)
193 * |
194 * |-- y/ (ino 261)
195 *
196 * Send snapshot:
197 *
198 * . (ino 256)
199 * |-- a/ (ino 257)
200 * |-- b/ (ino 258)
201 * |-- YY/ (ino 261)
202 * |-- x/ (ino 260)
203 *
204 * Sequence of steps that lead to the send snapshot:
205 * rm -f /a/b/c/foo.txt
206 * mv /a/b/y /a/b/YY
207 * mv /a/b/c/x /a/b/YY
208 * rmdir /a/b/c
209 *
210 * When the child is processed, its move/rename is delayed until its
211 * parent is processed (as explained above), but all other operations
212 * like update utimes, chown, chgrp, etc, are performed and the paths
213 * that it uses for those operations must use the orphanized name of
214 * its parent (the directory we're going to rm later), so we need to
215 * memorize that name.
216 *
217 * Indexed by the inode number of the directory to be deleted.
218 */
219 struct rb_root orphan_dirs;
220 };
221
222 struct pending_dir_move {
223 struct rb_node node;
224 struct list_head list;
225 u64 parent_ino;
226 u64 ino;
227 u64 gen;
228 struct list_head update_refs;
229 };
230
231 struct waiting_dir_move {
232 struct rb_node node;
233 u64 ino;
234 /*
235 * There might be some directory that could not be removed because it
236 * was waiting for this directory inode to be moved first. Therefore
237 * after this directory is moved, we can try to rmdir the ino rmdir_ino.
238 */
239 u64 rmdir_ino;
240 bool orphanized;
241 };
242
243 struct orphan_dir_info {
244 struct rb_node node;
245 u64 ino;
246 u64 gen;
247 u64 last_dir_index_offset;
248 };
249
250 struct name_cache_entry {
251 struct list_head list;
252 /*
253 * radix_tree has only 32bit entries but we need to handle 64bit inums.
254 * We use the lower 32bit of the 64bit inum to store it in the tree. If
255 * more then one inum would fall into the same entry, we use radix_list
256 * to store the additional entries. radix_list is also used to store
257 * entries where two entries have the same inum but different
258 * generations.
259 */
260 struct list_head radix_list;
261 u64 ino;
262 u64 gen;
263 u64 parent_ino;
264 u64 parent_gen;
265 int ret;
266 int need_later_update;
267 int name_len;
268 char name[];
269 };
270
271 #define ADVANCE 1
272 #define ADVANCE_ONLY_NEXT -1
273
274 enum btrfs_compare_tree_result {
275 BTRFS_COMPARE_TREE_NEW,
276 BTRFS_COMPARE_TREE_DELETED,
277 BTRFS_COMPARE_TREE_CHANGED,
278 BTRFS_COMPARE_TREE_SAME,
279 };
280 typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path,
281 struct btrfs_path *right_path,
282 struct btrfs_key *key,
283 enum btrfs_compare_tree_result result,
284 void *ctx);
285
286 __cold
inconsistent_snapshot_error(struct send_ctx * sctx,enum btrfs_compare_tree_result result,const char * what)287 static void inconsistent_snapshot_error(struct send_ctx *sctx,
288 enum btrfs_compare_tree_result result,
289 const char *what)
290 {
291 const char *result_string;
292
293 switch (result) {
294 case BTRFS_COMPARE_TREE_NEW:
295 result_string = "new";
296 break;
297 case BTRFS_COMPARE_TREE_DELETED:
298 result_string = "deleted";
299 break;
300 case BTRFS_COMPARE_TREE_CHANGED:
301 result_string = "updated";
302 break;
303 case BTRFS_COMPARE_TREE_SAME:
304 ASSERT(0);
305 result_string = "unchanged";
306 break;
307 default:
308 ASSERT(0);
309 result_string = "unexpected";
310 }
311
312 btrfs_err(sctx->send_root->fs_info,
313 "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
314 result_string, what, sctx->cmp_key->objectid,
315 sctx->send_root->root_key.objectid,
316 (sctx->parent_root ?
317 sctx->parent_root->root_key.objectid : 0));
318 }
319
320 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
321
322 static struct waiting_dir_move *
323 get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
324
325 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
326
need_send_hole(struct send_ctx * sctx)327 static int need_send_hole(struct send_ctx *sctx)
328 {
329 return (sctx->parent_root && !sctx->cur_inode_new &&
330 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
331 S_ISREG(sctx->cur_inode_mode));
332 }
333
fs_path_reset(struct fs_path * p)334 static void fs_path_reset(struct fs_path *p)
335 {
336 if (p->reversed) {
337 p->start = p->buf + p->buf_len - 1;
338 p->end = p->start;
339 *p->start = 0;
340 } else {
341 p->start = p->buf;
342 p->end = p->start;
343 *p->start = 0;
344 }
345 }
346
fs_path_alloc(void)347 static struct fs_path *fs_path_alloc(void)
348 {
349 struct fs_path *p;
350
351 p = kmalloc(sizeof(*p), GFP_KERNEL);
352 if (!p)
353 return NULL;
354 p->reversed = 0;
355 p->buf = p->inline_buf;
356 p->buf_len = FS_PATH_INLINE_SIZE;
357 fs_path_reset(p);
358 return p;
359 }
360
fs_path_alloc_reversed(void)361 static struct fs_path *fs_path_alloc_reversed(void)
362 {
363 struct fs_path *p;
364
365 p = fs_path_alloc();
366 if (!p)
367 return NULL;
368 p->reversed = 1;
369 fs_path_reset(p);
370 return p;
371 }
372
fs_path_free(struct fs_path * p)373 static void fs_path_free(struct fs_path *p)
374 {
375 if (!p)
376 return;
377 if (p->buf != p->inline_buf)
378 kfree(p->buf);
379 kfree(p);
380 }
381
fs_path_len(struct fs_path * p)382 static int fs_path_len(struct fs_path *p)
383 {
384 return p->end - p->start;
385 }
386
fs_path_ensure_buf(struct fs_path * p,int len)387 static int fs_path_ensure_buf(struct fs_path *p, int len)
388 {
389 char *tmp_buf;
390 int path_len;
391 int old_buf_len;
392
393 len++;
394
395 if (p->buf_len >= len)
396 return 0;
397
398 if (len > PATH_MAX) {
399 WARN_ON(1);
400 return -ENOMEM;
401 }
402
403 path_len = p->end - p->start;
404 old_buf_len = p->buf_len;
405
406 /*
407 * First time the inline_buf does not suffice
408 */
409 if (p->buf == p->inline_buf) {
410 tmp_buf = kmalloc(len, GFP_KERNEL);
411 if (tmp_buf)
412 memcpy(tmp_buf, p->buf, old_buf_len);
413 } else {
414 tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
415 }
416 if (!tmp_buf)
417 return -ENOMEM;
418 p->buf = tmp_buf;
419 /*
420 * The real size of the buffer is bigger, this will let the fast path
421 * happen most of the time
422 */
423 p->buf_len = ksize(p->buf);
424
425 if (p->reversed) {
426 tmp_buf = p->buf + old_buf_len - path_len - 1;
427 p->end = p->buf + p->buf_len - 1;
428 p->start = p->end - path_len;
429 memmove(p->start, tmp_buf, path_len + 1);
430 } else {
431 p->start = p->buf;
432 p->end = p->start + path_len;
433 }
434 return 0;
435 }
436
fs_path_prepare_for_add(struct fs_path * p,int name_len,char ** prepared)437 static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
438 char **prepared)
439 {
440 int ret;
441 int new_len;
442
443 new_len = p->end - p->start + name_len;
444 if (p->start != p->end)
445 new_len++;
446 ret = fs_path_ensure_buf(p, new_len);
447 if (ret < 0)
448 goto out;
449
450 if (p->reversed) {
451 if (p->start != p->end)
452 *--p->start = '/';
453 p->start -= name_len;
454 *prepared = p->start;
455 } else {
456 if (p->start != p->end)
457 *p->end++ = '/';
458 *prepared = p->end;
459 p->end += name_len;
460 *p->end = 0;
461 }
462
463 out:
464 return ret;
465 }
466
fs_path_add(struct fs_path * p,const char * name,int name_len)467 static int fs_path_add(struct fs_path *p, const char *name, int name_len)
468 {
469 int ret;
470 char *prepared;
471
472 ret = fs_path_prepare_for_add(p, name_len, &prepared);
473 if (ret < 0)
474 goto out;
475 memcpy(prepared, name, name_len);
476
477 out:
478 return ret;
479 }
480
fs_path_add_path(struct fs_path * p,struct fs_path * p2)481 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
482 {
483 int ret;
484 char *prepared;
485
486 ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
487 if (ret < 0)
488 goto out;
489 memcpy(prepared, p2->start, p2->end - p2->start);
490
491 out:
492 return ret;
493 }
494
fs_path_add_from_extent_buffer(struct fs_path * p,struct extent_buffer * eb,unsigned long off,int len)495 static int fs_path_add_from_extent_buffer(struct fs_path *p,
496 struct extent_buffer *eb,
497 unsigned long off, int len)
498 {
499 int ret;
500 char *prepared;
501
502 ret = fs_path_prepare_for_add(p, len, &prepared);
503 if (ret < 0)
504 goto out;
505
506 read_extent_buffer(eb, prepared, off, len);
507
508 out:
509 return ret;
510 }
511
fs_path_copy(struct fs_path * p,struct fs_path * from)512 static int fs_path_copy(struct fs_path *p, struct fs_path *from)
513 {
514 int ret;
515
516 p->reversed = from->reversed;
517 fs_path_reset(p);
518
519 ret = fs_path_add_path(p, from);
520
521 return ret;
522 }
523
524
fs_path_unreverse(struct fs_path * p)525 static void fs_path_unreverse(struct fs_path *p)
526 {
527 char *tmp;
528 int len;
529
530 if (!p->reversed)
531 return;
532
533 tmp = p->start;
534 len = p->end - p->start;
535 p->start = p->buf;
536 p->end = p->start + len;
537 memmove(p->start, tmp, len + 1);
538 p->reversed = 0;
539 }
540
alloc_path_for_send(void)541 static struct btrfs_path *alloc_path_for_send(void)
542 {
543 struct btrfs_path *path;
544
545 path = btrfs_alloc_path();
546 if (!path)
547 return NULL;
548 path->search_commit_root = 1;
549 path->skip_locking = 1;
550 path->need_commit_sem = 1;
551 return path;
552 }
553
write_buf(struct file * filp,const void * buf,u32 len,loff_t * off)554 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
555 {
556 int ret;
557 u32 pos = 0;
558
559 while (pos < len) {
560 ret = kernel_write(filp, buf + pos, len - pos, off);
561 /* TODO handle that correctly */
562 /*if (ret == -ERESTARTSYS) {
563 continue;
564 }*/
565 if (ret < 0)
566 return ret;
567 if (ret == 0) {
568 return -EIO;
569 }
570 pos += ret;
571 }
572
573 return 0;
574 }
575
tlv_put(struct send_ctx * sctx,u16 attr,const void * data,int len)576 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
577 {
578 struct btrfs_tlv_header *hdr;
579 int total_len = sizeof(*hdr) + len;
580 int left = sctx->send_max_size - sctx->send_size;
581
582 if (unlikely(left < total_len))
583 return -EOVERFLOW;
584
585 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
586 hdr->tlv_type = cpu_to_le16(attr);
587 hdr->tlv_len = cpu_to_le16(len);
588 memcpy(hdr + 1, data, len);
589 sctx->send_size += total_len;
590
591 return 0;
592 }
593
594 #define TLV_PUT_DEFINE_INT(bits) \
595 static int tlv_put_u##bits(struct send_ctx *sctx, \
596 u##bits attr, u##bits value) \
597 { \
598 __le##bits __tmp = cpu_to_le##bits(value); \
599 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
600 }
601
602 TLV_PUT_DEFINE_INT(64)
603
tlv_put_string(struct send_ctx * sctx,u16 attr,const char * str,int len)604 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
605 const char *str, int len)
606 {
607 if (len == -1)
608 len = strlen(str);
609 return tlv_put(sctx, attr, str, len);
610 }
611
tlv_put_uuid(struct send_ctx * sctx,u16 attr,const u8 * uuid)612 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
613 const u8 *uuid)
614 {
615 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
616 }
617
tlv_put_btrfs_timespec(struct send_ctx * sctx,u16 attr,struct extent_buffer * eb,struct btrfs_timespec * ts)618 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
619 struct extent_buffer *eb,
620 struct btrfs_timespec *ts)
621 {
622 struct btrfs_timespec bts;
623 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
624 return tlv_put(sctx, attr, &bts, sizeof(bts));
625 }
626
627
628 #define TLV_PUT(sctx, attrtype, data, attrlen) \
629 do { \
630 ret = tlv_put(sctx, attrtype, data, attrlen); \
631 if (ret < 0) \
632 goto tlv_put_failure; \
633 } while (0)
634
635 #define TLV_PUT_INT(sctx, attrtype, bits, value) \
636 do { \
637 ret = tlv_put_u##bits(sctx, attrtype, value); \
638 if (ret < 0) \
639 goto tlv_put_failure; \
640 } while (0)
641
642 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
643 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
644 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
645 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
646 #define TLV_PUT_STRING(sctx, attrtype, str, len) \
647 do { \
648 ret = tlv_put_string(sctx, attrtype, str, len); \
649 if (ret < 0) \
650 goto tlv_put_failure; \
651 } while (0)
652 #define TLV_PUT_PATH(sctx, attrtype, p) \
653 do { \
654 ret = tlv_put_string(sctx, attrtype, p->start, \
655 p->end - p->start); \
656 if (ret < 0) \
657 goto tlv_put_failure; \
658 } while(0)
659 #define TLV_PUT_UUID(sctx, attrtype, uuid) \
660 do { \
661 ret = tlv_put_uuid(sctx, attrtype, uuid); \
662 if (ret < 0) \
663 goto tlv_put_failure; \
664 } while (0)
665 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
666 do { \
667 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
668 if (ret < 0) \
669 goto tlv_put_failure; \
670 } while (0)
671
send_header(struct send_ctx * sctx)672 static int send_header(struct send_ctx *sctx)
673 {
674 struct btrfs_stream_header hdr;
675
676 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
677 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
678
679 return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
680 &sctx->send_off);
681 }
682
683 /*
684 * For each command/item we want to send to userspace, we call this function.
685 */
begin_cmd(struct send_ctx * sctx,int cmd)686 static int begin_cmd(struct send_ctx *sctx, int cmd)
687 {
688 struct btrfs_cmd_header *hdr;
689
690 if (WARN_ON(!sctx->send_buf))
691 return -EINVAL;
692
693 BUG_ON(sctx->send_size);
694
695 sctx->send_size += sizeof(*hdr);
696 hdr = (struct btrfs_cmd_header *)sctx->send_buf;
697 hdr->cmd = cpu_to_le16(cmd);
698
699 return 0;
700 }
701
send_cmd(struct send_ctx * sctx)702 static int send_cmd(struct send_ctx *sctx)
703 {
704 int ret;
705 struct btrfs_cmd_header *hdr;
706 u32 crc;
707
708 hdr = (struct btrfs_cmd_header *)sctx->send_buf;
709 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
710 hdr->crc = 0;
711
712 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
713 hdr->crc = cpu_to_le32(crc);
714
715 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
716 &sctx->send_off);
717
718 sctx->total_send_size += sctx->send_size;
719 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
720 sctx->send_size = 0;
721
722 return ret;
723 }
724
725 /*
726 * Sends a move instruction to user space
727 */
send_rename(struct send_ctx * sctx,struct fs_path * from,struct fs_path * to)728 static int send_rename(struct send_ctx *sctx,
729 struct fs_path *from, struct fs_path *to)
730 {
731 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
732 int ret;
733
734 btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
735
736 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
737 if (ret < 0)
738 goto out;
739
740 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
741 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
742
743 ret = send_cmd(sctx);
744
745 tlv_put_failure:
746 out:
747 return ret;
748 }
749
750 /*
751 * Sends a link instruction to user space
752 */
send_link(struct send_ctx * sctx,struct fs_path * path,struct fs_path * lnk)753 static int send_link(struct send_ctx *sctx,
754 struct fs_path *path, struct fs_path *lnk)
755 {
756 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
757 int ret;
758
759 btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
760
761 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
762 if (ret < 0)
763 goto out;
764
765 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
766 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
767
768 ret = send_cmd(sctx);
769
770 tlv_put_failure:
771 out:
772 return ret;
773 }
774
775 /*
776 * Sends an unlink instruction to user space
777 */
send_unlink(struct send_ctx * sctx,struct fs_path * path)778 static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
779 {
780 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
781 int ret;
782
783 btrfs_debug(fs_info, "send_unlink %s", path->start);
784
785 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
786 if (ret < 0)
787 goto out;
788
789 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
790
791 ret = send_cmd(sctx);
792
793 tlv_put_failure:
794 out:
795 return ret;
796 }
797
798 /*
799 * Sends a rmdir instruction to user space
800 */
send_rmdir(struct send_ctx * sctx,struct fs_path * path)801 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
802 {
803 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
804 int ret;
805
806 btrfs_debug(fs_info, "send_rmdir %s", path->start);
807
808 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
809 if (ret < 0)
810 goto out;
811
812 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
813
814 ret = send_cmd(sctx);
815
816 tlv_put_failure:
817 out:
818 return ret;
819 }
820
821 /*
822 * Helper function to retrieve some fields from an inode item.
823 */
__get_inode_info(struct btrfs_root * root,struct btrfs_path * path,u64 ino,u64 * size,u64 * gen,u64 * mode,u64 * uid,u64 * gid,u64 * rdev)824 static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
825 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
826 u64 *gid, u64 *rdev)
827 {
828 int ret;
829 struct btrfs_inode_item *ii;
830 struct btrfs_key key;
831
832 key.objectid = ino;
833 key.type = BTRFS_INODE_ITEM_KEY;
834 key.offset = 0;
835 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
836 if (ret) {
837 if (ret > 0)
838 ret = -ENOENT;
839 return ret;
840 }
841
842 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
843 struct btrfs_inode_item);
844 if (size)
845 *size = btrfs_inode_size(path->nodes[0], ii);
846 if (gen)
847 *gen = btrfs_inode_generation(path->nodes[0], ii);
848 if (mode)
849 *mode = btrfs_inode_mode(path->nodes[0], ii);
850 if (uid)
851 *uid = btrfs_inode_uid(path->nodes[0], ii);
852 if (gid)
853 *gid = btrfs_inode_gid(path->nodes[0], ii);
854 if (rdev)
855 *rdev = btrfs_inode_rdev(path->nodes[0], ii);
856
857 return ret;
858 }
859
get_inode_info(struct btrfs_root * root,u64 ino,u64 * size,u64 * gen,u64 * mode,u64 * uid,u64 * gid,u64 * rdev)860 static int get_inode_info(struct btrfs_root *root,
861 u64 ino, u64 *size, u64 *gen,
862 u64 *mode, u64 *uid, u64 *gid,
863 u64 *rdev)
864 {
865 struct btrfs_path *path;
866 int ret;
867
868 path = alloc_path_for_send();
869 if (!path)
870 return -ENOMEM;
871 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
872 rdev);
873 btrfs_free_path(path);
874 return ret;
875 }
876
877 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
878 struct fs_path *p,
879 void *ctx);
880
881 /*
882 * Helper function to iterate the entries in ONE btrfs_inode_ref or
883 * btrfs_inode_extref.
884 * The iterate callback may return a non zero value to stop iteration. This can
885 * be a negative value for error codes or 1 to simply stop it.
886 *
887 * path must point to the INODE_REF or INODE_EXTREF when called.
888 */
iterate_inode_ref(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * found_key,int resolve,iterate_inode_ref_t iterate,void * ctx)889 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
890 struct btrfs_key *found_key, int resolve,
891 iterate_inode_ref_t iterate, void *ctx)
892 {
893 struct extent_buffer *eb = path->nodes[0];
894 struct btrfs_item *item;
895 struct btrfs_inode_ref *iref;
896 struct btrfs_inode_extref *extref;
897 struct btrfs_path *tmp_path;
898 struct fs_path *p;
899 u32 cur = 0;
900 u32 total;
901 int slot = path->slots[0];
902 u32 name_len;
903 char *start;
904 int ret = 0;
905 int num = 0;
906 int index;
907 u64 dir;
908 unsigned long name_off;
909 unsigned long elem_size;
910 unsigned long ptr;
911
912 p = fs_path_alloc_reversed();
913 if (!p)
914 return -ENOMEM;
915
916 tmp_path = alloc_path_for_send();
917 if (!tmp_path) {
918 fs_path_free(p);
919 return -ENOMEM;
920 }
921
922
923 if (found_key->type == BTRFS_INODE_REF_KEY) {
924 ptr = (unsigned long)btrfs_item_ptr(eb, slot,
925 struct btrfs_inode_ref);
926 item = btrfs_item_nr(slot);
927 total = btrfs_item_size(eb, item);
928 elem_size = sizeof(*iref);
929 } else {
930 ptr = btrfs_item_ptr_offset(eb, slot);
931 total = btrfs_item_size_nr(eb, slot);
932 elem_size = sizeof(*extref);
933 }
934
935 while (cur < total) {
936 fs_path_reset(p);
937
938 if (found_key->type == BTRFS_INODE_REF_KEY) {
939 iref = (struct btrfs_inode_ref *)(ptr + cur);
940 name_len = btrfs_inode_ref_name_len(eb, iref);
941 name_off = (unsigned long)(iref + 1);
942 index = btrfs_inode_ref_index(eb, iref);
943 dir = found_key->offset;
944 } else {
945 extref = (struct btrfs_inode_extref *)(ptr + cur);
946 name_len = btrfs_inode_extref_name_len(eb, extref);
947 name_off = (unsigned long)&extref->name;
948 index = btrfs_inode_extref_index(eb, extref);
949 dir = btrfs_inode_extref_parent(eb, extref);
950 }
951
952 if (resolve) {
953 start = btrfs_ref_to_path(root, tmp_path, name_len,
954 name_off, eb, dir,
955 p->buf, p->buf_len);
956 if (IS_ERR(start)) {
957 ret = PTR_ERR(start);
958 goto out;
959 }
960 if (start < p->buf) {
961 /* overflow , try again with larger buffer */
962 ret = fs_path_ensure_buf(p,
963 p->buf_len + p->buf - start);
964 if (ret < 0)
965 goto out;
966 start = btrfs_ref_to_path(root, tmp_path,
967 name_len, name_off,
968 eb, dir,
969 p->buf, p->buf_len);
970 if (IS_ERR(start)) {
971 ret = PTR_ERR(start);
972 goto out;
973 }
974 BUG_ON(start < p->buf);
975 }
976 p->start = start;
977 } else {
978 ret = fs_path_add_from_extent_buffer(p, eb, name_off,
979 name_len);
980 if (ret < 0)
981 goto out;
982 }
983
984 cur += elem_size + name_len;
985 ret = iterate(num, dir, index, p, ctx);
986 if (ret)
987 goto out;
988 num++;
989 }
990
991 out:
992 btrfs_free_path(tmp_path);
993 fs_path_free(p);
994 return ret;
995 }
996
997 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
998 const char *name, int name_len,
999 const char *data, int data_len,
1000 u8 type, void *ctx);
1001
1002 /*
1003 * Helper function to iterate the entries in ONE btrfs_dir_item.
1004 * The iterate callback may return a non zero value to stop iteration. This can
1005 * be a negative value for error codes or 1 to simply stop it.
1006 *
1007 * path must point to the dir item when called.
1008 */
iterate_dir_item(struct btrfs_root * root,struct btrfs_path * path,iterate_dir_item_t iterate,void * ctx)1009 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
1010 iterate_dir_item_t iterate, void *ctx)
1011 {
1012 int ret = 0;
1013 struct extent_buffer *eb;
1014 struct btrfs_item *item;
1015 struct btrfs_dir_item *di;
1016 struct btrfs_key di_key;
1017 char *buf = NULL;
1018 int buf_len;
1019 u32 name_len;
1020 u32 data_len;
1021 u32 cur;
1022 u32 len;
1023 u32 total;
1024 int slot;
1025 int num;
1026 u8 type;
1027
1028 /*
1029 * Start with a small buffer (1 page). If later we end up needing more
1030 * space, which can happen for xattrs on a fs with a leaf size greater
1031 * then the page size, attempt to increase the buffer. Typically xattr
1032 * values are small.
1033 */
1034 buf_len = PATH_MAX;
1035 buf = kmalloc(buf_len, GFP_KERNEL);
1036 if (!buf) {
1037 ret = -ENOMEM;
1038 goto out;
1039 }
1040
1041 eb = path->nodes[0];
1042 slot = path->slots[0];
1043 item = btrfs_item_nr(slot);
1044 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
1045 cur = 0;
1046 len = 0;
1047 total = btrfs_item_size(eb, item);
1048
1049 num = 0;
1050 while (cur < total) {
1051 name_len = btrfs_dir_name_len(eb, di);
1052 data_len = btrfs_dir_data_len(eb, di);
1053 type = btrfs_dir_type(eb, di);
1054 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
1055
1056 if (type == BTRFS_FT_XATTR) {
1057 if (name_len > XATTR_NAME_MAX) {
1058 ret = -ENAMETOOLONG;
1059 goto out;
1060 }
1061 if (name_len + data_len >
1062 BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
1063 ret = -E2BIG;
1064 goto out;
1065 }
1066 } else {
1067 /*
1068 * Path too long
1069 */
1070 if (name_len + data_len > PATH_MAX) {
1071 ret = -ENAMETOOLONG;
1072 goto out;
1073 }
1074 }
1075
1076 if (name_len + data_len > buf_len) {
1077 buf_len = name_len + data_len;
1078 if (is_vmalloc_addr(buf)) {
1079 vfree(buf);
1080 buf = NULL;
1081 } else {
1082 char *tmp = krealloc(buf, buf_len,
1083 GFP_KERNEL | __GFP_NOWARN);
1084
1085 if (!tmp)
1086 kfree(buf);
1087 buf = tmp;
1088 }
1089 if (!buf) {
1090 buf = kvmalloc(buf_len, GFP_KERNEL);
1091 if (!buf) {
1092 ret = -ENOMEM;
1093 goto out;
1094 }
1095 }
1096 }
1097
1098 read_extent_buffer(eb, buf, (unsigned long)(di + 1),
1099 name_len + data_len);
1100
1101 len = sizeof(*di) + name_len + data_len;
1102 di = (struct btrfs_dir_item *)((char *)di + len);
1103 cur += len;
1104
1105 ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1106 data_len, type, ctx);
1107 if (ret < 0)
1108 goto out;
1109 if (ret) {
1110 ret = 0;
1111 goto out;
1112 }
1113
1114 num++;
1115 }
1116
1117 out:
1118 kvfree(buf);
1119 return ret;
1120 }
1121
__copy_first_ref(int num,u64 dir,int index,struct fs_path * p,void * ctx)1122 static int __copy_first_ref(int num, u64 dir, int index,
1123 struct fs_path *p, void *ctx)
1124 {
1125 int ret;
1126 struct fs_path *pt = ctx;
1127
1128 ret = fs_path_copy(pt, p);
1129 if (ret < 0)
1130 return ret;
1131
1132 /* we want the first only */
1133 return 1;
1134 }
1135
1136 /*
1137 * Retrieve the first path of an inode. If an inode has more then one
1138 * ref/hardlink, this is ignored.
1139 */
get_inode_path(struct btrfs_root * root,u64 ino,struct fs_path * path)1140 static int get_inode_path(struct btrfs_root *root,
1141 u64 ino, struct fs_path *path)
1142 {
1143 int ret;
1144 struct btrfs_key key, found_key;
1145 struct btrfs_path *p;
1146
1147 p = alloc_path_for_send();
1148 if (!p)
1149 return -ENOMEM;
1150
1151 fs_path_reset(path);
1152
1153 key.objectid = ino;
1154 key.type = BTRFS_INODE_REF_KEY;
1155 key.offset = 0;
1156
1157 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1158 if (ret < 0)
1159 goto out;
1160 if (ret) {
1161 ret = 1;
1162 goto out;
1163 }
1164 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1165 if (found_key.objectid != ino ||
1166 (found_key.type != BTRFS_INODE_REF_KEY &&
1167 found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1168 ret = -ENOENT;
1169 goto out;
1170 }
1171
1172 ret = iterate_inode_ref(root, p, &found_key, 1,
1173 __copy_first_ref, path);
1174 if (ret < 0)
1175 goto out;
1176 ret = 0;
1177
1178 out:
1179 btrfs_free_path(p);
1180 return ret;
1181 }
1182
1183 struct backref_ctx {
1184 struct send_ctx *sctx;
1185
1186 /* number of total found references */
1187 u64 found;
1188
1189 /*
1190 * used for clones found in send_root. clones found behind cur_objectid
1191 * and cur_offset are not considered as allowed clones.
1192 */
1193 u64 cur_objectid;
1194 u64 cur_offset;
1195
1196 /* may be truncated in case it's the last extent in a file */
1197 u64 extent_len;
1198
1199 /* data offset in the file extent item */
1200 u64 data_offset;
1201
1202 /* Just to check for bugs in backref resolving */
1203 int found_itself;
1204 };
1205
__clone_root_cmp_bsearch(const void * key,const void * elt)1206 static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1207 {
1208 u64 root = (u64)(uintptr_t)key;
1209 struct clone_root *cr = (struct clone_root *)elt;
1210
1211 if (root < cr->root->root_key.objectid)
1212 return -1;
1213 if (root > cr->root->root_key.objectid)
1214 return 1;
1215 return 0;
1216 }
1217
__clone_root_cmp_sort(const void * e1,const void * e2)1218 static int __clone_root_cmp_sort(const void *e1, const void *e2)
1219 {
1220 struct clone_root *cr1 = (struct clone_root *)e1;
1221 struct clone_root *cr2 = (struct clone_root *)e2;
1222
1223 if (cr1->root->root_key.objectid < cr2->root->root_key.objectid)
1224 return -1;
1225 if (cr1->root->root_key.objectid > cr2->root->root_key.objectid)
1226 return 1;
1227 return 0;
1228 }
1229
1230 /*
1231 * Called for every backref that is found for the current extent.
1232 * Results are collected in sctx->clone_roots->ino/offset/found_refs
1233 */
__iterate_backrefs(u64 ino,u64 offset,u64 root,void * ctx_)1234 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1235 {
1236 struct backref_ctx *bctx = ctx_;
1237 struct clone_root *found;
1238
1239 /* First check if the root is in the list of accepted clone sources */
1240 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1241 bctx->sctx->clone_roots_cnt,
1242 sizeof(struct clone_root),
1243 __clone_root_cmp_bsearch);
1244 if (!found)
1245 return 0;
1246
1247 if (found->root == bctx->sctx->send_root &&
1248 ino == bctx->cur_objectid &&
1249 offset == bctx->cur_offset) {
1250 bctx->found_itself = 1;
1251 }
1252
1253 /*
1254 * Make sure we don't consider clones from send_root that are
1255 * behind the current inode/offset.
1256 */
1257 if (found->root == bctx->sctx->send_root) {
1258 /*
1259 * TODO for the moment we don't accept clones from the inode
1260 * that is currently send. We may change this when
1261 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1262 * file.
1263 */
1264 if (ino >= bctx->cur_objectid)
1265 return 0;
1266 }
1267
1268 bctx->found++;
1269 found->found_refs++;
1270 if (ino < found->ino) {
1271 found->ino = ino;
1272 found->offset = offset;
1273 } else if (found->ino == ino) {
1274 /*
1275 * same extent found more then once in the same file.
1276 */
1277 if (found->offset > offset + bctx->extent_len)
1278 found->offset = offset;
1279 }
1280
1281 return 0;
1282 }
1283
1284 /*
1285 * Given an inode, offset and extent item, it finds a good clone for a clone
1286 * instruction. Returns -ENOENT when none could be found. The function makes
1287 * sure that the returned clone is usable at the point where sending is at the
1288 * moment. This means, that no clones are accepted which lie behind the current
1289 * inode+offset.
1290 *
1291 * path must point to the extent item when called.
1292 */
find_extent_clone(struct send_ctx * sctx,struct btrfs_path * path,u64 ino,u64 data_offset,u64 ino_size,struct clone_root ** found)1293 static int find_extent_clone(struct send_ctx *sctx,
1294 struct btrfs_path *path,
1295 u64 ino, u64 data_offset,
1296 u64 ino_size,
1297 struct clone_root **found)
1298 {
1299 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
1300 int ret;
1301 int extent_type;
1302 u64 logical;
1303 u64 disk_byte;
1304 u64 num_bytes;
1305 u64 extent_item_pos;
1306 u64 flags = 0;
1307 struct btrfs_file_extent_item *fi;
1308 struct extent_buffer *eb = path->nodes[0];
1309 struct backref_ctx *backref_ctx = NULL;
1310 struct clone_root *cur_clone_root;
1311 struct btrfs_key found_key;
1312 struct btrfs_path *tmp_path;
1313 struct btrfs_extent_item *ei;
1314 int compressed;
1315 u32 i;
1316
1317 tmp_path = alloc_path_for_send();
1318 if (!tmp_path)
1319 return -ENOMEM;
1320
1321 /* We only use this path under the commit sem */
1322 tmp_path->need_commit_sem = 0;
1323
1324 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
1325 if (!backref_ctx) {
1326 ret = -ENOMEM;
1327 goto out;
1328 }
1329
1330 if (data_offset >= ino_size) {
1331 /*
1332 * There may be extents that lie behind the file's size.
1333 * I at least had this in combination with snapshotting while
1334 * writing large files.
1335 */
1336 ret = 0;
1337 goto out;
1338 }
1339
1340 fi = btrfs_item_ptr(eb, path->slots[0],
1341 struct btrfs_file_extent_item);
1342 extent_type = btrfs_file_extent_type(eb, fi);
1343 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1344 ret = -ENOENT;
1345 goto out;
1346 }
1347 compressed = btrfs_file_extent_compression(eb, fi);
1348
1349 num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1350 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1351 if (disk_byte == 0) {
1352 ret = -ENOENT;
1353 goto out;
1354 }
1355 logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1356
1357 down_read(&fs_info->commit_root_sem);
1358 ret = extent_from_logical(fs_info, disk_byte, tmp_path,
1359 &found_key, &flags);
1360 up_read(&fs_info->commit_root_sem);
1361
1362 if (ret < 0)
1363 goto out;
1364 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1365 ret = -EIO;
1366 goto out;
1367 }
1368
1369 ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0],
1370 struct btrfs_extent_item);
1371 /*
1372 * Backreference walking (iterate_extent_inodes() below) is currently
1373 * too expensive when an extent has a large number of references, both
1374 * in time spent and used memory. So for now just fallback to write
1375 * operations instead of clone operations when an extent has more than
1376 * a certain amount of references.
1377 */
1378 if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) {
1379 ret = -ENOENT;
1380 goto out;
1381 }
1382 btrfs_release_path(tmp_path);
1383
1384 /*
1385 * Setup the clone roots.
1386 */
1387 for (i = 0; i < sctx->clone_roots_cnt; i++) {
1388 cur_clone_root = sctx->clone_roots + i;
1389 cur_clone_root->ino = (u64)-1;
1390 cur_clone_root->offset = 0;
1391 cur_clone_root->found_refs = 0;
1392 }
1393
1394 backref_ctx->sctx = sctx;
1395 backref_ctx->found = 0;
1396 backref_ctx->cur_objectid = ino;
1397 backref_ctx->cur_offset = data_offset;
1398 backref_ctx->found_itself = 0;
1399 backref_ctx->extent_len = num_bytes;
1400 /*
1401 * For non-compressed extents iterate_extent_inodes() gives us extent
1402 * offsets that already take into account the data offset, but not for
1403 * compressed extents, since the offset is logical and not relative to
1404 * the physical extent locations. We must take this into account to
1405 * avoid sending clone offsets that go beyond the source file's size,
1406 * which would result in the clone ioctl failing with -EINVAL on the
1407 * receiving end.
1408 */
1409 if (compressed == BTRFS_COMPRESS_NONE)
1410 backref_ctx->data_offset = 0;
1411 else
1412 backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
1413
1414 /*
1415 * The last extent of a file may be too large due to page alignment.
1416 * We need to adjust extent_len in this case so that the checks in
1417 * __iterate_backrefs work.
1418 */
1419 if (data_offset + num_bytes >= ino_size)
1420 backref_ctx->extent_len = ino_size - data_offset;
1421
1422 /*
1423 * Now collect all backrefs.
1424 */
1425 if (compressed == BTRFS_COMPRESS_NONE)
1426 extent_item_pos = logical - found_key.objectid;
1427 else
1428 extent_item_pos = 0;
1429 ret = iterate_extent_inodes(fs_info, found_key.objectid,
1430 extent_item_pos, 1, __iterate_backrefs,
1431 backref_ctx, false);
1432
1433 if (ret < 0)
1434 goto out;
1435
1436 if (!backref_ctx->found_itself) {
1437 /* found a bug in backref code? */
1438 ret = -EIO;
1439 btrfs_err(fs_info,
1440 "did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu",
1441 ino, data_offset, disk_byte, found_key.objectid);
1442 goto out;
1443 }
1444
1445 btrfs_debug(fs_info,
1446 "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
1447 data_offset, ino, num_bytes, logical);
1448
1449 if (!backref_ctx->found)
1450 btrfs_debug(fs_info, "no clones found");
1451
1452 cur_clone_root = NULL;
1453 for (i = 0; i < sctx->clone_roots_cnt; i++) {
1454 if (sctx->clone_roots[i].found_refs) {
1455 if (!cur_clone_root)
1456 cur_clone_root = sctx->clone_roots + i;
1457 else if (sctx->clone_roots[i].root == sctx->send_root)
1458 /* prefer clones from send_root over others */
1459 cur_clone_root = sctx->clone_roots + i;
1460 }
1461
1462 }
1463
1464 if (cur_clone_root) {
1465 *found = cur_clone_root;
1466 ret = 0;
1467 } else {
1468 ret = -ENOENT;
1469 }
1470
1471 out:
1472 btrfs_free_path(tmp_path);
1473 kfree(backref_ctx);
1474 return ret;
1475 }
1476
read_symlink(struct btrfs_root * root,u64 ino,struct fs_path * dest)1477 static int read_symlink(struct btrfs_root *root,
1478 u64 ino,
1479 struct fs_path *dest)
1480 {
1481 int ret;
1482 struct btrfs_path *path;
1483 struct btrfs_key key;
1484 struct btrfs_file_extent_item *ei;
1485 u8 type;
1486 u8 compression;
1487 unsigned long off;
1488 int len;
1489
1490 path = alloc_path_for_send();
1491 if (!path)
1492 return -ENOMEM;
1493
1494 key.objectid = ino;
1495 key.type = BTRFS_EXTENT_DATA_KEY;
1496 key.offset = 0;
1497 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1498 if (ret < 0)
1499 goto out;
1500 if (ret) {
1501 /*
1502 * An empty symlink inode. Can happen in rare error paths when
1503 * creating a symlink (transaction committed before the inode
1504 * eviction handler removed the symlink inode items and a crash
1505 * happened in between or the subvol was snapshoted in between).
1506 * Print an informative message to dmesg/syslog so that the user
1507 * can delete the symlink.
1508 */
1509 btrfs_err(root->fs_info,
1510 "Found empty symlink inode %llu at root %llu",
1511 ino, root->root_key.objectid);
1512 ret = -EIO;
1513 goto out;
1514 }
1515
1516 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1517 struct btrfs_file_extent_item);
1518 type = btrfs_file_extent_type(path->nodes[0], ei);
1519 compression = btrfs_file_extent_compression(path->nodes[0], ei);
1520 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1521 BUG_ON(compression);
1522
1523 off = btrfs_file_extent_inline_start(ei);
1524 len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
1525
1526 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1527
1528 out:
1529 btrfs_free_path(path);
1530 return ret;
1531 }
1532
1533 /*
1534 * Helper function to generate a file name that is unique in the root of
1535 * send_root and parent_root. This is used to generate names for orphan inodes.
1536 */
gen_unique_name(struct send_ctx * sctx,u64 ino,u64 gen,struct fs_path * dest)1537 static int gen_unique_name(struct send_ctx *sctx,
1538 u64 ino, u64 gen,
1539 struct fs_path *dest)
1540 {
1541 int ret = 0;
1542 struct btrfs_path *path;
1543 struct btrfs_dir_item *di;
1544 char tmp[64];
1545 int len;
1546 u64 idx = 0;
1547
1548 path = alloc_path_for_send();
1549 if (!path)
1550 return -ENOMEM;
1551
1552 while (1) {
1553 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1554 ino, gen, idx);
1555 ASSERT(len < sizeof(tmp));
1556
1557 di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1558 path, BTRFS_FIRST_FREE_OBJECTID,
1559 tmp, strlen(tmp), 0);
1560 btrfs_release_path(path);
1561 if (IS_ERR(di)) {
1562 ret = PTR_ERR(di);
1563 goto out;
1564 }
1565 if (di) {
1566 /* not unique, try again */
1567 idx++;
1568 continue;
1569 }
1570
1571 if (!sctx->parent_root) {
1572 /* unique */
1573 ret = 0;
1574 break;
1575 }
1576
1577 di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1578 path, BTRFS_FIRST_FREE_OBJECTID,
1579 tmp, strlen(tmp), 0);
1580 btrfs_release_path(path);
1581 if (IS_ERR(di)) {
1582 ret = PTR_ERR(di);
1583 goto out;
1584 }
1585 if (di) {
1586 /* not unique, try again */
1587 idx++;
1588 continue;
1589 }
1590 /* unique */
1591 break;
1592 }
1593
1594 ret = fs_path_add(dest, tmp, strlen(tmp));
1595
1596 out:
1597 btrfs_free_path(path);
1598 return ret;
1599 }
1600
1601 enum inode_state {
1602 inode_state_no_change,
1603 inode_state_will_create,
1604 inode_state_did_create,
1605 inode_state_will_delete,
1606 inode_state_did_delete,
1607 };
1608
get_cur_inode_state(struct send_ctx * sctx,u64 ino,u64 gen)1609 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1610 {
1611 int ret;
1612 int left_ret;
1613 int right_ret;
1614 u64 left_gen;
1615 u64 right_gen;
1616
1617 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1618 NULL, NULL);
1619 if (ret < 0 && ret != -ENOENT)
1620 goto out;
1621 left_ret = ret;
1622
1623 if (!sctx->parent_root) {
1624 right_ret = -ENOENT;
1625 } else {
1626 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1627 NULL, NULL, NULL, NULL);
1628 if (ret < 0 && ret != -ENOENT)
1629 goto out;
1630 right_ret = ret;
1631 }
1632
1633 if (!left_ret && !right_ret) {
1634 if (left_gen == gen && right_gen == gen) {
1635 ret = inode_state_no_change;
1636 } else if (left_gen == gen) {
1637 if (ino < sctx->send_progress)
1638 ret = inode_state_did_create;
1639 else
1640 ret = inode_state_will_create;
1641 } else if (right_gen == gen) {
1642 if (ino < sctx->send_progress)
1643 ret = inode_state_did_delete;
1644 else
1645 ret = inode_state_will_delete;
1646 } else {
1647 ret = -ENOENT;
1648 }
1649 } else if (!left_ret) {
1650 if (left_gen == gen) {
1651 if (ino < sctx->send_progress)
1652 ret = inode_state_did_create;
1653 else
1654 ret = inode_state_will_create;
1655 } else {
1656 ret = -ENOENT;
1657 }
1658 } else if (!right_ret) {
1659 if (right_gen == gen) {
1660 if (ino < sctx->send_progress)
1661 ret = inode_state_did_delete;
1662 else
1663 ret = inode_state_will_delete;
1664 } else {
1665 ret = -ENOENT;
1666 }
1667 } else {
1668 ret = -ENOENT;
1669 }
1670
1671 out:
1672 return ret;
1673 }
1674
is_inode_existent(struct send_ctx * sctx,u64 ino,u64 gen)1675 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1676 {
1677 int ret;
1678
1679 if (ino == BTRFS_FIRST_FREE_OBJECTID)
1680 return 1;
1681
1682 ret = get_cur_inode_state(sctx, ino, gen);
1683 if (ret < 0)
1684 goto out;
1685
1686 if (ret == inode_state_no_change ||
1687 ret == inode_state_did_create ||
1688 ret == inode_state_will_delete)
1689 ret = 1;
1690 else
1691 ret = 0;
1692
1693 out:
1694 return ret;
1695 }
1696
1697 /*
1698 * Helper function to lookup a dir item in a dir.
1699 */
lookup_dir_item_inode(struct btrfs_root * root,u64 dir,const char * name,int name_len,u64 * found_inode,u8 * found_type)1700 static int lookup_dir_item_inode(struct btrfs_root *root,
1701 u64 dir, const char *name, int name_len,
1702 u64 *found_inode,
1703 u8 *found_type)
1704 {
1705 int ret = 0;
1706 struct btrfs_dir_item *di;
1707 struct btrfs_key key;
1708 struct btrfs_path *path;
1709
1710 path = alloc_path_for_send();
1711 if (!path)
1712 return -ENOMEM;
1713
1714 di = btrfs_lookup_dir_item(NULL, root, path,
1715 dir, name, name_len, 0);
1716 if (IS_ERR_OR_NULL(di)) {
1717 ret = di ? PTR_ERR(di) : -ENOENT;
1718 goto out;
1719 }
1720 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1721 if (key.type == BTRFS_ROOT_ITEM_KEY) {
1722 ret = -ENOENT;
1723 goto out;
1724 }
1725 *found_inode = key.objectid;
1726 *found_type = btrfs_dir_type(path->nodes[0], di);
1727
1728 out:
1729 btrfs_free_path(path);
1730 return ret;
1731 }
1732
1733 /*
1734 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1735 * generation of the parent dir and the name of the dir entry.
1736 */
get_first_ref(struct btrfs_root * root,u64 ino,u64 * dir,u64 * dir_gen,struct fs_path * name)1737 static int get_first_ref(struct btrfs_root *root, u64 ino,
1738 u64 *dir, u64 *dir_gen, struct fs_path *name)
1739 {
1740 int ret;
1741 struct btrfs_key key;
1742 struct btrfs_key found_key;
1743 struct btrfs_path *path;
1744 int len;
1745 u64 parent_dir;
1746
1747 path = alloc_path_for_send();
1748 if (!path)
1749 return -ENOMEM;
1750
1751 key.objectid = ino;
1752 key.type = BTRFS_INODE_REF_KEY;
1753 key.offset = 0;
1754
1755 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1756 if (ret < 0)
1757 goto out;
1758 if (!ret)
1759 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1760 path->slots[0]);
1761 if (ret || found_key.objectid != ino ||
1762 (found_key.type != BTRFS_INODE_REF_KEY &&
1763 found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1764 ret = -ENOENT;
1765 goto out;
1766 }
1767
1768 if (found_key.type == BTRFS_INODE_REF_KEY) {
1769 struct btrfs_inode_ref *iref;
1770 iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1771 struct btrfs_inode_ref);
1772 len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1773 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1774 (unsigned long)(iref + 1),
1775 len);
1776 parent_dir = found_key.offset;
1777 } else {
1778 struct btrfs_inode_extref *extref;
1779 extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1780 struct btrfs_inode_extref);
1781 len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1782 ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1783 (unsigned long)&extref->name, len);
1784 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1785 }
1786 if (ret < 0)
1787 goto out;
1788 btrfs_release_path(path);
1789
1790 if (dir_gen) {
1791 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
1792 NULL, NULL, NULL);
1793 if (ret < 0)
1794 goto out;
1795 }
1796
1797 *dir = parent_dir;
1798
1799 out:
1800 btrfs_free_path(path);
1801 return ret;
1802 }
1803
is_first_ref(struct btrfs_root * root,u64 ino,u64 dir,const char * name,int name_len)1804 static int is_first_ref(struct btrfs_root *root,
1805 u64 ino, u64 dir,
1806 const char *name, int name_len)
1807 {
1808 int ret;
1809 struct fs_path *tmp_name;
1810 u64 tmp_dir;
1811
1812 tmp_name = fs_path_alloc();
1813 if (!tmp_name)
1814 return -ENOMEM;
1815
1816 ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
1817 if (ret < 0)
1818 goto out;
1819
1820 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1821 ret = 0;
1822 goto out;
1823 }
1824
1825 ret = !memcmp(tmp_name->start, name, name_len);
1826
1827 out:
1828 fs_path_free(tmp_name);
1829 return ret;
1830 }
1831
1832 /*
1833 * Used by process_recorded_refs to determine if a new ref would overwrite an
1834 * already existing ref. In case it detects an overwrite, it returns the
1835 * inode/gen in who_ino/who_gen.
1836 * When an overwrite is detected, process_recorded_refs does proper orphanizing
1837 * to make sure later references to the overwritten inode are possible.
1838 * Orphanizing is however only required for the first ref of an inode.
1839 * process_recorded_refs does an additional is_first_ref check to see if
1840 * orphanizing is really required.
1841 */
will_overwrite_ref(struct send_ctx * sctx,u64 dir,u64 dir_gen,const char * name,int name_len,u64 * who_ino,u64 * who_gen,u64 * who_mode)1842 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1843 const char *name, int name_len,
1844 u64 *who_ino, u64 *who_gen, u64 *who_mode)
1845 {
1846 int ret = 0;
1847 u64 gen;
1848 u64 other_inode = 0;
1849 u8 other_type = 0;
1850
1851 if (!sctx->parent_root)
1852 goto out;
1853
1854 ret = is_inode_existent(sctx, dir, dir_gen);
1855 if (ret <= 0)
1856 goto out;
1857
1858 /*
1859 * If we have a parent root we need to verify that the parent dir was
1860 * not deleted and then re-created, if it was then we have no overwrite
1861 * and we can just unlink this entry.
1862 */
1863 if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
1864 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1865 NULL, NULL, NULL);
1866 if (ret < 0 && ret != -ENOENT)
1867 goto out;
1868 if (ret) {
1869 ret = 0;
1870 goto out;
1871 }
1872 if (gen != dir_gen)
1873 goto out;
1874 }
1875
1876 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1877 &other_inode, &other_type);
1878 if (ret < 0 && ret != -ENOENT)
1879 goto out;
1880 if (ret) {
1881 ret = 0;
1882 goto out;
1883 }
1884
1885 /*
1886 * Check if the overwritten ref was already processed. If yes, the ref
1887 * was already unlinked/moved, so we can safely assume that we will not
1888 * overwrite anything at this point in time.
1889 */
1890 if (other_inode > sctx->send_progress ||
1891 is_waiting_for_move(sctx, other_inode)) {
1892 ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1893 who_gen, who_mode, NULL, NULL, NULL);
1894 if (ret < 0)
1895 goto out;
1896
1897 ret = 1;
1898 *who_ino = other_inode;
1899 } else {
1900 ret = 0;
1901 }
1902
1903 out:
1904 return ret;
1905 }
1906
1907 /*
1908 * Checks if the ref was overwritten by an already processed inode. This is
1909 * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1910 * thus the orphan name needs be used.
1911 * process_recorded_refs also uses it to avoid unlinking of refs that were
1912 * overwritten.
1913 */
did_overwrite_ref(struct send_ctx * sctx,u64 dir,u64 dir_gen,u64 ino,u64 ino_gen,const char * name,int name_len)1914 static int did_overwrite_ref(struct send_ctx *sctx,
1915 u64 dir, u64 dir_gen,
1916 u64 ino, u64 ino_gen,
1917 const char *name, int name_len)
1918 {
1919 int ret = 0;
1920 u64 gen;
1921 u64 ow_inode;
1922 u8 other_type;
1923
1924 if (!sctx->parent_root)
1925 goto out;
1926
1927 ret = is_inode_existent(sctx, dir, dir_gen);
1928 if (ret <= 0)
1929 goto out;
1930
1931 if (dir != BTRFS_FIRST_FREE_OBJECTID) {
1932 ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
1933 NULL, NULL, NULL);
1934 if (ret < 0 && ret != -ENOENT)
1935 goto out;
1936 if (ret) {
1937 ret = 0;
1938 goto out;
1939 }
1940 if (gen != dir_gen)
1941 goto out;
1942 }
1943
1944 /* check if the ref was overwritten by another ref */
1945 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1946 &ow_inode, &other_type);
1947 if (ret < 0 && ret != -ENOENT)
1948 goto out;
1949 if (ret) {
1950 /* was never and will never be overwritten */
1951 ret = 0;
1952 goto out;
1953 }
1954
1955 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1956 NULL, NULL);
1957 if (ret < 0)
1958 goto out;
1959
1960 if (ow_inode == ino && gen == ino_gen) {
1961 ret = 0;
1962 goto out;
1963 }
1964
1965 /*
1966 * We know that it is or will be overwritten. Check this now.
1967 * The current inode being processed might have been the one that caused
1968 * inode 'ino' to be orphanized, therefore check if ow_inode matches
1969 * the current inode being processed.
1970 */
1971 if ((ow_inode < sctx->send_progress) ||
1972 (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
1973 gen == sctx->cur_inode_gen))
1974 ret = 1;
1975 else
1976 ret = 0;
1977
1978 out:
1979 return ret;
1980 }
1981
1982 /*
1983 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1984 * that got overwritten. This is used by process_recorded_refs to determine
1985 * if it has to use the path as returned by get_cur_path or the orphan name.
1986 */
did_overwrite_first_ref(struct send_ctx * sctx,u64 ino,u64 gen)1987 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1988 {
1989 int ret = 0;
1990 struct fs_path *name = NULL;
1991 u64 dir;
1992 u64 dir_gen;
1993
1994 if (!sctx->parent_root)
1995 goto out;
1996
1997 name = fs_path_alloc();
1998 if (!name)
1999 return -ENOMEM;
2000
2001 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
2002 if (ret < 0)
2003 goto out;
2004
2005 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
2006 name->start, fs_path_len(name));
2007
2008 out:
2009 fs_path_free(name);
2010 return ret;
2011 }
2012
2013 /*
2014 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
2015 * so we need to do some special handling in case we have clashes. This function
2016 * takes care of this with the help of name_cache_entry::radix_list.
2017 * In case of error, nce is kfreed.
2018 */
name_cache_insert(struct send_ctx * sctx,struct name_cache_entry * nce)2019 static int name_cache_insert(struct send_ctx *sctx,
2020 struct name_cache_entry *nce)
2021 {
2022 int ret = 0;
2023 struct list_head *nce_head;
2024
2025 nce_head = radix_tree_lookup(&sctx->name_cache,
2026 (unsigned long)nce->ino);
2027 if (!nce_head) {
2028 nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
2029 if (!nce_head) {
2030 kfree(nce);
2031 return -ENOMEM;
2032 }
2033 INIT_LIST_HEAD(nce_head);
2034
2035 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
2036 if (ret < 0) {
2037 kfree(nce_head);
2038 kfree(nce);
2039 return ret;
2040 }
2041 }
2042 list_add_tail(&nce->radix_list, nce_head);
2043 list_add_tail(&nce->list, &sctx->name_cache_list);
2044 sctx->name_cache_size++;
2045
2046 return ret;
2047 }
2048
name_cache_delete(struct send_ctx * sctx,struct name_cache_entry * nce)2049 static void name_cache_delete(struct send_ctx *sctx,
2050 struct name_cache_entry *nce)
2051 {
2052 struct list_head *nce_head;
2053
2054 nce_head = radix_tree_lookup(&sctx->name_cache,
2055 (unsigned long)nce->ino);
2056 if (!nce_head) {
2057 btrfs_err(sctx->send_root->fs_info,
2058 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
2059 nce->ino, sctx->name_cache_size);
2060 }
2061
2062 list_del(&nce->radix_list);
2063 list_del(&nce->list);
2064 sctx->name_cache_size--;
2065
2066 /*
2067 * We may not get to the final release of nce_head if the lookup fails
2068 */
2069 if (nce_head && list_empty(nce_head)) {
2070 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
2071 kfree(nce_head);
2072 }
2073 }
2074
name_cache_search(struct send_ctx * sctx,u64 ino,u64 gen)2075 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
2076 u64 ino, u64 gen)
2077 {
2078 struct list_head *nce_head;
2079 struct name_cache_entry *cur;
2080
2081 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
2082 if (!nce_head)
2083 return NULL;
2084
2085 list_for_each_entry(cur, nce_head, radix_list) {
2086 if (cur->ino == ino && cur->gen == gen)
2087 return cur;
2088 }
2089 return NULL;
2090 }
2091
2092 /*
2093 * Removes the entry from the list and adds it back to the end. This marks the
2094 * entry as recently used so that name_cache_clean_unused does not remove it.
2095 */
name_cache_used(struct send_ctx * sctx,struct name_cache_entry * nce)2096 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
2097 {
2098 list_del(&nce->list);
2099 list_add_tail(&nce->list, &sctx->name_cache_list);
2100 }
2101
2102 /*
2103 * Remove some entries from the beginning of name_cache_list.
2104 */
name_cache_clean_unused(struct send_ctx * sctx)2105 static void name_cache_clean_unused(struct send_ctx *sctx)
2106 {
2107 struct name_cache_entry *nce;
2108
2109 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
2110 return;
2111
2112 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
2113 nce = list_entry(sctx->name_cache_list.next,
2114 struct name_cache_entry, list);
2115 name_cache_delete(sctx, nce);
2116 kfree(nce);
2117 }
2118 }
2119
name_cache_free(struct send_ctx * sctx)2120 static void name_cache_free(struct send_ctx *sctx)
2121 {
2122 struct name_cache_entry *nce;
2123
2124 while (!list_empty(&sctx->name_cache_list)) {
2125 nce = list_entry(sctx->name_cache_list.next,
2126 struct name_cache_entry, list);
2127 name_cache_delete(sctx, nce);
2128 kfree(nce);
2129 }
2130 }
2131
2132 /*
2133 * Used by get_cur_path for each ref up to the root.
2134 * Returns 0 if it succeeded.
2135 * Returns 1 if the inode is not existent or got overwritten. In that case, the
2136 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
2137 * is returned, parent_ino/parent_gen are not guaranteed to be valid.
2138 * Returns <0 in case of error.
2139 */
__get_cur_name_and_parent(struct send_ctx * sctx,u64 ino,u64 gen,u64 * parent_ino,u64 * parent_gen,struct fs_path * dest)2140 static int __get_cur_name_and_parent(struct send_ctx *sctx,
2141 u64 ino, u64 gen,
2142 u64 *parent_ino,
2143 u64 *parent_gen,
2144 struct fs_path *dest)
2145 {
2146 int ret;
2147 int nce_ret;
2148 struct name_cache_entry *nce = NULL;
2149
2150 /*
2151 * First check if we already did a call to this function with the same
2152 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2153 * return the cached result.
2154 */
2155 nce = name_cache_search(sctx, ino, gen);
2156 if (nce) {
2157 if (ino < sctx->send_progress && nce->need_later_update) {
2158 name_cache_delete(sctx, nce);
2159 kfree(nce);
2160 nce = NULL;
2161 } else {
2162 name_cache_used(sctx, nce);
2163 *parent_ino = nce->parent_ino;
2164 *parent_gen = nce->parent_gen;
2165 ret = fs_path_add(dest, nce->name, nce->name_len);
2166 if (ret < 0)
2167 goto out;
2168 ret = nce->ret;
2169 goto out;
2170 }
2171 }
2172
2173 /*
2174 * If the inode is not existent yet, add the orphan name and return 1.
2175 * This should only happen for the parent dir that we determine in
2176 * __record_new_ref
2177 */
2178 ret = is_inode_existent(sctx, ino, gen);
2179 if (ret < 0)
2180 goto out;
2181
2182 if (!ret) {
2183 ret = gen_unique_name(sctx, ino, gen, dest);
2184 if (ret < 0)
2185 goto out;
2186 ret = 1;
2187 goto out_cache;
2188 }
2189
2190 /*
2191 * Depending on whether the inode was already processed or not, use
2192 * send_root or parent_root for ref lookup.
2193 */
2194 if (ino < sctx->send_progress)
2195 ret = get_first_ref(sctx->send_root, ino,
2196 parent_ino, parent_gen, dest);
2197 else
2198 ret = get_first_ref(sctx->parent_root, ino,
2199 parent_ino, parent_gen, dest);
2200 if (ret < 0)
2201 goto out;
2202
2203 /*
2204 * Check if the ref was overwritten by an inode's ref that was processed
2205 * earlier. If yes, treat as orphan and return 1.
2206 */
2207 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2208 dest->start, dest->end - dest->start);
2209 if (ret < 0)
2210 goto out;
2211 if (ret) {
2212 fs_path_reset(dest);
2213 ret = gen_unique_name(sctx, ino, gen, dest);
2214 if (ret < 0)
2215 goto out;
2216 ret = 1;
2217 }
2218
2219 out_cache:
2220 /*
2221 * Store the result of the lookup in the name cache.
2222 */
2223 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
2224 if (!nce) {
2225 ret = -ENOMEM;
2226 goto out;
2227 }
2228
2229 nce->ino = ino;
2230 nce->gen = gen;
2231 nce->parent_ino = *parent_ino;
2232 nce->parent_gen = *parent_gen;
2233 nce->name_len = fs_path_len(dest);
2234 nce->ret = ret;
2235 strcpy(nce->name, dest->start);
2236
2237 if (ino < sctx->send_progress)
2238 nce->need_later_update = 0;
2239 else
2240 nce->need_later_update = 1;
2241
2242 nce_ret = name_cache_insert(sctx, nce);
2243 if (nce_ret < 0)
2244 ret = nce_ret;
2245 name_cache_clean_unused(sctx);
2246
2247 out:
2248 return ret;
2249 }
2250
2251 /*
2252 * Magic happens here. This function returns the first ref to an inode as it
2253 * would look like while receiving the stream at this point in time.
2254 * We walk the path up to the root. For every inode in between, we check if it
2255 * was already processed/sent. If yes, we continue with the parent as found
2256 * in send_root. If not, we continue with the parent as found in parent_root.
2257 * If we encounter an inode that was deleted at this point in time, we use the
2258 * inodes "orphan" name instead of the real name and stop. Same with new inodes
2259 * that were not created yet and overwritten inodes/refs.
2260 *
2261 * When do we have orphan inodes:
2262 * 1. When an inode is freshly created and thus no valid refs are available yet
2263 * 2. When a directory lost all it's refs (deleted) but still has dir items
2264 * inside which were not processed yet (pending for move/delete). If anyone
2265 * tried to get the path to the dir items, it would get a path inside that
2266 * orphan directory.
2267 * 3. When an inode is moved around or gets new links, it may overwrite the ref
2268 * of an unprocessed inode. If in that case the first ref would be
2269 * overwritten, the overwritten inode gets "orphanized". Later when we
2270 * process this overwritten inode, it is restored at a new place by moving
2271 * the orphan inode.
2272 *
2273 * sctx->send_progress tells this function at which point in time receiving
2274 * would be.
2275 */
get_cur_path(struct send_ctx * sctx,u64 ino,u64 gen,struct fs_path * dest)2276 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2277 struct fs_path *dest)
2278 {
2279 int ret = 0;
2280 struct fs_path *name = NULL;
2281 u64 parent_inode = 0;
2282 u64 parent_gen = 0;
2283 int stop = 0;
2284
2285 name = fs_path_alloc();
2286 if (!name) {
2287 ret = -ENOMEM;
2288 goto out;
2289 }
2290
2291 dest->reversed = 1;
2292 fs_path_reset(dest);
2293
2294 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2295 struct waiting_dir_move *wdm;
2296
2297 fs_path_reset(name);
2298
2299 if (is_waiting_for_rm(sctx, ino)) {
2300 ret = gen_unique_name(sctx, ino, gen, name);
2301 if (ret < 0)
2302 goto out;
2303 ret = fs_path_add_path(dest, name);
2304 break;
2305 }
2306
2307 wdm = get_waiting_dir_move(sctx, ino);
2308 if (wdm && wdm->orphanized) {
2309 ret = gen_unique_name(sctx, ino, gen, name);
2310 stop = 1;
2311 } else if (wdm) {
2312 ret = get_first_ref(sctx->parent_root, ino,
2313 &parent_inode, &parent_gen, name);
2314 } else {
2315 ret = __get_cur_name_and_parent(sctx, ino, gen,
2316 &parent_inode,
2317 &parent_gen, name);
2318 if (ret)
2319 stop = 1;
2320 }
2321
2322 if (ret < 0)
2323 goto out;
2324
2325 ret = fs_path_add_path(dest, name);
2326 if (ret < 0)
2327 goto out;
2328
2329 ino = parent_inode;
2330 gen = parent_gen;
2331 }
2332
2333 out:
2334 fs_path_free(name);
2335 if (!ret)
2336 fs_path_unreverse(dest);
2337 return ret;
2338 }
2339
2340 /*
2341 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2342 */
send_subvol_begin(struct send_ctx * sctx)2343 static int send_subvol_begin(struct send_ctx *sctx)
2344 {
2345 int ret;
2346 struct btrfs_root *send_root = sctx->send_root;
2347 struct btrfs_root *parent_root = sctx->parent_root;
2348 struct btrfs_path *path;
2349 struct btrfs_key key;
2350 struct btrfs_root_ref *ref;
2351 struct extent_buffer *leaf;
2352 char *name = NULL;
2353 int namelen;
2354
2355 path = btrfs_alloc_path();
2356 if (!path)
2357 return -ENOMEM;
2358
2359 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
2360 if (!name) {
2361 btrfs_free_path(path);
2362 return -ENOMEM;
2363 }
2364
2365 key.objectid = send_root->root_key.objectid;
2366 key.type = BTRFS_ROOT_BACKREF_KEY;
2367 key.offset = 0;
2368
2369 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2370 &key, path, 1, 0);
2371 if (ret < 0)
2372 goto out;
2373 if (ret) {
2374 ret = -ENOENT;
2375 goto out;
2376 }
2377
2378 leaf = path->nodes[0];
2379 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2380 if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2381 key.objectid != send_root->root_key.objectid) {
2382 ret = -ENOENT;
2383 goto out;
2384 }
2385 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2386 namelen = btrfs_root_ref_name_len(leaf, ref);
2387 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2388 btrfs_release_path(path);
2389
2390 if (parent_root) {
2391 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2392 if (ret < 0)
2393 goto out;
2394 } else {
2395 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2396 if (ret < 0)
2397 goto out;
2398 }
2399
2400 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2401
2402 if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
2403 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2404 sctx->send_root->root_item.received_uuid);
2405 else
2406 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2407 sctx->send_root->root_item.uuid);
2408
2409 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2410 le64_to_cpu(sctx->send_root->root_item.ctransid));
2411 if (parent_root) {
2412 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
2413 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2414 parent_root->root_item.received_uuid);
2415 else
2416 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2417 parent_root->root_item.uuid);
2418 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2419 le64_to_cpu(sctx->parent_root->root_item.ctransid));
2420 }
2421
2422 ret = send_cmd(sctx);
2423
2424 tlv_put_failure:
2425 out:
2426 btrfs_free_path(path);
2427 kfree(name);
2428 return ret;
2429 }
2430
send_truncate(struct send_ctx * sctx,u64 ino,u64 gen,u64 size)2431 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2432 {
2433 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2434 int ret = 0;
2435 struct fs_path *p;
2436
2437 btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
2438
2439 p = fs_path_alloc();
2440 if (!p)
2441 return -ENOMEM;
2442
2443 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2444 if (ret < 0)
2445 goto out;
2446
2447 ret = get_cur_path(sctx, ino, gen, p);
2448 if (ret < 0)
2449 goto out;
2450 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2451 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2452
2453 ret = send_cmd(sctx);
2454
2455 tlv_put_failure:
2456 out:
2457 fs_path_free(p);
2458 return ret;
2459 }
2460
send_chmod(struct send_ctx * sctx,u64 ino,u64 gen,u64 mode)2461 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2462 {
2463 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2464 int ret = 0;
2465 struct fs_path *p;
2466
2467 btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
2468
2469 p = fs_path_alloc();
2470 if (!p)
2471 return -ENOMEM;
2472
2473 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2474 if (ret < 0)
2475 goto out;
2476
2477 ret = get_cur_path(sctx, ino, gen, p);
2478 if (ret < 0)
2479 goto out;
2480 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2481 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2482
2483 ret = send_cmd(sctx);
2484
2485 tlv_put_failure:
2486 out:
2487 fs_path_free(p);
2488 return ret;
2489 }
2490
send_chown(struct send_ctx * sctx,u64 ino,u64 gen,u64 uid,u64 gid)2491 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2492 {
2493 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2494 int ret = 0;
2495 struct fs_path *p;
2496
2497 btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
2498 ino, uid, gid);
2499
2500 p = fs_path_alloc();
2501 if (!p)
2502 return -ENOMEM;
2503
2504 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2505 if (ret < 0)
2506 goto out;
2507
2508 ret = get_cur_path(sctx, ino, gen, p);
2509 if (ret < 0)
2510 goto out;
2511 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2512 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2513 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2514
2515 ret = send_cmd(sctx);
2516
2517 tlv_put_failure:
2518 out:
2519 fs_path_free(p);
2520 return ret;
2521 }
2522
send_utimes(struct send_ctx * sctx,u64 ino,u64 gen)2523 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2524 {
2525 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2526 int ret = 0;
2527 struct fs_path *p = NULL;
2528 struct btrfs_inode_item *ii;
2529 struct btrfs_path *path = NULL;
2530 struct extent_buffer *eb;
2531 struct btrfs_key key;
2532 int slot;
2533
2534 btrfs_debug(fs_info, "send_utimes %llu", ino);
2535
2536 p = fs_path_alloc();
2537 if (!p)
2538 return -ENOMEM;
2539
2540 path = alloc_path_for_send();
2541 if (!path) {
2542 ret = -ENOMEM;
2543 goto out;
2544 }
2545
2546 key.objectid = ino;
2547 key.type = BTRFS_INODE_ITEM_KEY;
2548 key.offset = 0;
2549 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2550 if (ret > 0)
2551 ret = -ENOENT;
2552 if (ret < 0)
2553 goto out;
2554
2555 eb = path->nodes[0];
2556 slot = path->slots[0];
2557 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2558
2559 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2560 if (ret < 0)
2561 goto out;
2562
2563 ret = get_cur_path(sctx, ino, gen, p);
2564 if (ret < 0)
2565 goto out;
2566 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2567 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2568 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2569 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
2570 /* TODO Add otime support when the otime patches get into upstream */
2571
2572 ret = send_cmd(sctx);
2573
2574 tlv_put_failure:
2575 out:
2576 fs_path_free(p);
2577 btrfs_free_path(path);
2578 return ret;
2579 }
2580
2581 /*
2582 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2583 * a valid path yet because we did not process the refs yet. So, the inode
2584 * is created as orphan.
2585 */
send_create_inode(struct send_ctx * sctx,u64 ino)2586 static int send_create_inode(struct send_ctx *sctx, u64 ino)
2587 {
2588 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
2589 int ret = 0;
2590 struct fs_path *p;
2591 int cmd;
2592 u64 gen;
2593 u64 mode;
2594 u64 rdev;
2595
2596 btrfs_debug(fs_info, "send_create_inode %llu", ino);
2597
2598 p = fs_path_alloc();
2599 if (!p)
2600 return -ENOMEM;
2601
2602 if (ino != sctx->cur_ino) {
2603 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
2604 NULL, NULL, &rdev);
2605 if (ret < 0)
2606 goto out;
2607 } else {
2608 gen = sctx->cur_inode_gen;
2609 mode = sctx->cur_inode_mode;
2610 rdev = sctx->cur_inode_rdev;
2611 }
2612
2613 if (S_ISREG(mode)) {
2614 cmd = BTRFS_SEND_C_MKFILE;
2615 } else if (S_ISDIR(mode)) {
2616 cmd = BTRFS_SEND_C_MKDIR;
2617 } else if (S_ISLNK(mode)) {
2618 cmd = BTRFS_SEND_C_SYMLINK;
2619 } else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2620 cmd = BTRFS_SEND_C_MKNOD;
2621 } else if (S_ISFIFO(mode)) {
2622 cmd = BTRFS_SEND_C_MKFIFO;
2623 } else if (S_ISSOCK(mode)) {
2624 cmd = BTRFS_SEND_C_MKSOCK;
2625 } else {
2626 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
2627 (int)(mode & S_IFMT));
2628 ret = -EOPNOTSUPP;
2629 goto out;
2630 }
2631
2632 ret = begin_cmd(sctx, cmd);
2633 if (ret < 0)
2634 goto out;
2635
2636 ret = gen_unique_name(sctx, ino, gen, p);
2637 if (ret < 0)
2638 goto out;
2639
2640 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2641 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2642
2643 if (S_ISLNK(mode)) {
2644 fs_path_reset(p);
2645 ret = read_symlink(sctx->send_root, ino, p);
2646 if (ret < 0)
2647 goto out;
2648 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2649 } else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2650 S_ISFIFO(mode) || S_ISSOCK(mode)) {
2651 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2652 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2653 }
2654
2655 ret = send_cmd(sctx);
2656 if (ret < 0)
2657 goto out;
2658
2659
2660 tlv_put_failure:
2661 out:
2662 fs_path_free(p);
2663 return ret;
2664 }
2665
2666 /*
2667 * We need some special handling for inodes that get processed before the parent
2668 * directory got created. See process_recorded_refs for details.
2669 * This function does the check if we already created the dir out of order.
2670 */
did_create_dir(struct send_ctx * sctx,u64 dir)2671 static int did_create_dir(struct send_ctx *sctx, u64 dir)
2672 {
2673 int ret = 0;
2674 struct btrfs_path *path = NULL;
2675 struct btrfs_key key;
2676 struct btrfs_key found_key;
2677 struct btrfs_key di_key;
2678 struct extent_buffer *eb;
2679 struct btrfs_dir_item *di;
2680 int slot;
2681
2682 path = alloc_path_for_send();
2683 if (!path) {
2684 ret = -ENOMEM;
2685 goto out;
2686 }
2687
2688 key.objectid = dir;
2689 key.type = BTRFS_DIR_INDEX_KEY;
2690 key.offset = 0;
2691 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2692 if (ret < 0)
2693 goto out;
2694
2695 while (1) {
2696 eb = path->nodes[0];
2697 slot = path->slots[0];
2698 if (slot >= btrfs_header_nritems(eb)) {
2699 ret = btrfs_next_leaf(sctx->send_root, path);
2700 if (ret < 0) {
2701 goto out;
2702 } else if (ret > 0) {
2703 ret = 0;
2704 break;
2705 }
2706 continue;
2707 }
2708
2709 btrfs_item_key_to_cpu(eb, &found_key, slot);
2710 if (found_key.objectid != key.objectid ||
2711 found_key.type != key.type) {
2712 ret = 0;
2713 goto out;
2714 }
2715
2716 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2717 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2718
2719 if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2720 di_key.objectid < sctx->send_progress) {
2721 ret = 1;
2722 goto out;
2723 }
2724
2725 path->slots[0]++;
2726 }
2727
2728 out:
2729 btrfs_free_path(path);
2730 return ret;
2731 }
2732
2733 /*
2734 * Only creates the inode if it is:
2735 * 1. Not a directory
2736 * 2. Or a directory which was not created already due to out of order
2737 * directories. See did_create_dir and process_recorded_refs for details.
2738 */
send_create_inode_if_needed(struct send_ctx * sctx)2739 static int send_create_inode_if_needed(struct send_ctx *sctx)
2740 {
2741 int ret;
2742
2743 if (S_ISDIR(sctx->cur_inode_mode)) {
2744 ret = did_create_dir(sctx, sctx->cur_ino);
2745 if (ret < 0)
2746 goto out;
2747 if (ret) {
2748 ret = 0;
2749 goto out;
2750 }
2751 }
2752
2753 ret = send_create_inode(sctx, sctx->cur_ino);
2754 if (ret < 0)
2755 goto out;
2756
2757 out:
2758 return ret;
2759 }
2760
2761 struct recorded_ref {
2762 struct list_head list;
2763 char *name;
2764 struct fs_path *full_path;
2765 u64 dir;
2766 u64 dir_gen;
2767 int name_len;
2768 };
2769
set_ref_path(struct recorded_ref * ref,struct fs_path * path)2770 static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
2771 {
2772 ref->full_path = path;
2773 ref->name = (char *)kbasename(ref->full_path->start);
2774 ref->name_len = ref->full_path->end - ref->name;
2775 }
2776
2777 /*
2778 * We need to process new refs before deleted refs, but compare_tree gives us
2779 * everything mixed. So we first record all refs and later process them.
2780 * This function is a helper to record one ref.
2781 */
__record_ref(struct list_head * head,u64 dir,u64 dir_gen,struct fs_path * path)2782 static int __record_ref(struct list_head *head, u64 dir,
2783 u64 dir_gen, struct fs_path *path)
2784 {
2785 struct recorded_ref *ref;
2786
2787 ref = kmalloc(sizeof(*ref), GFP_KERNEL);
2788 if (!ref)
2789 return -ENOMEM;
2790
2791 ref->dir = dir;
2792 ref->dir_gen = dir_gen;
2793 set_ref_path(ref, path);
2794 list_add_tail(&ref->list, head);
2795 return 0;
2796 }
2797
dup_ref(struct recorded_ref * ref,struct list_head * list)2798 static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2799 {
2800 struct recorded_ref *new;
2801
2802 new = kmalloc(sizeof(*ref), GFP_KERNEL);
2803 if (!new)
2804 return -ENOMEM;
2805
2806 new->dir = ref->dir;
2807 new->dir_gen = ref->dir_gen;
2808 new->full_path = NULL;
2809 INIT_LIST_HEAD(&new->list);
2810 list_add_tail(&new->list, list);
2811 return 0;
2812 }
2813
__free_recorded_refs(struct list_head * head)2814 static void __free_recorded_refs(struct list_head *head)
2815 {
2816 struct recorded_ref *cur;
2817
2818 while (!list_empty(head)) {
2819 cur = list_entry(head->next, struct recorded_ref, list);
2820 fs_path_free(cur->full_path);
2821 list_del(&cur->list);
2822 kfree(cur);
2823 }
2824 }
2825
free_recorded_refs(struct send_ctx * sctx)2826 static void free_recorded_refs(struct send_ctx *sctx)
2827 {
2828 __free_recorded_refs(&sctx->new_refs);
2829 __free_recorded_refs(&sctx->deleted_refs);
2830 }
2831
2832 /*
2833 * Renames/moves a file/dir to its orphan name. Used when the first
2834 * ref of an unprocessed inode gets overwritten and for all non empty
2835 * directories.
2836 */
orphanize_inode(struct send_ctx * sctx,u64 ino,u64 gen,struct fs_path * path)2837 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2838 struct fs_path *path)
2839 {
2840 int ret;
2841 struct fs_path *orphan;
2842
2843 orphan = fs_path_alloc();
2844 if (!orphan)
2845 return -ENOMEM;
2846
2847 ret = gen_unique_name(sctx, ino, gen, orphan);
2848 if (ret < 0)
2849 goto out;
2850
2851 ret = send_rename(sctx, path, orphan);
2852
2853 out:
2854 fs_path_free(orphan);
2855 return ret;
2856 }
2857
2858 static struct orphan_dir_info *
add_orphan_dir_info(struct send_ctx * sctx,u64 dir_ino)2859 add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2860 {
2861 struct rb_node **p = &sctx->orphan_dirs.rb_node;
2862 struct rb_node *parent = NULL;
2863 struct orphan_dir_info *entry, *odi;
2864
2865 while (*p) {
2866 parent = *p;
2867 entry = rb_entry(parent, struct orphan_dir_info, node);
2868 if (dir_ino < entry->ino) {
2869 p = &(*p)->rb_left;
2870 } else if (dir_ino > entry->ino) {
2871 p = &(*p)->rb_right;
2872 } else {
2873 return entry;
2874 }
2875 }
2876
2877 odi = kmalloc(sizeof(*odi), GFP_KERNEL);
2878 if (!odi)
2879 return ERR_PTR(-ENOMEM);
2880 odi->ino = dir_ino;
2881 odi->gen = 0;
2882 odi->last_dir_index_offset = 0;
2883
2884 rb_link_node(&odi->node, parent, p);
2885 rb_insert_color(&odi->node, &sctx->orphan_dirs);
2886 return odi;
2887 }
2888
2889 static struct orphan_dir_info *
get_orphan_dir_info(struct send_ctx * sctx,u64 dir_ino)2890 get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
2891 {
2892 struct rb_node *n = sctx->orphan_dirs.rb_node;
2893 struct orphan_dir_info *entry;
2894
2895 while (n) {
2896 entry = rb_entry(n, struct orphan_dir_info, node);
2897 if (dir_ino < entry->ino)
2898 n = n->rb_left;
2899 else if (dir_ino > entry->ino)
2900 n = n->rb_right;
2901 else
2902 return entry;
2903 }
2904 return NULL;
2905 }
2906
is_waiting_for_rm(struct send_ctx * sctx,u64 dir_ino)2907 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
2908 {
2909 struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
2910
2911 return odi != NULL;
2912 }
2913
free_orphan_dir_info(struct send_ctx * sctx,struct orphan_dir_info * odi)2914 static void free_orphan_dir_info(struct send_ctx *sctx,
2915 struct orphan_dir_info *odi)
2916 {
2917 if (!odi)
2918 return;
2919 rb_erase(&odi->node, &sctx->orphan_dirs);
2920 kfree(odi);
2921 }
2922
2923 /*
2924 * Returns 1 if a directory can be removed at this point in time.
2925 * We check this by iterating all dir items and checking if the inode behind
2926 * the dir item was already processed.
2927 */
can_rmdir(struct send_ctx * sctx,u64 dir,u64 dir_gen,u64 send_progress)2928 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
2929 u64 send_progress)
2930 {
2931 int ret = 0;
2932 struct btrfs_root *root = sctx->parent_root;
2933 struct btrfs_path *path;
2934 struct btrfs_key key;
2935 struct btrfs_key found_key;
2936 struct btrfs_key loc;
2937 struct btrfs_dir_item *di;
2938 struct orphan_dir_info *odi = NULL;
2939
2940 /*
2941 * Don't try to rmdir the top/root subvolume dir.
2942 */
2943 if (dir == BTRFS_FIRST_FREE_OBJECTID)
2944 return 0;
2945
2946 path = alloc_path_for_send();
2947 if (!path)
2948 return -ENOMEM;
2949
2950 key.objectid = dir;
2951 key.type = BTRFS_DIR_INDEX_KEY;
2952 key.offset = 0;
2953
2954 odi = get_orphan_dir_info(sctx, dir);
2955 if (odi)
2956 key.offset = odi->last_dir_index_offset;
2957
2958 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2959 if (ret < 0)
2960 goto out;
2961
2962 while (1) {
2963 struct waiting_dir_move *dm;
2964
2965 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2966 ret = btrfs_next_leaf(root, path);
2967 if (ret < 0)
2968 goto out;
2969 else if (ret > 0)
2970 break;
2971 continue;
2972 }
2973 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2974 path->slots[0]);
2975 if (found_key.objectid != key.objectid ||
2976 found_key.type != key.type)
2977 break;
2978
2979 di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2980 struct btrfs_dir_item);
2981 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2982
2983 dm = get_waiting_dir_move(sctx, loc.objectid);
2984 if (dm) {
2985 odi = add_orphan_dir_info(sctx, dir);
2986 if (IS_ERR(odi)) {
2987 ret = PTR_ERR(odi);
2988 goto out;
2989 }
2990 odi->gen = dir_gen;
2991 odi->last_dir_index_offset = found_key.offset;
2992 dm->rmdir_ino = dir;
2993 ret = 0;
2994 goto out;
2995 }
2996
2997 if (loc.objectid > send_progress) {
2998 odi = add_orphan_dir_info(sctx, dir);
2999 if (IS_ERR(odi)) {
3000 ret = PTR_ERR(odi);
3001 goto out;
3002 }
3003 odi->gen = dir_gen;
3004 odi->last_dir_index_offset = found_key.offset;
3005 ret = 0;
3006 goto out;
3007 }
3008
3009 path->slots[0]++;
3010 }
3011 free_orphan_dir_info(sctx, odi);
3012
3013 ret = 1;
3014
3015 out:
3016 btrfs_free_path(path);
3017 return ret;
3018 }
3019
is_waiting_for_move(struct send_ctx * sctx,u64 ino)3020 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
3021 {
3022 struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
3023
3024 return entry != NULL;
3025 }
3026
add_waiting_dir_move(struct send_ctx * sctx,u64 ino,bool orphanized)3027 static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
3028 {
3029 struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
3030 struct rb_node *parent = NULL;
3031 struct waiting_dir_move *entry, *dm;
3032
3033 dm = kmalloc(sizeof(*dm), GFP_KERNEL);
3034 if (!dm)
3035 return -ENOMEM;
3036 dm->ino = ino;
3037 dm->rmdir_ino = 0;
3038 dm->orphanized = orphanized;
3039
3040 while (*p) {
3041 parent = *p;
3042 entry = rb_entry(parent, struct waiting_dir_move, node);
3043 if (ino < entry->ino) {
3044 p = &(*p)->rb_left;
3045 } else if (ino > entry->ino) {
3046 p = &(*p)->rb_right;
3047 } else {
3048 kfree(dm);
3049 return -EEXIST;
3050 }
3051 }
3052
3053 rb_link_node(&dm->node, parent, p);
3054 rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
3055 return 0;
3056 }
3057
3058 static struct waiting_dir_move *
get_waiting_dir_move(struct send_ctx * sctx,u64 ino)3059 get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
3060 {
3061 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
3062 struct waiting_dir_move *entry;
3063
3064 while (n) {
3065 entry = rb_entry(n, struct waiting_dir_move, node);
3066 if (ino < entry->ino)
3067 n = n->rb_left;
3068 else if (ino > entry->ino)
3069 n = n->rb_right;
3070 else
3071 return entry;
3072 }
3073 return NULL;
3074 }
3075
free_waiting_dir_move(struct send_ctx * sctx,struct waiting_dir_move * dm)3076 static void free_waiting_dir_move(struct send_ctx *sctx,
3077 struct waiting_dir_move *dm)
3078 {
3079 if (!dm)
3080 return;
3081 rb_erase(&dm->node, &sctx->waiting_dir_moves);
3082 kfree(dm);
3083 }
3084
add_pending_dir_move(struct send_ctx * sctx,u64 ino,u64 ino_gen,u64 parent_ino,struct list_head * new_refs,struct list_head * deleted_refs,const bool is_orphan)3085 static int add_pending_dir_move(struct send_ctx *sctx,
3086 u64 ino,
3087 u64 ino_gen,
3088 u64 parent_ino,
3089 struct list_head *new_refs,
3090 struct list_head *deleted_refs,
3091 const bool is_orphan)
3092 {
3093 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
3094 struct rb_node *parent = NULL;
3095 struct pending_dir_move *entry = NULL, *pm;
3096 struct recorded_ref *cur;
3097 int exists = 0;
3098 int ret;
3099
3100 pm = kmalloc(sizeof(*pm), GFP_KERNEL);
3101 if (!pm)
3102 return -ENOMEM;
3103 pm->parent_ino = parent_ino;
3104 pm->ino = ino;
3105 pm->gen = ino_gen;
3106 INIT_LIST_HEAD(&pm->list);
3107 INIT_LIST_HEAD(&pm->update_refs);
3108 RB_CLEAR_NODE(&pm->node);
3109
3110 while (*p) {
3111 parent = *p;
3112 entry = rb_entry(parent, struct pending_dir_move, node);
3113 if (parent_ino < entry->parent_ino) {
3114 p = &(*p)->rb_left;
3115 } else if (parent_ino > entry->parent_ino) {
3116 p = &(*p)->rb_right;
3117 } else {
3118 exists = 1;
3119 break;
3120 }
3121 }
3122
3123 list_for_each_entry(cur, deleted_refs, list) {
3124 ret = dup_ref(cur, &pm->update_refs);
3125 if (ret < 0)
3126 goto out;
3127 }
3128 list_for_each_entry(cur, new_refs, list) {
3129 ret = dup_ref(cur, &pm->update_refs);
3130 if (ret < 0)
3131 goto out;
3132 }
3133
3134 ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
3135 if (ret)
3136 goto out;
3137
3138 if (exists) {
3139 list_add_tail(&pm->list, &entry->list);
3140 } else {
3141 rb_link_node(&pm->node, parent, p);
3142 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
3143 }
3144 ret = 0;
3145 out:
3146 if (ret) {
3147 __free_recorded_refs(&pm->update_refs);
3148 kfree(pm);
3149 }
3150 return ret;
3151 }
3152
get_pending_dir_moves(struct send_ctx * sctx,u64 parent_ino)3153 static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3154 u64 parent_ino)
3155 {
3156 struct rb_node *n = sctx->pending_dir_moves.rb_node;
3157 struct pending_dir_move *entry;
3158
3159 while (n) {
3160 entry = rb_entry(n, struct pending_dir_move, node);
3161 if (parent_ino < entry->parent_ino)
3162 n = n->rb_left;
3163 else if (parent_ino > entry->parent_ino)
3164 n = n->rb_right;
3165 else
3166 return entry;
3167 }
3168 return NULL;
3169 }
3170
path_loop(struct send_ctx * sctx,struct fs_path * name,u64 ino,u64 gen,u64 * ancestor_ino)3171 static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3172 u64 ino, u64 gen, u64 *ancestor_ino)
3173 {
3174 int ret = 0;
3175 u64 parent_inode = 0;
3176 u64 parent_gen = 0;
3177 u64 start_ino = ino;
3178
3179 *ancestor_ino = 0;
3180 while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3181 fs_path_reset(name);
3182
3183 if (is_waiting_for_rm(sctx, ino))
3184 break;
3185 if (is_waiting_for_move(sctx, ino)) {
3186 if (*ancestor_ino == 0)
3187 *ancestor_ino = ino;
3188 ret = get_first_ref(sctx->parent_root, ino,
3189 &parent_inode, &parent_gen, name);
3190 } else {
3191 ret = __get_cur_name_and_parent(sctx, ino, gen,
3192 &parent_inode,
3193 &parent_gen, name);
3194 if (ret > 0) {
3195 ret = 0;
3196 break;
3197 }
3198 }
3199 if (ret < 0)
3200 break;
3201 if (parent_inode == start_ino) {
3202 ret = 1;
3203 if (*ancestor_ino == 0)
3204 *ancestor_ino = ino;
3205 break;
3206 }
3207 ino = parent_inode;
3208 gen = parent_gen;
3209 }
3210 return ret;
3211 }
3212
apply_dir_move(struct send_ctx * sctx,struct pending_dir_move * pm)3213 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3214 {
3215 struct fs_path *from_path = NULL;
3216 struct fs_path *to_path = NULL;
3217 struct fs_path *name = NULL;
3218 u64 orig_progress = sctx->send_progress;
3219 struct recorded_ref *cur;
3220 u64 parent_ino, parent_gen;
3221 struct waiting_dir_move *dm = NULL;
3222 u64 rmdir_ino = 0;
3223 u64 ancestor;
3224 bool is_orphan;
3225 int ret;
3226
3227 name = fs_path_alloc();
3228 from_path = fs_path_alloc();
3229 if (!name || !from_path) {
3230 ret = -ENOMEM;
3231 goto out;
3232 }
3233
3234 dm = get_waiting_dir_move(sctx, pm->ino);
3235 ASSERT(dm);
3236 rmdir_ino = dm->rmdir_ino;
3237 is_orphan = dm->orphanized;
3238 free_waiting_dir_move(sctx, dm);
3239
3240 if (is_orphan) {
3241 ret = gen_unique_name(sctx, pm->ino,
3242 pm->gen, from_path);
3243 } else {
3244 ret = get_first_ref(sctx->parent_root, pm->ino,
3245 &parent_ino, &parent_gen, name);
3246 if (ret < 0)
3247 goto out;
3248 ret = get_cur_path(sctx, parent_ino, parent_gen,
3249 from_path);
3250 if (ret < 0)
3251 goto out;
3252 ret = fs_path_add_path(from_path, name);
3253 }
3254 if (ret < 0)
3255 goto out;
3256
3257 sctx->send_progress = sctx->cur_ino + 1;
3258 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3259 if (ret < 0)
3260 goto out;
3261 if (ret) {
3262 LIST_HEAD(deleted_refs);
3263 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3264 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3265 &pm->update_refs, &deleted_refs,
3266 is_orphan);
3267 if (ret < 0)
3268 goto out;
3269 if (rmdir_ino) {
3270 dm = get_waiting_dir_move(sctx, pm->ino);
3271 ASSERT(dm);
3272 dm->rmdir_ino = rmdir_ino;
3273 }
3274 goto out;
3275 }
3276 fs_path_reset(name);
3277 to_path = name;
3278 name = NULL;
3279 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
3280 if (ret < 0)
3281 goto out;
3282
3283 ret = send_rename(sctx, from_path, to_path);
3284 if (ret < 0)
3285 goto out;
3286
3287 if (rmdir_ino) {
3288 struct orphan_dir_info *odi;
3289 u64 gen;
3290
3291 odi = get_orphan_dir_info(sctx, rmdir_ino);
3292 if (!odi) {
3293 /* already deleted */
3294 goto finish;
3295 }
3296 gen = odi->gen;
3297
3298 ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
3299 if (ret < 0)
3300 goto out;
3301 if (!ret)
3302 goto finish;
3303
3304 name = fs_path_alloc();
3305 if (!name) {
3306 ret = -ENOMEM;
3307 goto out;
3308 }
3309 ret = get_cur_path(sctx, rmdir_ino, gen, name);
3310 if (ret < 0)
3311 goto out;
3312 ret = send_rmdir(sctx, name);
3313 if (ret < 0)
3314 goto out;
3315 }
3316
3317 finish:
3318 ret = send_utimes(sctx, pm->ino, pm->gen);
3319 if (ret < 0)
3320 goto out;
3321
3322 /*
3323 * After rename/move, need to update the utimes of both new parent(s)
3324 * and old parent(s).
3325 */
3326 list_for_each_entry(cur, &pm->update_refs, list) {
3327 /*
3328 * The parent inode might have been deleted in the send snapshot
3329 */
3330 ret = get_inode_info(sctx->send_root, cur->dir, NULL,
3331 NULL, NULL, NULL, NULL, NULL);
3332 if (ret == -ENOENT) {
3333 ret = 0;
3334 continue;
3335 }
3336 if (ret < 0)
3337 goto out;
3338
3339 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
3340 if (ret < 0)
3341 goto out;
3342 }
3343
3344 out:
3345 fs_path_free(name);
3346 fs_path_free(from_path);
3347 fs_path_free(to_path);
3348 sctx->send_progress = orig_progress;
3349
3350 return ret;
3351 }
3352
free_pending_move(struct send_ctx * sctx,struct pending_dir_move * m)3353 static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
3354 {
3355 if (!list_empty(&m->list))
3356 list_del(&m->list);
3357 if (!RB_EMPTY_NODE(&m->node))
3358 rb_erase(&m->node, &sctx->pending_dir_moves);
3359 __free_recorded_refs(&m->update_refs);
3360 kfree(m);
3361 }
3362
tail_append_pending_moves(struct send_ctx * sctx,struct pending_dir_move * moves,struct list_head * stack)3363 static void tail_append_pending_moves(struct send_ctx *sctx,
3364 struct pending_dir_move *moves,
3365 struct list_head *stack)
3366 {
3367 if (list_empty(&moves->list)) {
3368 list_add_tail(&moves->list, stack);
3369 } else {
3370 LIST_HEAD(list);
3371 list_splice_init(&moves->list, &list);
3372 list_add_tail(&moves->list, stack);
3373 list_splice_tail(&list, stack);
3374 }
3375 if (!RB_EMPTY_NODE(&moves->node)) {
3376 rb_erase(&moves->node, &sctx->pending_dir_moves);
3377 RB_CLEAR_NODE(&moves->node);
3378 }
3379 }
3380
apply_children_dir_moves(struct send_ctx * sctx)3381 static int apply_children_dir_moves(struct send_ctx *sctx)
3382 {
3383 struct pending_dir_move *pm;
3384 struct list_head stack;
3385 u64 parent_ino = sctx->cur_ino;
3386 int ret = 0;
3387
3388 pm = get_pending_dir_moves(sctx, parent_ino);
3389 if (!pm)
3390 return 0;
3391
3392 INIT_LIST_HEAD(&stack);
3393 tail_append_pending_moves(sctx, pm, &stack);
3394
3395 while (!list_empty(&stack)) {
3396 pm = list_first_entry(&stack, struct pending_dir_move, list);
3397 parent_ino = pm->ino;
3398 ret = apply_dir_move(sctx, pm);
3399 free_pending_move(sctx, pm);
3400 if (ret)
3401 goto out;
3402 pm = get_pending_dir_moves(sctx, parent_ino);
3403 if (pm)
3404 tail_append_pending_moves(sctx, pm, &stack);
3405 }
3406 return 0;
3407
3408 out:
3409 while (!list_empty(&stack)) {
3410 pm = list_first_entry(&stack, struct pending_dir_move, list);
3411 free_pending_move(sctx, pm);
3412 }
3413 return ret;
3414 }
3415
3416 /*
3417 * We might need to delay a directory rename even when no ancestor directory
3418 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3419 * renamed. This happens when we rename a directory to the old name (the name
3420 * in the parent root) of some other unrelated directory that got its rename
3421 * delayed due to some ancestor with higher number that got renamed.
3422 *
3423 * Example:
3424 *
3425 * Parent snapshot:
3426 * . (ino 256)
3427 * |---- a/ (ino 257)
3428 * | |---- file (ino 260)
3429 * |
3430 * |---- b/ (ino 258)
3431 * |---- c/ (ino 259)
3432 *
3433 * Send snapshot:
3434 * . (ino 256)
3435 * |---- a/ (ino 258)
3436 * |---- x/ (ino 259)
3437 * |---- y/ (ino 257)
3438 * |----- file (ino 260)
3439 *
3440 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3441 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3442 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3443 * must issue is:
3444 *
3445 * 1 - rename 259 from 'c' to 'x'
3446 * 2 - rename 257 from 'a' to 'x/y'
3447 * 3 - rename 258 from 'b' to 'a'
3448 *
3449 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3450 * be done right away and < 0 on error.
3451 */
wait_for_dest_dir_move(struct send_ctx * sctx,struct recorded_ref * parent_ref,const bool is_orphan)3452 static int wait_for_dest_dir_move(struct send_ctx *sctx,
3453 struct recorded_ref *parent_ref,
3454 const bool is_orphan)
3455 {
3456 struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
3457 struct btrfs_path *path;
3458 struct btrfs_key key;
3459 struct btrfs_key di_key;
3460 struct btrfs_dir_item *di;
3461 u64 left_gen;
3462 u64 right_gen;
3463 int ret = 0;
3464 struct waiting_dir_move *wdm;
3465
3466 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3467 return 0;
3468
3469 path = alloc_path_for_send();
3470 if (!path)
3471 return -ENOMEM;
3472
3473 key.objectid = parent_ref->dir;
3474 key.type = BTRFS_DIR_ITEM_KEY;
3475 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3476
3477 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3478 if (ret < 0) {
3479 goto out;
3480 } else if (ret > 0) {
3481 ret = 0;
3482 goto out;
3483 }
3484
3485 di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
3486 parent_ref->name_len);
3487 if (!di) {
3488 ret = 0;
3489 goto out;
3490 }
3491 /*
3492 * di_key.objectid has the number of the inode that has a dentry in the
3493 * parent directory with the same name that sctx->cur_ino is being
3494 * renamed to. We need to check if that inode is in the send root as
3495 * well and if it is currently marked as an inode with a pending rename,
3496 * if it is, we need to delay the rename of sctx->cur_ino as well, so
3497 * that it happens after that other inode is renamed.
3498 */
3499 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3500 if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3501 ret = 0;
3502 goto out;
3503 }
3504
3505 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3506 &left_gen, NULL, NULL, NULL, NULL);
3507 if (ret < 0)
3508 goto out;
3509 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3510 &right_gen, NULL, NULL, NULL, NULL);
3511 if (ret < 0) {
3512 if (ret == -ENOENT)
3513 ret = 0;
3514 goto out;
3515 }
3516
3517 /* Different inode, no need to delay the rename of sctx->cur_ino */
3518 if (right_gen != left_gen) {
3519 ret = 0;
3520 goto out;
3521 }
3522
3523 wdm = get_waiting_dir_move(sctx, di_key.objectid);
3524 if (wdm && !wdm->orphanized) {
3525 ret = add_pending_dir_move(sctx,
3526 sctx->cur_ino,
3527 sctx->cur_inode_gen,
3528 di_key.objectid,
3529 &sctx->new_refs,
3530 &sctx->deleted_refs,
3531 is_orphan);
3532 if (!ret)
3533 ret = 1;
3534 }
3535 out:
3536 btrfs_free_path(path);
3537 return ret;
3538 }
3539
3540 /*
3541 * Check if inode ino2, or any of its ancestors, is inode ino1.
3542 * Return 1 if true, 0 if false and < 0 on error.
3543 */
check_ino_in_path(struct btrfs_root * root,const u64 ino1,const u64 ino1_gen,const u64 ino2,const u64 ino2_gen,struct fs_path * fs_path)3544 static int check_ino_in_path(struct btrfs_root *root,
3545 const u64 ino1,
3546 const u64 ino1_gen,
3547 const u64 ino2,
3548 const u64 ino2_gen,
3549 struct fs_path *fs_path)
3550 {
3551 u64 ino = ino2;
3552
3553 if (ino1 == ino2)
3554 return ino1_gen == ino2_gen;
3555
3556 while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3557 u64 parent;
3558 u64 parent_gen;
3559 int ret;
3560
3561 fs_path_reset(fs_path);
3562 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
3563 if (ret < 0)
3564 return ret;
3565 if (parent == ino1)
3566 return parent_gen == ino1_gen;
3567 ino = parent;
3568 }
3569 return 0;
3570 }
3571
3572 /*
3573 * Check if ino ino1 is an ancestor of inode ino2 in the given root for any
3574 * possible path (in case ino2 is not a directory and has multiple hard links).
3575 * Return 1 if true, 0 if false and < 0 on error.
3576 */
is_ancestor(struct btrfs_root * root,const u64 ino1,const u64 ino1_gen,const u64 ino2,struct fs_path * fs_path)3577 static int is_ancestor(struct btrfs_root *root,
3578 const u64 ino1,
3579 const u64 ino1_gen,
3580 const u64 ino2,
3581 struct fs_path *fs_path)
3582 {
3583 bool free_fs_path = false;
3584 int ret = 0;
3585 struct btrfs_path *path = NULL;
3586 struct btrfs_key key;
3587
3588 if (!fs_path) {
3589 fs_path = fs_path_alloc();
3590 if (!fs_path)
3591 return -ENOMEM;
3592 free_fs_path = true;
3593 }
3594
3595 path = alloc_path_for_send();
3596 if (!path) {
3597 ret = -ENOMEM;
3598 goto out;
3599 }
3600
3601 key.objectid = ino2;
3602 key.type = BTRFS_INODE_REF_KEY;
3603 key.offset = 0;
3604
3605 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3606 if (ret < 0)
3607 goto out;
3608
3609 while (true) {
3610 struct extent_buffer *leaf = path->nodes[0];
3611 int slot = path->slots[0];
3612 u32 cur_offset = 0;
3613 u32 item_size;
3614
3615 if (slot >= btrfs_header_nritems(leaf)) {
3616 ret = btrfs_next_leaf(root, path);
3617 if (ret < 0)
3618 goto out;
3619 if (ret > 0)
3620 break;
3621 continue;
3622 }
3623
3624 btrfs_item_key_to_cpu(leaf, &key, slot);
3625 if (key.objectid != ino2)
3626 break;
3627 if (key.type != BTRFS_INODE_REF_KEY &&
3628 key.type != BTRFS_INODE_EXTREF_KEY)
3629 break;
3630
3631 item_size = btrfs_item_size_nr(leaf, slot);
3632 while (cur_offset < item_size) {
3633 u64 parent;
3634 u64 parent_gen;
3635
3636 if (key.type == BTRFS_INODE_EXTREF_KEY) {
3637 unsigned long ptr;
3638 struct btrfs_inode_extref *extref;
3639
3640 ptr = btrfs_item_ptr_offset(leaf, slot);
3641 extref = (struct btrfs_inode_extref *)
3642 (ptr + cur_offset);
3643 parent = btrfs_inode_extref_parent(leaf,
3644 extref);
3645 cur_offset += sizeof(*extref);
3646 cur_offset += btrfs_inode_extref_name_len(leaf,
3647 extref);
3648 } else {
3649 parent = key.offset;
3650 cur_offset = item_size;
3651 }
3652
3653 ret = get_inode_info(root, parent, NULL, &parent_gen,
3654 NULL, NULL, NULL, NULL);
3655 if (ret < 0)
3656 goto out;
3657 ret = check_ino_in_path(root, ino1, ino1_gen,
3658 parent, parent_gen, fs_path);
3659 if (ret)
3660 goto out;
3661 }
3662 path->slots[0]++;
3663 }
3664 ret = 0;
3665 out:
3666 btrfs_free_path(path);
3667 if (free_fs_path)
3668 fs_path_free(fs_path);
3669 return ret;
3670 }
3671
wait_for_parent_move(struct send_ctx * sctx,struct recorded_ref * parent_ref,const bool is_orphan)3672 static int wait_for_parent_move(struct send_ctx *sctx,
3673 struct recorded_ref *parent_ref,
3674 const bool is_orphan)
3675 {
3676 int ret = 0;
3677 u64 ino = parent_ref->dir;
3678 u64 ino_gen = parent_ref->dir_gen;
3679 u64 parent_ino_before, parent_ino_after;
3680 struct fs_path *path_before = NULL;
3681 struct fs_path *path_after = NULL;
3682 int len1, len2;
3683
3684 path_after = fs_path_alloc();
3685 path_before = fs_path_alloc();
3686 if (!path_after || !path_before) {
3687 ret = -ENOMEM;
3688 goto out;
3689 }
3690
3691 /*
3692 * Our current directory inode may not yet be renamed/moved because some
3693 * ancestor (immediate or not) has to be renamed/moved first. So find if
3694 * such ancestor exists and make sure our own rename/move happens after
3695 * that ancestor is processed to avoid path build infinite loops (done
3696 * at get_cur_path()).
3697 */
3698 while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3699 u64 parent_ino_after_gen;
3700
3701 if (is_waiting_for_move(sctx, ino)) {
3702 /*
3703 * If the current inode is an ancestor of ino in the
3704 * parent root, we need to delay the rename of the
3705 * current inode, otherwise don't delayed the rename
3706 * because we can end up with a circular dependency
3707 * of renames, resulting in some directories never
3708 * getting the respective rename operations issued in
3709 * the send stream or getting into infinite path build
3710 * loops.
3711 */
3712 ret = is_ancestor(sctx->parent_root,
3713 sctx->cur_ino, sctx->cur_inode_gen,
3714 ino, path_before);
3715 if (ret)
3716 break;
3717 }
3718
3719 fs_path_reset(path_before);
3720 fs_path_reset(path_after);
3721
3722 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3723 &parent_ino_after_gen, path_after);
3724 if (ret < 0)
3725 goto out;
3726 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3727 NULL, path_before);
3728 if (ret < 0 && ret != -ENOENT) {
3729 goto out;
3730 } else if (ret == -ENOENT) {
3731 ret = 0;
3732 break;
3733 }
3734
3735 len1 = fs_path_len(path_before);
3736 len2 = fs_path_len(path_after);
3737 if (ino > sctx->cur_ino &&
3738 (parent_ino_before != parent_ino_after || len1 != len2 ||
3739 memcmp(path_before->start, path_after->start, len1))) {
3740 u64 parent_ino_gen;
3741
3742 ret = get_inode_info(sctx->parent_root, ino, NULL,
3743 &parent_ino_gen, NULL, NULL, NULL,
3744 NULL);
3745 if (ret < 0)
3746 goto out;
3747 if (ino_gen == parent_ino_gen) {
3748 ret = 1;
3749 break;
3750 }
3751 }
3752 ino = parent_ino_after;
3753 ino_gen = parent_ino_after_gen;
3754 }
3755
3756 out:
3757 fs_path_free(path_before);
3758 fs_path_free(path_after);
3759
3760 if (ret == 1) {
3761 ret = add_pending_dir_move(sctx,
3762 sctx->cur_ino,
3763 sctx->cur_inode_gen,
3764 ino,
3765 &sctx->new_refs,
3766 &sctx->deleted_refs,
3767 is_orphan);
3768 if (!ret)
3769 ret = 1;
3770 }
3771
3772 return ret;
3773 }
3774
update_ref_path(struct send_ctx * sctx,struct recorded_ref * ref)3775 static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
3776 {
3777 int ret;
3778 struct fs_path *new_path;
3779
3780 /*
3781 * Our reference's name member points to its full_path member string, so
3782 * we use here a new path.
3783 */
3784 new_path = fs_path_alloc();
3785 if (!new_path)
3786 return -ENOMEM;
3787
3788 ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
3789 if (ret < 0) {
3790 fs_path_free(new_path);
3791 return ret;
3792 }
3793 ret = fs_path_add(new_path, ref->name, ref->name_len);
3794 if (ret < 0) {
3795 fs_path_free(new_path);
3796 return ret;
3797 }
3798
3799 fs_path_free(ref->full_path);
3800 set_ref_path(ref, new_path);
3801
3802 return 0;
3803 }
3804
3805 /*
3806 * This does all the move/link/unlink/rmdir magic.
3807 */
process_recorded_refs(struct send_ctx * sctx,int * pending_move)3808 static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3809 {
3810 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
3811 int ret = 0;
3812 struct recorded_ref *cur;
3813 struct recorded_ref *cur2;
3814 struct list_head check_dirs;
3815 struct fs_path *valid_path = NULL;
3816 u64 ow_inode = 0;
3817 u64 ow_gen;
3818 u64 ow_mode;
3819 int did_overwrite = 0;
3820 int is_orphan = 0;
3821 u64 last_dir_ino_rm = 0;
3822 bool can_rename = true;
3823 bool orphanized_dir = false;
3824 bool orphanized_ancestor = false;
3825
3826 btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
3827
3828 /*
3829 * This should never happen as the root dir always has the same ref
3830 * which is always '..'
3831 */
3832 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
3833 INIT_LIST_HEAD(&check_dirs);
3834
3835 valid_path = fs_path_alloc();
3836 if (!valid_path) {
3837 ret = -ENOMEM;
3838 goto out;
3839 }
3840
3841 /*
3842 * First, check if the first ref of the current inode was overwritten
3843 * before. If yes, we know that the current inode was already orphanized
3844 * and thus use the orphan name. If not, we can use get_cur_path to
3845 * get the path of the first ref as it would like while receiving at
3846 * this point in time.
3847 * New inodes are always orphan at the beginning, so force to use the
3848 * orphan name in this case.
3849 * The first ref is stored in valid_path and will be updated if it
3850 * gets moved around.
3851 */
3852 if (!sctx->cur_inode_new) {
3853 ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
3854 sctx->cur_inode_gen);
3855 if (ret < 0)
3856 goto out;
3857 if (ret)
3858 did_overwrite = 1;
3859 }
3860 if (sctx->cur_inode_new || did_overwrite) {
3861 ret = gen_unique_name(sctx, sctx->cur_ino,
3862 sctx->cur_inode_gen, valid_path);
3863 if (ret < 0)
3864 goto out;
3865 is_orphan = 1;
3866 } else {
3867 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
3868 valid_path);
3869 if (ret < 0)
3870 goto out;
3871 }
3872
3873 list_for_each_entry(cur, &sctx->new_refs, list) {
3874 /*
3875 * We may have refs where the parent directory does not exist
3876 * yet. This happens if the parent directories inum is higher
3877 * than the current inum. To handle this case, we create the
3878 * parent directory out of order. But we need to check if this
3879 * did already happen before due to other refs in the same dir.
3880 */
3881 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
3882 if (ret < 0)
3883 goto out;
3884 if (ret == inode_state_will_create) {
3885 ret = 0;
3886 /*
3887 * First check if any of the current inodes refs did
3888 * already create the dir.
3889 */
3890 list_for_each_entry(cur2, &sctx->new_refs, list) {
3891 if (cur == cur2)
3892 break;
3893 if (cur2->dir == cur->dir) {
3894 ret = 1;
3895 break;
3896 }
3897 }
3898
3899 /*
3900 * If that did not happen, check if a previous inode
3901 * did already create the dir.
3902 */
3903 if (!ret)
3904 ret = did_create_dir(sctx, cur->dir);
3905 if (ret < 0)
3906 goto out;
3907 if (!ret) {
3908 ret = send_create_inode(sctx, cur->dir);
3909 if (ret < 0)
3910 goto out;
3911 }
3912 }
3913
3914 /*
3915 * Check if this new ref would overwrite the first ref of
3916 * another unprocessed inode. If yes, orphanize the
3917 * overwritten inode. If we find an overwritten ref that is
3918 * not the first ref, simply unlink it.
3919 */
3920 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
3921 cur->name, cur->name_len,
3922 &ow_inode, &ow_gen, &ow_mode);
3923 if (ret < 0)
3924 goto out;
3925 if (ret) {
3926 ret = is_first_ref(sctx->parent_root,
3927 ow_inode, cur->dir, cur->name,
3928 cur->name_len);
3929 if (ret < 0)
3930 goto out;
3931 if (ret) {
3932 struct name_cache_entry *nce;
3933 struct waiting_dir_move *wdm;
3934
3935 ret = orphanize_inode(sctx, ow_inode, ow_gen,
3936 cur->full_path);
3937 if (ret < 0)
3938 goto out;
3939 if (S_ISDIR(ow_mode))
3940 orphanized_dir = true;
3941
3942 /*
3943 * If ow_inode has its rename operation delayed
3944 * make sure that its orphanized name is used in
3945 * the source path when performing its rename
3946 * operation.
3947 */
3948 if (is_waiting_for_move(sctx, ow_inode)) {
3949 wdm = get_waiting_dir_move(sctx,
3950 ow_inode);
3951 ASSERT(wdm);
3952 wdm->orphanized = true;
3953 }
3954
3955 /*
3956 * Make sure we clear our orphanized inode's
3957 * name from the name cache. This is because the
3958 * inode ow_inode might be an ancestor of some
3959 * other inode that will be orphanized as well
3960 * later and has an inode number greater than
3961 * sctx->send_progress. We need to prevent
3962 * future name lookups from using the old name
3963 * and get instead the orphan name.
3964 */
3965 nce = name_cache_search(sctx, ow_inode, ow_gen);
3966 if (nce) {
3967 name_cache_delete(sctx, nce);
3968 kfree(nce);
3969 }
3970
3971 /*
3972 * ow_inode might currently be an ancestor of
3973 * cur_ino, therefore compute valid_path (the
3974 * current path of cur_ino) again because it
3975 * might contain the pre-orphanization name of
3976 * ow_inode, which is no longer valid.
3977 */
3978 ret = is_ancestor(sctx->parent_root,
3979 ow_inode, ow_gen,
3980 sctx->cur_ino, NULL);
3981 if (ret > 0) {
3982 orphanized_ancestor = true;
3983 fs_path_reset(valid_path);
3984 ret = get_cur_path(sctx, sctx->cur_ino,
3985 sctx->cur_inode_gen,
3986 valid_path);
3987 }
3988 if (ret < 0)
3989 goto out;
3990 } else {
3991 ret = send_unlink(sctx, cur->full_path);
3992 if (ret < 0)
3993 goto out;
3994 }
3995 }
3996
3997 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3998 ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3999 if (ret < 0)
4000 goto out;
4001 if (ret == 1) {
4002 can_rename = false;
4003 *pending_move = 1;
4004 }
4005 }
4006
4007 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
4008 can_rename) {
4009 ret = wait_for_parent_move(sctx, cur, is_orphan);
4010 if (ret < 0)
4011 goto out;
4012 if (ret == 1) {
4013 can_rename = false;
4014 *pending_move = 1;
4015 }
4016 }
4017
4018 /*
4019 * link/move the ref to the new place. If we have an orphan
4020 * inode, move it and update valid_path. If not, link or move
4021 * it depending on the inode mode.
4022 */
4023 if (is_orphan && can_rename) {
4024 ret = send_rename(sctx, valid_path, cur->full_path);
4025 if (ret < 0)
4026 goto out;
4027 is_orphan = 0;
4028 ret = fs_path_copy(valid_path, cur->full_path);
4029 if (ret < 0)
4030 goto out;
4031 } else if (can_rename) {
4032 if (S_ISDIR(sctx->cur_inode_mode)) {
4033 /*
4034 * Dirs can't be linked, so move it. For moved
4035 * dirs, we always have one new and one deleted
4036 * ref. The deleted ref is ignored later.
4037 */
4038 ret = send_rename(sctx, valid_path,
4039 cur->full_path);
4040 if (!ret)
4041 ret = fs_path_copy(valid_path,
4042 cur->full_path);
4043 if (ret < 0)
4044 goto out;
4045 } else {
4046 /*
4047 * We might have previously orphanized an inode
4048 * which is an ancestor of our current inode,
4049 * so our reference's full path, which was
4050 * computed before any such orphanizations, must
4051 * be updated.
4052 */
4053 if (orphanized_dir) {
4054 ret = update_ref_path(sctx, cur);
4055 if (ret < 0)
4056 goto out;
4057 }
4058 ret = send_link(sctx, cur->full_path,
4059 valid_path);
4060 if (ret < 0)
4061 goto out;
4062 }
4063 }
4064 ret = dup_ref(cur, &check_dirs);
4065 if (ret < 0)
4066 goto out;
4067 }
4068
4069 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
4070 /*
4071 * Check if we can already rmdir the directory. If not,
4072 * orphanize it. For every dir item inside that gets deleted
4073 * later, we do this check again and rmdir it then if possible.
4074 * See the use of check_dirs for more details.
4075 */
4076 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4077 sctx->cur_ino);
4078 if (ret < 0)
4079 goto out;
4080 if (ret) {
4081 ret = send_rmdir(sctx, valid_path);
4082 if (ret < 0)
4083 goto out;
4084 } else if (!is_orphan) {
4085 ret = orphanize_inode(sctx, sctx->cur_ino,
4086 sctx->cur_inode_gen, valid_path);
4087 if (ret < 0)
4088 goto out;
4089 is_orphan = 1;
4090 }
4091
4092 list_for_each_entry(cur, &sctx->deleted_refs, list) {
4093 ret = dup_ref(cur, &check_dirs);
4094 if (ret < 0)
4095 goto out;
4096 }
4097 } else if (S_ISDIR(sctx->cur_inode_mode) &&
4098 !list_empty(&sctx->deleted_refs)) {
4099 /*
4100 * We have a moved dir. Add the old parent to check_dirs
4101 */
4102 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
4103 list);
4104 ret = dup_ref(cur, &check_dirs);
4105 if (ret < 0)
4106 goto out;
4107 } else if (!S_ISDIR(sctx->cur_inode_mode)) {
4108 /*
4109 * We have a non dir inode. Go through all deleted refs and
4110 * unlink them if they were not already overwritten by other
4111 * inodes.
4112 */
4113 list_for_each_entry(cur, &sctx->deleted_refs, list) {
4114 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
4115 sctx->cur_ino, sctx->cur_inode_gen,
4116 cur->name, cur->name_len);
4117 if (ret < 0)
4118 goto out;
4119 if (!ret) {
4120 /*
4121 * If we orphanized any ancestor before, we need
4122 * to recompute the full path for deleted names,
4123 * since any such path was computed before we
4124 * processed any references and orphanized any
4125 * ancestor inode.
4126 */
4127 if (orphanized_ancestor) {
4128 ret = update_ref_path(sctx, cur);
4129 if (ret < 0)
4130 goto out;
4131 }
4132 ret = send_unlink(sctx, cur->full_path);
4133 if (ret < 0)
4134 goto out;
4135 }
4136 ret = dup_ref(cur, &check_dirs);
4137 if (ret < 0)
4138 goto out;
4139 }
4140 /*
4141 * If the inode is still orphan, unlink the orphan. This may
4142 * happen when a previous inode did overwrite the first ref
4143 * of this inode and no new refs were added for the current
4144 * inode. Unlinking does not mean that the inode is deleted in
4145 * all cases. There may still be links to this inode in other
4146 * places.
4147 */
4148 if (is_orphan) {
4149 ret = send_unlink(sctx, valid_path);
4150 if (ret < 0)
4151 goto out;
4152 }
4153 }
4154
4155 /*
4156 * We did collect all parent dirs where cur_inode was once located. We
4157 * now go through all these dirs and check if they are pending for
4158 * deletion and if it's finally possible to perform the rmdir now.
4159 * We also update the inode stats of the parent dirs here.
4160 */
4161 list_for_each_entry(cur, &check_dirs, list) {
4162 /*
4163 * In case we had refs into dirs that were not processed yet,
4164 * we don't need to do the utime and rmdir logic for these dirs.
4165 * The dir will be processed later.
4166 */
4167 if (cur->dir > sctx->cur_ino)
4168 continue;
4169
4170 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
4171 if (ret < 0)
4172 goto out;
4173
4174 if (ret == inode_state_did_create ||
4175 ret == inode_state_no_change) {
4176 /* TODO delayed utimes */
4177 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
4178 if (ret < 0)
4179 goto out;
4180 } else if (ret == inode_state_did_delete &&
4181 cur->dir != last_dir_ino_rm) {
4182 ret = can_rmdir(sctx, cur->dir, cur->dir_gen,
4183 sctx->cur_ino);
4184 if (ret < 0)
4185 goto out;
4186 if (ret) {
4187 ret = get_cur_path(sctx, cur->dir,
4188 cur->dir_gen, valid_path);
4189 if (ret < 0)
4190 goto out;
4191 ret = send_rmdir(sctx, valid_path);
4192 if (ret < 0)
4193 goto out;
4194 last_dir_ino_rm = cur->dir;
4195 }
4196 }
4197 }
4198
4199 ret = 0;
4200
4201 out:
4202 __free_recorded_refs(&check_dirs);
4203 free_recorded_refs(sctx);
4204 fs_path_free(valid_path);
4205 return ret;
4206 }
4207
record_ref(struct btrfs_root * root,u64 dir,struct fs_path * name,void * ctx,struct list_head * refs)4208 static int record_ref(struct btrfs_root *root, u64 dir, struct fs_path *name,
4209 void *ctx, struct list_head *refs)
4210 {
4211 int ret = 0;
4212 struct send_ctx *sctx = ctx;
4213 struct fs_path *p;
4214 u64 gen;
4215
4216 p = fs_path_alloc();
4217 if (!p)
4218 return -ENOMEM;
4219
4220 ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL,
4221 NULL, NULL);
4222 if (ret < 0)
4223 goto out;
4224
4225 ret = get_cur_path(sctx, dir, gen, p);
4226 if (ret < 0)
4227 goto out;
4228 ret = fs_path_add_path(p, name);
4229 if (ret < 0)
4230 goto out;
4231
4232 ret = __record_ref(refs, dir, gen, p);
4233
4234 out:
4235 if (ret)
4236 fs_path_free(p);
4237 return ret;
4238 }
4239
__record_new_ref(int num,u64 dir,int index,struct fs_path * name,void * ctx)4240 static int __record_new_ref(int num, u64 dir, int index,
4241 struct fs_path *name,
4242 void *ctx)
4243 {
4244 struct send_ctx *sctx = ctx;
4245 return record_ref(sctx->send_root, dir, name, ctx, &sctx->new_refs);
4246 }
4247
4248
__record_deleted_ref(int num,u64 dir,int index,struct fs_path * name,void * ctx)4249 static int __record_deleted_ref(int num, u64 dir, int index,
4250 struct fs_path *name,
4251 void *ctx)
4252 {
4253 struct send_ctx *sctx = ctx;
4254 return record_ref(sctx->parent_root, dir, name, ctx,
4255 &sctx->deleted_refs);
4256 }
4257
record_new_ref(struct send_ctx * sctx)4258 static int record_new_ref(struct send_ctx *sctx)
4259 {
4260 int ret;
4261
4262 ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4263 sctx->cmp_key, 0, __record_new_ref, sctx);
4264 if (ret < 0)
4265 goto out;
4266 ret = 0;
4267
4268 out:
4269 return ret;
4270 }
4271
record_deleted_ref(struct send_ctx * sctx)4272 static int record_deleted_ref(struct send_ctx *sctx)
4273 {
4274 int ret;
4275
4276 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4277 sctx->cmp_key, 0, __record_deleted_ref, sctx);
4278 if (ret < 0)
4279 goto out;
4280 ret = 0;
4281
4282 out:
4283 return ret;
4284 }
4285
4286 struct find_ref_ctx {
4287 u64 dir;
4288 u64 dir_gen;
4289 struct btrfs_root *root;
4290 struct fs_path *name;
4291 int found_idx;
4292 };
4293
__find_iref(int num,u64 dir,int index,struct fs_path * name,void * ctx_)4294 static int __find_iref(int num, u64 dir, int index,
4295 struct fs_path *name,
4296 void *ctx_)
4297 {
4298 struct find_ref_ctx *ctx = ctx_;
4299 u64 dir_gen;
4300 int ret;
4301
4302 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
4303 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
4304 /*
4305 * To avoid doing extra lookups we'll only do this if everything
4306 * else matches.
4307 */
4308 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
4309 NULL, NULL, NULL);
4310 if (ret)
4311 return ret;
4312 if (dir_gen != ctx->dir_gen)
4313 return 0;
4314 ctx->found_idx = num;
4315 return 1;
4316 }
4317 return 0;
4318 }
4319
find_iref(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * key,u64 dir,u64 dir_gen,struct fs_path * name)4320 static int find_iref(struct btrfs_root *root,
4321 struct btrfs_path *path,
4322 struct btrfs_key *key,
4323 u64 dir, u64 dir_gen, struct fs_path *name)
4324 {
4325 int ret;
4326 struct find_ref_ctx ctx;
4327
4328 ctx.dir = dir;
4329 ctx.name = name;
4330 ctx.dir_gen = dir_gen;
4331 ctx.found_idx = -1;
4332 ctx.root = root;
4333
4334 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
4335 if (ret < 0)
4336 return ret;
4337
4338 if (ctx.found_idx == -1)
4339 return -ENOENT;
4340
4341 return ctx.found_idx;
4342 }
4343
__record_changed_new_ref(int num,u64 dir,int index,struct fs_path * name,void * ctx)4344 static int __record_changed_new_ref(int num, u64 dir, int index,
4345 struct fs_path *name,
4346 void *ctx)
4347 {
4348 u64 dir_gen;
4349 int ret;
4350 struct send_ctx *sctx = ctx;
4351
4352 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
4353 NULL, NULL, NULL);
4354 if (ret)
4355 return ret;
4356
4357 ret = find_iref(sctx->parent_root, sctx->right_path,
4358 sctx->cmp_key, dir, dir_gen, name);
4359 if (ret == -ENOENT)
4360 ret = __record_new_ref(num, dir, index, name, sctx);
4361 else if (ret > 0)
4362 ret = 0;
4363
4364 return ret;
4365 }
4366
__record_changed_deleted_ref(int num,u64 dir,int index,struct fs_path * name,void * ctx)4367 static int __record_changed_deleted_ref(int num, u64 dir, int index,
4368 struct fs_path *name,
4369 void *ctx)
4370 {
4371 u64 dir_gen;
4372 int ret;
4373 struct send_ctx *sctx = ctx;
4374
4375 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
4376 NULL, NULL, NULL);
4377 if (ret)
4378 return ret;
4379
4380 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
4381 dir, dir_gen, name);
4382 if (ret == -ENOENT)
4383 ret = __record_deleted_ref(num, dir, index, name, sctx);
4384 else if (ret > 0)
4385 ret = 0;
4386
4387 return ret;
4388 }
4389
record_changed_ref(struct send_ctx * sctx)4390 static int record_changed_ref(struct send_ctx *sctx)
4391 {
4392 int ret = 0;
4393
4394 ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
4395 sctx->cmp_key, 0, __record_changed_new_ref, sctx);
4396 if (ret < 0)
4397 goto out;
4398 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
4399 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
4400 if (ret < 0)
4401 goto out;
4402 ret = 0;
4403
4404 out:
4405 return ret;
4406 }
4407
4408 /*
4409 * Record and process all refs at once. Needed when an inode changes the
4410 * generation number, which means that it was deleted and recreated.
4411 */
process_all_refs(struct send_ctx * sctx,enum btrfs_compare_tree_result cmd)4412 static int process_all_refs(struct send_ctx *sctx,
4413 enum btrfs_compare_tree_result cmd)
4414 {
4415 int ret;
4416 struct btrfs_root *root;
4417 struct btrfs_path *path;
4418 struct btrfs_key key;
4419 struct btrfs_key found_key;
4420 struct extent_buffer *eb;
4421 int slot;
4422 iterate_inode_ref_t cb;
4423 int pending_move = 0;
4424
4425 path = alloc_path_for_send();
4426 if (!path)
4427 return -ENOMEM;
4428
4429 if (cmd == BTRFS_COMPARE_TREE_NEW) {
4430 root = sctx->send_root;
4431 cb = __record_new_ref;
4432 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
4433 root = sctx->parent_root;
4434 cb = __record_deleted_ref;
4435 } else {
4436 btrfs_err(sctx->send_root->fs_info,
4437 "Wrong command %d in process_all_refs", cmd);
4438 ret = -EINVAL;
4439 goto out;
4440 }
4441
4442 key.objectid = sctx->cmp_key->objectid;
4443 key.type = BTRFS_INODE_REF_KEY;
4444 key.offset = 0;
4445 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4446 if (ret < 0)
4447 goto out;
4448
4449 while (1) {
4450 eb = path->nodes[0];
4451 slot = path->slots[0];
4452 if (slot >= btrfs_header_nritems(eb)) {
4453 ret = btrfs_next_leaf(root, path);
4454 if (ret < 0)
4455 goto out;
4456 else if (ret > 0)
4457 break;
4458 continue;
4459 }
4460
4461 btrfs_item_key_to_cpu(eb, &found_key, slot);
4462
4463 if (found_key.objectid != key.objectid ||
4464 (found_key.type != BTRFS_INODE_REF_KEY &&
4465 found_key.type != BTRFS_INODE_EXTREF_KEY))
4466 break;
4467
4468 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
4469 if (ret < 0)
4470 goto out;
4471
4472 path->slots[0]++;
4473 }
4474 btrfs_release_path(path);
4475
4476 /*
4477 * We don't actually care about pending_move as we are simply
4478 * re-creating this inode and will be rename'ing it into place once we
4479 * rename the parent directory.
4480 */
4481 ret = process_recorded_refs(sctx, &pending_move);
4482 out:
4483 btrfs_free_path(path);
4484 return ret;
4485 }
4486
send_set_xattr(struct send_ctx * sctx,struct fs_path * path,const char * name,int name_len,const char * data,int data_len)4487 static int send_set_xattr(struct send_ctx *sctx,
4488 struct fs_path *path,
4489 const char *name, int name_len,
4490 const char *data, int data_len)
4491 {
4492 int ret = 0;
4493
4494 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
4495 if (ret < 0)
4496 goto out;
4497
4498 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4499 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4500 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
4501
4502 ret = send_cmd(sctx);
4503
4504 tlv_put_failure:
4505 out:
4506 return ret;
4507 }
4508
send_remove_xattr(struct send_ctx * sctx,struct fs_path * path,const char * name,int name_len)4509 static int send_remove_xattr(struct send_ctx *sctx,
4510 struct fs_path *path,
4511 const char *name, int name_len)
4512 {
4513 int ret = 0;
4514
4515 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
4516 if (ret < 0)
4517 goto out;
4518
4519 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
4520 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
4521
4522 ret = send_cmd(sctx);
4523
4524 tlv_put_failure:
4525 out:
4526 return ret;
4527 }
4528
__process_new_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,u8 type,void * ctx)4529 static int __process_new_xattr(int num, struct btrfs_key *di_key,
4530 const char *name, int name_len,
4531 const char *data, int data_len,
4532 u8 type, void *ctx)
4533 {
4534 int ret;
4535 struct send_ctx *sctx = ctx;
4536 struct fs_path *p;
4537 struct posix_acl_xattr_header dummy_acl;
4538
4539 p = fs_path_alloc();
4540 if (!p)
4541 return -ENOMEM;
4542
4543 /*
4544 * This hack is needed because empty acls are stored as zero byte
4545 * data in xattrs. Problem with that is, that receiving these zero byte
4546 * acls will fail later. To fix this, we send a dummy acl list that
4547 * only contains the version number and no entries.
4548 */
4549 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
4550 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
4551 if (data_len == 0) {
4552 dummy_acl.a_version =
4553 cpu_to_le32(POSIX_ACL_XATTR_VERSION);
4554 data = (char *)&dummy_acl;
4555 data_len = sizeof(dummy_acl);
4556 }
4557 }
4558
4559 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4560 if (ret < 0)
4561 goto out;
4562
4563 ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
4564
4565 out:
4566 fs_path_free(p);
4567 return ret;
4568 }
4569
__process_deleted_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,u8 type,void * ctx)4570 static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
4571 const char *name, int name_len,
4572 const char *data, int data_len,
4573 u8 type, void *ctx)
4574 {
4575 int ret;
4576 struct send_ctx *sctx = ctx;
4577 struct fs_path *p;
4578
4579 p = fs_path_alloc();
4580 if (!p)
4581 return -ENOMEM;
4582
4583 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4584 if (ret < 0)
4585 goto out;
4586
4587 ret = send_remove_xattr(sctx, p, name, name_len);
4588
4589 out:
4590 fs_path_free(p);
4591 return ret;
4592 }
4593
process_new_xattr(struct send_ctx * sctx)4594 static int process_new_xattr(struct send_ctx *sctx)
4595 {
4596 int ret = 0;
4597
4598 ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4599 __process_new_xattr, sctx);
4600
4601 return ret;
4602 }
4603
process_deleted_xattr(struct send_ctx * sctx)4604 static int process_deleted_xattr(struct send_ctx *sctx)
4605 {
4606 return iterate_dir_item(sctx->parent_root, sctx->right_path,
4607 __process_deleted_xattr, sctx);
4608 }
4609
4610 struct find_xattr_ctx {
4611 const char *name;
4612 int name_len;
4613 int found_idx;
4614 char *found_data;
4615 int found_data_len;
4616 };
4617
__find_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,u8 type,void * vctx)4618 static int __find_xattr(int num, struct btrfs_key *di_key,
4619 const char *name, int name_len,
4620 const char *data, int data_len,
4621 u8 type, void *vctx)
4622 {
4623 struct find_xattr_ctx *ctx = vctx;
4624
4625 if (name_len == ctx->name_len &&
4626 strncmp(name, ctx->name, name_len) == 0) {
4627 ctx->found_idx = num;
4628 ctx->found_data_len = data_len;
4629 ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
4630 if (!ctx->found_data)
4631 return -ENOMEM;
4632 return 1;
4633 }
4634 return 0;
4635 }
4636
find_xattr(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * key,const char * name,int name_len,char ** data,int * data_len)4637 static int find_xattr(struct btrfs_root *root,
4638 struct btrfs_path *path,
4639 struct btrfs_key *key,
4640 const char *name, int name_len,
4641 char **data, int *data_len)
4642 {
4643 int ret;
4644 struct find_xattr_ctx ctx;
4645
4646 ctx.name = name;
4647 ctx.name_len = name_len;
4648 ctx.found_idx = -1;
4649 ctx.found_data = NULL;
4650 ctx.found_data_len = 0;
4651
4652 ret = iterate_dir_item(root, path, __find_xattr, &ctx);
4653 if (ret < 0)
4654 return ret;
4655
4656 if (ctx.found_idx == -1)
4657 return -ENOENT;
4658 if (data) {
4659 *data = ctx.found_data;
4660 *data_len = ctx.found_data_len;
4661 } else {
4662 kfree(ctx.found_data);
4663 }
4664 return ctx.found_idx;
4665 }
4666
4667
__process_changed_new_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,u8 type,void * ctx)4668 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
4669 const char *name, int name_len,
4670 const char *data, int data_len,
4671 u8 type, void *ctx)
4672 {
4673 int ret;
4674 struct send_ctx *sctx = ctx;
4675 char *found_data = NULL;
4676 int found_data_len = 0;
4677
4678 ret = find_xattr(sctx->parent_root, sctx->right_path,
4679 sctx->cmp_key, name, name_len, &found_data,
4680 &found_data_len);
4681 if (ret == -ENOENT) {
4682 ret = __process_new_xattr(num, di_key, name, name_len, data,
4683 data_len, type, ctx);
4684 } else if (ret >= 0) {
4685 if (data_len != found_data_len ||
4686 memcmp(data, found_data, data_len)) {
4687 ret = __process_new_xattr(num, di_key, name, name_len,
4688 data, data_len, type, ctx);
4689 } else {
4690 ret = 0;
4691 }
4692 }
4693
4694 kfree(found_data);
4695 return ret;
4696 }
4697
__process_changed_deleted_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,u8 type,void * ctx)4698 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
4699 const char *name, int name_len,
4700 const char *data, int data_len,
4701 u8 type, void *ctx)
4702 {
4703 int ret;
4704 struct send_ctx *sctx = ctx;
4705
4706 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
4707 name, name_len, NULL, NULL);
4708 if (ret == -ENOENT)
4709 ret = __process_deleted_xattr(num, di_key, name, name_len, data,
4710 data_len, type, ctx);
4711 else if (ret >= 0)
4712 ret = 0;
4713
4714 return ret;
4715 }
4716
process_changed_xattr(struct send_ctx * sctx)4717 static int process_changed_xattr(struct send_ctx *sctx)
4718 {
4719 int ret = 0;
4720
4721 ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4722 __process_changed_new_xattr, sctx);
4723 if (ret < 0)
4724 goto out;
4725 ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
4726 __process_changed_deleted_xattr, sctx);
4727
4728 out:
4729 return ret;
4730 }
4731
process_all_new_xattrs(struct send_ctx * sctx)4732 static int process_all_new_xattrs(struct send_ctx *sctx)
4733 {
4734 int ret;
4735 struct btrfs_root *root;
4736 struct btrfs_path *path;
4737 struct btrfs_key key;
4738 struct btrfs_key found_key;
4739 struct extent_buffer *eb;
4740 int slot;
4741
4742 path = alloc_path_for_send();
4743 if (!path)
4744 return -ENOMEM;
4745
4746 root = sctx->send_root;
4747
4748 key.objectid = sctx->cmp_key->objectid;
4749 key.type = BTRFS_XATTR_ITEM_KEY;
4750 key.offset = 0;
4751 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4752 if (ret < 0)
4753 goto out;
4754
4755 while (1) {
4756 eb = path->nodes[0];
4757 slot = path->slots[0];
4758 if (slot >= btrfs_header_nritems(eb)) {
4759 ret = btrfs_next_leaf(root, path);
4760 if (ret < 0) {
4761 goto out;
4762 } else if (ret > 0) {
4763 ret = 0;
4764 break;
4765 }
4766 continue;
4767 }
4768
4769 btrfs_item_key_to_cpu(eb, &found_key, slot);
4770 if (found_key.objectid != key.objectid ||
4771 found_key.type != key.type) {
4772 ret = 0;
4773 goto out;
4774 }
4775
4776 ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
4777 if (ret < 0)
4778 goto out;
4779
4780 path->slots[0]++;
4781 }
4782
4783 out:
4784 btrfs_free_path(path);
4785 return ret;
4786 }
4787
fill_read_buf(struct send_ctx * sctx,u64 offset,u32 len)4788 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
4789 {
4790 struct btrfs_root *root = sctx->send_root;
4791 struct btrfs_fs_info *fs_info = root->fs_info;
4792 struct inode *inode;
4793 struct page *page;
4794 char *addr;
4795 struct btrfs_key key;
4796 pgoff_t index = offset >> PAGE_SHIFT;
4797 pgoff_t last_index;
4798 unsigned pg_offset = offset_in_page(offset);
4799 ssize_t ret = 0;
4800
4801 key.objectid = sctx->cur_ino;
4802 key.type = BTRFS_INODE_ITEM_KEY;
4803 key.offset = 0;
4804
4805 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
4806 if (IS_ERR(inode))
4807 return PTR_ERR(inode);
4808
4809 if (offset + len > i_size_read(inode)) {
4810 if (offset > i_size_read(inode))
4811 len = 0;
4812 else
4813 len = offset - i_size_read(inode);
4814 }
4815 if (len == 0)
4816 goto out;
4817
4818 last_index = (offset + len - 1) >> PAGE_SHIFT;
4819
4820 /* initial readahead */
4821 memset(&sctx->ra, 0, sizeof(struct file_ra_state));
4822 file_ra_state_init(&sctx->ra, inode->i_mapping);
4823
4824 while (index <= last_index) {
4825 unsigned cur_len = min_t(unsigned, len,
4826 PAGE_SIZE - pg_offset);
4827
4828 page = find_lock_page(inode->i_mapping, index);
4829 if (!page) {
4830 page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
4831 NULL, index, last_index + 1 - index);
4832
4833 page = find_or_create_page(inode->i_mapping, index,
4834 GFP_KERNEL);
4835 if (!page) {
4836 ret = -ENOMEM;
4837 break;
4838 }
4839 }
4840
4841 if (PageReadahead(page)) {
4842 page_cache_async_readahead(inode->i_mapping, &sctx->ra,
4843 NULL, page, index, last_index + 1 - index);
4844 }
4845
4846 if (!PageUptodate(page)) {
4847 btrfs_readpage(NULL, page);
4848 lock_page(page);
4849 if (!PageUptodate(page)) {
4850 unlock_page(page);
4851 put_page(page);
4852 ret = -EIO;
4853 break;
4854 }
4855 }
4856
4857 addr = kmap(page);
4858 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
4859 kunmap(page);
4860 unlock_page(page);
4861 put_page(page);
4862 index++;
4863 pg_offset = 0;
4864 len -= cur_len;
4865 ret += cur_len;
4866 }
4867 out:
4868 iput(inode);
4869 return ret;
4870 }
4871
4872 /*
4873 * Read some bytes from the current inode/file and send a write command to
4874 * user space.
4875 */
send_write(struct send_ctx * sctx,u64 offset,u32 len)4876 static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
4877 {
4878 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
4879 int ret = 0;
4880 struct fs_path *p;
4881 ssize_t num_read = 0;
4882
4883 p = fs_path_alloc();
4884 if (!p)
4885 return -ENOMEM;
4886
4887 btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
4888
4889 num_read = fill_read_buf(sctx, offset, len);
4890 if (num_read <= 0) {
4891 if (num_read < 0)
4892 ret = num_read;
4893 goto out;
4894 }
4895
4896 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4897 if (ret < 0)
4898 goto out;
4899
4900 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4901 if (ret < 0)
4902 goto out;
4903
4904 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4905 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4906 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
4907
4908 ret = send_cmd(sctx);
4909
4910 tlv_put_failure:
4911 out:
4912 fs_path_free(p);
4913 if (ret < 0)
4914 return ret;
4915 return num_read;
4916 }
4917
4918 /*
4919 * Send a clone command to user space.
4920 */
send_clone(struct send_ctx * sctx,u64 offset,u32 len,struct clone_root * clone_root)4921 static int send_clone(struct send_ctx *sctx,
4922 u64 offset, u32 len,
4923 struct clone_root *clone_root)
4924 {
4925 int ret = 0;
4926 struct fs_path *p;
4927 u64 gen;
4928
4929 btrfs_debug(sctx->send_root->fs_info,
4930 "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
4931 offset, len, clone_root->root->root_key.objectid,
4932 clone_root->ino, clone_root->offset);
4933
4934 p = fs_path_alloc();
4935 if (!p)
4936 return -ENOMEM;
4937
4938 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
4939 if (ret < 0)
4940 goto out;
4941
4942 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4943 if (ret < 0)
4944 goto out;
4945
4946 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4947 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
4948 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4949
4950 if (clone_root->root == sctx->send_root) {
4951 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
4952 &gen, NULL, NULL, NULL, NULL);
4953 if (ret < 0)
4954 goto out;
4955 ret = get_cur_path(sctx, clone_root->ino, gen, p);
4956 } else {
4957 ret = get_inode_path(clone_root->root, clone_root->ino, p);
4958 }
4959 if (ret < 0)
4960 goto out;
4961
4962 /*
4963 * If the parent we're using has a received_uuid set then use that as
4964 * our clone source as that is what we will look for when doing a
4965 * receive.
4966 *
4967 * This covers the case that we create a snapshot off of a received
4968 * subvolume and then use that as the parent and try to receive on a
4969 * different host.
4970 */
4971 if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
4972 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4973 clone_root->root->root_item.received_uuid);
4974 else
4975 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
4976 clone_root->root->root_item.uuid);
4977 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
4978 le64_to_cpu(clone_root->root->root_item.ctransid));
4979 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
4980 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
4981 clone_root->offset);
4982
4983 ret = send_cmd(sctx);
4984
4985 tlv_put_failure:
4986 out:
4987 fs_path_free(p);
4988 return ret;
4989 }
4990
4991 /*
4992 * Send an update extent command to user space.
4993 */
send_update_extent(struct send_ctx * sctx,u64 offset,u32 len)4994 static int send_update_extent(struct send_ctx *sctx,
4995 u64 offset, u32 len)
4996 {
4997 int ret = 0;
4998 struct fs_path *p;
4999
5000 p = fs_path_alloc();
5001 if (!p)
5002 return -ENOMEM;
5003
5004 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
5005 if (ret < 0)
5006 goto out;
5007
5008 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
5009 if (ret < 0)
5010 goto out;
5011
5012 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
5013 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
5014 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
5015
5016 ret = send_cmd(sctx);
5017
5018 tlv_put_failure:
5019 out:
5020 fs_path_free(p);
5021 return ret;
5022 }
5023
send_hole(struct send_ctx * sctx,u64 end)5024 static int send_hole(struct send_ctx *sctx, u64 end)
5025 {
5026 struct fs_path *p = NULL;
5027 u64 offset = sctx->cur_inode_last_extent;
5028 u64 len;
5029 int ret = 0;
5030
5031 /*
5032 * A hole that starts at EOF or beyond it. Since we do not yet support
5033 * fallocate (for extent preallocation and hole punching), sending a
5034 * write of zeroes starting at EOF or beyond would later require issuing
5035 * a truncate operation which would undo the write and achieve nothing.
5036 */
5037 if (offset >= sctx->cur_inode_size)
5038 return 0;
5039
5040 /*
5041 * Don't go beyond the inode's i_size due to prealloc extents that start
5042 * after the i_size.
5043 */
5044 end = min_t(u64, end, sctx->cur_inode_size);
5045
5046 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5047 return send_update_extent(sctx, offset, end - offset);
5048
5049 p = fs_path_alloc();
5050 if (!p)
5051 return -ENOMEM;
5052 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
5053 if (ret < 0)
5054 goto tlv_put_failure;
5055 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
5056 while (offset < end) {
5057 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
5058
5059 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
5060 if (ret < 0)
5061 break;
5062 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
5063 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
5064 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
5065 ret = send_cmd(sctx);
5066 if (ret < 0)
5067 break;
5068 offset += len;
5069 }
5070 sctx->cur_inode_next_write_offset = offset;
5071 tlv_put_failure:
5072 fs_path_free(p);
5073 return ret;
5074 }
5075
send_extent_data(struct send_ctx * sctx,const u64 offset,const u64 len)5076 static int send_extent_data(struct send_ctx *sctx,
5077 const u64 offset,
5078 const u64 len)
5079 {
5080 u64 sent = 0;
5081
5082 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5083 return send_update_extent(sctx, offset, len);
5084
5085 while (sent < len) {
5086 u64 size = len - sent;
5087 int ret;
5088
5089 if (size > BTRFS_SEND_READ_SIZE)
5090 size = BTRFS_SEND_READ_SIZE;
5091 ret = send_write(sctx, offset + sent, size);
5092 if (ret < 0)
5093 return ret;
5094 if (!ret)
5095 break;
5096 sent += ret;
5097 }
5098 return 0;
5099 }
5100
clone_range(struct send_ctx * sctx,struct clone_root * clone_root,const u64 disk_byte,u64 data_offset,u64 offset,u64 len)5101 static int clone_range(struct send_ctx *sctx,
5102 struct clone_root *clone_root,
5103 const u64 disk_byte,
5104 u64 data_offset,
5105 u64 offset,
5106 u64 len)
5107 {
5108 struct btrfs_path *path;
5109 struct btrfs_key key;
5110 int ret;
5111 u64 clone_src_i_size = 0;
5112
5113 /*
5114 * Prevent cloning from a zero offset with a length matching the sector
5115 * size because in some scenarios this will make the receiver fail.
5116 *
5117 * For example, if in the source filesystem the extent at offset 0
5118 * has a length of sectorsize and it was written using direct IO, then
5119 * it can never be an inline extent (even if compression is enabled).
5120 * Then this extent can be cloned in the original filesystem to a non
5121 * zero file offset, but it may not be possible to clone in the
5122 * destination filesystem because it can be inlined due to compression
5123 * on the destination filesystem (as the receiver's write operations are
5124 * always done using buffered IO). The same happens when the original
5125 * filesystem does not have compression enabled but the destination
5126 * filesystem has.
5127 */
5128 if (clone_root->offset == 0 &&
5129 len == sctx->send_root->fs_info->sectorsize)
5130 return send_extent_data(sctx, offset, len);
5131
5132 path = alloc_path_for_send();
5133 if (!path)
5134 return -ENOMEM;
5135
5136 /*
5137 * There are inodes that have extents that lie behind its i_size. Don't
5138 * accept clones from these extents.
5139 */
5140 ret = __get_inode_info(clone_root->root, path, clone_root->ino,
5141 &clone_src_i_size, NULL, NULL, NULL, NULL, NULL);
5142 btrfs_release_path(path);
5143 if (ret < 0)
5144 goto out;
5145
5146 /*
5147 * We can't send a clone operation for the entire range if we find
5148 * extent items in the respective range in the source file that
5149 * refer to different extents or if we find holes.
5150 * So check for that and do a mix of clone and regular write/copy
5151 * operations if needed.
5152 *
5153 * Example:
5154 *
5155 * mkfs.btrfs -f /dev/sda
5156 * mount /dev/sda /mnt
5157 * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
5158 * cp --reflink=always /mnt/foo /mnt/bar
5159 * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
5160 * btrfs subvolume snapshot -r /mnt /mnt/snap
5161 *
5162 * If when we send the snapshot and we are processing file bar (which
5163 * has a higher inode number than foo) we blindly send a clone operation
5164 * for the [0, 100K[ range from foo to bar, the receiver ends up getting
5165 * a file bar that matches the content of file foo - iow, doesn't match
5166 * the content from bar in the original filesystem.
5167 */
5168 key.objectid = clone_root->ino;
5169 key.type = BTRFS_EXTENT_DATA_KEY;
5170 key.offset = clone_root->offset;
5171 ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
5172 if (ret < 0)
5173 goto out;
5174 if (ret > 0 && path->slots[0] > 0) {
5175 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
5176 if (key.objectid == clone_root->ino &&
5177 key.type == BTRFS_EXTENT_DATA_KEY)
5178 path->slots[0]--;
5179 }
5180
5181 while (true) {
5182 struct extent_buffer *leaf = path->nodes[0];
5183 int slot = path->slots[0];
5184 struct btrfs_file_extent_item *ei;
5185 u8 type;
5186 u64 ext_len;
5187 u64 clone_len;
5188 u64 clone_data_offset;
5189
5190 if (slot >= btrfs_header_nritems(leaf)) {
5191 ret = btrfs_next_leaf(clone_root->root, path);
5192 if (ret < 0)
5193 goto out;
5194 else if (ret > 0)
5195 break;
5196 continue;
5197 }
5198
5199 btrfs_item_key_to_cpu(leaf, &key, slot);
5200
5201 /*
5202 * We might have an implicit trailing hole (NO_HOLES feature
5203 * enabled). We deal with it after leaving this loop.
5204 */
5205 if (key.objectid != clone_root->ino ||
5206 key.type != BTRFS_EXTENT_DATA_KEY)
5207 break;
5208
5209 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5210 type = btrfs_file_extent_type(leaf, ei);
5211 if (type == BTRFS_FILE_EXTENT_INLINE) {
5212 ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
5213 ext_len = PAGE_ALIGN(ext_len);
5214 } else {
5215 ext_len = btrfs_file_extent_num_bytes(leaf, ei);
5216 }
5217
5218 if (key.offset + ext_len <= clone_root->offset)
5219 goto next;
5220
5221 if (key.offset > clone_root->offset) {
5222 /* Implicit hole, NO_HOLES feature enabled. */
5223 u64 hole_len = key.offset - clone_root->offset;
5224
5225 if (hole_len > len)
5226 hole_len = len;
5227 ret = send_extent_data(sctx, offset, hole_len);
5228 if (ret < 0)
5229 goto out;
5230
5231 len -= hole_len;
5232 if (len == 0)
5233 break;
5234 offset += hole_len;
5235 clone_root->offset += hole_len;
5236 data_offset += hole_len;
5237 }
5238
5239 if (key.offset >= clone_root->offset + len)
5240 break;
5241
5242 if (key.offset >= clone_src_i_size)
5243 break;
5244
5245 if (key.offset + ext_len > clone_src_i_size)
5246 ext_len = clone_src_i_size - key.offset;
5247
5248 clone_data_offset = btrfs_file_extent_offset(leaf, ei);
5249 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
5250 clone_root->offset = key.offset;
5251 if (clone_data_offset < data_offset &&
5252 clone_data_offset + ext_len > data_offset) {
5253 u64 extent_offset;
5254
5255 extent_offset = data_offset - clone_data_offset;
5256 ext_len -= extent_offset;
5257 clone_data_offset += extent_offset;
5258 clone_root->offset += extent_offset;
5259 }
5260 }
5261
5262 clone_len = min_t(u64, ext_len, len);
5263
5264 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
5265 clone_data_offset == data_offset) {
5266 const u64 src_end = clone_root->offset + clone_len;
5267 const u64 sectorsize = SZ_64K;
5268
5269 /*
5270 * We can't clone the last block, when its size is not
5271 * sector size aligned, into the middle of a file. If we
5272 * do so, the receiver will get a failure (-EINVAL) when
5273 * trying to clone or will silently corrupt the data in
5274 * the destination file if it's on a kernel without the
5275 * fix introduced by commit ac765f83f1397646
5276 * ("Btrfs: fix data corruption due to cloning of eof
5277 * block).
5278 *
5279 * So issue a clone of the aligned down range plus a
5280 * regular write for the eof block, if we hit that case.
5281 *
5282 * Also, we use the maximum possible sector size, 64K,
5283 * because we don't know what's the sector size of the
5284 * filesystem that receives the stream, so we have to
5285 * assume the largest possible sector size.
5286 */
5287 if (src_end == clone_src_i_size &&
5288 !IS_ALIGNED(src_end, sectorsize) &&
5289 offset + clone_len < sctx->cur_inode_size) {
5290 u64 slen;
5291
5292 slen = ALIGN_DOWN(src_end - clone_root->offset,
5293 sectorsize);
5294 if (slen > 0) {
5295 ret = send_clone(sctx, offset, slen,
5296 clone_root);
5297 if (ret < 0)
5298 goto out;
5299 }
5300 ret = send_extent_data(sctx, offset + slen,
5301 clone_len - slen);
5302 } else {
5303 ret = send_clone(sctx, offset, clone_len,
5304 clone_root);
5305 }
5306 } else {
5307 ret = send_extent_data(sctx, offset, clone_len);
5308 }
5309
5310 if (ret < 0)
5311 goto out;
5312
5313 len -= clone_len;
5314 if (len == 0)
5315 break;
5316 offset += clone_len;
5317 clone_root->offset += clone_len;
5318 data_offset += clone_len;
5319 next:
5320 path->slots[0]++;
5321 }
5322
5323 if (len > 0)
5324 ret = send_extent_data(sctx, offset, len);
5325 else
5326 ret = 0;
5327 out:
5328 btrfs_free_path(path);
5329 return ret;
5330 }
5331
send_write_or_clone(struct send_ctx * sctx,struct btrfs_path * path,struct btrfs_key * key,struct clone_root * clone_root)5332 static int send_write_or_clone(struct send_ctx *sctx,
5333 struct btrfs_path *path,
5334 struct btrfs_key *key,
5335 struct clone_root *clone_root)
5336 {
5337 int ret = 0;
5338 struct btrfs_file_extent_item *ei;
5339 u64 offset = key->offset;
5340 u64 len;
5341 u8 type;
5342 u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
5343
5344 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5345 struct btrfs_file_extent_item);
5346 type = btrfs_file_extent_type(path->nodes[0], ei);
5347 if (type == BTRFS_FILE_EXTENT_INLINE) {
5348 len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
5349 /*
5350 * it is possible the inline item won't cover the whole page,
5351 * but there may be items after this page. Make
5352 * sure to send the whole thing
5353 */
5354 len = PAGE_ALIGN(len);
5355 } else {
5356 len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
5357 }
5358
5359 if (offset >= sctx->cur_inode_size) {
5360 ret = 0;
5361 goto out;
5362 }
5363 if (offset + len > sctx->cur_inode_size)
5364 len = sctx->cur_inode_size - offset;
5365 if (len == 0) {
5366 ret = 0;
5367 goto out;
5368 }
5369
5370 if (clone_root && IS_ALIGNED(offset + len, bs)) {
5371 u64 disk_byte;
5372 u64 data_offset;
5373
5374 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
5375 data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
5376 ret = clone_range(sctx, clone_root, disk_byte, data_offset,
5377 offset, len);
5378 } else {
5379 ret = send_extent_data(sctx, offset, len);
5380 }
5381 sctx->cur_inode_next_write_offset = offset + len;
5382 out:
5383 return ret;
5384 }
5385
is_extent_unchanged(struct send_ctx * sctx,struct btrfs_path * left_path,struct btrfs_key * ekey)5386 static int is_extent_unchanged(struct send_ctx *sctx,
5387 struct btrfs_path *left_path,
5388 struct btrfs_key *ekey)
5389 {
5390 int ret = 0;
5391 struct btrfs_key key;
5392 struct btrfs_path *path = NULL;
5393 struct extent_buffer *eb;
5394 int slot;
5395 struct btrfs_key found_key;
5396 struct btrfs_file_extent_item *ei;
5397 u64 left_disknr;
5398 u64 right_disknr;
5399 u64 left_offset;
5400 u64 right_offset;
5401 u64 left_offset_fixed;
5402 u64 left_len;
5403 u64 right_len;
5404 u64 left_gen;
5405 u64 right_gen;
5406 u8 left_type;
5407 u8 right_type;
5408
5409 path = alloc_path_for_send();
5410 if (!path)
5411 return -ENOMEM;
5412
5413 eb = left_path->nodes[0];
5414 slot = left_path->slots[0];
5415 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5416 left_type = btrfs_file_extent_type(eb, ei);
5417
5418 if (left_type != BTRFS_FILE_EXTENT_REG) {
5419 ret = 0;
5420 goto out;
5421 }
5422 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5423 left_len = btrfs_file_extent_num_bytes(eb, ei);
5424 left_offset = btrfs_file_extent_offset(eb, ei);
5425 left_gen = btrfs_file_extent_generation(eb, ei);
5426
5427 /*
5428 * Following comments will refer to these graphics. L is the left
5429 * extents which we are checking at the moment. 1-8 are the right
5430 * extents that we iterate.
5431 *
5432 * |-----L-----|
5433 * |-1-|-2a-|-3-|-4-|-5-|-6-|
5434 *
5435 * |-----L-----|
5436 * |--1--|-2b-|...(same as above)
5437 *
5438 * Alternative situation. Happens on files where extents got split.
5439 * |-----L-----|
5440 * |-----------7-----------|-6-|
5441 *
5442 * Alternative situation. Happens on files which got larger.
5443 * |-----L-----|
5444 * |-8-|
5445 * Nothing follows after 8.
5446 */
5447
5448 key.objectid = ekey->objectid;
5449 key.type = BTRFS_EXTENT_DATA_KEY;
5450 key.offset = ekey->offset;
5451 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
5452 if (ret < 0)
5453 goto out;
5454 if (ret) {
5455 ret = 0;
5456 goto out;
5457 }
5458
5459 /*
5460 * Handle special case where the right side has no extents at all.
5461 */
5462 eb = path->nodes[0];
5463 slot = path->slots[0];
5464 btrfs_item_key_to_cpu(eb, &found_key, slot);
5465 if (found_key.objectid != key.objectid ||
5466 found_key.type != key.type) {
5467 /* If we're a hole then just pretend nothing changed */
5468 ret = (left_disknr) ? 0 : 1;
5469 goto out;
5470 }
5471
5472 /*
5473 * We're now on 2a, 2b or 7.
5474 */
5475 key = found_key;
5476 while (key.offset < ekey->offset + left_len) {
5477 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
5478 right_type = btrfs_file_extent_type(eb, ei);
5479 if (right_type != BTRFS_FILE_EXTENT_REG &&
5480 right_type != BTRFS_FILE_EXTENT_INLINE) {
5481 ret = 0;
5482 goto out;
5483 }
5484
5485 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5486 right_len = btrfs_file_extent_ram_bytes(eb, ei);
5487 right_len = PAGE_ALIGN(right_len);
5488 } else {
5489 right_len = btrfs_file_extent_num_bytes(eb, ei);
5490 }
5491
5492 /*
5493 * Are we at extent 8? If yes, we know the extent is changed.
5494 * This may only happen on the first iteration.
5495 */
5496 if (found_key.offset + right_len <= ekey->offset) {
5497 /* If we're a hole just pretend nothing changed */
5498 ret = (left_disknr) ? 0 : 1;
5499 goto out;
5500 }
5501
5502 /*
5503 * We just wanted to see if when we have an inline extent, what
5504 * follows it is a regular extent (wanted to check the above
5505 * condition for inline extents too). This should normally not
5506 * happen but it's possible for example when we have an inline
5507 * compressed extent representing data with a size matching
5508 * the page size (currently the same as sector size).
5509 */
5510 if (right_type == BTRFS_FILE_EXTENT_INLINE) {
5511 ret = 0;
5512 goto out;
5513 }
5514
5515 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
5516 right_offset = btrfs_file_extent_offset(eb, ei);
5517 right_gen = btrfs_file_extent_generation(eb, ei);
5518
5519 left_offset_fixed = left_offset;
5520 if (key.offset < ekey->offset) {
5521 /* Fix the right offset for 2a and 7. */
5522 right_offset += ekey->offset - key.offset;
5523 } else {
5524 /* Fix the left offset for all behind 2a and 2b */
5525 left_offset_fixed += key.offset - ekey->offset;
5526 }
5527
5528 /*
5529 * Check if we have the same extent.
5530 */
5531 if (left_disknr != right_disknr ||
5532 left_offset_fixed != right_offset ||
5533 left_gen != right_gen) {
5534 ret = 0;
5535 goto out;
5536 }
5537
5538 /*
5539 * Go to the next extent.
5540 */
5541 ret = btrfs_next_item(sctx->parent_root, path);
5542 if (ret < 0)
5543 goto out;
5544 if (!ret) {
5545 eb = path->nodes[0];
5546 slot = path->slots[0];
5547 btrfs_item_key_to_cpu(eb, &found_key, slot);
5548 }
5549 if (ret || found_key.objectid != key.objectid ||
5550 found_key.type != key.type) {
5551 key.offset += right_len;
5552 break;
5553 }
5554 if (found_key.offset != key.offset + right_len) {
5555 ret = 0;
5556 goto out;
5557 }
5558 key = found_key;
5559 }
5560
5561 /*
5562 * We're now behind the left extent (treat as unchanged) or at the end
5563 * of the right side (treat as changed).
5564 */
5565 if (key.offset >= ekey->offset + left_len)
5566 ret = 1;
5567 else
5568 ret = 0;
5569
5570
5571 out:
5572 btrfs_free_path(path);
5573 return ret;
5574 }
5575
get_last_extent(struct send_ctx * sctx,u64 offset)5576 static int get_last_extent(struct send_ctx *sctx, u64 offset)
5577 {
5578 struct btrfs_path *path;
5579 struct btrfs_root *root = sctx->send_root;
5580 struct btrfs_file_extent_item *fi;
5581 struct btrfs_key key;
5582 u64 extent_end;
5583 u8 type;
5584 int ret;
5585
5586 path = alloc_path_for_send();
5587 if (!path)
5588 return -ENOMEM;
5589
5590 sctx->cur_inode_last_extent = 0;
5591
5592 key.objectid = sctx->cur_ino;
5593 key.type = BTRFS_EXTENT_DATA_KEY;
5594 key.offset = offset;
5595 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
5596 if (ret < 0)
5597 goto out;
5598 ret = 0;
5599 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
5600 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
5601 goto out;
5602
5603 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5604 struct btrfs_file_extent_item);
5605 type = btrfs_file_extent_type(path->nodes[0], fi);
5606 if (type == BTRFS_FILE_EXTENT_INLINE) {
5607 u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
5608 extent_end = ALIGN(key.offset + size,
5609 sctx->send_root->fs_info->sectorsize);
5610 } else {
5611 extent_end = key.offset +
5612 btrfs_file_extent_num_bytes(path->nodes[0], fi);
5613 }
5614 sctx->cur_inode_last_extent = extent_end;
5615 out:
5616 btrfs_free_path(path);
5617 return ret;
5618 }
5619
range_is_hole_in_parent(struct send_ctx * sctx,const u64 start,const u64 end)5620 static int range_is_hole_in_parent(struct send_ctx *sctx,
5621 const u64 start,
5622 const u64 end)
5623 {
5624 struct btrfs_path *path;
5625 struct btrfs_key key;
5626 struct btrfs_root *root = sctx->parent_root;
5627 u64 search_start = start;
5628 int ret;
5629
5630 path = alloc_path_for_send();
5631 if (!path)
5632 return -ENOMEM;
5633
5634 key.objectid = sctx->cur_ino;
5635 key.type = BTRFS_EXTENT_DATA_KEY;
5636 key.offset = search_start;
5637 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5638 if (ret < 0)
5639 goto out;
5640 if (ret > 0 && path->slots[0] > 0)
5641 path->slots[0]--;
5642
5643 while (search_start < end) {
5644 struct extent_buffer *leaf = path->nodes[0];
5645 int slot = path->slots[0];
5646 struct btrfs_file_extent_item *fi;
5647 u64 extent_end;
5648
5649 if (slot >= btrfs_header_nritems(leaf)) {
5650 ret = btrfs_next_leaf(root, path);
5651 if (ret < 0)
5652 goto out;
5653 else if (ret > 0)
5654 break;
5655 continue;
5656 }
5657
5658 btrfs_item_key_to_cpu(leaf, &key, slot);
5659 if (key.objectid < sctx->cur_ino ||
5660 key.type < BTRFS_EXTENT_DATA_KEY)
5661 goto next;
5662 if (key.objectid > sctx->cur_ino ||
5663 key.type > BTRFS_EXTENT_DATA_KEY ||
5664 key.offset >= end)
5665 break;
5666
5667 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5668 if (btrfs_file_extent_type(leaf, fi) ==
5669 BTRFS_FILE_EXTENT_INLINE) {
5670 u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
5671
5672 extent_end = ALIGN(key.offset + size,
5673 root->fs_info->sectorsize);
5674 } else {
5675 extent_end = key.offset +
5676 btrfs_file_extent_num_bytes(leaf, fi);
5677 }
5678 if (extent_end <= start)
5679 goto next;
5680 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
5681 search_start = extent_end;
5682 goto next;
5683 }
5684 ret = 0;
5685 goto out;
5686 next:
5687 path->slots[0]++;
5688 }
5689 ret = 1;
5690 out:
5691 btrfs_free_path(path);
5692 return ret;
5693 }
5694
maybe_send_hole(struct send_ctx * sctx,struct btrfs_path * path,struct btrfs_key * key)5695 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
5696 struct btrfs_key *key)
5697 {
5698 struct btrfs_file_extent_item *fi;
5699 u64 extent_end;
5700 u8 type;
5701 int ret = 0;
5702
5703 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
5704 return 0;
5705
5706 if (sctx->cur_inode_last_extent == (u64)-1) {
5707 ret = get_last_extent(sctx, key->offset - 1);
5708 if (ret)
5709 return ret;
5710 }
5711
5712 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
5713 struct btrfs_file_extent_item);
5714 type = btrfs_file_extent_type(path->nodes[0], fi);
5715 if (type == BTRFS_FILE_EXTENT_INLINE) {
5716 u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
5717 extent_end = ALIGN(key->offset + size,
5718 sctx->send_root->fs_info->sectorsize);
5719 } else {
5720 extent_end = key->offset +
5721 btrfs_file_extent_num_bytes(path->nodes[0], fi);
5722 }
5723
5724 if (path->slots[0] == 0 &&
5725 sctx->cur_inode_last_extent < key->offset) {
5726 /*
5727 * We might have skipped entire leafs that contained only
5728 * file extent items for our current inode. These leafs have
5729 * a generation number smaller (older) than the one in the
5730 * current leaf and the leaf our last extent came from, and
5731 * are located between these 2 leafs.
5732 */
5733 ret = get_last_extent(sctx, key->offset - 1);
5734 if (ret)
5735 return ret;
5736 }
5737
5738 if (sctx->cur_inode_last_extent < key->offset) {
5739 ret = range_is_hole_in_parent(sctx,
5740 sctx->cur_inode_last_extent,
5741 key->offset);
5742 if (ret < 0)
5743 return ret;
5744 else if (ret == 0)
5745 ret = send_hole(sctx, key->offset);
5746 else
5747 ret = 0;
5748 }
5749 sctx->cur_inode_last_extent = extent_end;
5750 return ret;
5751 }
5752
process_extent(struct send_ctx * sctx,struct btrfs_path * path,struct btrfs_key * key)5753 static int process_extent(struct send_ctx *sctx,
5754 struct btrfs_path *path,
5755 struct btrfs_key *key)
5756 {
5757 struct clone_root *found_clone = NULL;
5758 int ret = 0;
5759
5760 if (S_ISLNK(sctx->cur_inode_mode))
5761 return 0;
5762
5763 if (sctx->parent_root && !sctx->cur_inode_new) {
5764 ret = is_extent_unchanged(sctx, path, key);
5765 if (ret < 0)
5766 goto out;
5767 if (ret) {
5768 ret = 0;
5769 goto out_hole;
5770 }
5771 } else {
5772 struct btrfs_file_extent_item *ei;
5773 u8 type;
5774
5775 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
5776 struct btrfs_file_extent_item);
5777 type = btrfs_file_extent_type(path->nodes[0], ei);
5778 if (type == BTRFS_FILE_EXTENT_PREALLOC ||
5779 type == BTRFS_FILE_EXTENT_REG) {
5780 /*
5781 * The send spec does not have a prealloc command yet,
5782 * so just leave a hole for prealloc'ed extents until
5783 * we have enough commands queued up to justify rev'ing
5784 * the send spec.
5785 */
5786 if (type == BTRFS_FILE_EXTENT_PREALLOC) {
5787 ret = 0;
5788 goto out;
5789 }
5790
5791 /* Have a hole, just skip it. */
5792 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
5793 ret = 0;
5794 goto out;
5795 }
5796 }
5797 }
5798
5799 ret = find_extent_clone(sctx, path, key->objectid, key->offset,
5800 sctx->cur_inode_size, &found_clone);
5801 if (ret != -ENOENT && ret < 0)
5802 goto out;
5803
5804 ret = send_write_or_clone(sctx, path, key, found_clone);
5805 if (ret)
5806 goto out;
5807 out_hole:
5808 ret = maybe_send_hole(sctx, path, key);
5809 out:
5810 return ret;
5811 }
5812
process_all_extents(struct send_ctx * sctx)5813 static int process_all_extents(struct send_ctx *sctx)
5814 {
5815 int ret;
5816 struct btrfs_root *root;
5817 struct btrfs_path *path;
5818 struct btrfs_key key;
5819 struct btrfs_key found_key;
5820 struct extent_buffer *eb;
5821 int slot;
5822
5823 root = sctx->send_root;
5824 path = alloc_path_for_send();
5825 if (!path)
5826 return -ENOMEM;
5827
5828 key.objectid = sctx->cmp_key->objectid;
5829 key.type = BTRFS_EXTENT_DATA_KEY;
5830 key.offset = 0;
5831 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5832 if (ret < 0)
5833 goto out;
5834
5835 while (1) {
5836 eb = path->nodes[0];
5837 slot = path->slots[0];
5838
5839 if (slot >= btrfs_header_nritems(eb)) {
5840 ret = btrfs_next_leaf(root, path);
5841 if (ret < 0) {
5842 goto out;
5843 } else if (ret > 0) {
5844 ret = 0;
5845 break;
5846 }
5847 continue;
5848 }
5849
5850 btrfs_item_key_to_cpu(eb, &found_key, slot);
5851
5852 if (found_key.objectid != key.objectid ||
5853 found_key.type != key.type) {
5854 ret = 0;
5855 goto out;
5856 }
5857
5858 ret = process_extent(sctx, path, &found_key);
5859 if (ret < 0)
5860 goto out;
5861
5862 path->slots[0]++;
5863 }
5864
5865 out:
5866 btrfs_free_path(path);
5867 return ret;
5868 }
5869
process_recorded_refs_if_needed(struct send_ctx * sctx,int at_end,int * pending_move,int * refs_processed)5870 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
5871 int *pending_move,
5872 int *refs_processed)
5873 {
5874 int ret = 0;
5875
5876 if (sctx->cur_ino == 0)
5877 goto out;
5878 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
5879 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
5880 goto out;
5881 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
5882 goto out;
5883
5884 ret = process_recorded_refs(sctx, pending_move);
5885 if (ret < 0)
5886 goto out;
5887
5888 *refs_processed = 1;
5889 out:
5890 return ret;
5891 }
5892
finish_inode_if_needed(struct send_ctx * sctx,int at_end)5893 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
5894 {
5895 int ret = 0;
5896 u64 left_mode;
5897 u64 left_uid;
5898 u64 left_gid;
5899 u64 right_mode;
5900 u64 right_uid;
5901 u64 right_gid;
5902 int need_chmod = 0;
5903 int need_chown = 0;
5904 int need_truncate = 1;
5905 int pending_move = 0;
5906 int refs_processed = 0;
5907
5908 if (sctx->ignore_cur_inode)
5909 return 0;
5910
5911 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
5912 &refs_processed);
5913 if (ret < 0)
5914 goto out;
5915
5916 /*
5917 * We have processed the refs and thus need to advance send_progress.
5918 * Now, calls to get_cur_xxx will take the updated refs of the current
5919 * inode into account.
5920 *
5921 * On the other hand, if our current inode is a directory and couldn't
5922 * be moved/renamed because its parent was renamed/moved too and it has
5923 * a higher inode number, we can only move/rename our current inode
5924 * after we moved/renamed its parent. Therefore in this case operate on
5925 * the old path (pre move/rename) of our current inode, and the
5926 * move/rename will be performed later.
5927 */
5928 if (refs_processed && !pending_move)
5929 sctx->send_progress = sctx->cur_ino + 1;
5930
5931 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
5932 goto out;
5933 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
5934 goto out;
5935
5936 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
5937 &left_mode, &left_uid, &left_gid, NULL);
5938 if (ret < 0)
5939 goto out;
5940
5941 if (!sctx->parent_root || sctx->cur_inode_new) {
5942 need_chown = 1;
5943 if (!S_ISLNK(sctx->cur_inode_mode))
5944 need_chmod = 1;
5945 if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
5946 need_truncate = 0;
5947 } else {
5948 u64 old_size;
5949
5950 ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
5951 &old_size, NULL, &right_mode, &right_uid,
5952 &right_gid, NULL);
5953 if (ret < 0)
5954 goto out;
5955
5956 if (left_uid != right_uid || left_gid != right_gid)
5957 need_chown = 1;
5958 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
5959 need_chmod = 1;
5960 if ((old_size == sctx->cur_inode_size) ||
5961 (sctx->cur_inode_size > old_size &&
5962 sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
5963 need_truncate = 0;
5964 }
5965
5966 if (S_ISREG(sctx->cur_inode_mode)) {
5967 if (need_send_hole(sctx)) {
5968 if (sctx->cur_inode_last_extent == (u64)-1 ||
5969 sctx->cur_inode_last_extent <
5970 sctx->cur_inode_size) {
5971 ret = get_last_extent(sctx, (u64)-1);
5972 if (ret)
5973 goto out;
5974 }
5975 if (sctx->cur_inode_last_extent <
5976 sctx->cur_inode_size) {
5977 ret = send_hole(sctx, sctx->cur_inode_size);
5978 if (ret)
5979 goto out;
5980 }
5981 }
5982 if (need_truncate) {
5983 ret = send_truncate(sctx, sctx->cur_ino,
5984 sctx->cur_inode_gen,
5985 sctx->cur_inode_size);
5986 if (ret < 0)
5987 goto out;
5988 }
5989 }
5990
5991 if (need_chown) {
5992 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5993 left_uid, left_gid);
5994 if (ret < 0)
5995 goto out;
5996 }
5997 if (need_chmod) {
5998 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
5999 left_mode);
6000 if (ret < 0)
6001 goto out;
6002 }
6003
6004 /*
6005 * If other directory inodes depended on our current directory
6006 * inode's move/rename, now do their move/rename operations.
6007 */
6008 if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
6009 ret = apply_children_dir_moves(sctx);
6010 if (ret)
6011 goto out;
6012 /*
6013 * Need to send that every time, no matter if it actually
6014 * changed between the two trees as we have done changes to
6015 * the inode before. If our inode is a directory and it's
6016 * waiting to be moved/renamed, we will send its utimes when
6017 * it's moved/renamed, therefore we don't need to do it here.
6018 */
6019 sctx->send_progress = sctx->cur_ino + 1;
6020 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
6021 if (ret < 0)
6022 goto out;
6023 }
6024
6025 out:
6026 return ret;
6027 }
6028
6029 struct parent_paths_ctx {
6030 struct list_head *refs;
6031 struct send_ctx *sctx;
6032 };
6033
record_parent_ref(int num,u64 dir,int index,struct fs_path * name,void * ctx)6034 static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
6035 void *ctx)
6036 {
6037 struct parent_paths_ctx *ppctx = ctx;
6038
6039 return record_ref(ppctx->sctx->parent_root, dir, name, ppctx->sctx,
6040 ppctx->refs);
6041 }
6042
6043 /*
6044 * Issue unlink operations for all paths of the current inode found in the
6045 * parent snapshot.
6046 */
btrfs_unlink_all_paths(struct send_ctx * sctx)6047 static int btrfs_unlink_all_paths(struct send_ctx *sctx)
6048 {
6049 LIST_HEAD(deleted_refs);
6050 struct btrfs_path *path;
6051 struct btrfs_key key;
6052 struct parent_paths_ctx ctx;
6053 int ret;
6054
6055 path = alloc_path_for_send();
6056 if (!path)
6057 return -ENOMEM;
6058
6059 key.objectid = sctx->cur_ino;
6060 key.type = BTRFS_INODE_REF_KEY;
6061 key.offset = 0;
6062 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
6063 if (ret < 0)
6064 goto out;
6065
6066 ctx.refs = &deleted_refs;
6067 ctx.sctx = sctx;
6068
6069 while (true) {
6070 struct extent_buffer *eb = path->nodes[0];
6071 int slot = path->slots[0];
6072
6073 if (slot >= btrfs_header_nritems(eb)) {
6074 ret = btrfs_next_leaf(sctx->parent_root, path);
6075 if (ret < 0)
6076 goto out;
6077 else if (ret > 0)
6078 break;
6079 continue;
6080 }
6081
6082 btrfs_item_key_to_cpu(eb, &key, slot);
6083 if (key.objectid != sctx->cur_ino)
6084 break;
6085 if (key.type != BTRFS_INODE_REF_KEY &&
6086 key.type != BTRFS_INODE_EXTREF_KEY)
6087 break;
6088
6089 ret = iterate_inode_ref(sctx->parent_root, path, &key, 1,
6090 record_parent_ref, &ctx);
6091 if (ret < 0)
6092 goto out;
6093
6094 path->slots[0]++;
6095 }
6096
6097 while (!list_empty(&deleted_refs)) {
6098 struct recorded_ref *ref;
6099
6100 ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
6101 ret = send_unlink(sctx, ref->full_path);
6102 if (ret < 0)
6103 goto out;
6104 fs_path_free(ref->full_path);
6105 list_del(&ref->list);
6106 kfree(ref);
6107 }
6108 ret = 0;
6109 out:
6110 btrfs_free_path(path);
6111 if (ret)
6112 __free_recorded_refs(&deleted_refs);
6113 return ret;
6114 }
6115
changed_inode(struct send_ctx * sctx,enum btrfs_compare_tree_result result)6116 static int changed_inode(struct send_ctx *sctx,
6117 enum btrfs_compare_tree_result result)
6118 {
6119 int ret = 0;
6120 struct btrfs_key *key = sctx->cmp_key;
6121 struct btrfs_inode_item *left_ii = NULL;
6122 struct btrfs_inode_item *right_ii = NULL;
6123 u64 left_gen = 0;
6124 u64 right_gen = 0;
6125
6126 sctx->cur_ino = key->objectid;
6127 sctx->cur_inode_new_gen = 0;
6128 sctx->cur_inode_last_extent = (u64)-1;
6129 sctx->cur_inode_next_write_offset = 0;
6130 sctx->ignore_cur_inode = false;
6131
6132 /*
6133 * Set send_progress to current inode. This will tell all get_cur_xxx
6134 * functions that the current inode's refs are not updated yet. Later,
6135 * when process_recorded_refs is finished, it is set to cur_ino + 1.
6136 */
6137 sctx->send_progress = sctx->cur_ino;
6138
6139 if (result == BTRFS_COMPARE_TREE_NEW ||
6140 result == BTRFS_COMPARE_TREE_CHANGED) {
6141 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
6142 sctx->left_path->slots[0],
6143 struct btrfs_inode_item);
6144 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
6145 left_ii);
6146 } else {
6147 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
6148 sctx->right_path->slots[0],
6149 struct btrfs_inode_item);
6150 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
6151 right_ii);
6152 }
6153 if (result == BTRFS_COMPARE_TREE_CHANGED) {
6154 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
6155 sctx->right_path->slots[0],
6156 struct btrfs_inode_item);
6157
6158 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
6159 right_ii);
6160
6161 /*
6162 * The cur_ino = root dir case is special here. We can't treat
6163 * the inode as deleted+reused because it would generate a
6164 * stream that tries to delete/mkdir the root dir.
6165 */
6166 if (left_gen != right_gen &&
6167 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
6168 sctx->cur_inode_new_gen = 1;
6169 }
6170
6171 /*
6172 * Normally we do not find inodes with a link count of zero (orphans)
6173 * because the most common case is to create a snapshot and use it
6174 * for a send operation. However other less common use cases involve
6175 * using a subvolume and send it after turning it to RO mode just
6176 * after deleting all hard links of a file while holding an open
6177 * file descriptor against it or turning a RO snapshot into RW mode,
6178 * keep an open file descriptor against a file, delete it and then
6179 * turn the snapshot back to RO mode before using it for a send
6180 * operation. So if we find such cases, ignore the inode and all its
6181 * items completely if it's a new inode, or if it's a changed inode
6182 * make sure all its previous paths (from the parent snapshot) are all
6183 * unlinked and all other the inode items are ignored.
6184 */
6185 if (result == BTRFS_COMPARE_TREE_NEW ||
6186 result == BTRFS_COMPARE_TREE_CHANGED) {
6187 u32 nlinks;
6188
6189 nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
6190 if (nlinks == 0) {
6191 sctx->ignore_cur_inode = true;
6192 if (result == BTRFS_COMPARE_TREE_CHANGED)
6193 ret = btrfs_unlink_all_paths(sctx);
6194 goto out;
6195 }
6196 }
6197
6198 if (result == BTRFS_COMPARE_TREE_NEW) {
6199 sctx->cur_inode_gen = left_gen;
6200 sctx->cur_inode_new = 1;
6201 sctx->cur_inode_deleted = 0;
6202 sctx->cur_inode_size = btrfs_inode_size(
6203 sctx->left_path->nodes[0], left_ii);
6204 sctx->cur_inode_mode = btrfs_inode_mode(
6205 sctx->left_path->nodes[0], left_ii);
6206 sctx->cur_inode_rdev = btrfs_inode_rdev(
6207 sctx->left_path->nodes[0], left_ii);
6208 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
6209 ret = send_create_inode_if_needed(sctx);
6210 } else if (result == BTRFS_COMPARE_TREE_DELETED) {
6211 sctx->cur_inode_gen = right_gen;
6212 sctx->cur_inode_new = 0;
6213 sctx->cur_inode_deleted = 1;
6214 sctx->cur_inode_size = btrfs_inode_size(
6215 sctx->right_path->nodes[0], right_ii);
6216 sctx->cur_inode_mode = btrfs_inode_mode(
6217 sctx->right_path->nodes[0], right_ii);
6218 } else if (result == BTRFS_COMPARE_TREE_CHANGED) {
6219 /*
6220 * We need to do some special handling in case the inode was
6221 * reported as changed with a changed generation number. This
6222 * means that the original inode was deleted and new inode
6223 * reused the same inum. So we have to treat the old inode as
6224 * deleted and the new one as new.
6225 */
6226 if (sctx->cur_inode_new_gen) {
6227 /*
6228 * First, process the inode as if it was deleted.
6229 */
6230 sctx->cur_inode_gen = right_gen;
6231 sctx->cur_inode_new = 0;
6232 sctx->cur_inode_deleted = 1;
6233 sctx->cur_inode_size = btrfs_inode_size(
6234 sctx->right_path->nodes[0], right_ii);
6235 sctx->cur_inode_mode = btrfs_inode_mode(
6236 sctx->right_path->nodes[0], right_ii);
6237 ret = process_all_refs(sctx,
6238 BTRFS_COMPARE_TREE_DELETED);
6239 if (ret < 0)
6240 goto out;
6241
6242 /*
6243 * Now process the inode as if it was new.
6244 */
6245 sctx->cur_inode_gen = left_gen;
6246 sctx->cur_inode_new = 1;
6247 sctx->cur_inode_deleted = 0;
6248 sctx->cur_inode_size = btrfs_inode_size(
6249 sctx->left_path->nodes[0], left_ii);
6250 sctx->cur_inode_mode = btrfs_inode_mode(
6251 sctx->left_path->nodes[0], left_ii);
6252 sctx->cur_inode_rdev = btrfs_inode_rdev(
6253 sctx->left_path->nodes[0], left_ii);
6254 ret = send_create_inode_if_needed(sctx);
6255 if (ret < 0)
6256 goto out;
6257
6258 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
6259 if (ret < 0)
6260 goto out;
6261 /*
6262 * Advance send_progress now as we did not get into
6263 * process_recorded_refs_if_needed in the new_gen case.
6264 */
6265 sctx->send_progress = sctx->cur_ino + 1;
6266
6267 /*
6268 * Now process all extents and xattrs of the inode as if
6269 * they were all new.
6270 */
6271 ret = process_all_extents(sctx);
6272 if (ret < 0)
6273 goto out;
6274 ret = process_all_new_xattrs(sctx);
6275 if (ret < 0)
6276 goto out;
6277 } else {
6278 sctx->cur_inode_gen = left_gen;
6279 sctx->cur_inode_new = 0;
6280 sctx->cur_inode_new_gen = 0;
6281 sctx->cur_inode_deleted = 0;
6282 sctx->cur_inode_size = btrfs_inode_size(
6283 sctx->left_path->nodes[0], left_ii);
6284 sctx->cur_inode_mode = btrfs_inode_mode(
6285 sctx->left_path->nodes[0], left_ii);
6286 }
6287 }
6288
6289 out:
6290 return ret;
6291 }
6292
6293 /*
6294 * We have to process new refs before deleted refs, but compare_trees gives us
6295 * the new and deleted refs mixed. To fix this, we record the new/deleted refs
6296 * first and later process them in process_recorded_refs.
6297 * For the cur_inode_new_gen case, we skip recording completely because
6298 * changed_inode did already initiate processing of refs. The reason for this is
6299 * that in this case, compare_tree actually compares the refs of 2 different
6300 * inodes. To fix this, process_all_refs is used in changed_inode to handle all
6301 * refs of the right tree as deleted and all refs of the left tree as new.
6302 */
changed_ref(struct send_ctx * sctx,enum btrfs_compare_tree_result result)6303 static int changed_ref(struct send_ctx *sctx,
6304 enum btrfs_compare_tree_result result)
6305 {
6306 int ret = 0;
6307
6308 if (sctx->cur_ino != sctx->cmp_key->objectid) {
6309 inconsistent_snapshot_error(sctx, result, "reference");
6310 return -EIO;
6311 }
6312
6313 if (!sctx->cur_inode_new_gen &&
6314 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
6315 if (result == BTRFS_COMPARE_TREE_NEW)
6316 ret = record_new_ref(sctx);
6317 else if (result == BTRFS_COMPARE_TREE_DELETED)
6318 ret = record_deleted_ref(sctx);
6319 else if (result == BTRFS_COMPARE_TREE_CHANGED)
6320 ret = record_changed_ref(sctx);
6321 }
6322
6323 return ret;
6324 }
6325
6326 /*
6327 * Process new/deleted/changed xattrs. We skip processing in the
6328 * cur_inode_new_gen case because changed_inode did already initiate processing
6329 * of xattrs. The reason is the same as in changed_ref
6330 */
changed_xattr(struct send_ctx * sctx,enum btrfs_compare_tree_result result)6331 static int changed_xattr(struct send_ctx *sctx,
6332 enum btrfs_compare_tree_result result)
6333 {
6334 int ret = 0;
6335
6336 if (sctx->cur_ino != sctx->cmp_key->objectid) {
6337 inconsistent_snapshot_error(sctx, result, "xattr");
6338 return -EIO;
6339 }
6340
6341 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
6342 if (result == BTRFS_COMPARE_TREE_NEW)
6343 ret = process_new_xattr(sctx);
6344 else if (result == BTRFS_COMPARE_TREE_DELETED)
6345 ret = process_deleted_xattr(sctx);
6346 else if (result == BTRFS_COMPARE_TREE_CHANGED)
6347 ret = process_changed_xattr(sctx);
6348 }
6349
6350 return ret;
6351 }
6352
6353 /*
6354 * Process new/deleted/changed extents. We skip processing in the
6355 * cur_inode_new_gen case because changed_inode did already initiate processing
6356 * of extents. The reason is the same as in changed_ref
6357 */
changed_extent(struct send_ctx * sctx,enum btrfs_compare_tree_result result)6358 static int changed_extent(struct send_ctx *sctx,
6359 enum btrfs_compare_tree_result result)
6360 {
6361 int ret = 0;
6362
6363 /*
6364 * We have found an extent item that changed without the inode item
6365 * having changed. This can happen either after relocation (where the
6366 * disk_bytenr of an extent item is replaced at
6367 * relocation.c:replace_file_extents()) or after deduplication into a
6368 * file in both the parent and send snapshots (where an extent item can
6369 * get modified or replaced with a new one). Note that deduplication
6370 * updates the inode item, but it only changes the iversion (sequence
6371 * field in the inode item) of the inode, so if a file is deduplicated
6372 * the same amount of times in both the parent and send snapshots, its
6373 * iversion becames the same in both snapshots, whence the inode item is
6374 * the same on both snapshots.
6375 */
6376 if (sctx->cur_ino != sctx->cmp_key->objectid)
6377 return 0;
6378
6379 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
6380 if (result != BTRFS_COMPARE_TREE_DELETED)
6381 ret = process_extent(sctx, sctx->left_path,
6382 sctx->cmp_key);
6383 }
6384
6385 return ret;
6386 }
6387
dir_changed(struct send_ctx * sctx,u64 dir)6388 static int dir_changed(struct send_ctx *sctx, u64 dir)
6389 {
6390 u64 orig_gen, new_gen;
6391 int ret;
6392
6393 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
6394 NULL, NULL);
6395 if (ret)
6396 return ret;
6397
6398 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
6399 NULL, NULL, NULL);
6400 if (ret)
6401 return ret;
6402
6403 return (orig_gen != new_gen) ? 1 : 0;
6404 }
6405
compare_refs(struct send_ctx * sctx,struct btrfs_path * path,struct btrfs_key * key)6406 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
6407 struct btrfs_key *key)
6408 {
6409 struct btrfs_inode_extref *extref;
6410 struct extent_buffer *leaf;
6411 u64 dirid = 0, last_dirid = 0;
6412 unsigned long ptr;
6413 u32 item_size;
6414 u32 cur_offset = 0;
6415 int ref_name_len;
6416 int ret = 0;
6417
6418 /* Easy case, just check this one dirid */
6419 if (key->type == BTRFS_INODE_REF_KEY) {
6420 dirid = key->offset;
6421
6422 ret = dir_changed(sctx, dirid);
6423 goto out;
6424 }
6425
6426 leaf = path->nodes[0];
6427 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
6428 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
6429 while (cur_offset < item_size) {
6430 extref = (struct btrfs_inode_extref *)(ptr +
6431 cur_offset);
6432 dirid = btrfs_inode_extref_parent(leaf, extref);
6433 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
6434 cur_offset += ref_name_len + sizeof(*extref);
6435 if (dirid == last_dirid)
6436 continue;
6437 ret = dir_changed(sctx, dirid);
6438 if (ret)
6439 break;
6440 last_dirid = dirid;
6441 }
6442 out:
6443 return ret;
6444 }
6445
6446 /*
6447 * Updates compare related fields in sctx and simply forwards to the actual
6448 * changed_xxx functions.
6449 */
changed_cb(struct btrfs_path * left_path,struct btrfs_path * right_path,struct btrfs_key * key,enum btrfs_compare_tree_result result,void * ctx)6450 static int changed_cb(struct btrfs_path *left_path,
6451 struct btrfs_path *right_path,
6452 struct btrfs_key *key,
6453 enum btrfs_compare_tree_result result,
6454 void *ctx)
6455 {
6456 int ret = 0;
6457 struct send_ctx *sctx = ctx;
6458
6459 if (result == BTRFS_COMPARE_TREE_SAME) {
6460 if (key->type == BTRFS_INODE_REF_KEY ||
6461 key->type == BTRFS_INODE_EXTREF_KEY) {
6462 ret = compare_refs(sctx, left_path, key);
6463 if (!ret)
6464 return 0;
6465 if (ret < 0)
6466 return ret;
6467 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
6468 return maybe_send_hole(sctx, left_path, key);
6469 } else {
6470 return 0;
6471 }
6472 result = BTRFS_COMPARE_TREE_CHANGED;
6473 ret = 0;
6474 }
6475
6476 sctx->left_path = left_path;
6477 sctx->right_path = right_path;
6478 sctx->cmp_key = key;
6479
6480 ret = finish_inode_if_needed(sctx, 0);
6481 if (ret < 0)
6482 goto out;
6483
6484 /* Ignore non-FS objects */
6485 if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
6486 key->objectid == BTRFS_FREE_SPACE_OBJECTID)
6487 goto out;
6488
6489 if (key->type == BTRFS_INODE_ITEM_KEY) {
6490 ret = changed_inode(sctx, result);
6491 } else if (!sctx->ignore_cur_inode) {
6492 if (key->type == BTRFS_INODE_REF_KEY ||
6493 key->type == BTRFS_INODE_EXTREF_KEY)
6494 ret = changed_ref(sctx, result);
6495 else if (key->type == BTRFS_XATTR_ITEM_KEY)
6496 ret = changed_xattr(sctx, result);
6497 else if (key->type == BTRFS_EXTENT_DATA_KEY)
6498 ret = changed_extent(sctx, result);
6499 }
6500
6501 out:
6502 return ret;
6503 }
6504
full_send_tree(struct send_ctx * sctx)6505 static int full_send_tree(struct send_ctx *sctx)
6506 {
6507 int ret;
6508 struct btrfs_root *send_root = sctx->send_root;
6509 struct btrfs_key key;
6510 struct btrfs_path *path;
6511 struct extent_buffer *eb;
6512 int slot;
6513
6514 path = alloc_path_for_send();
6515 if (!path)
6516 return -ENOMEM;
6517
6518 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
6519 key.type = BTRFS_INODE_ITEM_KEY;
6520 key.offset = 0;
6521
6522 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
6523 if (ret < 0)
6524 goto out;
6525 if (ret)
6526 goto out_finish;
6527
6528 while (1) {
6529 eb = path->nodes[0];
6530 slot = path->slots[0];
6531 btrfs_item_key_to_cpu(eb, &key, slot);
6532
6533 ret = changed_cb(path, NULL, &key,
6534 BTRFS_COMPARE_TREE_NEW, sctx);
6535 if (ret < 0)
6536 goto out;
6537
6538 ret = btrfs_next_item(send_root, path);
6539 if (ret < 0)
6540 goto out;
6541 if (ret) {
6542 ret = 0;
6543 break;
6544 }
6545 }
6546
6547 out_finish:
6548 ret = finish_inode_if_needed(sctx, 1);
6549
6550 out:
6551 btrfs_free_path(path);
6552 return ret;
6553 }
6554
tree_move_down(struct btrfs_path * path,int * level)6555 static int tree_move_down(struct btrfs_path *path, int *level)
6556 {
6557 struct extent_buffer *eb;
6558
6559 BUG_ON(*level == 0);
6560 eb = btrfs_read_node_slot(path->nodes[*level], path->slots[*level]);
6561 if (IS_ERR(eb))
6562 return PTR_ERR(eb);
6563
6564 path->nodes[*level - 1] = eb;
6565 path->slots[*level - 1] = 0;
6566 (*level)--;
6567 return 0;
6568 }
6569
tree_move_next_or_upnext(struct btrfs_path * path,int * level,int root_level)6570 static int tree_move_next_or_upnext(struct btrfs_path *path,
6571 int *level, int root_level)
6572 {
6573 int ret = 0;
6574 int nritems;
6575 nritems = btrfs_header_nritems(path->nodes[*level]);
6576
6577 path->slots[*level]++;
6578
6579 while (path->slots[*level] >= nritems) {
6580 if (*level == root_level)
6581 return -1;
6582
6583 /* move upnext */
6584 path->slots[*level] = 0;
6585 free_extent_buffer(path->nodes[*level]);
6586 path->nodes[*level] = NULL;
6587 (*level)++;
6588 path->slots[*level]++;
6589
6590 nritems = btrfs_header_nritems(path->nodes[*level]);
6591 ret = 1;
6592 }
6593 return ret;
6594 }
6595
6596 /*
6597 * Returns 1 if it had to move up and next. 0 is returned if it moved only next
6598 * or down.
6599 */
tree_advance(struct btrfs_path * path,int * level,int root_level,int allow_down,struct btrfs_key * key)6600 static int tree_advance(struct btrfs_path *path,
6601 int *level, int root_level,
6602 int allow_down,
6603 struct btrfs_key *key)
6604 {
6605 int ret;
6606
6607 if (*level == 0 || !allow_down) {
6608 ret = tree_move_next_or_upnext(path, level, root_level);
6609 } else {
6610 ret = tree_move_down(path, level);
6611 }
6612 if (ret >= 0) {
6613 if (*level == 0)
6614 btrfs_item_key_to_cpu(path->nodes[*level], key,
6615 path->slots[*level]);
6616 else
6617 btrfs_node_key_to_cpu(path->nodes[*level], key,
6618 path->slots[*level]);
6619 }
6620 return ret;
6621 }
6622
tree_compare_item(struct btrfs_path * left_path,struct btrfs_path * right_path,char * tmp_buf)6623 static int tree_compare_item(struct btrfs_path *left_path,
6624 struct btrfs_path *right_path,
6625 char *tmp_buf)
6626 {
6627 int cmp;
6628 int len1, len2;
6629 unsigned long off1, off2;
6630
6631 len1 = btrfs_item_size_nr(left_path->nodes[0], left_path->slots[0]);
6632 len2 = btrfs_item_size_nr(right_path->nodes[0], right_path->slots[0]);
6633 if (len1 != len2)
6634 return 1;
6635
6636 off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
6637 off2 = btrfs_item_ptr_offset(right_path->nodes[0],
6638 right_path->slots[0]);
6639
6640 read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
6641
6642 cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
6643 if (cmp)
6644 return 1;
6645 return 0;
6646 }
6647
6648 /*
6649 * This function compares two trees and calls the provided callback for
6650 * every changed/new/deleted item it finds.
6651 * If shared tree blocks are encountered, whole subtrees are skipped, making
6652 * the compare pretty fast on snapshotted subvolumes.
6653 *
6654 * This currently works on commit roots only. As commit roots are read only,
6655 * we don't do any locking. The commit roots are protected with transactions.
6656 * Transactions are ended and rejoined when a commit is tried in between.
6657 *
6658 * This function checks for modifications done to the trees while comparing.
6659 * If it detects a change, it aborts immediately.
6660 */
btrfs_compare_trees(struct btrfs_root * left_root,struct btrfs_root * right_root,btrfs_changed_cb_t changed_cb,void * ctx)6661 static int btrfs_compare_trees(struct btrfs_root *left_root,
6662 struct btrfs_root *right_root,
6663 btrfs_changed_cb_t changed_cb, void *ctx)
6664 {
6665 struct btrfs_fs_info *fs_info = left_root->fs_info;
6666 int ret;
6667 int cmp;
6668 struct btrfs_path *left_path = NULL;
6669 struct btrfs_path *right_path = NULL;
6670 struct btrfs_key left_key;
6671 struct btrfs_key right_key;
6672 char *tmp_buf = NULL;
6673 int left_root_level;
6674 int right_root_level;
6675 int left_level;
6676 int right_level;
6677 int left_end_reached;
6678 int right_end_reached;
6679 int advance_left;
6680 int advance_right;
6681 u64 left_blockptr;
6682 u64 right_blockptr;
6683 u64 left_gen;
6684 u64 right_gen;
6685
6686 left_path = btrfs_alloc_path();
6687 if (!left_path) {
6688 ret = -ENOMEM;
6689 goto out;
6690 }
6691 right_path = btrfs_alloc_path();
6692 if (!right_path) {
6693 ret = -ENOMEM;
6694 goto out;
6695 }
6696
6697 tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
6698 if (!tmp_buf) {
6699 ret = -ENOMEM;
6700 goto out;
6701 }
6702
6703 left_path->search_commit_root = 1;
6704 left_path->skip_locking = 1;
6705 right_path->search_commit_root = 1;
6706 right_path->skip_locking = 1;
6707
6708 /*
6709 * Strategy: Go to the first items of both trees. Then do
6710 *
6711 * If both trees are at level 0
6712 * Compare keys of current items
6713 * If left < right treat left item as new, advance left tree
6714 * and repeat
6715 * If left > right treat right item as deleted, advance right tree
6716 * and repeat
6717 * If left == right do deep compare of items, treat as changed if
6718 * needed, advance both trees and repeat
6719 * If both trees are at the same level but not at level 0
6720 * Compare keys of current nodes/leafs
6721 * If left < right advance left tree and repeat
6722 * If left > right advance right tree and repeat
6723 * If left == right compare blockptrs of the next nodes/leafs
6724 * If they match advance both trees but stay at the same level
6725 * and repeat
6726 * If they don't match advance both trees while allowing to go
6727 * deeper and repeat
6728 * If tree levels are different
6729 * Advance the tree that needs it and repeat
6730 *
6731 * Advancing a tree means:
6732 * If we are at level 0, try to go to the next slot. If that's not
6733 * possible, go one level up and repeat. Stop when we found a level
6734 * where we could go to the next slot. We may at this point be on a
6735 * node or a leaf.
6736 *
6737 * If we are not at level 0 and not on shared tree blocks, go one
6738 * level deeper.
6739 *
6740 * If we are not at level 0 and on shared tree blocks, go one slot to
6741 * the right if possible or go up and right.
6742 */
6743
6744 down_read(&fs_info->commit_root_sem);
6745 left_level = btrfs_header_level(left_root->commit_root);
6746 left_root_level = left_level;
6747 left_path->nodes[left_level] =
6748 btrfs_clone_extent_buffer(left_root->commit_root);
6749 if (!left_path->nodes[left_level]) {
6750 up_read(&fs_info->commit_root_sem);
6751 ret = -ENOMEM;
6752 goto out;
6753 }
6754
6755 right_level = btrfs_header_level(right_root->commit_root);
6756 right_root_level = right_level;
6757 right_path->nodes[right_level] =
6758 btrfs_clone_extent_buffer(right_root->commit_root);
6759 if (!right_path->nodes[right_level]) {
6760 up_read(&fs_info->commit_root_sem);
6761 ret = -ENOMEM;
6762 goto out;
6763 }
6764 up_read(&fs_info->commit_root_sem);
6765
6766 if (left_level == 0)
6767 btrfs_item_key_to_cpu(left_path->nodes[left_level],
6768 &left_key, left_path->slots[left_level]);
6769 else
6770 btrfs_node_key_to_cpu(left_path->nodes[left_level],
6771 &left_key, left_path->slots[left_level]);
6772 if (right_level == 0)
6773 btrfs_item_key_to_cpu(right_path->nodes[right_level],
6774 &right_key, right_path->slots[right_level]);
6775 else
6776 btrfs_node_key_to_cpu(right_path->nodes[right_level],
6777 &right_key, right_path->slots[right_level]);
6778
6779 left_end_reached = right_end_reached = 0;
6780 advance_left = advance_right = 0;
6781
6782 while (1) {
6783 cond_resched();
6784 if (advance_left && !left_end_reached) {
6785 ret = tree_advance(left_path, &left_level,
6786 left_root_level,
6787 advance_left != ADVANCE_ONLY_NEXT,
6788 &left_key);
6789 if (ret == -1)
6790 left_end_reached = ADVANCE;
6791 else if (ret < 0)
6792 goto out;
6793 advance_left = 0;
6794 }
6795 if (advance_right && !right_end_reached) {
6796 ret = tree_advance(right_path, &right_level,
6797 right_root_level,
6798 advance_right != ADVANCE_ONLY_NEXT,
6799 &right_key);
6800 if (ret == -1)
6801 right_end_reached = ADVANCE;
6802 else if (ret < 0)
6803 goto out;
6804 advance_right = 0;
6805 }
6806
6807 if (left_end_reached && right_end_reached) {
6808 ret = 0;
6809 goto out;
6810 } else if (left_end_reached) {
6811 if (right_level == 0) {
6812 ret = changed_cb(left_path, right_path,
6813 &right_key,
6814 BTRFS_COMPARE_TREE_DELETED,
6815 ctx);
6816 if (ret < 0)
6817 goto out;
6818 }
6819 advance_right = ADVANCE;
6820 continue;
6821 } else if (right_end_reached) {
6822 if (left_level == 0) {
6823 ret = changed_cb(left_path, right_path,
6824 &left_key,
6825 BTRFS_COMPARE_TREE_NEW,
6826 ctx);
6827 if (ret < 0)
6828 goto out;
6829 }
6830 advance_left = ADVANCE;
6831 continue;
6832 }
6833
6834 if (left_level == 0 && right_level == 0) {
6835 cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
6836 if (cmp < 0) {
6837 ret = changed_cb(left_path, right_path,
6838 &left_key,
6839 BTRFS_COMPARE_TREE_NEW,
6840 ctx);
6841 if (ret < 0)
6842 goto out;
6843 advance_left = ADVANCE;
6844 } else if (cmp > 0) {
6845 ret = changed_cb(left_path, right_path,
6846 &right_key,
6847 BTRFS_COMPARE_TREE_DELETED,
6848 ctx);
6849 if (ret < 0)
6850 goto out;
6851 advance_right = ADVANCE;
6852 } else {
6853 enum btrfs_compare_tree_result result;
6854
6855 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
6856 ret = tree_compare_item(left_path, right_path,
6857 tmp_buf);
6858 if (ret)
6859 result = BTRFS_COMPARE_TREE_CHANGED;
6860 else
6861 result = BTRFS_COMPARE_TREE_SAME;
6862 ret = changed_cb(left_path, right_path,
6863 &left_key, result, ctx);
6864 if (ret < 0)
6865 goto out;
6866 advance_left = ADVANCE;
6867 advance_right = ADVANCE;
6868 }
6869 } else if (left_level == right_level) {
6870 cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
6871 if (cmp < 0) {
6872 advance_left = ADVANCE;
6873 } else if (cmp > 0) {
6874 advance_right = ADVANCE;
6875 } else {
6876 left_blockptr = btrfs_node_blockptr(
6877 left_path->nodes[left_level],
6878 left_path->slots[left_level]);
6879 right_blockptr = btrfs_node_blockptr(
6880 right_path->nodes[right_level],
6881 right_path->slots[right_level]);
6882 left_gen = btrfs_node_ptr_generation(
6883 left_path->nodes[left_level],
6884 left_path->slots[left_level]);
6885 right_gen = btrfs_node_ptr_generation(
6886 right_path->nodes[right_level],
6887 right_path->slots[right_level]);
6888 if (left_blockptr == right_blockptr &&
6889 left_gen == right_gen) {
6890 /*
6891 * As we're on a shared block, don't
6892 * allow to go deeper.
6893 */
6894 advance_left = ADVANCE_ONLY_NEXT;
6895 advance_right = ADVANCE_ONLY_NEXT;
6896 } else {
6897 advance_left = ADVANCE;
6898 advance_right = ADVANCE;
6899 }
6900 }
6901 } else if (left_level < right_level) {
6902 advance_right = ADVANCE;
6903 } else {
6904 advance_left = ADVANCE;
6905 }
6906 }
6907
6908 out:
6909 btrfs_free_path(left_path);
6910 btrfs_free_path(right_path);
6911 kvfree(tmp_buf);
6912 return ret;
6913 }
6914
send_subvol(struct send_ctx * sctx)6915 static int send_subvol(struct send_ctx *sctx)
6916 {
6917 int ret;
6918
6919 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
6920 ret = send_header(sctx);
6921 if (ret < 0)
6922 goto out;
6923 }
6924
6925 ret = send_subvol_begin(sctx);
6926 if (ret < 0)
6927 goto out;
6928
6929 if (sctx->parent_root) {
6930 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
6931 changed_cb, sctx);
6932 if (ret < 0)
6933 goto out;
6934 ret = finish_inode_if_needed(sctx, 1);
6935 if (ret < 0)
6936 goto out;
6937 } else {
6938 ret = full_send_tree(sctx);
6939 if (ret < 0)
6940 goto out;
6941 }
6942
6943 out:
6944 free_recorded_refs(sctx);
6945 return ret;
6946 }
6947
6948 /*
6949 * If orphan cleanup did remove any orphans from a root, it means the tree
6950 * was modified and therefore the commit root is not the same as the current
6951 * root anymore. This is a problem, because send uses the commit root and
6952 * therefore can see inode items that don't exist in the current root anymore,
6953 * and for example make calls to btrfs_iget, which will do tree lookups based
6954 * on the current root and not on the commit root. Those lookups will fail,
6955 * returning a -ESTALE error, and making send fail with that error. So make
6956 * sure a send does not see any orphans we have just removed, and that it will
6957 * see the same inodes regardless of whether a transaction commit happened
6958 * before it started (meaning that the commit root will be the same as the
6959 * current root) or not.
6960 */
ensure_commit_roots_uptodate(struct send_ctx * sctx)6961 static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
6962 {
6963 int i;
6964 struct btrfs_trans_handle *trans = NULL;
6965
6966 again:
6967 if (sctx->parent_root &&
6968 sctx->parent_root->node != sctx->parent_root->commit_root)
6969 goto commit_trans;
6970
6971 for (i = 0; i < sctx->clone_roots_cnt; i++)
6972 if (sctx->clone_roots[i].root->node !=
6973 sctx->clone_roots[i].root->commit_root)
6974 goto commit_trans;
6975
6976 if (trans)
6977 return btrfs_end_transaction(trans);
6978
6979 return 0;
6980
6981 commit_trans:
6982 /* Use any root, all fs roots will get their commit roots updated. */
6983 if (!trans) {
6984 trans = btrfs_join_transaction(sctx->send_root);
6985 if (IS_ERR(trans))
6986 return PTR_ERR(trans);
6987 goto again;
6988 }
6989
6990 return btrfs_commit_transaction(trans);
6991 }
6992
6993 /*
6994 * Make sure any existing dellaloc is flushed for any root used by a send
6995 * operation so that we do not miss any data and we do not race with writeback
6996 * finishing and changing a tree while send is using the tree. This could
6997 * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
6998 * a send operation then uses the subvolume.
6999 * After flushing delalloc ensure_commit_roots_uptodate() must be called.
7000 */
flush_delalloc_roots(struct send_ctx * sctx)7001 static int flush_delalloc_roots(struct send_ctx *sctx)
7002 {
7003 struct btrfs_root *root = sctx->parent_root;
7004 int ret;
7005 int i;
7006
7007 if (root) {
7008 ret = btrfs_start_delalloc_snapshot(root);
7009 if (ret)
7010 return ret;
7011 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
7012 }
7013
7014 for (i = 0; i < sctx->clone_roots_cnt; i++) {
7015 root = sctx->clone_roots[i].root;
7016 ret = btrfs_start_delalloc_snapshot(root);
7017 if (ret)
7018 return ret;
7019 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
7020 }
7021
7022 return 0;
7023 }
7024
btrfs_root_dec_send_in_progress(struct btrfs_root * root)7025 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
7026 {
7027 spin_lock(&root->root_item_lock);
7028 root->send_in_progress--;
7029 /*
7030 * Not much left to do, we don't know why it's unbalanced and
7031 * can't blindly reset it to 0.
7032 */
7033 if (root->send_in_progress < 0)
7034 btrfs_err(root->fs_info,
7035 "send_in_progress unbalanced %d root %llu",
7036 root->send_in_progress, root->root_key.objectid);
7037 spin_unlock(&root->root_item_lock);
7038 }
7039
dedupe_in_progress_warn(const struct btrfs_root * root)7040 static void dedupe_in_progress_warn(const struct btrfs_root *root)
7041 {
7042 btrfs_warn_rl(root->fs_info,
7043 "cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
7044 root->root_key.objectid, root->dedupe_in_progress);
7045 }
7046
btrfs_ioctl_send(struct file * mnt_file,struct btrfs_ioctl_send_args * arg)7047 long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
7048 {
7049 int ret = 0;
7050 struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
7051 struct btrfs_fs_info *fs_info = send_root->fs_info;
7052 struct btrfs_root *clone_root;
7053 struct btrfs_key key;
7054 struct send_ctx *sctx = NULL;
7055 u32 i;
7056 u64 *clone_sources_tmp = NULL;
7057 int clone_sources_to_rollback = 0;
7058 unsigned alloc_size;
7059 int sort_clone_roots = 0;
7060 int index;
7061
7062 if (!capable(CAP_SYS_ADMIN))
7063 return -EPERM;
7064
7065 /*
7066 * The subvolume must remain read-only during send, protect against
7067 * making it RW. This also protects against deletion.
7068 */
7069 spin_lock(&send_root->root_item_lock);
7070 if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
7071 dedupe_in_progress_warn(send_root);
7072 spin_unlock(&send_root->root_item_lock);
7073 return -EAGAIN;
7074 }
7075 send_root->send_in_progress++;
7076 spin_unlock(&send_root->root_item_lock);
7077
7078 /*
7079 * Userspace tools do the checks and warn the user if it's
7080 * not RO.
7081 */
7082 if (!btrfs_root_readonly(send_root)) {
7083 ret = -EPERM;
7084 goto out;
7085 }
7086
7087 /*
7088 * Check that we don't overflow at later allocations, we request
7089 * clone_sources_count + 1 items, and compare to unsigned long inside
7090 * access_ok.
7091 */
7092 if (arg->clone_sources_count >
7093 ULONG_MAX / sizeof(struct clone_root) - 1) {
7094 ret = -EINVAL;
7095 goto out;
7096 }
7097
7098 if (!access_ok(arg->clone_sources,
7099 sizeof(*arg->clone_sources) *
7100 arg->clone_sources_count)) {
7101 ret = -EFAULT;
7102 goto out;
7103 }
7104
7105 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
7106 ret = -EINVAL;
7107 goto out;
7108 }
7109
7110 sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
7111 if (!sctx) {
7112 ret = -ENOMEM;
7113 goto out;
7114 }
7115
7116 INIT_LIST_HEAD(&sctx->new_refs);
7117 INIT_LIST_HEAD(&sctx->deleted_refs);
7118 INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
7119 INIT_LIST_HEAD(&sctx->name_cache_list);
7120
7121 sctx->flags = arg->flags;
7122
7123 sctx->send_filp = fget(arg->send_fd);
7124 if (!sctx->send_filp) {
7125 ret = -EBADF;
7126 goto out;
7127 }
7128
7129 sctx->send_root = send_root;
7130 /*
7131 * Unlikely but possible, if the subvolume is marked for deletion but
7132 * is slow to remove the directory entry, send can still be started
7133 */
7134 if (btrfs_root_dead(sctx->send_root)) {
7135 ret = -EPERM;
7136 goto out;
7137 }
7138
7139 sctx->clone_roots_cnt = arg->clone_sources_count;
7140
7141 sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
7142 sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
7143 if (!sctx->send_buf) {
7144 ret = -ENOMEM;
7145 goto out;
7146 }
7147
7148 sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL);
7149 if (!sctx->read_buf) {
7150 ret = -ENOMEM;
7151 goto out;
7152 }
7153
7154 sctx->pending_dir_moves = RB_ROOT;
7155 sctx->waiting_dir_moves = RB_ROOT;
7156 sctx->orphan_dirs = RB_ROOT;
7157
7158 alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
7159
7160 sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL);
7161 if (!sctx->clone_roots) {
7162 ret = -ENOMEM;
7163 goto out;
7164 }
7165
7166 alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
7167
7168 if (arg->clone_sources_count) {
7169 clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
7170 if (!clone_sources_tmp) {
7171 ret = -ENOMEM;
7172 goto out;
7173 }
7174
7175 ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
7176 alloc_size);
7177 if (ret) {
7178 ret = -EFAULT;
7179 goto out;
7180 }
7181
7182 for (i = 0; i < arg->clone_sources_count; i++) {
7183 key.objectid = clone_sources_tmp[i];
7184 key.type = BTRFS_ROOT_ITEM_KEY;
7185 key.offset = (u64)-1;
7186
7187 index = srcu_read_lock(&fs_info->subvol_srcu);
7188
7189 clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
7190 if (IS_ERR(clone_root)) {
7191 srcu_read_unlock(&fs_info->subvol_srcu, index);
7192 ret = PTR_ERR(clone_root);
7193 goto out;
7194 }
7195 spin_lock(&clone_root->root_item_lock);
7196 if (!btrfs_root_readonly(clone_root) ||
7197 btrfs_root_dead(clone_root)) {
7198 spin_unlock(&clone_root->root_item_lock);
7199 srcu_read_unlock(&fs_info->subvol_srcu, index);
7200 ret = -EPERM;
7201 goto out;
7202 }
7203 if (clone_root->dedupe_in_progress) {
7204 dedupe_in_progress_warn(clone_root);
7205 spin_unlock(&clone_root->root_item_lock);
7206 srcu_read_unlock(&fs_info->subvol_srcu, index);
7207 ret = -EAGAIN;
7208 goto out;
7209 }
7210 clone_root->send_in_progress++;
7211 spin_unlock(&clone_root->root_item_lock);
7212 srcu_read_unlock(&fs_info->subvol_srcu, index);
7213
7214 sctx->clone_roots[i].root = clone_root;
7215 clone_sources_to_rollback = i + 1;
7216 }
7217 kvfree(clone_sources_tmp);
7218 clone_sources_tmp = NULL;
7219 }
7220
7221 if (arg->parent_root) {
7222 key.objectid = arg->parent_root;
7223 key.type = BTRFS_ROOT_ITEM_KEY;
7224 key.offset = (u64)-1;
7225
7226 index = srcu_read_lock(&fs_info->subvol_srcu);
7227
7228 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
7229 if (IS_ERR(sctx->parent_root)) {
7230 srcu_read_unlock(&fs_info->subvol_srcu, index);
7231 ret = PTR_ERR(sctx->parent_root);
7232 goto out;
7233 }
7234
7235 spin_lock(&sctx->parent_root->root_item_lock);
7236 sctx->parent_root->send_in_progress++;
7237 if (!btrfs_root_readonly(sctx->parent_root) ||
7238 btrfs_root_dead(sctx->parent_root)) {
7239 spin_unlock(&sctx->parent_root->root_item_lock);
7240 srcu_read_unlock(&fs_info->subvol_srcu, index);
7241 ret = -EPERM;
7242 goto out;
7243 }
7244 if (sctx->parent_root->dedupe_in_progress) {
7245 dedupe_in_progress_warn(sctx->parent_root);
7246 spin_unlock(&sctx->parent_root->root_item_lock);
7247 srcu_read_unlock(&fs_info->subvol_srcu, index);
7248 ret = -EAGAIN;
7249 goto out;
7250 }
7251 spin_unlock(&sctx->parent_root->root_item_lock);
7252
7253 srcu_read_unlock(&fs_info->subvol_srcu, index);
7254 }
7255
7256 /*
7257 * Clones from send_root are allowed, but only if the clone source
7258 * is behind the current send position. This is checked while searching
7259 * for possible clone sources.
7260 */
7261 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
7262
7263 /* We do a bsearch later */
7264 sort(sctx->clone_roots, sctx->clone_roots_cnt,
7265 sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
7266 NULL);
7267 sort_clone_roots = 1;
7268
7269 ret = flush_delalloc_roots(sctx);
7270 if (ret)
7271 goto out;
7272
7273 ret = ensure_commit_roots_uptodate(sctx);
7274 if (ret)
7275 goto out;
7276
7277 mutex_lock(&fs_info->balance_mutex);
7278 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
7279 mutex_unlock(&fs_info->balance_mutex);
7280 btrfs_warn_rl(fs_info,
7281 "cannot run send because a balance operation is in progress");
7282 ret = -EAGAIN;
7283 goto out;
7284 }
7285 fs_info->send_in_progress++;
7286 mutex_unlock(&fs_info->balance_mutex);
7287
7288 current->journal_info = BTRFS_SEND_TRANS_STUB;
7289 ret = send_subvol(sctx);
7290 current->journal_info = NULL;
7291 mutex_lock(&fs_info->balance_mutex);
7292 fs_info->send_in_progress--;
7293 mutex_unlock(&fs_info->balance_mutex);
7294 if (ret < 0)
7295 goto out;
7296
7297 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
7298 ret = begin_cmd(sctx, BTRFS_SEND_C_END);
7299 if (ret < 0)
7300 goto out;
7301 ret = send_cmd(sctx);
7302 if (ret < 0)
7303 goto out;
7304 }
7305
7306 out:
7307 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
7308 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
7309 struct rb_node *n;
7310 struct pending_dir_move *pm;
7311
7312 n = rb_first(&sctx->pending_dir_moves);
7313 pm = rb_entry(n, struct pending_dir_move, node);
7314 while (!list_empty(&pm->list)) {
7315 struct pending_dir_move *pm2;
7316
7317 pm2 = list_first_entry(&pm->list,
7318 struct pending_dir_move, list);
7319 free_pending_move(sctx, pm2);
7320 }
7321 free_pending_move(sctx, pm);
7322 }
7323
7324 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
7325 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
7326 struct rb_node *n;
7327 struct waiting_dir_move *dm;
7328
7329 n = rb_first(&sctx->waiting_dir_moves);
7330 dm = rb_entry(n, struct waiting_dir_move, node);
7331 rb_erase(&dm->node, &sctx->waiting_dir_moves);
7332 kfree(dm);
7333 }
7334
7335 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
7336 while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
7337 struct rb_node *n;
7338 struct orphan_dir_info *odi;
7339
7340 n = rb_first(&sctx->orphan_dirs);
7341 odi = rb_entry(n, struct orphan_dir_info, node);
7342 free_orphan_dir_info(sctx, odi);
7343 }
7344
7345 if (sort_clone_roots) {
7346 for (i = 0; i < sctx->clone_roots_cnt; i++)
7347 btrfs_root_dec_send_in_progress(
7348 sctx->clone_roots[i].root);
7349 } else {
7350 for (i = 0; sctx && i < clone_sources_to_rollback; i++)
7351 btrfs_root_dec_send_in_progress(
7352 sctx->clone_roots[i].root);
7353
7354 btrfs_root_dec_send_in_progress(send_root);
7355 }
7356 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
7357 btrfs_root_dec_send_in_progress(sctx->parent_root);
7358
7359 kvfree(clone_sources_tmp);
7360
7361 if (sctx) {
7362 if (sctx->send_filp)
7363 fput(sctx->send_filp);
7364
7365 kvfree(sctx->clone_roots);
7366 kvfree(sctx->send_buf);
7367 kvfree(sctx->read_buf);
7368
7369 name_cache_free(sctx);
7370
7371 kfree(sctx);
7372 }
7373
7374 return ret;
7375 }
7376