diff options
Diffstat (limited to 'fs/btrfs/send.c')
-rw-r--r-- | fs/btrfs/send.c | 299 |
1 files changed, 235 insertions, 64 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6ad216e8178e..e258fc484cea 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -238,6 +238,7 @@ struct waiting_dir_move { * after this directory is moved, we can try to rmdir the ino rmdir_ino. */ u64 rmdir_ino; + u64 rmdir_gen; bool orphanized; }; @@ -323,7 +324,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); static struct waiting_dir_move * get_waiting_dir_move(struct send_ctx *sctx, u64 ino); -static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen); static int need_send_hole(struct send_ctx *sctx) { @@ -1257,12 +1258,21 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) */ if (found->root == bctx->sctx->send_root) { /* - * TODO for the moment we don't accept clones from the inode - * that is currently send. We may change this when - * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same - * file. + * If the source inode was not yet processed we can't issue a + * clone operation, as the source extent does not exist yet at + * the destination of the stream. */ - if (ino >= bctx->cur_objectid) + if (ino > bctx->cur_objectid) + return 0; + /* + * We clone from the inode currently being sent as long as the + * source extent is already processed, otherwise we could try + * to clone from an extent that does not exist yet at the + * destination of the stream. + */ + if (ino == bctx->cur_objectid && + offset + bctx->extent_len > + bctx->sctx->cur_inode_next_write_offset) return 0; } @@ -2297,7 +2307,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, fs_path_reset(name); - if (is_waiting_for_rm(sctx, ino)) { + if (is_waiting_for_rm(sctx, ino, gen)) { ret = gen_unique_name(sctx, ino, gen, name); if (ret < 0) goto out; @@ -2856,8 +2866,8 @@ out: return ret; } -static struct orphan_dir_info * -add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx, + u64 dir_ino, u64 dir_gen) { struct rb_node **p = &sctx->orphan_dirs.rb_node; struct rb_node *parent = NULL; @@ -2866,20 +2876,23 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) while (*p) { parent = *p; entry = rb_entry(parent, struct orphan_dir_info, node); - if (dir_ino < entry->ino) { + if (dir_ino < entry->ino) p = &(*p)->rb_left; - } else if (dir_ino > entry->ino) { + else if (dir_ino > entry->ino) p = &(*p)->rb_right; - } else { + else if (dir_gen < entry->gen) + p = &(*p)->rb_left; + else if (dir_gen > entry->gen) + p = &(*p)->rb_right; + else return entry; - } } odi = kmalloc(sizeof(*odi), GFP_KERNEL); if (!odi) return ERR_PTR(-ENOMEM); odi->ino = dir_ino; - odi->gen = 0; + odi->gen = dir_gen; odi->last_dir_index_offset = 0; rb_link_node(&odi->node, parent, p); @@ -2887,8 +2900,8 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) return odi; } -static struct orphan_dir_info * -get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx, + u64 dir_ino, u64 gen) { struct rb_node *n = sctx->orphan_dirs.rb_node; struct orphan_dir_info *entry; @@ -2899,15 +2912,19 @@ get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) n = n->rb_left; else if (dir_ino > entry->ino) n = n->rb_right; + else if (gen < entry->gen) + n = n->rb_left; + else if (gen > entry->gen) + n = n->rb_right; else return entry; } return NULL; } -static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen) { - struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); + struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen); return odi != NULL; } @@ -2952,7 +2969,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, key.type = BTRFS_DIR_INDEX_KEY; key.offset = 0; - odi = get_orphan_dir_info(sctx, dir); + odi = get_orphan_dir_info(sctx, dir, dir_gen); if (odi) key.offset = odi->last_dir_index_offset; @@ -2983,7 +3000,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, dm = get_waiting_dir_move(sctx, loc.objectid); if (dm) { - odi = add_orphan_dir_info(sctx, dir); + odi = add_orphan_dir_info(sctx, dir, dir_gen); if (IS_ERR(odi)) { ret = PTR_ERR(odi); goto out; @@ -2991,12 +3008,13 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, odi->gen = dir_gen; odi->last_dir_index_offset = found_key.offset; dm->rmdir_ino = dir; + dm->rmdir_gen = dir_gen; ret = 0; goto out; } if (loc.objectid > send_progress) { - odi = add_orphan_dir_info(sctx, dir); + odi = add_orphan_dir_info(sctx, dir, dir_gen); if (IS_ERR(odi)) { ret = PTR_ERR(odi); goto out; @@ -3036,6 +3054,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) return -ENOMEM; dm->ino = ino; dm->rmdir_ino = 0; + dm->rmdir_gen = 0; dm->orphanized = orphanized; while (*p) { @@ -3181,7 +3200,7 @@ static int path_loop(struct send_ctx *sctx, struct fs_path *name, while (ino != BTRFS_FIRST_FREE_OBJECTID) { fs_path_reset(name); - if (is_waiting_for_rm(sctx, ino)) + if (is_waiting_for_rm(sctx, ino, gen)) break; if (is_waiting_for_move(sctx, ino)) { if (*ancestor_ino == 0) @@ -3221,6 +3240,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) u64 parent_ino, parent_gen; struct waiting_dir_move *dm = NULL; u64 rmdir_ino = 0; + u64 rmdir_gen; u64 ancestor; bool is_orphan; int ret; @@ -3235,6 +3255,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); rmdir_ino = dm->rmdir_ino; + rmdir_gen = dm->rmdir_gen; is_orphan = dm->orphanized; free_waiting_dir_move(sctx, dm); @@ -3271,6 +3292,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); dm->rmdir_ino = rmdir_ino; + dm->rmdir_gen = rmdir_gen; } goto out; } @@ -3289,7 +3311,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) struct orphan_dir_info *odi; u64 gen; - odi = get_orphan_dir_info(sctx, rmdir_ino); + odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen); if (!odi) { /* already deleted */ goto finish; @@ -3804,6 +3826,72 @@ static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) } /* + * When processing the new references for an inode we may orphanize an existing + * directory inode because its old name conflicts with one of the new references + * of the current inode. Later, when processing another new reference of our + * inode, we might need to orphanize another inode, but the path we have in the + * reference reflects the pre-orphanization name of the directory we previously + * orphanized. For example: + * + * parent snapshot looks like: + * + * . (ino 256) + * |----- f1 (ino 257) + * |----- f2 (ino 258) + * |----- d1/ (ino 259) + * |----- d2/ (ino 260) + * + * send snapshot looks like: + * + * . (ino 256) + * |----- d1 (ino 258) + * |----- f2/ (ino 259) + * |----- f2_link/ (ino 260) + * | |----- f1 (ino 257) + * | + * |----- d2 (ino 258) + * + * When processing inode 257 we compute the name for inode 259 as "d1", and we + * cache it in the name cache. Later when we start processing inode 258, when + * collecting all its new references we set a full path of "d1/d2" for its new + * reference with name "d2". When we start processing the new references we + * start by processing the new reference with name "d1", and this results in + * orphanizing inode 259, since its old reference causes a conflict. Then we + * move on the next new reference, with name "d2", and we find out we must + * orphanize inode 260, as its old reference conflicts with ours - but for the + * orphanization we use a source path corresponding to the path we stored in the + * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the + * receiver fail since the path component "d1/" no longer exists, it was renamed + * to "o259-6-0/" when processing the previous new reference. So in this case we + * must recompute the path in the new reference and use it for the new + * orphanization operation. + */ +static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) +{ + char *name; + int ret; + + name = kmemdup(ref->name, ref->name_len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + fs_path_reset(ref->full_path); + ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path); + if (ret < 0) + goto out; + + ret = fs_path_add(ref->full_path, name, ref->name_len); + if (ret < 0) + goto out; + + /* Update the reference's base name pointer. */ + set_ref_path(ref, ref->full_path); +out: + kfree(name); + return ret; +} + +/* * This does all the move/link/unlink/rmdir magic. */ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) @@ -3871,52 +3959,56 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) goto out; } + /* + * Before doing any rename and link operations, do a first pass on the + * new references to orphanize any unprocessed inodes that may have a + * reference that conflicts with one of the new references of the current + * inode. This needs to happen first because a new reference may conflict + * with the old reference of a parent directory, so we must make sure + * that the path used for link and rename commands don't use an + * orphanized name when an ancestor was not yet orphanized. + * + * Example: + * + * Parent snapshot: + * + * . (ino 256) + * |----- testdir/ (ino 259) + * | |----- a (ino 257) + * | + * |----- b (ino 258) + * + * Send snapshot: + * + * . (ino 256) + * |----- testdir_2/ (ino 259) + * | |----- a (ino 260) + * | + * |----- testdir (ino 257) + * |----- b (ino 257) + * |----- b2 (ino 258) + * + * Processing the new reference for inode 257 with name "b" may happen + * before processing the new reference with name "testdir". If so, we + * must make sure that by the time we send a link command to create the + * hard link "b", inode 259 was already orphanized, since the generated + * path in "valid_path" already contains the orphanized name for 259. + * We are processing inode 257, so only later when processing 259 we do + * the rename operation to change its temporary (orphanized) name to + * "testdir_2". + */ list_for_each_entry(cur, &sctx->new_refs, list) { - /* - * We may have refs where the parent directory does not exist - * yet. This happens if the parent directories inum is higher - * than the current inum. To handle this case, we create the - * parent directory out of order. But we need to check if this - * did already happen before due to other refs in the same dir. - */ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; - if (ret == inode_state_will_create) { - ret = 0; - /* - * First check if any of the current inodes refs did - * already create the dir. - */ - list_for_each_entry(cur2, &sctx->new_refs, list) { - if (cur == cur2) - break; - if (cur2->dir == cur->dir) { - ret = 1; - break; - } - } - - /* - * If that did not happen, check if a previous inode - * did already create the dir. - */ - if (!ret) - ret = did_create_dir(sctx, cur->dir); - if (ret < 0) - goto out; - if (!ret) { - ret = send_create_inode(sctx, cur->dir); - if (ret < 0) - goto out; - } - } + if (ret == inode_state_will_create) + continue; /* - * Check if this new ref would overwrite the first ref of - * another unprocessed inode. If yes, orphanize the - * overwritten inode. If we find an overwritten ref that is - * not the first ref, simply unlink it. + * Check if this new ref would overwrite the first ref of another + * unprocessed inode. If yes, orphanize the overwritten inode. + * If we find an overwritten ref that is not the first ref, + * simply unlink it. */ ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, cur->name, cur->name_len, @@ -3933,6 +4025,12 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) struct name_cache_entry *nce; struct waiting_dir_move *wdm; + if (orphanized_dir) { + ret = refresh_ref_path(sctx, cur); + if (ret < 0) + goto out; + } + ret = orphanize_inode(sctx, ow_inode, ow_gen, cur->full_path); if (ret < 0) @@ -3989,12 +4087,66 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) if (ret < 0) goto out; } else { + /* + * If we previously orphanized a directory that + * collided with a new reference that we already + * processed, recompute the current path because + * that directory may be part of the path. + */ + if (orphanized_dir) { + ret = refresh_ref_path(sctx, cur); + if (ret < 0) + goto out; + } ret = send_unlink(sctx, cur->full_path); if (ret < 0) goto out; } } + } + + list_for_each_entry(cur, &sctx->new_refs, list) { + /* + * We may have refs where the parent directory does not exist + * yet. This happens if the parent directories inum is higher + * than the current inum. To handle this case, we create the + * parent directory out of order. But we need to check if this + * did already happen before due to other refs in the same dir. + */ + ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); + if (ret < 0) + goto out; + if (ret == inode_state_will_create) { + ret = 0; + /* + * First check if any of the current inodes refs did + * already create the dir. + */ + list_for_each_entry(cur2, &sctx->new_refs, list) { + if (cur == cur2) + break; + if (cur2->dir == cur->dir) { + ret = 1; + break; + } + } + + /* + * If that did not happen, check if a previous inode + * did already create the dir. + */ + if (!ret) + ret = did_create_dir(sctx, cur->dir); + if (ret < 0) + goto out; + if (!ret) { + ret = send_create_inode(sctx, cur->dir); + if (ret < 0) + goto out; + } + } + if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { ret = wait_for_dest_dir_move(sctx, cur, is_orphan); if (ret < 0) @@ -4853,6 +5005,10 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) lock_page(page); if (!PageUptodate(page)) { unlock_page(page); + btrfs_err(fs_info, + "send: IO error at offset %llu for inode %llu root %llu", + page_offset(page), sctx->cur_ino, + sctx->send_root->root_key.objectid); put_page(page); ret = -EIO; break; @@ -5378,6 +5534,21 @@ static int clone_range(struct send_ctx *sctx, break; offset += clone_len; clone_root->offset += clone_len; + + /* + * If we are cloning from the file we are currently processing, + * and using the send root as the clone root, we must stop once + * the current clone offset reaches the current eof of the file + * at the receiver, otherwise we would issue an invalid clone + * operation (source range going beyond eof) and cause the + * receiver to fail. So if we reach the current eof, bail out + * and fallback to a regular write. + */ + if (clone_root->root == sctx->send_root && + clone_root->ino == sctx->cur_ino && + clone_root->offset >= sctx->cur_inode_next_write_offset) + break; + data_offset += clone_len; next: path->slots[0]++; @@ -7224,7 +7395,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); - sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL); + sctx->clone_roots = kvzalloc(alloc_size, GFP_KERNEL); if (!sctx->clone_roots) { ret = -ENOMEM; goto out; |