aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/send.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/send.c')
-rw-r--r--fs/btrfs/send.c365
1 files changed, 226 insertions, 139 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d9813a5b075a..340c76a12ce1 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -122,8 +122,6 @@ struct send_ctx {
struct file_ra_state ra;
- char *read_buf;
-
/*
* We process inodes by their increasing order, so if before an
* incremental send we reverse the parent/child relationship of
@@ -278,11 +276,6 @@ enum btrfs_compare_tree_result {
BTRFS_COMPARE_TREE_CHANGED,
BTRFS_COMPARE_TREE_SAME,
};
-typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path,
- struct btrfs_path *right_path,
- struct btrfs_key *key,
- enum btrfs_compare_tree_result result,
- void *ctx);
__cold
static void inconsistent_snapshot_error(struct send_ctx *sctx,
@@ -584,8 +577,8 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
return -EOVERFLOW;
hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
- hdr->tlv_type = cpu_to_le16(attr);
- hdr->tlv_len = cpu_to_le16(len);
+ put_unaligned_le16(attr, &hdr->tlv_type);
+ put_unaligned_le16(len, &hdr->tlv_len);
memcpy(hdr + 1, data, len);
sctx->send_size += total_len;
@@ -695,7 +688,7 @@ static int begin_cmd(struct send_ctx *sctx, int cmd)
sctx->send_size += sizeof(*hdr);
hdr = (struct btrfs_cmd_header *)sctx->send_buf;
- hdr->cmd = cpu_to_le16(cmd);
+ put_unaligned_le16(cmd, &hdr->cmd);
return 0;
}
@@ -707,17 +700,17 @@ static int send_cmd(struct send_ctx *sctx)
u32 crc;
hdr = (struct btrfs_cmd_header *)sctx->send_buf;
- hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
- hdr->crc = 0;
+ put_unaligned_le32(sctx->send_size - sizeof(*hdr), &hdr->len);
+ put_unaligned_le32(0, &hdr->crc);
crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
- hdr->crc = cpu_to_le32(crc);
+ put_unaligned_le32(crc, &hdr->crc);
ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
&sctx->send_off);
sctx->total_send_size += sctx->send_size;
- sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
+ sctx->cmd_send_size[get_unaligned_le16(&hdr->cmd)] += sctx->send_size;
sctx->send_size = 0;
return ret;
@@ -3813,6 +3806,72 @@ static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
}
/*
+ * When processing the new references for an inode we may orphanize an existing
+ * directory inode because its old name conflicts with one of the new references
+ * of the current inode. Later, when processing another new reference of our
+ * inode, we might need to orphanize another inode, but the path we have in the
+ * reference reflects the pre-orphanization name of the directory we previously
+ * orphanized. For example:
+ *
+ * parent snapshot looks like:
+ *
+ * . (ino 256)
+ * |----- f1 (ino 257)
+ * |----- f2 (ino 258)
+ * |----- d1/ (ino 259)
+ * |----- d2/ (ino 260)
+ *
+ * send snapshot looks like:
+ *
+ * . (ino 256)
+ * |----- d1 (ino 258)
+ * |----- f2/ (ino 259)
+ * |----- f2_link/ (ino 260)
+ * | |----- f1 (ino 257)
+ * |
+ * |----- d2 (ino 258)
+ *
+ * When processing inode 257 we compute the name for inode 259 as "d1", and we
+ * cache it in the name cache. Later when we start processing inode 258, when
+ * collecting all its new references we set a full path of "d1/d2" for its new
+ * reference with name "d2". When we start processing the new references we
+ * start by processing the new reference with name "d1", and this results in
+ * orphanizing inode 259, since its old reference causes a conflict. Then we
+ * move on the next new reference, with name "d2", and we find out we must
+ * orphanize inode 260, as its old reference conflicts with ours - but for the
+ * orphanization we use a source path corresponding to the path we stored in the
+ * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the
+ * receiver fail since the path component "d1/" no longer exists, it was renamed
+ * to "o259-6-0/" when processing the previous new reference. So in this case we
+ * must recompute the path in the new reference and use it for the new
+ * orphanization operation.
+ */
+static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
+{
+ char *name;
+ int ret;
+
+ name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ fs_path_reset(ref->full_path);
+ ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
+ if (ret < 0)
+ goto out;
+
+ ret = fs_path_add(ref->full_path, name, ref->name_len);
+ if (ret < 0)
+ goto out;
+
+ /* Update the reference's base name pointer. */
+ set_ref_path(ref, ref->full_path);
+out:
+ kfree(name);
+ return ret;
+}
+
+/*
* This does all the move/link/unlink/rmdir magic.
*/
static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
@@ -3880,52 +3939,56 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
goto out;
}
+ /*
+ * Before doing any rename and link operations, do a first pass on the
+ * new references to orphanize any unprocessed inodes that may have a
+ * reference that conflicts with one of the new references of the current
+ * inode. This needs to happen first because a new reference may conflict
+ * with the old reference of a parent directory, so we must make sure
+ * that the path used for link and rename commands don't use an
+ * orphanized name when an ancestor was not yet orphanized.
+ *
+ * Example:
+ *
+ * Parent snapshot:
+ *
+ * . (ino 256)
+ * |----- testdir/ (ino 259)
+ * | |----- a (ino 257)
+ * |
+ * |----- b (ino 258)
+ *
+ * Send snapshot:
+ *
+ * . (ino 256)
+ * |----- testdir_2/ (ino 259)
+ * | |----- a (ino 260)
+ * |
+ * |----- testdir (ino 257)
+ * |----- b (ino 257)
+ * |----- b2 (ino 258)
+ *
+ * Processing the new reference for inode 257 with name "b" may happen
+ * before processing the new reference with name "testdir". If so, we
+ * must make sure that by the time we send a link command to create the
+ * hard link "b", inode 259 was already orphanized, since the generated
+ * path in "valid_path" already contains the orphanized name for 259.
+ * We are processing inode 257, so only later when processing 259 we do
+ * the rename operation to change its temporary (orphanized) name to
+ * "testdir_2".
+ */
list_for_each_entry(cur, &sctx->new_refs, list) {
- /*
- * We may have refs where the parent directory does not exist
- * yet. This happens if the parent directories inum is higher
- * than the current inum. To handle this case, we create the
- * parent directory out of order. But we need to check if this
- * did already happen before due to other refs in the same dir.
- */
ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
- if (ret == inode_state_will_create) {
- ret = 0;
- /*
- * First check if any of the current inodes refs did
- * already create the dir.
- */
- list_for_each_entry(cur2, &sctx->new_refs, list) {
- if (cur == cur2)
- break;
- if (cur2->dir == cur->dir) {
- ret = 1;
- break;
- }
- }
-
- /*
- * If that did not happen, check if a previous inode
- * did already create the dir.
- */
- if (!ret)
- ret = did_create_dir(sctx, cur->dir);
- if (ret < 0)
- goto out;
- if (!ret) {
- ret = send_create_inode(sctx, cur->dir);
- if (ret < 0)
- goto out;
- }
- }
+ if (ret == inode_state_will_create)
+ continue;
/*
- * Check if this new ref would overwrite the first ref of
- * another unprocessed inode. If yes, orphanize the
- * overwritten inode. If we find an overwritten ref that is
- * not the first ref, simply unlink it.
+ * Check if this new ref would overwrite the first ref of another
+ * unprocessed inode. If yes, orphanize the overwritten inode.
+ * If we find an overwritten ref that is not the first ref,
+ * simply unlink it.
*/
ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
cur->name, cur->name_len,
@@ -3942,6 +4005,12 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
struct name_cache_entry *nce;
struct waiting_dir_move *wdm;
+ if (orphanized_dir) {
+ ret = refresh_ref_path(sctx, cur);
+ if (ret < 0)
+ goto out;
+ }
+
ret = orphanize_inode(sctx, ow_inode, ow_gen,
cur->full_path);
if (ret < 0)
@@ -4004,6 +4073,49 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
}
}
+ }
+
+ list_for_each_entry(cur, &sctx->new_refs, list) {
+ /*
+ * We may have refs where the parent directory does not exist
+ * yet. This happens if the parent directories inum is higher
+ * than the current inum. To handle this case, we create the
+ * parent directory out of order. But we need to check if this
+ * did already happen before due to other refs in the same dir.
+ */
+ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
+ if (ret < 0)
+ goto out;
+ if (ret == inode_state_will_create) {
+ ret = 0;
+ /*
+ * First check if any of the current inodes refs did
+ * already create the dir.
+ */
+ list_for_each_entry(cur2, &sctx->new_refs, list) {
+ if (cur == cur2)
+ break;
+ if (cur2->dir == cur->dir) {
+ ret = 1;
+ break;
+ }
+ }
+
+ /*
+ * If that did not happen, check if a previous inode
+ * did already create the dir.
+ */
+ if (!ret)
+ ret = did_create_dir(sctx, cur->dir);
+ if (ret < 0)
+ goto out;
+ if (!ret) {
+ ret = send_create_inode(sctx, cur->dir);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
if (ret < 0)
@@ -4799,7 +4911,25 @@ out:
return ret;
}
-static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
+static inline u64 max_send_read_size(const struct send_ctx *sctx)
+{
+ return sctx->send_max_size - SZ_16K;
+}
+
+static int put_data_header(struct send_ctx *sctx, u32 len)
+{
+ struct btrfs_tlv_header *hdr;
+
+ if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
+ return -EOVERFLOW;
+ hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
+ put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
+ put_unaligned_le16(len, &hdr->tlv_len);
+ sctx->send_size += sizeof(*hdr);
+ return 0;
+}
+
+static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
{
struct btrfs_root *root = sctx->send_root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4809,21 +4939,16 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
pgoff_t index = offset >> PAGE_SHIFT;
pgoff_t last_index;
unsigned pg_offset = offset_in_page(offset);
- ssize_t ret = 0;
+ int ret;
+
+ ret = put_data_header(sctx, len);
+ if (ret)
+ return ret;
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
- if (offset + len > i_size_read(inode)) {
- if (offset > i_size_read(inode))
- len = 0;
- else
- len = offset - i_size_read(inode);
- }
- if (len == 0)
- goto out;
-
last_index = (offset + len - 1) >> PAGE_SHIFT;
/* initial readahead */
@@ -4864,16 +4989,16 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
}
addr = kmap(page);
- memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
+ memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset,
+ cur_len);
kunmap(page);
unlock_page(page);
put_page(page);
index++;
pg_offset = 0;
len -= cur_len;
- ret += cur_len;
+ sctx->send_size += cur_len;
}
-out:
iput(inode);
return ret;
}
@@ -4887,7 +5012,6 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
int ret = 0;
struct fs_path *p;
- ssize_t num_read = 0;
p = fs_path_alloc();
if (!p)
@@ -4895,13 +5019,6 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
- num_read = fill_read_buf(sctx, offset, len);
- if (num_read <= 0) {
- if (num_read < 0)
- ret = num_read;
- goto out;
- }
-
ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
if (ret < 0)
goto out;
@@ -4912,16 +5029,16 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
- TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
+ ret = put_file_data(sctx, offset, len);
+ if (ret < 0)
+ goto out;
ret = send_cmd(sctx);
tlv_put_failure:
out:
fs_path_free(p);
- if (ret < 0)
- return ret;
- return num_read;
+ return ret;
}
/*
@@ -5033,8 +5150,8 @@ out:
static int send_hole(struct send_ctx *sctx, u64 end)
{
struct fs_path *p = NULL;
+ u64 read_size = max_send_read_size(sctx);
u64 offset = sctx->cur_inode_last_extent;
- u64 len;
int ret = 0;
/*
@@ -5061,16 +5178,19 @@ static int send_hole(struct send_ctx *sctx, u64 end)
ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
if (ret < 0)
goto tlv_put_failure;
- memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
while (offset < end) {
- len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
+ u64 len = min(end - offset, read_size);
ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
if (ret < 0)
break;
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
- TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
+ ret = put_data_header(sctx, len);
+ if (ret < 0)
+ break;
+ memset(sctx->send_buf + sctx->send_size, 0, len);
+ sctx->send_size += len;
ret = send_cmd(sctx);
if (ret < 0)
break;
@@ -5086,23 +5206,20 @@ static int send_extent_data(struct send_ctx *sctx,
const u64 offset,
const u64 len)
{
+ u64 read_size = max_send_read_size(sctx);
u64 sent = 0;
if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
return send_update_extent(sctx, offset, len);
while (sent < len) {
- u64 size = len - sent;
+ u64 size = min(len - sent, read_size);
int ret;
- if (size > BTRFS_SEND_READ_SIZE)
- size = BTRFS_SEND_READ_SIZE;
ret = send_write(sctx, offset + sent, size);
if (ret < 0)
return ret;
- if (!ret)
- break;
- sent += ret;
+ sent += size;
}
return 0;
}
@@ -5402,51 +5519,29 @@ static int send_write_or_clone(struct send_ctx *sctx,
struct clone_root *clone_root)
{
int ret = 0;
- struct btrfs_file_extent_item *ei;
u64 offset = key->offset;
- u64 len;
- u8 type;
+ u64 end;
u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
- ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_file_extent_item);
- type = btrfs_file_extent_type(path->nodes[0], ei);
- if (type == BTRFS_FILE_EXTENT_INLINE) {
- len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
- /*
- * it is possible the inline item won't cover the whole page,
- * but there may be items after this page. Make
- * sure to send the whole thing
- */
- len = PAGE_ALIGN(len);
- } else {
- len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
- }
-
- if (offset >= sctx->cur_inode_size) {
- ret = 0;
- goto out;
- }
- if (offset + len > sctx->cur_inode_size)
- len = sctx->cur_inode_size - offset;
- if (len == 0) {
- ret = 0;
- goto out;
- }
+ end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
+ if (offset >= end)
+ return 0;
- if (clone_root && IS_ALIGNED(offset + len, bs)) {
+ if (clone_root && IS_ALIGNED(end, bs)) {
+ struct btrfs_file_extent_item *ei;
u64 disk_byte;
u64 data_offset;
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
ret = clone_range(sctx, clone_root, disk_byte, data_offset,
- offset, len);
+ offset, end - offset);
} else {
- ret = send_extent_data(sctx, offset, len);
+ ret = send_extent_data(sctx, offset, end - offset);
}
- sctx->cur_inode_next_write_offset = offset + len;
-out:
+ sctx->cur_inode_next_write_offset = end;
return ret;
}
@@ -6692,8 +6787,7 @@ static int tree_compare_item(struct btrfs_path *left_path,
* If it detects a change, it aborts immediately.
*/
static int btrfs_compare_trees(struct btrfs_root *left_root,
- struct btrfs_root *right_root,
- btrfs_changed_cb_t changed_cb, void *ctx)
+ struct btrfs_root *right_root, void *ctx)
{
struct btrfs_fs_info *fs_info = left_root->fs_info;
int ret;
@@ -6960,8 +7054,7 @@ static int send_subvol(struct send_ctx *sctx)
goto out;
if (sctx->parent_root) {
- ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
- changed_cb, sctx);
+ ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, sctx);
if (ret < 0)
goto out;
ret = finish_inode_if_needed(sctx, 1);
@@ -7087,7 +7180,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
u32 i;
u64 *clone_sources_tmp = NULL;
int clone_sources_to_rollback = 0;
- unsigned alloc_size;
+ size_t alloc_size;
int sort_clone_roots = 0;
if (!capable(CAP_SYS_ADMIN))
@@ -7169,25 +7262,20 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
goto out;
}
- sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL);
- if (!sctx->read_buf) {
- ret = -ENOMEM;
- goto out;
- }
-
sctx->pending_dir_moves = RB_ROOT;
sctx->waiting_dir_moves = RB_ROOT;
sctx->orphan_dirs = RB_ROOT;
- alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
-
- sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL);
+ sctx->clone_roots = kvcalloc(sizeof(*sctx->clone_roots),
+ arg->clone_sources_count + 1,
+ GFP_KERNEL);
if (!sctx->clone_roots) {
ret = -ENOMEM;
goto out;
}
- alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
+ alloc_size = array_size(sizeof(*arg->clone_sources),
+ arg->clone_sources_count);
if (arg->clone_sources_count) {
clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
@@ -7378,7 +7466,6 @@ out:
kvfree(sctx->clone_roots);
kvfree(sctx->send_buf);
- kvfree(sctx->read_buf);
name_cache_free(sctx);