aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/persistent-data
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r--drivers/md/persistent-data/Kconfig2
-rw-r--r--drivers/md/persistent-data/dm-array.c52
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c17
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h25
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c182
-rw-r--r--drivers/md/persistent-data/dm-btree-spine.c44
-rw-r--r--drivers/md/persistent-data/dm-btree.c553
-rw-r--r--drivers/md/persistent-data/dm-btree.h10
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c541
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.h42
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c92
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c105
-rw-r--r--drivers/md/persistent-data/dm-space-map.h18
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c61
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.h22
15 files changed, 1428 insertions, 338 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index baaec1ae29c1..f4f948b0e173 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -4,7 +4,7 @@ config DM_PERSISTENT_DATA
depends on BLK_DEV_DM
select LIBCRC32C
select DM_BUFIO
- ---help---
+ help
Library providing immutable on-disk data structure support for
device-mapper targets such as the thin provisioning target.
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 185dc60360b5..3a963d783a86 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -108,12 +108,10 @@ static void *element_at(struct dm_array_info *info, struct array_block *ab,
* in an array block.
*/
static void on_entries(struct dm_array_info *info, struct array_block *ab,
- void (*fn)(void *, const void *))
+ void (*fn)(void *, const void *, unsigned))
{
- unsigned i, nr_entries = le32_to_cpu(ab->nr_entries);
-
- for (i = 0; i < nr_entries; i++)
- fn(info->value_type.context, element_at(info, ab, i));
+ unsigned nr_entries = le32_to_cpu(ab->nr_entries);
+ fn(info->value_type.context, element_at(info, ab, 0), nr_entries);
}
/*
@@ -175,19 +173,18 @@ static int alloc_ablock(struct dm_array_info *info, size_t size_of_block,
static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
const void *value, unsigned new_nr)
{
- unsigned i;
- uint32_t nr_entries;
+ uint32_t nr_entries, delta, i;
struct dm_btree_value_type *vt = &info->value_type;
BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
BUG_ON(new_nr < le32_to_cpu(ab->nr_entries));
nr_entries = le32_to_cpu(ab->nr_entries);
- for (i = nr_entries; i < new_nr; i++) {
- if (vt->inc)
- vt->inc(vt->context, value);
+ delta = new_nr - nr_entries;
+ if (vt->inc)
+ vt->inc(vt->context, value, delta);
+ for (i = nr_entries; i < new_nr; i++)
memcpy(element_at(info, ab, i), value, vt->size);
- }
ab->nr_entries = cpu_to_le32(new_nr);
}
@@ -199,17 +196,16 @@ static void fill_ablock(struct dm_array_info *info, struct array_block *ab,
static void trim_ablock(struct dm_array_info *info, struct array_block *ab,
unsigned new_nr)
{
- unsigned i;
- uint32_t nr_entries;
+ uint32_t nr_entries, delta;
struct dm_btree_value_type *vt = &info->value_type;
BUG_ON(new_nr > le32_to_cpu(ab->max_entries));
BUG_ON(new_nr > le32_to_cpu(ab->nr_entries));
nr_entries = le32_to_cpu(ab->nr_entries);
- for (i = nr_entries; i > new_nr; i--)
- if (vt->dec)
- vt->dec(vt->context, element_at(info, ab, i - 1));
+ delta = nr_entries - new_nr;
+ if (vt->dec)
+ vt->dec(vt->context, element_at(info, ab, new_nr - 1), delta);
ab->nr_entries = cpu_to_le32(new_nr);
}
@@ -573,16 +569,17 @@ static int grow(struct resize *resize)
* These are the value_type functions for the btree elements, which point
* to array blocks.
*/
-static void block_inc(void *context, const void *value)
+static void block_inc(void *context, const void *value, unsigned count)
{
- __le64 block_le;
+ const __le64 *block_le = value;
struct dm_array_info *info = context;
+ unsigned i;
- memcpy(&block_le, value, sizeof(block_le));
- dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le));
+ for (i = 0; i < count; i++, block_le++)
+ dm_tm_inc(info->btree_info.tm, le64_to_cpu(*block_le));
}
-static void block_dec(void *context, const void *value)
+static void __block_dec(void *context, const void *value)
{
int r;
uint64_t b;
@@ -621,6 +618,13 @@ static void block_dec(void *context, const void *value)
dm_tm_dec(info->btree_info.tm, b);
}
+static void block_dec(void *context, const void *value, unsigned count)
+{
+ unsigned i;
+ for (i = 0; i < count; i++, value += sizeof(__le64))
+ __block_dec(context, value);
+}
+
static int block_equal(void *context, const void *value1, const void *value2)
{
return !memcmp(value1, value2, sizeof(__le64));
@@ -711,7 +715,7 @@ static int populate_ablock_with_values(struct dm_array_info *info, struct array_
return r;
if (vt->inc)
- vt->inc(vt->context, element_at(info, ab, i));
+ vt->inc(vt->context, element_at(info, ab, i), 1);
}
ab->nr_entries = cpu_to_le32(new_nr);
@@ -822,9 +826,9 @@ static int array_set_value(struct dm_array_info *info, dm_block_t root,
old_value = element_at(info, ab, entry);
if (vt->dec &&
(!vt->equal || !vt->equal(vt->context, old_value, value))) {
- vt->dec(vt->context, old_value);
+ vt->dec(vt->context, old_value, 1);
if (vt->inc)
- vt->inc(vt->context, value);
+ vt->inc(vt->context, value, 1);
}
memcpy(old_value, value, info->value_type.size);
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 749ec268d957..11935864f50f 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -391,7 +391,8 @@ struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread,
sizeof(struct buffer_aux),
dm_block_manager_alloc_callback,
- dm_block_manager_write_callback);
+ dm_block_manager_write_callback,
+ 0);
if (IS_ERR(bm->bufio)) {
r = PTR_ERR(bm->bufio);
kfree(bm);
@@ -493,7 +494,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm,
void *p;
int r;
- if (bm->read_only)
+ if (dm_bm_is_read_only(bm))
return -EPERM;
p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
@@ -562,7 +563,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm,
struct buffer_aux *aux;
void *p;
- if (bm->read_only)
+ if (dm_bm_is_read_only(bm))
return -EPERM;
p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
@@ -602,7 +603,7 @@ EXPORT_SYMBOL_GPL(dm_bm_unlock);
int dm_bm_flush(struct dm_block_manager *bm)
{
- if (bm->read_only)
+ if (dm_bm_is_read_only(bm))
return -EPERM;
return dm_bufio_write_dirty_buffers(bm->bufio);
@@ -616,19 +617,21 @@ void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
bool dm_bm_is_read_only(struct dm_block_manager *bm)
{
- return bm->read_only;
+ return (bm ? bm->read_only : true);
}
EXPORT_SYMBOL_GPL(dm_bm_is_read_only);
void dm_bm_set_read_only(struct dm_block_manager *bm)
{
- bm->read_only = true;
+ if (bm)
+ bm->read_only = true;
}
EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
void dm_bm_set_read_write(struct dm_block_manager *bm)
{
- bm->read_only = false;
+ if (bm)
+ bm->read_only = false;
}
EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h
index a240990a7f33..893edb426dba 100644
--- a/drivers/md/persistent-data/dm-btree-internal.h
+++ b/drivers/md/persistent-data/dm-btree-internal.h
@@ -34,12 +34,12 @@ struct node_header {
__le32 max_entries;
__le32 value_size;
__le32 padding;
-} __packed;
+} __attribute__((packed, aligned(8)));
struct btree_node {
struct node_header header;
- __le64 keys[0];
-} __packed;
+ __le64 keys[];
+} __attribute__((packed, aligned(8)));
/*
@@ -68,7 +68,7 @@ struct ro_spine {
};
void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info);
-int exit_ro_spine(struct ro_spine *s);
+void exit_ro_spine(struct ro_spine *s);
int ro_step(struct ro_spine *s, dm_block_t new_child);
void ro_pop(struct ro_spine *s);
struct btree_node *ro_node(struct ro_spine *s);
@@ -83,7 +83,7 @@ struct shadow_spine {
};
void init_shadow_spine(struct shadow_spine *s, struct dm_btree_info *info);
-int exit_shadow_spine(struct shadow_spine *s);
+void exit_shadow_spine(struct shadow_spine *s);
int shadow_step(struct shadow_spine *s, dm_block_t b,
struct dm_btree_value_type *vt);
@@ -100,7 +100,7 @@ struct dm_block *shadow_parent(struct shadow_spine *s);
int shadow_has_parent(struct shadow_spine *s);
-int shadow_root(struct shadow_spine *s);
+dm_block_t shadow_root(struct shadow_spine *s);
/*
* Some inlines.
@@ -144,4 +144,17 @@ extern struct dm_block_validator btree_node_validator;
extern void init_le64_type(struct dm_transaction_manager *tm,
struct dm_btree_value_type *vt);
+/*
+ * This returns a shadowed btree leaf that you may modify. In practise
+ * this means overwrites only, since an insert could cause a node to
+ * be split. Useful if you need access to the old value to calculate the
+ * new one.
+ *
+ * This only works with single level btrees. The given key must be present in
+ * the tree, otherwise -EINVAL will be returned.
+ */
+int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
+ uint64_t key, int *index,
+ dm_block_t *new_root, struct dm_block **leaf);
+
#endif /* DM_BTREE_INTERNAL_H */
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index eff04fa23dfa..4ead31e0d8ce 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -9,6 +9,9 @@
#include "dm-transaction-manager.h"
#include <linux/export.h>
+#include <linux/device-mapper.h>
+
+#define DM_MSG_PREFIX "btree"
/*
* Removing an entry from a btree
@@ -79,15 +82,23 @@ static void node_shift(struct btree_node *n, int shift)
}
}
-static void node_copy(struct btree_node *left, struct btree_node *right, int shift)
+static int node_copy(struct btree_node *left, struct btree_node *right, int shift)
{
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t value_size = le32_to_cpu(left->header.value_size);
- BUG_ON(value_size != le32_to_cpu(right->header.value_size));
+ if (value_size != le32_to_cpu(right->header.value_size)) {
+ DMERR("mismatched value size");
+ return -EILSEQ;
+ }
if (shift < 0) {
shift = -shift;
- BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries));
+
+ if (nr_left + shift > le32_to_cpu(left->header.max_entries)) {
+ DMERR("bad shift");
+ return -EINVAL;
+ }
+
memcpy(key_ptr(left, nr_left),
key_ptr(right, 0),
shift * sizeof(__le64));
@@ -95,7 +106,11 @@ static void node_copy(struct btree_node *left, struct btree_node *right, int shi
value_ptr(right, 0),
shift * value_size);
} else {
- BUG_ON(shift > le32_to_cpu(right->header.max_entries));
+ if (shift > le32_to_cpu(right->header.max_entries)) {
+ DMERR("bad shift");
+ return -EINVAL;
+ }
+
memcpy(key_ptr(right, 0),
key_ptr(left, nr_left - shift),
shift * sizeof(__le64));
@@ -103,6 +118,7 @@ static void node_copy(struct btree_node *left, struct btree_node *right, int shi
value_ptr(left, nr_left - shift),
shift * value_size);
}
+ return 0;
}
/*
@@ -170,35 +186,54 @@ static void exit_child(struct dm_btree_info *info, struct child *c)
dm_tm_unlock(info->tm, c->block);
}
-static void shift(struct btree_node *left, struct btree_node *right, int count)
+static int shift(struct btree_node *left, struct btree_node *right, int count)
{
+ int r;
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
uint32_t max_entries = le32_to_cpu(left->header.max_entries);
uint32_t r_max_entries = le32_to_cpu(right->header.max_entries);
- BUG_ON(max_entries != r_max_entries);
- BUG_ON(nr_left - count > max_entries);
- BUG_ON(nr_right + count > max_entries);
+ if (max_entries != r_max_entries) {
+ DMERR("node max_entries mismatch");
+ return -EILSEQ;
+ }
+
+ if (nr_left - count > max_entries) {
+ DMERR("node shift out of bounds");
+ return -EINVAL;
+ }
+
+ if (nr_right + count > max_entries) {
+ DMERR("node shift out of bounds");
+ return -EINVAL;
+ }
if (!count)
- return;
+ return 0;
if (count > 0) {
node_shift(right, count);
- node_copy(left, right, count);
+ r = node_copy(left, right, count);
+ if (r)
+ return r;
} else {
- node_copy(left, right, count);
+ r = node_copy(left, right, count);
+ if (r)
+ return r;
node_shift(right, count);
}
left->header.nr_entries = cpu_to_le32(nr_left - count);
right->header.nr_entries = cpu_to_le32(nr_right + count);
+
+ return 0;
}
-static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
- struct child *l, struct child *r)
+static int __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
+ struct child *l, struct child *r)
{
+ int ret;
struct btree_node *left = l->n;
struct btree_node *right = r->n;
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
@@ -229,9 +264,12 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
* Rebalance.
*/
unsigned target_left = (nr_left + nr_right) / 2;
- shift(left, right, nr_left - target_left);
+ ret = shift(left, right, nr_left - target_left);
+ if (ret)
+ return ret;
*key_ptr(parent, r->index) = right->keys[0];
}
+ return 0;
}
static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
@@ -253,12 +291,12 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
return r;
}
- __rebalance2(info, parent, &left, &right);
+ r = __rebalance2(info, parent, &left, &right);
exit_child(info, &left);
exit_child(info, &right);
- return 0;
+ return r;
}
/*
@@ -266,21 +304,30 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
* in right, then rebalance2. This wastes some cpu, but I want something
* simple atm.
*/
-static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
- struct child *l, struct child *c, struct child *r,
- struct btree_node *left, struct btree_node *center, struct btree_node *right,
- uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
+static int delete_center_node(struct dm_btree_info *info, struct btree_node *parent,
+ struct child *l, struct child *c, struct child *r,
+ struct btree_node *left, struct btree_node *center, struct btree_node *right,
+ uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
{
uint32_t max_entries = le32_to_cpu(left->header.max_entries);
unsigned shift = min(max_entries - nr_left, nr_center);
- BUG_ON(nr_left + shift > max_entries);
+ if (nr_left + shift > max_entries) {
+ DMERR("node shift out of bounds");
+ return -EINVAL;
+ }
+
node_copy(left, center, -shift);
left->header.nr_entries = cpu_to_le32(nr_left + shift);
if (shift != nr_center) {
shift = nr_center - shift;
- BUG_ON((nr_right + shift) > max_entries);
+
+ if ((nr_right + shift) > max_entries) {
+ DMERR("node shift out of bounds");
+ return -EINVAL;
+ }
+
node_shift(right, shift);
node_copy(center, right, shift);
right->header.nr_entries = cpu_to_le32(nr_right + shift);
@@ -291,18 +338,18 @@ static void delete_center_node(struct dm_btree_info *info, struct btree_node *pa
r->index--;
dm_tm_dec(info->tm, dm_block_location(c->block));
- __rebalance2(info, parent, l, r);
+ return __rebalance2(info, parent, l, r);
}
/*
* Redistributes entries among 3 sibling nodes.
*/
-static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
- struct child *l, struct child *c, struct child *r,
- struct btree_node *left, struct btree_node *center, struct btree_node *right,
- uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
+static int redistribute3(struct dm_btree_info *info, struct btree_node *parent,
+ struct child *l, struct child *c, struct child *r,
+ struct btree_node *left, struct btree_node *center, struct btree_node *right,
+ uint32_t nr_left, uint32_t nr_center, uint32_t nr_right)
{
- int s;
+ int s, ret;
uint32_t max_entries = le32_to_cpu(left->header.max_entries);
unsigned total = nr_left + nr_center + nr_right;
unsigned target_right = total / 3;
@@ -317,35 +364,55 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
if (s < 0 && nr_center < -s) {
/* not enough in central node */
- shift(left, center, -nr_center);
+ ret = shift(left, center, -nr_center);
+ if (ret)
+ return ret;
+
s += nr_center;
- shift(left, right, s);
- nr_right += s;
- } else
- shift(left, center, s);
+ ret = shift(left, right, s);
+ if (ret)
+ return ret;
- shift(center, right, target_right - nr_right);
+ nr_right += s;
+ } else {
+ ret = shift(left, center, s);
+ if (ret)
+ return ret;
+ }
+ ret = shift(center, right, target_right - nr_right);
+ if (ret)
+ return ret;
} else {
s = target_right - nr_right;
if (s > 0 && nr_center < s) {
/* not enough in central node */
- shift(center, right, nr_center);
+ ret = shift(center, right, nr_center);
+ if (ret)
+ return ret;
s -= nr_center;
- shift(left, right, s);
+ ret = shift(left, right, s);
+ if (ret)
+ return ret;
nr_left -= s;
- } else
- shift(center, right, s);
+ } else {
+ ret = shift(center, right, s);
+ if (ret)
+ return ret;
+ }
- shift(left, center, nr_left - target_left);
+ ret = shift(left, center, nr_left - target_left);
+ if (ret)
+ return ret;
}
*key_ptr(parent, c->index) = center->keys[0];
*key_ptr(parent, r->index) = right->keys[0];
+ return 0;
}
-static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
- struct child *l, struct child *c, struct child *r)
+static int __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
+ struct child *l, struct child *c, struct child *r)
{
struct btree_node *left = l->n;
struct btree_node *center = c->n;
@@ -357,15 +424,19 @@ static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
unsigned threshold = merge_threshold(left) * 4 + 1;
- BUG_ON(left->header.max_entries != center->header.max_entries);
- BUG_ON(center->header.max_entries != right->header.max_entries);
+ if ((left->header.max_entries != center->header.max_entries) ||
+ (center->header.max_entries != right->header.max_entries)) {
+ DMERR("bad btree metadata, max_entries differ");
+ return -EILSEQ;
+ }
+
+ if ((nr_left + nr_center + nr_right) < threshold) {
+ return delete_center_node(info, parent, l, c, r, left, center, right,
+ nr_left, nr_center, nr_right);
+ }
- if ((nr_left + nr_center + nr_right) < threshold)
- delete_center_node(info, parent, l, c, r, left, center, right,
- nr_left, nr_center, nr_right);
- else
- redistribute3(info, parent, l, c, r, left, center, right,
- nr_left, nr_center, nr_right);
+ return redistribute3(info, parent, l, c, r, left, center, right,
+ nr_left, nr_center, nr_right);
}
static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
@@ -395,13 +466,13 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
return r;
}
- __rebalance3(info, parent, &left, &center, &right);
+ r = __rebalance3(info, parent, &left, &center, &right);
exit_child(info, &left);
exit_child(info, &center);
exit_child(info, &right);
- return 0;
+ return r;
}
static int rebalance_children(struct shadow_spine *s,
@@ -423,9 +494,9 @@ static int rebalance_children(struct shadow_spine *s,
memcpy(n, dm_block_data(child),
dm_bm_block_size(dm_tm_get_bm(info->tm)));
- dm_tm_unlock(info->tm, child);
dm_tm_dec(info->tm, dm_block_location(child));
+ dm_tm_unlock(info->tm, child);
return 0;
}
@@ -544,12 +615,13 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
if (info->value_type.dec)
info->value_type.dec(info->value_type.context,
- value_ptr(n, index));
+ value_ptr(n, index), 1);
delete_at(n, index);
}
- *new_root = shadow_root(&spine);
+ if (!r)
+ *new_root = shadow_root(&spine);
exit_shadow_spine(&spine);
return r;
@@ -653,7 +725,7 @@ static int remove_one(struct dm_btree_info *info, dm_block_t root,
if (k >= keys[last_level] && k < end_key) {
if (info->value_type.dec)
info->value_type.dec(info->value_type.context,
- value_ptr(n, index));
+ value_ptr(n, index), 1);
delete_at(n, index);
keys[last_level] = k + 1ull;
diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c
index b27b8091a1ca..e653458888a7 100644
--- a/drivers/md/persistent-data/dm-btree-spine.c
+++ b/drivers/md/persistent-data/dm-btree-spine.c
@@ -15,10 +15,6 @@
#define BTREE_CSUM_XOR 121107
-static int node_check(struct dm_block_validator *v,
- struct dm_block *b,
- size_t block_size);
-
static void node_prepare_for_write(struct dm_block_validator *v,
struct dm_block *b,
size_t block_size)
@@ -30,8 +26,6 @@ static void node_prepare_for_write(struct dm_block_validator *v,
h->csum = cpu_to_le32(dm_bm_checksum(&h->flags,
block_size - sizeof(__le32),
BTREE_CSUM_XOR));
-
- BUG_ON(node_check(v, b, 4096));
}
static int node_check(struct dm_block_validator *v,
@@ -42,7 +36,7 @@ static int node_check(struct dm_block_validator *v,
struct node_header *h = &n->header;
size_t value_size;
__le32 csum_disk;
- uint32_t flags;
+ uint32_t flags, nr_entries, max_entries;
if (dm_block_location(b) != le64_to_cpu(h->blocknr)) {
DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu",
@@ -59,15 +53,17 @@ static int node_check(struct dm_block_validator *v,
return -EILSEQ;
}
+ nr_entries = le32_to_cpu(h->nr_entries);
+ max_entries = le32_to_cpu(h->max_entries);
value_size = le32_to_cpu(h->value_size);
if (sizeof(struct node_header) +
- (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) {
+ (sizeof(__le64) + value_size) * max_entries > block_size) {
DMERR_LIMIT("node_check failed: max_entries too large");
return -EILSEQ;
}
- if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) {
+ if (nr_entries > max_entries) {
DMERR_LIMIT("node_check failed: too many entries");
return -EILSEQ;
}
@@ -132,15 +128,13 @@ void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info)
s->nodes[1] = NULL;
}
-int exit_ro_spine(struct ro_spine *s)
+void exit_ro_spine(struct ro_spine *s)
{
- int r = 0, i;
+ int i;
for (i = 0; i < s->count; i++) {
unlock_block(s->info, s->nodes[i]);
}
-
- return r;
}
int ro_step(struct ro_spine *s, dm_block_t new_child)
@@ -185,15 +179,13 @@ void init_shadow_spine(struct shadow_spine *s, struct dm_btree_info *info)
s->count = 0;
}
-int exit_shadow_spine(struct shadow_spine *s)
+void exit_shadow_spine(struct shadow_spine *s)
{
- int r = 0, i;
+ int i;
for (i = 0; i < s->count; i++) {
unlock_block(s->info, s->nodes[i]);
}
-
- return r;
}
int shadow_step(struct shadow_spine *s, dm_block_t b,
@@ -237,27 +229,19 @@ int shadow_has_parent(struct shadow_spine *s)
return s->count >= 2;
}
-int shadow_root(struct shadow_spine *s)
+dm_block_t shadow_root(struct shadow_spine *s)
{
return s->root;
}
-static void le64_inc(void *context, const void *value_le)
+static void le64_inc(void *context, const void *value_le, unsigned count)
{
- struct dm_transaction_manager *tm = context;
- __le64 v_le;
-
- memcpy(&v_le, value_le, sizeof(v_le));
- dm_tm_inc(tm, le64_to_cpu(v_le));
+ dm_tm_with_runs(context, value_le, count, dm_tm_inc_range);
}
-static void le64_dec(void *context, const void *value_le)
+static void le64_dec(void *context, const void *value_le, unsigned count)
{
- struct dm_transaction_manager *tm = context;
- __le64 v_le;
-
- memcpy(&v_le, value_le, sizeof(v_le));
- dm_tm_dec(tm, le64_to_cpu(v_le));
+ dm_tm_with_runs(context, value_le, count, dm_tm_dec_range);
}
static int le64_equal(void *context, const void *value1_le, const void *value2_le)
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 8aae0624a297..5ce64e93aae7 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -71,26 +71,26 @@ static int upper_bound(struct btree_node *n, uint64_t key)
void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
struct dm_btree_value_type *vt)
{
- unsigned i;
uint32_t nr_entries = le32_to_cpu(n->header.nr_entries);
if (le32_to_cpu(n->header.flags) & INTERNAL_NODE)
- for (i = 0; i < nr_entries; i++)
- dm_tm_inc(tm, value64(n, i));
+ dm_tm_with_runs(tm, value_ptr(n, 0), nr_entries, dm_tm_inc_range);
+
else if (vt->inc)
- for (i = 0; i < nr_entries; i++)
- vt->inc(vt->context, value_ptr(n, i));
+ vt->inc(vt->context, value_ptr(n, 0), nr_entries);
}
static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
- uint64_t key, void *value)
- __dm_written_to_disk(value)
+ uint64_t key, void *value)
+ __dm_written_to_disk(value)
{
uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
+ uint32_t max_entries = le32_to_cpu(node->header.max_entries);
__le64 key_le = cpu_to_le64(key);
if (index > nr_entries ||
- index >= le32_to_cpu(node->header.max_entries)) {
+ index >= max_entries ||
+ nr_entries >= max_entries) {
DMERR("too many entries in btree node for insert");
__dm_unbless_for_disk(value);
return -ENOMEM;
@@ -318,13 +318,9 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
goto out;
} else {
- if (info->value_type.dec) {
- unsigned i;
-
- for (i = 0; i < f->nr_children; i++)
- info->value_type.dec(info->value_type.context,
- value_ptr(f->n, i));
- }
+ if (info->value_type.dec)
+ info->value_type.dec(info->value_type.context,
+ value_ptr(f->n, 0), f->nr_children);
pop_frame(s);
}
}
@@ -366,7 +362,8 @@ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key,
} while (!(flags & LEAF_NODE));
*result_key = le64_to_cpu(ro_node(s)->keys[i]);
- memcpy(v, value_ptr(ro_node(s), i), value_size);
+ if (v)
+ memcpy(v, value_ptr(ro_node(s), i), value_size);
return 0;
}
@@ -499,6 +496,122 @@ out:
EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
+/*----------------------------------------------------------------*/
+
+/*
+ * Copies entries from one region of a btree node to another. The regions
+ * must not overlap.
+ */
+static void copy_entries(struct btree_node *dest, unsigned dest_offset,
+ struct btree_node *src, unsigned src_offset,
+ unsigned count)
+{
+ size_t value_size = le32_to_cpu(dest->header.value_size);
+ memcpy(dest->keys + dest_offset, src->keys + src_offset, count * sizeof(uint64_t));
+ memcpy(value_ptr(dest, dest_offset), value_ptr(src, src_offset), count * value_size);
+}
+
+/*
+ * Moves entries from one region fo a btree node to another. The regions
+ * may overlap.
+ */
+static void move_entries(struct btree_node *dest, unsigned dest_offset,
+ struct btree_node *src, unsigned src_offset,
+ unsigned count)
+{
+ size_t value_size = le32_to_cpu(dest->header.value_size);
+ memmove(dest->keys + dest_offset, src->keys + src_offset, count * sizeof(uint64_t));
+ memmove(value_ptr(dest, dest_offset), value_ptr(src, src_offset), count * value_size);
+}
+
+/*
+ * Erases the first 'count' entries of a btree node, shifting following
+ * entries down into their place.
+ */
+static void shift_down(struct btree_node *n, unsigned count)
+{
+ move_entries(n, 0, n, count, le32_to_cpu(n->header.nr_entries) - count);
+}
+
+/*
+ * Moves entries in a btree node up 'count' places, making space for
+ * new entries at the start of the node.
+ */
+static void shift_up(struct btree_node *n, unsigned count)
+{
+ move_entries(n, count, n, 0, le32_to_cpu(n->header.nr_entries));
+}
+
+/*
+ * Redistributes entries between two btree nodes to make them
+ * have similar numbers of entries.
+ */
+static void redistribute2(struct btree_node *left, struct btree_node *right)
+{
+ unsigned nr_left = le32_to_cpu(left->header.nr_entries);
+ unsigned nr_right = le32_to_cpu(right->header.nr_entries);
+ unsigned total = nr_left + nr_right;
+ unsigned target_left = total / 2;
+ unsigned target_right = total - target_left;
+
+ if (nr_left < target_left) {
+ unsigned delta = target_left - nr_left;
+ copy_entries(left, nr_left, right, 0, delta);
+ shift_down(right, delta);
+ } else if (nr_left > target_left) {
+ unsigned delta = nr_left - target_left;
+ if (nr_right)
+ shift_up(right, delta);
+ copy_entries(right, 0, left, target_left, delta);
+ }
+
+ left->header.nr_entries = cpu_to_le32(target_left);
+ right->header.nr_entries = cpu_to_le32(target_right);
+}
+
+/*
+ * Redistribute entries between three nodes. Assumes the central
+ * node is empty.
+ */
+static void redistribute3(struct btree_node *left, struct btree_node *center,
+ struct btree_node *right)
+{
+ unsigned nr_left = le32_to_cpu(left->header.nr_entries);
+ unsigned nr_center = le32_to_cpu(center->header.nr_entries);
+ unsigned nr_right = le32_to_cpu(right->header.nr_entries);
+ unsigned total, target_left, target_center, target_right;
+
+ BUG_ON(nr_center);
+
+ total = nr_left + nr_right;
+ target_left = total / 3;
+ target_center = (total - target_left) / 2;
+ target_right = (total - target_left - target_center);
+
+ if (nr_left < target_left) {
+ unsigned left_short = target_left - nr_left;
+ copy_entries(left, nr_left, right, 0, left_short);
+ copy_entries(center, 0, right, left_short, target_center);
+ shift_down(right, nr_right - target_right);
+
+ } else if (nr_left < (target_left + target_center)) {
+ unsigned left_to_center = nr_left - target_left;
+ copy_entries(center, 0, left, target_left, left_to_center);
+ copy_entries(center, left_to_center, right, 0, target_center - left_to_center);
+ shift_down(right, nr_right - target_right);
+
+ } else {
+ unsigned right_short = target_right - nr_right;
+ shift_up(right, right_short);
+ copy_entries(right, 0, left, nr_left - right_short, right_short);
+ copy_entries(center, 0, left, target_left, nr_left - target_left);
+ }
+
+ left->header.nr_entries = cpu_to_le32(target_left);
+ center->header.nr_entries = cpu_to_le32(target_center);
+ right->header.nr_entries = cpu_to_le32(target_right);
+}
+
/*
* Splits a node by creating a sibling node and shifting half the nodes
* contents across. Assumes there is a parent node, and it has room for
@@ -529,12 +642,10 @@ EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
*
* Where A* is a shadow of A.
*/
-static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
- uint64_t key)
+static int split_one_into_two(struct shadow_spine *s, unsigned parent_index,
+ struct dm_btree_value_type *vt, uint64_t key)
{
int r;
- size_t size;
- unsigned nr_left, nr_right;
struct dm_block *left, *right, *parent;
struct btree_node *ln, *rn, *pn;
__le64 location;
@@ -548,36 +659,18 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
ln = dm_block_data(left);
rn = dm_block_data(right);
- nr_left = le32_to_cpu(ln->header.nr_entries) / 2;
- nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left;
-
- ln->header.nr_entries = cpu_to_le32(nr_left);
-
rn->header.flags = ln->header.flags;
- rn->header.nr_entries = cpu_to_le32(nr_right);
+ rn->header.nr_entries = cpu_to_le32(0);
rn->header.max_entries = ln->header.max_entries;
rn->header.value_size = ln->header.value_size;
- memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0]));
-
- size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ?
- sizeof(uint64_t) : s->info->value_type.size;
- memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left),
- size * nr_right);
+ redistribute2(ln, rn);
- /*
- * Patch up the parent
- */
+ /* patch up the parent */
parent = shadow_parent(s);
-
pn = dm_block_data(parent);
- location = cpu_to_le64(dm_block_location(left));
- __dm_bless_for_disk(&location);
- memcpy_disk(value_ptr(pn, parent_index),
- &location, sizeof(__le64));
location = cpu_to_le64(dm_block_location(right));
__dm_bless_for_disk(&location);
-
r = insert_at(sizeof(__le64), pn, parent_index + 1,
le64_to_cpu(rn->keys[0]), &location);
if (r) {
@@ -585,6 +678,7 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
return r;
}
+ /* patch up the spine */
if (key < le64_to_cpu(rn->keys[0])) {
unlock_block(s->info, right);
s->nodes[1] = left;
@@ -597,6 +691,121 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
}
/*
+ * We often need to modify a sibling node. This function shadows a particular
+ * child of the given parent node. Making sure to update the parent to point
+ * to the new shadow.
+ */
+static int shadow_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
+ struct btree_node *parent, unsigned index,
+ struct dm_block **result)
+{
+ int r, inc;
+ dm_block_t root;
+ struct btree_node *node;
+
+ root = value64(parent, index);
+
+ r = dm_tm_shadow_block(info->tm, root, &btree_node_validator,
+ result, &inc);
+ if (r)
+ return r;
+
+ node = dm_block_data(*result);
+
+ if (inc)
+ inc_children(info->tm, node, vt);
+
+ *((__le64 *) value_ptr(parent, index)) =
+ cpu_to_le64(dm_block_location(*result));
+
+ return 0;
+}
+
+/*
+ * Splits two nodes into three. This is more work, but results in fuller
+ * nodes, so saves metadata space.
+ */
+static int split_two_into_three(struct shadow_spine *s, unsigned parent_index,
+ struct dm_btree_value_type *vt, uint64_t key)
+{
+ int r;
+ unsigned middle_index;
+ struct dm_block *left, *middle, *right, *parent;
+ struct btree_node *ln, *rn, *mn, *pn;
+ __le64 location;
+
+ parent = shadow_parent(s);
+ pn = dm_block_data(parent);
+
+ if (parent_index == 0) {
+ middle_index = 1;
+ left = shadow_current(s);
+ r = shadow_child(s->info, vt, pn, parent_index + 1, &right);
+ if (r)
+ return r;
+ } else {
+ middle_index = parent_index;
+ right = shadow_current(s);
+ r = shadow_child(s->info, vt, pn, parent_index - 1, &left);
+ if (r)
+ return r;
+ }
+
+ r = new_block(s->info, &middle);
+ if (r < 0)
+ return r;
+
+ ln = dm_block_data(left);
+ mn = dm_block_data(middle);
+ rn = dm_block_data(right);
+
+ mn->header.nr_entries = cpu_to_le32(0);
+ mn->header.flags = ln->header.flags;
+ mn->header.max_entries = ln->header.max_entries;
+ mn->header.value_size = ln->header.value_size;
+
+ redistribute3(ln, mn, rn);
+
+ /* patch up the parent */
+ pn->keys[middle_index] = rn->keys[0];
+ location = cpu_to_le64(dm_block_location(middle));
+ __dm_bless_for_disk(&location);
+ r = insert_at(sizeof(__le64), pn, middle_index,
+ le64_to_cpu(mn->keys[0]), &location);
+ if (r) {
+ if (shadow_current(s) != left)
+ unlock_block(s->info, left);
+
+ unlock_block(s->info, middle);
+
+ if (shadow_current(s) != right)
+ unlock_block(s->info, right);
+
+ return r;
+ }
+
+
+ /* patch up the spine */
+ if (key < le64_to_cpu(mn->keys[0])) {
+ unlock_block(s->info, middle);
+ unlock_block(s->info, right);
+ s->nodes[1] = left;
+ } else if (key < le64_to_cpu(rn->keys[0])) {
+ unlock_block(s->info, left);
+ unlock_block(s->info, right);
+ s->nodes[1] = middle;
+ } else {
+ unlock_block(s->info, left);
+ unlock_block(s->info, middle);
+ s->nodes[1] = right;
+ }
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
* Splits a node by creating two new children beneath the given node.
*
* Before:
@@ -689,6 +898,186 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
return 0;
}
+/*----------------------------------------------------------------*/
+
+/*
+ * Redistributes a node's entries with its left sibling.
+ */
+static int rebalance_left(struct shadow_spine *s, struct dm_btree_value_type *vt,
+ unsigned parent_index, uint64_t key)
+{
+ int r;
+ struct dm_block *sib;
+ struct btree_node *left, *right, *parent = dm_block_data(shadow_parent(s));
+
+ r = shadow_child(s->info, vt, parent, parent_index - 1, &sib);
+ if (r)
+ return r;
+
+ left = dm_block_data(sib);
+ right = dm_block_data(shadow_current(s));
+ redistribute2(left, right);
+ *key_ptr(parent, parent_index) = right->keys[0];
+
+ if (key < le64_to_cpu(right->keys[0])) {
+ unlock_block(s->info, s->nodes[1]);
+ s->nodes[1] = sib;
+ } else {
+ unlock_block(s->info, sib);
+ }
+
+ return 0;
+}
+
+/*
+ * Redistributes a nodes entries with its right sibling.
+ */
+static int rebalance_right(struct shadow_spine *s, struct dm_btree_value_type *vt,
+ unsigned parent_index, uint64_t key)
+{
+ int r;
+ struct dm_block *sib;
+ struct btree_node *left, *right, *parent = dm_block_data(shadow_parent(s));
+
+ r = shadow_child(s->info, vt, parent, parent_index + 1, &sib);
+ if (r)
+ return r;
+
+ left = dm_block_data(shadow_current(s));
+ right = dm_block_data(sib);
+ redistribute2(left, right);
+ *key_ptr(parent, parent_index + 1) = right->keys[0];
+
+ if (key < le64_to_cpu(right->keys[0])) {
+ unlock_block(s->info, sib);
+ } else {
+ unlock_block(s->info, s->nodes[1]);
+ s->nodes[1] = sib;
+ }
+
+ return 0;
+}
+
+/*
+ * Returns the number of spare entries in a node.
+ */
+static int get_node_free_space(struct dm_btree_info *info, dm_block_t b, unsigned *space)
+{
+ int r;
+ unsigned nr_entries;
+ struct dm_block *block;
+ struct btree_node *node;
+
+ r = bn_read_lock(info, b, &block);
+ if (r)
+ return r;
+
+ node = dm_block_data(block);
+ nr_entries = le32_to_cpu(node->header.nr_entries);
+ *space = le32_to_cpu(node->header.max_entries) - nr_entries;
+
+ unlock_block(info, block);
+ return 0;
+}
+
+/*
+ * Make space in a node, either by moving some entries to a sibling,
+ * or creating a new sibling node. SPACE_THRESHOLD defines the minimum
+ * number of free entries that must be in the sibling to make the move
+ * worth while. If the siblings are shared (eg, part of a snapshot),
+ * then they are not touched, since this break sharing and so consume
+ * more space than we save.
+ */
+#define SPACE_THRESHOLD 8
+static int rebalance_or_split(struct shadow_spine *s, struct dm_btree_value_type *vt,
+ unsigned parent_index, uint64_t key)
+{
+ int r;
+ struct btree_node *parent = dm_block_data(shadow_parent(s));
+ unsigned nr_parent = le32_to_cpu(parent->header.nr_entries);
+ unsigned free_space;
+ int left_shared = 0, right_shared = 0;
+
+ /* Should we move entries to the left sibling? */
+ if (parent_index > 0) {
+ dm_block_t left_b = value64(parent, parent_index - 1);
+ r = dm_tm_block_is_shared(s->info->tm, left_b, &left_shared);
+ if (r)
+ return r;
+
+ if (!left_shared) {
+ r = get_node_free_space(s->info, left_b, &free_space);
+ if (r)
+ return r;
+
+ if (free_space >= SPACE_THRESHOLD)
+ return rebalance_left(s, vt, parent_index, key);
+ }
+ }
+
+ /* Should we move entries to the right sibling? */
+ if (parent_index < (nr_parent - 1)) {
+ dm_block_t right_b = value64(parent, parent_index + 1);
+ r = dm_tm_block_is_shared(s->info->tm, right_b, &right_shared);
+ if (r)
+ return r;
+
+ if (!right_shared) {
+ r = get_node_free_space(s->info, right_b, &free_space);
+ if (r)
+ return r;
+
+ if (free_space >= SPACE_THRESHOLD)
+ return rebalance_right(s, vt, parent_index, key);
+ }
+ }
+
+ /*
+ * We need to split the node, normally we split two nodes
+ * into three. But when inserting a sequence that is either
+ * monotonically increasing or decreasing it's better to split
+ * a single node into two.
+ */
+ if (left_shared || right_shared || (nr_parent <= 2) ||
+ (parent_index == 0) || (parent_index + 1 == nr_parent)) {
+ return split_one_into_two(s, parent_index, vt, key);
+ } else {
+ return split_two_into_three(s, parent_index, vt, key);
+ }
+}
+
+/*
+ * Does the node contain a particular key?
+ */
+static bool contains_key(struct btree_node *node, uint64_t key)
+{
+ int i = lower_bound(node, key);
+
+ if (i >= 0 && le64_to_cpu(node->keys[i]) == key)
+ return true;
+
+ return false;
+}
+
+/*
+ * In general we preemptively make sure there's a free entry in every
+ * node on the spine when doing an insert. But we can avoid that with
+ * leaf nodes if we know it's an overwrite.
+ */
+static bool has_space_for_insert(struct btree_node *node, uint64_t key)
+{
+ if (node->header.nr_entries == node->header.max_entries) {
+ if (le32_to_cpu(node->header.flags) & LEAF_NODE) {
+ /* we don't need space if it's an overwrite */
+ return contains_key(node, key);
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
struct dm_btree_value_type *vt,
uint64_t key, unsigned *index)
@@ -718,17 +1107,18 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
node = dm_block_data(shadow_current(s));
- if (node->header.nr_entries == node->header.max_entries) {
+ if (!has_space_for_insert(node, key)) {
if (top)
r = btree_split_beneath(s, key);
else
- r = btree_split_sibling(s, i, key);
+ r = rebalance_or_split(s, vt, i, key);
if (r < 0)
return r;
- }
- node = dm_block_data(shadow_current(s));
+ /* making space can cause the current node to change */
+ node = dm_block_data(shadow_current(s));
+ }
i = lower_bound(node, key);
@@ -752,6 +1142,77 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
return 0;
}
+static int __btree_get_overwrite_leaf(struct shadow_spine *s, dm_block_t root,
+ uint64_t key, int *index)
+{
+ int r, i = -1;
+ struct btree_node *node;
+
+ *index = 0;
+ for (;;) {
+ r = shadow_step(s, root, &s->info->value_type);
+ if (r < 0)
+ return r;
+
+ node = dm_block_data(shadow_current(s));
+
+ /*
+ * We have to patch up the parent node, ugly, but I don't
+ * see a way to do this automatically as part of the spine
+ * op.
+ */
+ if (shadow_has_parent(s) && i >= 0) {
+ __le64 location = cpu_to_le64(dm_block_location(shadow_current(s)));
+
+ __dm_bless_for_disk(&location);
+ memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i),
+ &location, sizeof(__le64));
+ }
+
+ node = dm_block_data(shadow_current(s));
+ i = lower_bound(node, key);
+
+ BUG_ON(i < 0);
+ BUG_ON(i >= le32_to_cpu(node->header.nr_entries));
+
+ if (le32_to_cpu(node->header.flags) & LEAF_NODE) {
+ if (key != le64_to_cpu(node->keys[i]))
+ return -EINVAL;
+ break;
+ }
+
+ root = value64(node, i);
+ }
+
+ *index = i;
+ return 0;
+}
+
+int btree_get_overwrite_leaf(struct dm_btree_info *info, dm_block_t root,
+ uint64_t key, int *index,
+ dm_block_t *new_root, struct dm_block **leaf)
+{
+ int r;
+ struct shadow_spine spine;
+
+ BUG_ON(info->levels > 1);
+ init_shadow_spine(&spine, info);
+ r = __btree_get_overwrite_leaf(&spine, root, key, index);
+ if (!r) {
+ *new_root = shadow_root(&spine);
+ *leaf = shadow_current(&spine);
+
+ /*
+ * Decrement the count so exit_shadow_spine() doesn't
+ * unlock the leaf.
+ */
+ spine.count--;
+ }
+ exit_shadow_spine(&spine);
+
+ return r;
+}
+
static bool need_insert(struct btree_node *node, uint64_t *keys,
unsigned level, unsigned index)
{
@@ -828,7 +1289,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
value_ptr(n, index),
value))) {
info->value_type.dec(info->value_type.context,
- value_ptr(n, index));
+ value_ptr(n, index), 1);
}
memcpy_disk(value_ptr(n, index),
value, info->value_type.size);
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 3dc5bb1a4748..d2ae5aa4d00b 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -51,21 +51,21 @@ struct dm_btree_value_type {
*/
/*
- * The btree is making a duplicate of the value, for instance
+ * The btree is making a duplicate of a run of values, for instance
* because previously-shared btree nodes have now diverged.
* @value argument is the new copy that the copy function may modify.
* (Probably it just wants to increment a reference count
* somewhere.) This method is _not_ called for insertion of a new
* value: It is assumed the ref count is already 1.
*/
- void (*inc)(void *context, const void *value);
+ void (*inc)(void *context, const void *value, unsigned count);
/*
- * This value is being deleted. The btree takes care of freeing
+ * These values are being deleted. The btree takes care of freeing
* the memory pointed to by @value. Often the del function just
- * needs to decrement a reference count somewhere.
+ * needs to decrement a reference counts somewhere.
*/
- void (*dec)(void *context, const void *value);
+ void (*dec)(void *context, const void *value, unsigned count);
/*
* A test for equality between two values. When a value is
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index d8b4125e338c..bfbfa750e016 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -6,6 +6,8 @@
#include "dm-space-map-common.h"
#include "dm-transaction-manager.h"
+#include "dm-btree-internal.h"
+#include "dm-persistent-data-internal.h"
#include <linux/bitops.h>
#include <linux/device-mapper.h>
@@ -281,6 +283,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
struct disk_index_entry ie_disk;
struct dm_block *blk;
+ if (b >= ll->nr_blocks) {
+ DMERR_LIMIT("metadata block out of bounds");
+ return -EINVAL;
+ }
+
b = do_div(index, ll->entries_per_block);
r = ll->load_ie(ll, index, &ie_disk);
if (r < 0)
@@ -339,6 +346,8 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
*/
begin = do_div(index_begin, ll->entries_per_block);
end = do_div(end, ll->entries_per_block);
+ if (end == 0)
+ end = ll->entries_per_block;
for (i = index_begin; i < index_end; i++, begin = 0) {
struct dm_block *blk;
@@ -407,12 +416,13 @@ int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
return r;
}
-static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
- int (*mutator)(void *context, uint32_t old, uint32_t *new),
- void *context, enum allocation_event *ev)
+/*----------------------------------------------------------------*/
+
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
+ uint32_t ref_count, int32_t *nr_allocations)
{
int r;
- uint32_t bit, old, ref_count;
+ uint32_t bit, old;
struct dm_block *nb;
dm_block_t index = b;
struct disk_index_entry ie_disk;
@@ -431,10 +441,9 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
return r;
}
ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
-
bm_le = dm_bitmap_data(nb);
- old = sm_lookup_bitmap(bm_le, bit);
+ old = sm_lookup_bitmap(bm_le, bit);
if (old > 2) {
r = sm_ll_lookup_big_ref_count(ll, b, &old);
if (r < 0) {
@@ -443,7 +452,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
}
}
- r = mutator(context, old, &ref_count);
if (r) {
dm_tm_unlock(ll->tm, nb);
return r;
@@ -451,7 +459,6 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
if (ref_count <= 2) {
sm_set_bitmap(bm_le, bit, ref_count);
-
dm_tm_unlock(ll->tm, nb);
if (old > 2) {
@@ -478,62 +485,459 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
}
if (ref_count && !old) {
- *ev = SM_ALLOC;
+ *nr_allocations = 1;
ll->nr_allocated++;
le32_add_cpu(&ie_disk.nr_free, -1);
if (le32_to_cpu(ie_disk.none_free_before) == bit)
ie_disk.none_free_before = cpu_to_le32(bit + 1);
} else if (old && !ref_count) {
- *ev = SM_FREE;
+ *nr_allocations = -1;
ll->nr_allocated--;
le32_add_cpu(&ie_disk.nr_free, 1);
ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
} else
- *ev = SM_NONE;
+ *nr_allocations = 0;
return ll->save_ie(ll, index, &ie_disk);
}
-static int set_ref_count(void *context, uint32_t old, uint32_t *new)
+/*----------------------------------------------------------------*/
+
+/*
+ * Holds useful intermediate results for the range based inc and dec
+ * operations.
+ */
+struct inc_context {
+ struct disk_index_entry ie_disk;
+ struct dm_block *bitmap_block;
+ void *bitmap;
+
+ struct dm_block *overflow_leaf;
+};
+
+static inline void init_inc_context(struct inc_context *ic)
+{
+ ic->bitmap_block = NULL;
+ ic->bitmap = NULL;
+ ic->overflow_leaf = NULL;
+}
+
+static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
+{
+ if (ic->bitmap_block)
+ dm_tm_unlock(ll->tm, ic->bitmap_block);
+ if (ic->overflow_leaf)
+ dm_tm_unlock(ll->tm, ic->overflow_leaf);
+}
+
+static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
+{
+ exit_inc_context(ll, ic);
+ init_inc_context(ic);
+}
+
+/*
+ * Confirms a btree node contains a particular key at an index.
+ */
+static bool contains_key(struct btree_node *n, uint64_t key, int index)
{
- *new = *((uint32_t *) context);
+ return index >= 0 &&
+ index < le32_to_cpu(n->header.nr_entries) &&
+ le64_to_cpu(n->keys[index]) == key;
+}
+
+static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
+{
+ int r;
+ int index;
+ struct btree_node *n;
+ __le32 *v_ptr;
+ uint32_t rc;
+
+ /*
+ * bitmap_block needs to be unlocked because getting the
+ * overflow_leaf may need to allocate, and thus use the space map.
+ */
+ reset_inc_context(ll, ic);
+
+ r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
+ b, &index, &ll->ref_count_root, &ic->overflow_leaf);
+ if (r < 0)
+ return r;
+
+ n = dm_block_data(ic->overflow_leaf);
+
+ if (!contains_key(n, b, index)) {
+ DMERR("overflow btree is missing an entry");
+ return -EINVAL;
+ }
+
+ v_ptr = value_ptr(n, index);
+ rc = le32_to_cpu(*v_ptr) + 1;
+ *v_ptr = cpu_to_le32(rc);
+
return 0;
}
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
- uint32_t ref_count, enum allocation_event *ev)
+static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
+{
+ int index;
+ struct btree_node *n;
+ __le32 *v_ptr;
+ uint32_t rc;
+
+ /*
+ * Do we already have the correct overflow leaf?
+ */
+ if (ic->overflow_leaf) {
+ n = dm_block_data(ic->overflow_leaf);
+ index = lower_bound(n, b);
+ if (contains_key(n, b, index)) {
+ v_ptr = value_ptr(n, index);
+ rc = le32_to_cpu(*v_ptr) + 1;
+ *v_ptr = cpu_to_le32(rc);
+
+ return 0;
+ }
+ }
+
+ return __sm_ll_inc_overflow(ll, b, ic);
+}
+
+static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
{
- return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
+ int r, inc;
+ r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
+ &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
+ if (r < 0) {
+ DMERR("dm_tm_shadow_block() failed");
+ return r;
+ }
+ ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
+ ic->bitmap = dm_bitmap_data(ic->bitmap_block);
+ return 0;
}
-static int inc_ref_count(void *context, uint32_t old, uint32_t *new)
+/*
+ * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
+ * we can reopen the bitmap with a simple write lock, rather than re calling
+ * dm_tm_shadow_block().
+ */
+static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
{
- *new = old + 1;
+ if (!ic->bitmap_block) {
+ int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
+ &dm_sm_bitmap_validator, &ic->bitmap_block);
+ if (r) {
+ DMERR("unable to re-get write lock for bitmap");
+ return r;
+ }
+ ic->bitmap = dm_bitmap_data(ic->bitmap_block);
+ }
+
return 0;
}
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+/*
+ * Loops round incrementing entries in a single bitmap.
+ */
+static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
+ uint32_t bit, uint32_t bit_end,
+ int32_t *nr_allocations, dm_block_t *new_b,
+ struct inc_context *ic)
{
- return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
+ int r;
+ __le32 le_rc;
+ uint32_t old;
+
+ for (; bit != bit_end; bit++, b++) {
+ /*
+ * We only need to drop the bitmap if we need to find a new btree
+ * leaf for the overflow. So if it was dropped last iteration,
+ * we now re-get it.
+ */
+ r = ensure_bitmap(ll, ic);
+ if (r)
+ return r;
+
+ old = sm_lookup_bitmap(ic->bitmap, bit);
+ switch (old) {
+ case 0:
+ /* inc bitmap, adjust nr_allocated */
+ sm_set_bitmap(ic->bitmap, bit, 1);
+ (*nr_allocations)++;
+ ll->nr_allocated++;
+ le32_add_cpu(&ic->ie_disk.nr_free, -1);
+ if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
+ ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
+ break;
+
+ case 1:
+ /* inc bitmap */
+ sm_set_bitmap(ic->bitmap, bit, 2);
+ break;
+
+ case 2:
+ /* inc bitmap and insert into overflow */
+ sm_set_bitmap(ic->bitmap, bit, 3);
+ reset_inc_context(ll, ic);
+
+ le_rc = cpu_to_le32(3);
+ __dm_bless_for_disk(&le_rc);
+ r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
+ &b, &le_rc, &ll->ref_count_root);
+ if (r < 0) {
+ DMERR("ref count insert failed");
+ return r;
+ }
+ break;
+
+ default:
+ /*
+ * inc within the overflow tree only.
+ */
+ r = sm_ll_inc_overflow(ll, b, ic);
+ if (r < 0)
+ return r;
+ }
+ }
+
+ *new_b = b;
+ return 0;
+}
+
+/*
+ * Finds a bitmap that contains entries in the block range, and increments
+ * them.
+ */
+static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+ int32_t *nr_allocations, dm_block_t *new_b)
+{
+ int r;
+ struct inc_context ic;
+ uint32_t bit, bit_end;
+ dm_block_t index = b;
+
+ init_inc_context(&ic);
+
+ bit = do_div(index, ll->entries_per_block);
+ r = ll->load_ie(ll, index, &ic.ie_disk);
+ if (r < 0)
+ return r;
+
+ r = shadow_bitmap(ll, &ic);
+ if (r)
+ return r;
+
+ bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
+ r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
+
+ exit_inc_context(ll, &ic);
+
+ if (r)
+ return r;
+
+ return ll->save_ie(ll, index, &ic.ie_disk);
}
-static int dec_ref_count(void *context, uint32_t old, uint32_t *new)
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+ int32_t *nr_allocations)
{
- if (!old) {
- DMERR_LIMIT("unable to decrement a reference count below 0");
+ *nr_allocations = 0;
+ while (b != e) {
+ int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
+static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
+ struct inc_context *ic)
+{
+ reset_inc_context(ll, ic);
+ return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
+ &b, &ll->ref_count_root);
+}
+
+static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
+ struct inc_context *ic, uint32_t *old_rc)
+{
+ int r;
+ int index = -1;
+ struct btree_node *n;
+ __le32 *v_ptr;
+ uint32_t rc;
+
+ reset_inc_context(ll, ic);
+ r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
+ b, &index, &ll->ref_count_root, &ic->overflow_leaf);
+ if (r < 0)
+ return r;
+
+ n = dm_block_data(ic->overflow_leaf);
+
+ if (!contains_key(n, b, index)) {
+ DMERR("overflow btree is missing an entry");
return -EINVAL;
}
- *new = old - 1;
+ v_ptr = value_ptr(n, index);
+ rc = le32_to_cpu(*v_ptr);
+ *old_rc = rc;
+
+ if (rc == 3) {
+ return __sm_ll_del_overflow(ll, b, ic);
+ } else {
+ rc--;
+ *v_ptr = cpu_to_le32(rc);
+ return 0;
+ }
+}
+
+static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
+ struct inc_context *ic, uint32_t *old_rc)
+{
+ /*
+ * Do we already have the correct overflow leaf?
+ */
+ if (ic->overflow_leaf) {
+ int index;
+ struct btree_node *n;
+ __le32 *v_ptr;
+ uint32_t rc;
+
+ n = dm_block_data(ic->overflow_leaf);
+ index = lower_bound(n, b);
+ if (contains_key(n, b, index)) {
+ v_ptr = value_ptr(n, index);
+ rc = le32_to_cpu(*v_ptr);
+ *old_rc = rc;
+
+ if (rc > 3) {
+ rc--;
+ *v_ptr = cpu_to_le32(rc);
+ return 0;
+ } else {
+ return __sm_ll_del_overflow(ll, b, ic);
+ }
+
+ }
+ }
+
+ return __sm_ll_dec_overflow(ll, b, ic, old_rc);
+}
+
+/*
+ * Loops round incrementing entries in a single bitmap.
+ */
+static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
+ uint32_t bit, uint32_t bit_end,
+ struct inc_context *ic,
+ int32_t *nr_allocations, dm_block_t *new_b)
+{
+ int r;
+ uint32_t old;
+
+ for (; bit != bit_end; bit++, b++) {
+ /*
+ * We only need to drop the bitmap if we need to find a new btree
+ * leaf for the overflow. So if it was dropped last iteration,
+ * we now re-get it.
+ */
+ r = ensure_bitmap(ll, ic);
+ if (r)
+ return r;
+
+ old = sm_lookup_bitmap(ic->bitmap, bit);
+ switch (old) {
+ case 0:
+ DMERR("unable to decrement block");
+ return -EINVAL;
+
+ case 1:
+ /* dec bitmap */
+ sm_set_bitmap(ic->bitmap, bit, 0);
+ (*nr_allocations)--;
+ ll->nr_allocated--;
+ le32_add_cpu(&ic->ie_disk.nr_free, 1);
+ ic->ie_disk.none_free_before =
+ cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
+ break;
+
+ case 2:
+ /* dec bitmap and insert into overflow */
+ sm_set_bitmap(ic->bitmap, bit, 1);
+ break;
+
+ case 3:
+ r = sm_ll_dec_overflow(ll, b, ic, &old);
+ if (r < 0)
+ return r;
+
+ if (old == 3) {
+ r = ensure_bitmap(ll, ic);
+ if (r)
+ return r;
+
+ sm_set_bitmap(ic->bitmap, bit, 2);
+ }
+ break;
+ }
+ }
+
+ *new_b = b;
return 0;
}
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+ int32_t *nr_allocations, dm_block_t *new_b)
{
- return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev);
+ int r;
+ uint32_t bit, bit_end;
+ struct inc_context ic;
+ dm_block_t index = b;
+
+ init_inc_context(&ic);
+
+ bit = do_div(index, ll->entries_per_block);
+ r = ll->load_ie(ll, index, &ic.ie_disk);
+ if (r < 0)
+ return r;
+
+ r = shadow_bitmap(ll, &ic);
+ if (r)
+ return r;
+
+ bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
+ r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
+ exit_inc_context(ll, &ic);
+
+ if (r)
+ return r;
+
+ return ll->save_ie(ll, index, &ic.ie_disk);
+}
+
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
+ int32_t *nr_allocations)
+{
+ *nr_allocations = 0;
+ while (b != e) {
+ int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
+ if (r)
+ return r;
+ }
+
+ return 0;
}
+/*----------------------------------------------------------------*/
+
int sm_ll_commit(struct ll_disk *ll)
{
int r = 0;
@@ -685,28 +1089,92 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
/*----------------------------------------------------------------*/
+static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
+{
+ iec->dirty = false;
+ __dm_bless_for_disk(iec->ie);
+ return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
+ &iec->index, &iec->ie, &ll->bitmap_root);
+}
+
+static inline unsigned hash_index(dm_block_t index)
+{
+ return dm_hash_block(index, IE_CACHE_MASK);
+}
+
static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
struct disk_index_entry *ie)
{
- return dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
+ int r;
+ unsigned h = hash_index(index);
+ struct ie_cache *iec = ll->ie_cache + h;
+
+ if (iec->valid) {
+ if (iec->index == index) {
+ memcpy(ie, &iec->ie, sizeof(*ie));
+ return 0;
+ }
+
+ if (iec->dirty) {
+ r = ie_cache_writeback(ll, iec);
+ if (r)
+ return r;
+ }
+ }
+
+ r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
+ if (!r) {
+ iec->valid = true;
+ iec->dirty = false;
+ iec->index = index;
+ memcpy(&iec->ie, ie, sizeof(*ie));
+ }
+
+ return r;
}
static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
struct disk_index_entry *ie)
{
- __dm_bless_for_disk(ie);
- return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
- &index, ie, &ll->bitmap_root);
+ int r;
+ unsigned h = hash_index(index);
+ struct ie_cache *iec = ll->ie_cache + h;
+
+ ll->bitmap_index_changed = true;
+ if (iec->valid) {
+ if (iec->index == index) {
+ memcpy(&iec->ie, ie, sizeof(*ie));
+ iec->dirty = true;
+ return 0;
+ }
+
+ if (iec->dirty) {
+ r = ie_cache_writeback(ll, iec);
+ if (r)
+ return r;
+ }
+ }
+
+ iec->valid = true;
+ iec->dirty = true;
+ iec->index = index;
+ memcpy(&iec->ie, ie, sizeof(*ie));
+ return 0;
}
static int disk_ll_init_index(struct ll_disk *ll)
{
+ unsigned i;
+ for (i = 0; i < IE_CACHE_SIZE; i++) {
+ struct ie_cache *iec = ll->ie_cache + i;
+ iec->valid = false;
+ iec->dirty = false;
+ }
return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
}
static int disk_ll_open(struct ll_disk *ll)
{
- /* nothing to do */
return 0;
}
@@ -717,7 +1185,16 @@ static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
static int disk_ll_commit(struct ll_disk *ll)
{
- return 0;
+ int r = 0;
+ unsigned i;
+
+ for (i = 0; i < IE_CACHE_SIZE; i++) {
+ struct ie_cache *iec = ll->ie_cache + i;
+ if (iec->valid && iec->dirty)
+ r = ie_cache_writeback(ll, iec);
+ }
+
+ return r;
}
int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h
index 8de63ce39bdd..706ceb85d680 100644
--- a/drivers/md/persistent-data/dm-space-map-common.h
+++ b/drivers/md/persistent-data/dm-space-map-common.h
@@ -33,7 +33,7 @@ struct disk_index_entry {
__le64 blocknr;
__le32 nr_free;
__le32 none_free_before;
-} __packed;
+} __attribute__ ((packed, aligned(8)));
#define MAX_METADATA_BITMAPS 255
@@ -43,7 +43,7 @@ struct disk_metadata_index {
__le64 blocknr;
struct disk_index_entry index[MAX_METADATA_BITMAPS];
-} __packed;
+} __attribute__ ((packed, aligned(8)));
struct ll_disk;
@@ -54,6 +54,20 @@ typedef int (*open_index_fn)(struct ll_disk *ll);
typedef dm_block_t (*max_index_entries_fn)(struct ll_disk *ll);
typedef int (*commit_fn)(struct ll_disk *ll);
+/*
+ * A lot of time can be wasted reading and writing the same
+ * index entry. So we cache a few entries.
+ */
+#define IE_CACHE_SIZE 64
+#define IE_CACHE_MASK (IE_CACHE_SIZE - 1)
+
+struct ie_cache {
+ bool valid;
+ bool dirty;
+ dm_block_t index;
+ struct disk_index_entry ie;
+};
+
struct ll_disk {
struct dm_transaction_manager *tm;
struct dm_btree_info bitmap_info;
@@ -79,6 +93,8 @@ struct ll_disk {
max_index_entries_fn max_entries;
commit_fn commit;
bool bitmap_index_changed:1;
+
+ struct ie_cache ie_cache[IE_CACHE_SIZE];
};
struct disk_sm_root {
@@ -86,7 +102,7 @@ struct disk_sm_root {
__le64 nr_allocated;
__le64 bitmap_root;
__le64 ref_count_root;
-} __packed;
+} __attribute__ ((packed, aligned(8)));
#define ENTRIES_PER_BYTE 4
@@ -94,13 +110,7 @@ struct disk_bitmap_header {
__le32 csum;
__le32 not_used;
__le64 blocknr;
-} __packed;
-
-enum allocation_event {
- SM_NONE,
- SM_ALLOC,
- SM_FREE,
-};
+} __attribute__ ((packed, aligned(8)));
/*----------------------------------------------------------------*/
@@ -111,9 +121,15 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
dm_block_t end, dm_block_t *result);
int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
dm_block_t begin, dm_block_t end, dm_block_t *result);
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev);
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev);
+
+/*
+ * The next three functions return (via nr_allocations) the net number of
+ * allocations that were made. This number may be negative if there were
+ * more frees than allocs.
+ */
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, int32_t *nr_allocations);
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations);
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, int32_t *nr_allocations);
int sm_ll_commit(struct ll_disk *ll);
int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm);
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index bf4c5e2ccb6f..d0a8d5e73c28 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -87,76 +87,39 @@ static int sm_disk_set_count(struct dm_space_map *sm, dm_block_t b,
uint32_t count)
{
int r;
- uint32_t old_count;
- enum allocation_event ev;
+ int32_t nr_allocations;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
- r = sm_ll_insert(&smd->ll, b, count, &ev);
+ r = sm_ll_insert(&smd->ll, b, count, &nr_allocations);
if (!r) {
- switch (ev) {
- case SM_NONE:
- break;
-
- case SM_ALLOC:
- /*
- * This _must_ be free in the prior transaction
- * otherwise we've lost atomicity.
- */
- smd->nr_allocated_this_transaction++;
- break;
-
- case SM_FREE:
- /*
- * It's only free if it's also free in the last
- * transaction.
- */
- r = sm_ll_lookup(&smd->old_ll, b, &old_count);
- if (r)
- return r;
-
- if (!old_count)
- smd->nr_allocated_this_transaction--;
- break;
- }
+ smd->nr_allocated_this_transaction += nr_allocations;
}
return r;
}
-static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_disk_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
{
int r;
- enum allocation_event ev;
+ int32_t nr_allocations;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
- r = sm_ll_inc(&smd->ll, b, &ev);
- if (!r && (ev == SM_ALLOC))
- /*
- * This _must_ be free in the prior transaction
- * otherwise we've lost atomicity.
- */
- smd->nr_allocated_this_transaction++;
+ r = sm_ll_inc(&smd->ll, b, e, &nr_allocations);
+ if (!r)
+ smd->nr_allocated_this_transaction += nr_allocations;
return r;
}
-static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_disk_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
{
int r;
- uint32_t old_count;
- enum allocation_event ev;
+ int32_t nr_allocations;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
- r = sm_ll_dec(&smd->ll, b, &ev);
- if (!r && (ev == SM_FREE)) {
- /*
- * It's only free if it's also free in the last
- * transaction.
- */
- r = sm_ll_lookup(&smd->old_ll, b, &old_count);
- if (!r && !old_count)
- smd->nr_allocated_this_transaction--;
- }
+ r = sm_ll_dec(&smd->ll, b, e, &nr_allocations);
+ if (!r)
+ smd->nr_allocated_this_transaction += nr_allocations;
return r;
}
@@ -164,21 +127,28 @@ static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
{
int r;
- enum allocation_event ev;
+ int32_t nr_allocations;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
/*
* Any block we allocate has to be free in both the old and current ll.
*/
r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, smd->begin, smd->ll.nr_blocks, b);
+ if (r == -ENOSPC) {
+ /*
+ * There's no free block between smd->begin and the end of the metadata device.
+ * We search before smd->begin in case something has been freed.
+ */
+ r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, 0, smd->begin, b);
+ }
+
if (r)
return r;
smd->begin = *b + 1;
- r = sm_ll_inc(&smd->ll, *b, &ev);
+ r = sm_ll_inc(&smd->ll, *b, *b + 1, &nr_allocations);
if (!r) {
- BUG_ON(ev != SM_ALLOC);
- smd->nr_allocated_this_transaction++;
+ smd->nr_allocated_this_transaction += nr_allocations;
}
return r;
@@ -187,25 +157,15 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
static int sm_disk_commit(struct dm_space_map *sm)
{
int r;
- dm_block_t nr_free;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
- r = sm_disk_get_nr_free(sm, &nr_free);
- if (r)
- return r;
-
r = sm_ll_commit(&smd->ll);
if (r)
return r;
memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll));
- smd->begin = 0;
smd->nr_allocated_this_transaction = 0;
- r = sm_disk_get_nr_free(sm, &nr_free);
- if (r)
- return r;
-
return 0;
}
@@ -244,8 +204,8 @@ static struct dm_space_map ops = {
.get_count = sm_disk_get_count,
.count_is_more_than_one = sm_disk_count_is_more_than_one,
.set_count = sm_disk_set_count,
- .inc_block = sm_disk_inc_block,
- .dec_block = sm_disk_dec_block,
+ .inc_blocks = sm_disk_inc_blocks,
+ .dec_blocks = sm_disk_dec_blocks,
.new_block = sm_disk_new_block,
.commit = sm_disk_commit,
.root_size = sm_disk_root_size,
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 9e3c64ec2026..392ae26134a4 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -89,7 +89,8 @@ enum block_op_type {
struct block_op {
enum block_op_type type;
- dm_block_t block;
+ dm_block_t b;
+ dm_block_t e;
};
struct bop_ring_buffer {
@@ -116,7 +117,7 @@ static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
}
static int brb_push(struct bop_ring_buffer *brb,
- enum block_op_type type, dm_block_t b)
+ enum block_op_type type, dm_block_t b, dm_block_t e)
{
struct block_op *bop;
unsigned next = brb_next(brb, brb->end);
@@ -130,7 +131,8 @@ static int brb_push(struct bop_ring_buffer *brb,
bop = brb->bops + brb->end;
bop->type = type;
- bop->block = b;
+ bop->b = b;
+ bop->e = e;
brb->end = next;
@@ -145,9 +147,7 @@ static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
return -ENODATA;
bop = brb->bops + brb->begin;
- result->type = bop->type;
- result->block = bop->block;
-
+ memcpy(result, bop, sizeof(*result));
return 0;
}
@@ -178,10 +178,9 @@ struct sm_metadata {
struct threshold threshold;
};
-static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
+static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e)
{
- int r = brb_push(&smm->uncommitted, type, b);
-
+ int r = brb_push(&smm->uncommitted, type, b, e);
if (r) {
DMERR("too many recursive allocations");
return -ENOMEM;
@@ -193,15 +192,15 @@ static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t
static int commit_bop(struct sm_metadata *smm, struct block_op *op)
{
int r = 0;
- enum allocation_event ev;
+ int32_t nr_allocations;
switch (op->type) {
case BOP_INC:
- r = sm_ll_inc(&smm->ll, op->block, &ev);
+ r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations);
break;
case BOP_DEC:
- r = sm_ll_dec(&smm->ll, op->block, &ev);
+ r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations);
break;
}
@@ -314,7 +313,7 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
i = brb_next(&smm->uncommitted, i)) {
struct block_op *op = smm->uncommitted.bops + i;
- if (op->block != b)
+ if (b < op->b || b >= op->e)
continue;
switch (op->type) {
@@ -355,7 +354,7 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
struct block_op *op = smm->uncommitted.bops + i;
- if (op->block != b)
+ if (b < op->b || b >= op->e)
continue;
switch (op->type) {
@@ -393,7 +392,7 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
uint32_t count)
{
int r, r2;
- enum allocation_event ev;
+ int32_t nr_allocations;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
if (smm->recursion_count) {
@@ -402,40 +401,42 @@ static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b,
}
in(smm);
- r = sm_ll_insert(&smm->ll, b, count, &ev);
+ r = sm_ll_insert(&smm->ll, b, count, &nr_allocations);
r2 = out(smm);
return combine_errors(r, r2);
}
-static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
{
int r, r2 = 0;
- enum allocation_event ev;
+ int32_t nr_allocations;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
- if (recursing(smm))
- r = add_bop(smm, BOP_INC, b);
- else {
+ if (recursing(smm)) {
+ r = add_bop(smm, BOP_INC, b, e);
+ if (r)
+ return r;
+ } else {
in(smm);
- r = sm_ll_inc(&smm->ll, b, &ev);
+ r = sm_ll_inc(&smm->ll, b, e, &nr_allocations);
r2 = out(smm);
}
return combine_errors(r, r2);
}
-static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
{
int r, r2 = 0;
- enum allocation_event ev;
+ int32_t nr_allocations;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
if (recursing(smm))
- r = add_bop(smm, BOP_DEC, b);
+ r = add_bop(smm, BOP_DEC, b, e);
else {
in(smm);
- r = sm_ll_dec(&smm->ll, b, &ev);
+ r = sm_ll_dec(&smm->ll, b, e, &nr_allocations);
r2 = out(smm);
}
@@ -445,23 +446,31 @@ static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b)
static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
{
int r, r2 = 0;
- enum allocation_event ev;
+ int32_t nr_allocations;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
/*
* Any block we allocate has to be free in both the old and current ll.
*/
r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b);
+ if (r == -ENOSPC) {
+ /*
+ * There's no free block between smm->begin and the end of the metadata device.
+ * We search before smm->begin in case something has been freed.
+ */
+ r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b);
+ }
+
if (r)
return r;
smm->begin = *b + 1;
if (recursing(smm))
- r = add_bop(smm, BOP_INC, *b);
+ r = add_bop(smm, BOP_INC, *b, *b + 1);
else {
in(smm);
- r = sm_ll_inc(&smm->ll, *b, &ev);
+ r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations);
r2 = out(smm);
}
@@ -503,7 +512,6 @@ static int sm_metadata_commit(struct dm_space_map *sm)
return r;
memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
- smm->begin = 0;
smm->allocated_this_transaction = 0;
return 0;
@@ -556,8 +564,8 @@ static const struct dm_space_map ops = {
.get_count = sm_metadata_get_count,
.count_is_more_than_one = sm_metadata_count_is_more_than_one,
.set_count = sm_metadata_set_count,
- .inc_block = sm_metadata_inc_block,
- .dec_block = sm_metadata_dec_block,
+ .inc_blocks = sm_metadata_inc_blocks,
+ .dec_blocks = sm_metadata_dec_blocks,
.new_block = sm_metadata_new_block,
.commit = sm_metadata_commit,
.root_size = sm_metadata_root_size,
@@ -641,18 +649,28 @@ static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b)
return 0;
}
-static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
{
+ int r;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
- return add_bop(smm, BOP_INC, b);
+ r = add_bop(smm, BOP_INC, b, e);
+ if (r)
+ return r;
+
+ return 0;
}
-static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b)
+static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
{
+ int r;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
- return add_bop(smm, BOP_DEC, b);
+ r = add_bop(smm, BOP_DEC, b, e);
+ if (r)
+ return r;
+
+ return 0;
}
static int sm_bootstrap_commit(struct dm_space_map *sm)
@@ -683,8 +701,8 @@ static const struct dm_space_map bootstrap_ops = {
.get_count = sm_bootstrap_get_count,
.count_is_more_than_one = sm_bootstrap_count_is_more_than_one,
.set_count = sm_bootstrap_set_count,
- .inc_block = sm_bootstrap_inc_block,
- .dec_block = sm_bootstrap_dec_block,
+ .inc_blocks = sm_bootstrap_inc_blocks,
+ .dec_blocks = sm_bootstrap_dec_blocks,
.new_block = sm_bootstrap_new_block,
.commit = sm_bootstrap_commit,
.root_size = sm_bootstrap_root_size,
@@ -696,7 +714,7 @@ static const struct dm_space_map bootstrap_ops = {
static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
{
- int r, i;
+ int r;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
dm_block_t old_len = smm->ll.nr_blocks;
@@ -718,9 +736,7 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
* allocate any new blocks.
*/
do {
- for (i = old_len; !r && i < smm->begin; i++)
- r = add_bop(smm, BOP_INC, i);
-
+ r = add_bop(smm, BOP_INC, old_len, smm->begin);
if (r)
goto out;
@@ -767,7 +783,6 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
dm_block_t superblock)
{
int r;
- dm_block_t i;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
smm->begin = superblock + 1;
@@ -792,9 +807,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
* Now we need to update the newly created data structures with the
* allocated blocks that they were built from.
*/
- for (i = superblock; !r && i < smm->begin; i++)
- r = add_bop(smm, BOP_INC, i);
-
+ r = add_bop(smm, BOP_INC, superblock, smm->begin);
if (r)
return r;
diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h
index 3e6d1153b7c4..a015cd11f6e9 100644
--- a/drivers/md/persistent-data/dm-space-map.h
+++ b/drivers/md/persistent-data/dm-space-map.h
@@ -46,8 +46,8 @@ struct dm_space_map {
int (*commit)(struct dm_space_map *sm);
- int (*inc_block)(struct dm_space_map *sm, dm_block_t b);
- int (*dec_block)(struct dm_space_map *sm, dm_block_t b);
+ int (*inc_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e);
+ int (*dec_blocks)(struct dm_space_map *sm, dm_block_t b, dm_block_t e);
/*
* new_block will increment the returned block.
@@ -117,14 +117,24 @@ static inline int dm_sm_commit(struct dm_space_map *sm)
return sm->commit(sm);
}
+static inline int dm_sm_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
+{
+ return sm->inc_blocks(sm, b, e);
+}
+
static inline int dm_sm_inc_block(struct dm_space_map *sm, dm_block_t b)
{
- return sm->inc_block(sm, b);
+ return dm_sm_inc_blocks(sm, b, b + 1);
+}
+
+static inline int dm_sm_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e)
+{
+ return sm->dec_blocks(sm, b, e);
}
static inline int dm_sm_dec_block(struct dm_space_map *sm, dm_block_t b)
{
- return sm->dec_block(sm, b);
+ return dm_sm_dec_blocks(sm, b, b + 1);
}
static inline int dm_sm_new_block(struct dm_space_map *sm, dm_block_t *b)
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index abe2c5dd0993..16643fc974e8 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -359,6 +359,17 @@ void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b)
}
EXPORT_SYMBOL_GPL(dm_tm_inc);
+void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e)
+{
+ /*
+ * The non-blocking clone doesn't support this.
+ */
+ BUG_ON(tm->is_clone);
+
+ dm_sm_inc_blocks(tm->sm, b, e);
+}
+EXPORT_SYMBOL_GPL(dm_tm_inc_range);
+
void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
{
/*
@@ -370,6 +381,47 @@ void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b)
}
EXPORT_SYMBOL_GPL(dm_tm_dec);
+void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e)
+{
+ /*
+ * The non-blocking clone doesn't support this.
+ */
+ BUG_ON(tm->is_clone);
+
+ dm_sm_dec_blocks(tm->sm, b, e);
+}
+EXPORT_SYMBOL_GPL(dm_tm_dec_range);
+
+void dm_tm_with_runs(struct dm_transaction_manager *tm,
+ const __le64 *value_le, unsigned count, dm_tm_run_fn fn)
+{
+ uint64_t b, begin, end;
+ bool in_run = false;
+ unsigned i;
+
+ for (i = 0; i < count; i++, value_le++) {
+ b = le64_to_cpu(*value_le);
+
+ if (in_run) {
+ if (b == end)
+ end++;
+ else {
+ fn(tm, begin, end);
+ begin = b;
+ end = b + 1;
+ }
+ } else {
+ in_run = true;
+ begin = b;
+ end = b + 1;
+ }
+ }
+
+ if (in_run)
+ fn(tm, begin, end);
+}
+EXPORT_SYMBOL_GPL(dm_tm_with_runs);
+
int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
uint32_t *result)
{
@@ -379,6 +431,15 @@ int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
return dm_sm_get_count(tm->sm, b, result);
}
+int dm_tm_block_is_shared(struct dm_transaction_manager *tm, dm_block_t b,
+ int *result)
+{
+ if (tm->is_clone)
+ return -EWOULDBLOCK;
+
+ return dm_sm_count_is_more_than_one(tm->sm, b, result);
+}
+
struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm)
{
return tm->bm;
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index f3a18be68f30..906c02ed0365 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -100,11 +100,27 @@ void dm_tm_unlock(struct dm_transaction_manager *tm, struct dm_block *b);
* Functions for altering the reference count of a block directly.
*/
void dm_tm_inc(struct dm_transaction_manager *tm, dm_block_t b);
-
+void dm_tm_inc_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e);
void dm_tm_dec(struct dm_transaction_manager *tm, dm_block_t b);
+void dm_tm_dec_range(struct dm_transaction_manager *tm, dm_block_t b, dm_block_t e);
+
+/*
+ * Builds up runs of adjacent blocks, and then calls the given fn
+ * (typically dm_tm_inc/dec). Very useful when you have to perform
+ * the same tm operation on all values in a btree leaf.
+ */
+typedef void (*dm_tm_run_fn)(struct dm_transaction_manager *, dm_block_t, dm_block_t);
+void dm_tm_with_runs(struct dm_transaction_manager *tm,
+ const __le64 *value_le, unsigned count, dm_tm_run_fn fn);
-int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b,
- uint32_t *result);
+int dm_tm_ref(struct dm_transaction_manager *tm, dm_block_t b, uint32_t *result);
+
+/*
+ * Finds out if a given block is shared (ie. has a reference count higher
+ * than one).
+ */
+int dm_tm_block_is_shared(struct dm_transaction_manager *tm, dm_block_t b,
+ int *result);
struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm);