aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r--drivers/block/drbd/drbd_actlog.c23
-rw-r--r--drivers/block/drbd/drbd_int.h92
-rw-r--r--drivers/block/drbd/drbd_main.c28
-rw-r--r--drivers/block/drbd/drbd_nl.c485
-rw-r--r--drivers/block/drbd/drbd_nla.c1
-rw-r--r--drivers/block/drbd/drbd_proc.c2
-rw-r--r--drivers/block/drbd/drbd_protocol.h12
-rw-r--r--drivers/block/drbd/drbd_receiver.c196
-rw-r--r--drivers/block/drbd/drbd_req.c74
-rw-r--r--drivers/block/drbd/drbd_req.h6
-rw-r--r--drivers/block/drbd/drbd_state.c38
-rw-r--r--drivers/block/drbd/drbd_worker.c107
-rw-r--r--drivers/block/drbd/drbd_wrappers.h54
13 files changed, 701 insertions, 417 deletions
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 90ae4ba8f9ee..05a1780ffa85 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -29,7 +29,6 @@
#include <linux/drbd_limits.h>
#include <linux/dynamic_debug.h>
#include "drbd_int.h"
-#include "drbd_wrappers.h"
enum al_transaction_types {
@@ -204,7 +203,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
BUG_ON(!bdev->md_bdev);
- drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
+ dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
(void*)_RET_IP_ );
@@ -276,7 +275,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval
return _al_get(device, first, true);
}
-static
bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
{
/* for bios crossing activity log extent boundaries,
@@ -846,7 +844,7 @@ void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
int wake_up = 0;
unsigned long flags;
- if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
+ if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
(unsigned long long)sector, size);
return;
@@ -920,7 +918,7 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
if (size == 0)
return 0;
- if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
+ if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "sector: %llus, size: %d\n",
(unsigned long long)sector, size);
return 0;
@@ -1023,8 +1021,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector)
unsigned int enr = BM_SECT_TO_EXT(sector);
struct bm_extent *bm_ext;
int i, sig;
- int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
- 200 times -> 20 seconds. */
+ bool sa;
retry:
sig = wait_event_interruptible(device->al_wait,
@@ -1035,12 +1032,15 @@ retry:
if (test_bit(BME_LOCKED, &bm_ext->flags))
return 0;
+ /* step aside only while we are above c-min-rate; unless disabled. */
+ sa = drbd_rs_c_min_rate_throttle(device);
+
for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
sig = wait_event_interruptible(device->al_wait,
!_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) ||
- test_bit(BME_PRIORITY, &bm_ext->flags));
+ (sa && test_bit(BME_PRIORITY, &bm_ext->flags)));
- if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
+ if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) {
spin_lock_irq(&device->al_lock);
if (lc_put(device->resync, &bm_ext->lce) == 0) {
bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
@@ -1052,9 +1052,6 @@ retry:
return -EINTR;
if (schedule_timeout_interruptible(HZ/10))
return -EINTR;
- if (sa && --sa == 0)
- drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec."
- "Resync stalled?\n");
goto retry;
}
}
@@ -1288,7 +1285,7 @@ void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
sector_t esector, nr_sectors;
int wake_up = 0;
- if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
+ if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
(unsigned long long)sector, size);
return;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e7093d4291f1..a76ceb344d64 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -382,6 +382,12 @@ enum {
__EE_CALL_AL_COMPLETE_IO,
__EE_MAY_SET_IN_SYNC,
+ /* is this a TRIM aka REQ_DISCARD? */
+ __EE_IS_TRIM,
+ /* our lower level cannot handle trim,
+ * and we want to fall back to zeroout instead */
+ __EE_IS_TRIM_USE_ZEROOUT,
+
/* In case a barrier failed,
* we need to resubmit without the barrier flag. */
__EE_RESUBMITTED,
@@ -405,7 +411,9 @@ enum {
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
-#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
+#define EE_IS_TRIM (1<<__EE_IS_TRIM)
+#define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT)
+#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
@@ -579,6 +587,7 @@ struct drbd_resource {
struct list_head resources;
struct res_opts res_opts;
struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */
+ struct mutex adm_mutex; /* mutex to serialize administrative requests */
spinlock_t req_lock;
unsigned susp:1; /* IO suspended by user */
@@ -609,6 +618,7 @@ struct drbd_connection {
struct drbd_socket data; /* data/barrier/cstate/parameter packets */
struct drbd_socket meta; /* ping/ack (metadata) packets */
int agreed_pro_version; /* actually used protocol version */
+ u32 agreed_features;
unsigned long last_received; /* in jiffies, either socket */
unsigned int ko_count;
@@ -814,6 +824,28 @@ struct drbd_device {
struct submit_worker submit;
};
+struct drbd_config_context {
+ /* assigned from drbd_genlmsghdr */
+ unsigned int minor;
+ /* assigned from request attributes, if present */
+ unsigned int volume;
+#define VOLUME_UNSPECIFIED (-1U)
+ /* pointer into the request skb,
+ * limited lifetime! */
+ char *resource_name;
+ struct nlattr *my_addr;
+ struct nlattr *peer_addr;
+
+ /* reply buffer */
+ struct sk_buff *reply_skb;
+ /* pointer into reply buffer */
+ struct drbd_genlmsghdr *reply_dh;
+ /* resolved from attributes, if possible */
+ struct drbd_device *device;
+ struct drbd_resource *resource;
+ struct drbd_connection *connection;
+};
+
static inline struct drbd_device *minor_to_device(unsigned int minor)
{
return (struct drbd_device *)idr_find(&drbd_devices, minor);
@@ -821,7 +853,7 @@ static inline struct drbd_device *minor_to_device(unsigned int minor)
static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device)
{
- return list_first_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
+ return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
}
#define for_each_resource(resource, _resources) \
@@ -1139,6 +1171,12 @@ struct bm_extent {
#define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */
#define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
+/* For now, don't allow more than one activity log extent worth of data
+ * to be discarded in one go. We may need to rework drbd_al_begin_io()
+ * to allow for even larger discard ranges */
+#define DRBD_MAX_DISCARD_SIZE AL_EXTENT_SIZE
+#define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9)
+
extern int drbd_bm_init(struct drbd_device *device);
extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits);
extern void drbd_bm_cleanup(struct drbd_device *device);
@@ -1229,9 +1267,9 @@ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
extern rwlock_t global_state_lock;
extern int conn_lowest_minor(struct drbd_connection *connection);
-enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr);
+extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
extern void drbd_destroy_device(struct kref *kref);
-extern void drbd_delete_device(struct drbd_device *mdev);
+extern void drbd_delete_device(struct drbd_device *device);
extern struct drbd_resource *drbd_create_resource(const char *name);
extern void drbd_free_resource(struct drbd_resource *resource);
@@ -1257,7 +1295,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
/* drbd_nl.c */
-extern int drbd_msg_put_info(const char *info);
+extern int drbd_msg_put_info(struct sk_buff *skb, const char *info);
extern void drbd_suspend_io(struct drbd_device *device);
extern void drbd_resume_io(struct drbd_device *device);
extern char *ppsize(char *buf, unsigned long long size);
@@ -1283,6 +1321,10 @@ extern void conn_try_outdate_peer_async(struct drbd_connection *connection);
extern int drbd_khelper(struct drbd_device *device, char *cmd);
/* drbd_worker.c */
+/* bi_end_io handlers */
+extern void drbd_md_io_complete(struct bio *bio, int error);
+extern void drbd_peer_request_endio(struct bio *bio, int error);
+extern void drbd_request_endio(struct bio *bio, int error);
extern int drbd_worker(struct drbd_thread *thi);
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor);
void drbd_resync_after_changed(struct drbd_device *device);
@@ -1332,16 +1374,20 @@ extern int w_start_resync(struct drbd_work *, int);
extern void resync_timer_fn(unsigned long data);
extern void start_resync_timer_fn(unsigned long data);
+extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
+
/* drbd_receiver.c */
extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_asender(struct drbd_thread *thi);
-extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
+extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
+extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
extern int drbd_submit_peer_request(struct drbd_device *,
struct drbd_peer_request *, const unsigned,
const int);
extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
sector_t, unsigned int,
+ bool,
gfp_t) __must_hold(local);
extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *,
int);
@@ -1401,6 +1447,37 @@ static inline void drbd_tcp_quickack(struct socket *sock)
(char*)&val, sizeof(val));
}
+/* sets the number of 512 byte sectors of our virtual device */
+static inline void drbd_set_my_capacity(struct drbd_device *device,
+ sector_t size)
+{
+ /* set_capacity(device->this_bdev->bd_disk, size); */
+ set_capacity(device->vdisk, size);
+ device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
+}
+
+/*
+ * used to submit our private bio
+ */
+static inline void drbd_generic_make_request(struct drbd_device *device,
+ int fault_type, struct bio *bio)
+{
+ __release(local);
+ if (!bio->bi_bdev) {
+ printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
+ "bio->bi_bdev == NULL\n",
+ device_to_minor(device));
+ dump_stack();
+ bio_endio(bio, -ENODEV);
+ return;
+ }
+
+ if (drbd_insert_fault(device, fault_type))
+ bio_endio(bio, -EIO);
+ else
+ generic_make_request(bio);
+}
+
void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
/* drbd_proc.c */
@@ -1410,6 +1487,7 @@ extern const char *drbd_conn_str(enum drbd_conns s);
extern const char *drbd_role_str(enum drbd_role s);
/* drbd_actlog.c */
+extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
@@ -2144,7 +2222,7 @@ static inline void drbd_md_flush(struct drbd_device *device)
static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
{
- return list_first_entry(&resource->connections,
+ return list_first_entry_or_null(&resource->connections,
struct drbd_connection, connections);
}
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 331e5cc1227d..960645c26e6f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1607,8 +1607,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long b
return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
}
-/* Used to send write requests
- * R_PRIMARY -> Peer (P_DATA)
+/* Used to send write or TRIM aka REQ_DISCARD requests
+ * R_PRIMARY -> Peer (P_DATA, P_TRIM)
*/
int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req)
{
@@ -1640,6 +1640,16 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
dp_flags |= DP_SEND_WRITE_ACK;
}
p->dp_flags = cpu_to_be32(dp_flags);
+
+ if (dp_flags & DP_DISCARD) {
+ struct p_trim *t = (struct p_trim*)p;
+ t->size = cpu_to_be32(req->i.size);
+ err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
+ goto out;
+ }
+
+ /* our digest is still only over the payload.
+ * TRIM does not carry any payload. */
if (dgs)
drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1);
err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
@@ -1675,6 +1685,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
... Be noisy about digest too large ...
} */
}
+out:
mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */
return err;
@@ -2570,6 +2581,7 @@ struct drbd_resource *drbd_create_resource(const char *name)
INIT_LIST_HEAD(&resource->connections);
list_add_tail_rcu(&resource->resources, &drbd_resources);
mutex_init(&resource->conf_update);
+ mutex_init(&resource->adm_mutex);
spin_lock_init(&resource->req_lock);
return resource;
@@ -2687,14 +2699,16 @@ static int init_submitter(struct drbd_device *device)
return 0;
}
-enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr)
+enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor)
{
+ struct drbd_resource *resource = adm_ctx->resource;
struct drbd_connection *connection;
struct drbd_device *device;
struct drbd_peer_device *peer_device, *tmp_peer_device;
struct gendisk *disk;
struct request_queue *q;
int id;
+ int vnr = adm_ctx->volume;
enum drbd_ret_code err = ERR_NOMEM;
device = minor_to_device(minor);
@@ -2763,7 +2777,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) {
if (id == -ENOSPC) {
err = ERR_MINOR_EXISTS;
- drbd_msg_put_info("requested minor exists already");
+ drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
}
goto out_no_minor_idr;
}
@@ -2773,7 +2787,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) {
if (id == -ENOSPC) {
err = ERR_MINOR_EXISTS;
- drbd_msg_put_info("requested minor exists already");
+ drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
}
goto out_idr_remove_minor;
}
@@ -2794,7 +2808,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (id < 0) {
if (id == -ENOSPC) {
err = ERR_INVALID_REQUEST;
- drbd_msg_put_info("requested volume exists already");
+ drbd_msg_put_info(adm_ctx->reply_skb, "requested volume exists already");
}
goto out_idr_remove_from_resource;
}
@@ -2803,7 +2817,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i
if (init_submitter(device)) {
err = ERR_NOMEM;
- drbd_msg_put_info("unable to create submit workqueue");
+ drbd_msg_put_info(adm_ctx->reply_skb, "unable to create submit workqueue");
goto out_idr_remove_vol;
}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 526414bc2cab..1b35c45c92b7 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -34,7 +34,6 @@
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
-#include "drbd_wrappers.h"
#include <asm/unaligned.h>
#include <linux/drbd_limits.h>
#include <linux/kthread.h>
@@ -82,32 +81,6 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
/* used blkdev_get_by_path, to claim our meta data device(s) */
static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
-/* Configuration is strictly serialized, because generic netlink message
- * processing is strictly serialized by the genl_lock().
- * Which means we can use one static global drbd_config_context struct.
- */
-static struct drbd_config_context {
- /* assigned from drbd_genlmsghdr */
- unsigned int minor;
- /* assigned from request attributes, if present */
- unsigned int volume;
-#define VOLUME_UNSPECIFIED (-1U)
- /* pointer into the request skb,
- * limited lifetime! */
- char *resource_name;
- struct nlattr *my_addr;
- struct nlattr *peer_addr;
-
- /* reply buffer */
- struct sk_buff *reply_skb;
- /* pointer into reply buffer */
- struct drbd_genlmsghdr *reply_dh;
- /* resolved from attributes, if possible */
- struct drbd_device *device;
- struct drbd_resource *resource;
- struct drbd_connection *connection;
-} adm_ctx;
-
static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
{
genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
@@ -117,9 +90,8 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
* reason it could fail was no space in skb, and there are 4k available. */
-int drbd_msg_put_info(const char *info)
+int drbd_msg_put_info(struct sk_buff *skb, const char *info)
{
- struct sk_buff *skb = adm_ctx.reply_skb;
struct nlattr *nla;
int err = -EMSGSIZE;
@@ -143,42 +115,46 @@ int drbd_msg_put_info(const char *info)
* and per-family private info->pointers.
* But we need to stay compatible with older kernels.
* If it returns successfully, adm_ctx members are valid.
+ *
+ * At this point, we still rely on the global genl_lock().
+ * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
+ * to add additional synchronization against object destruction/modification.
*/
#define DRBD_ADM_NEED_MINOR 1
#define DRBD_ADM_NEED_RESOURCE 2
#define DRBD_ADM_NEED_CONNECTION 4
-static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
- unsigned flags)
+static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
+ struct sk_buff *skb, struct genl_info *info, unsigned flags)
{
struct drbd_genlmsghdr *d_in = info->userhdr;
const u8 cmd = info->genlhdr->cmd;
int err;
- memset(&adm_ctx, 0, sizeof(adm_ctx));
+ memset(adm_ctx, 0, sizeof(*adm_ctx));
/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
return -EPERM;
- adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!adm_ctx.reply_skb) {
+ adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!adm_ctx->reply_skb) {
err = -ENOMEM;
goto fail;
}
- adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
+ adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
info, &drbd_genl_family, 0, cmd);
/* put of a few bytes into a fresh skb of >= 4k will always succeed.
* but anyways */
- if (!adm_ctx.reply_dh) {
+ if (!adm_ctx->reply_dh) {
err = -ENOMEM;
goto fail;
}
- adm_ctx.reply_dh->minor = d_in->minor;
- adm_ctx.reply_dh->ret_code = NO_ERROR;
+ adm_ctx->reply_dh->minor = d_in->minor;
+ adm_ctx->reply_dh->ret_code = NO_ERROR;
- adm_ctx.volume = VOLUME_UNSPECIFIED;
+ adm_ctx->volume = VOLUME_UNSPECIFIED;
if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
struct nlattr *nla;
/* parse and validate only */
@@ -188,111 +164,131 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
/* It was present, and valid,
* copy it over to the reply skb. */
- err = nla_put_nohdr(adm_ctx.reply_skb,
+ err = nla_put_nohdr(adm_ctx->reply_skb,
info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
info->attrs[DRBD_NLA_CFG_CONTEXT]);
if (err)
goto fail;
- /* and assign stuff to the global adm_ctx */
+ /* and assign stuff to the adm_ctx */
nla = nested_attr_tb[__nla_type(T_ctx_volume)];
if (nla)
- adm_ctx.volume = nla_get_u32(nla);
+ adm_ctx->volume = nla_get_u32(nla);
nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
if (nla)
- adm_ctx.resource_name = nla_data(nla);
- adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
- adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
- if ((adm_ctx.my_addr &&
- nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) ||
- (adm_ctx.peer_addr &&
- nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) {
+ adm_ctx->resource_name = nla_data(nla);
+ adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
+ adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
+ if ((adm_ctx->my_addr &&
+ nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
+ (adm_ctx->peer_addr &&
+ nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
err = -EINVAL;
goto fail;
}
}
- adm_ctx.minor = d_in->minor;
- adm_ctx.device = minor_to_device(d_in->minor);
- if (adm_ctx.resource_name) {
- adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name);
+ adm_ctx->minor = d_in->minor;
+ adm_ctx->device = minor_to_device(d_in->minor);
+
+ /* We are protected by the global genl_lock().
+ * But we may explicitly drop it/retake it in drbd_adm_set_role(),
+ * so make sure this object stays around. */
+ if (adm_ctx->device)
+ kref_get(&adm_ctx->device->kref);
+
+ if (adm_ctx->resource_name) {
+ adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
}
- if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) {
- drbd_msg_put_info("unknown minor");
+ if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
+ drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
return ERR_MINOR_INVALID;
}
- if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
- drbd_msg_put_info("unknown resource");
- if (adm_ctx.resource_name)
+ if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
+ drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
+ if (adm_ctx->resource_name)
return ERR_RES_NOT_KNOWN;
return ERR_INVALID_REQUEST;
}
if (flags & DRBD_ADM_NEED_CONNECTION) {
- if (adm_ctx.resource) {
- drbd_msg_put_info("no resource name expected");
+ if (adm_ctx->resource) {
+ drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
return ERR_INVALID_REQUEST;
}
- if (adm_ctx.device) {
- drbd_msg_put_info("no minor number expected");
+ if (adm_ctx->device) {
+ drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
return ERR_INVALID_REQUEST;
}
- if (adm_ctx.my_addr && adm_ctx.peer_addr)
- adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
- nla_len(adm_ctx.my_addr),
- nla_data(adm_ctx.peer_addr),
- nla_len(adm_ctx.peer_addr));
- if (!adm_ctx.connection) {
- drbd_msg_put_info("unknown connection");
+ if (adm_ctx->my_addr && adm_ctx->peer_addr)
+ adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
+ nla_len(adm_ctx->my_addr),
+ nla_data(adm_ctx->peer_addr),
+ nla_len(adm_ctx->peer_addr));
+ if (!adm_ctx->connection) {
+ drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
return ERR_INVALID_REQUEST;
}
}
/* some more paranoia, if the request was over-determined */
- if (adm_ctx.device && adm_ctx.resource &&
- adm_ctx.device->resource != adm_ctx.resource) {
+ if (adm_ctx->device && adm_ctx->resource &&
+ adm_ctx->device->resource != adm_ctx->resource) {
pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
- adm_ctx.minor, adm_ctx.resource->name,
- adm_ctx.device->resource->name);
- drbd_msg_put_info("minor exists in different resource");
+ adm_ctx->minor, adm_ctx->resource->name,
+ adm_ctx->device->resource->name);
+ drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
return ERR_INVALID_REQUEST;
}
- if (adm_ctx.device &&
- adm_ctx.volume != VOLUME_UNSPECIFIED &&
- adm_ctx.volume != adm_ctx.device->vnr) {
+ if (adm_ctx->device &&
+ adm_ctx->volume != VOLUME_UNSPECIFIED &&
+ adm_ctx->volume != adm_ctx->device->vnr) {
pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
- adm_ctx.minor, adm_ctx.volume,
- adm_ctx.device->vnr,
- adm_ctx.device->resource->name);
- drbd_msg_put_info("minor exists as different volume");
+ adm_ctx->minor, adm_ctx->volume,
+ adm_ctx->device->vnr,
+ adm_ctx->device->resource->name);
+ drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
return ERR_INVALID_REQUEST;
}
+ /* still, provide adm_ctx->resource always, if possible. */
+ if (!adm_ctx->resource) {
+ adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
+ : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
+ if (adm_ctx->resource)
+ kref_get(&adm_ctx->resource->kref);
+ }
+
return NO_ERROR;
fail:
- nlmsg_free(adm_ctx.reply_skb);
- adm_ctx.reply_skb = NULL;
+ nlmsg_free(adm_ctx->reply_skb);
+ adm_ctx->reply_skb = NULL;
return err;
}
-static int drbd_adm_finish(struct genl_info *info, int retcode)
+static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
+ struct genl_info *info, int retcode)
{
- if (adm_ctx.connection) {
- kref_put(&adm_ctx.connection->kref, drbd_destroy_connection);
- adm_ctx.connection = NULL;
+ if (adm_ctx->device) {
+ kref_put(&adm_ctx->device->kref, drbd_destroy_device);
+ adm_ctx->device = NULL;
}
- if (adm_ctx.resource) {
- kref_put(&adm_ctx.resource->kref, drbd_destroy_resource);
- adm_ctx.resource = NULL;
+ if (adm_ctx->connection) {
+ kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
+ adm_ctx->connection = NULL;
+ }
+ if (adm_ctx->resource) {
+ kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
+ adm_ctx->resource = NULL;
}
- if (!adm_ctx.reply_skb)
+ if (!adm_ctx->reply_skb)
return -ENOMEM;
- adm_ctx.reply_dh->ret_code = retcode;
- drbd_adm_send_reply(adm_ctx.reply_skb, info);
+ adm_ctx->reply_dh->ret_code = retcode;
+ drbd_adm_send_reply(adm_ctx->reply_skb, info);
return 0;
}
@@ -426,6 +422,14 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connec
}
rcu_read_unlock();
+ if (fp == FP_NOT_AVAIL) {
+ /* IO Suspending works on the whole resource.
+ Do it only for one device. */
+ vnr = 0;
+ peer_device = idr_get_next(&connection->peer_devices, &vnr);
+ drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
+ }
+
return fp;
}
@@ -438,12 +442,13 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
char *ex_to_string;
int r;
+ spin_lock_irq(&connection->resource->req_lock);
if (connection->cstate >= C_WF_REPORT_PARAMS) {
drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
+ spin_unlock_irq(&connection->resource->req_lock);
return false;
}
- spin_lock_irq(&connection->resource->req_lock);
connect_cnt = connection->connect_cnt;
spin_unlock_irq(&connection->resource->req_lock);
@@ -654,11 +659,11 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
put_ldev(device);
}
} else {
- mutex_lock(&device->resource->conf_update);
+ /* Called from drbd_adm_set_role only.
+ * We are still holding the conf_update mutex. */
nc = first_peer_device(device)->connection->net_conf;
if (nc)
nc->discard_my_data = 0; /* without copy; single bit op is atomic */
- mutex_unlock(&device->resource->conf_update);
set_disk_ro(device->vdisk, false);
if (get_ldev(device)) {
@@ -700,11 +705,12 @@ static const char *from_attrs_err_to_txt(int err)
int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct set_role_parms parms;
int err;
enum drbd_ret_code retcode;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -715,17 +721,22 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
err = set_role_parms_from_attrs(&parms, info);
if (err) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto out;
}
}
+ genl_unlock();
+ mutex_lock(&adm_ctx.resource->adm_mutex);
if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
else
retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
+
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
+ genl_lock();
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -1104,15 +1115,18 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
struct request_queue * const q = device->rq_queue;
unsigned int max_hw_sectors = max_bio_size >> 9;
unsigned int max_segments = 0;
+ struct request_queue *b = NULL;
if (get_ldev_if_state(device, D_ATTACHING)) {
- struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
+ b = device->ldev->backing_bdev->bd_disk->queue;
max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
rcu_read_lock();
max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
rcu_read_unlock();
- put_ldev(device);
+
+ blk_set_stacking_limits(&q->limits);
+ blk_queue_max_write_same_sectors(q, 0);
}
blk_queue_logical_block_size(q, 512);
@@ -1121,8 +1135,25 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
- if (get_ldev_if_state(device, D_ATTACHING)) {
- struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
+ if (b) {
+ struct drbd_connection *connection = first_peer_device(device)->connection;
+
+ if (blk_queue_discard(b) &&
+ (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
+ /* For now, don't allow more than one activity log extent worth of data
+ * to be discarded in one go. We may need to rework drbd_al_begin_io()
+ * to allow for even larger discard ranges */
+ q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
+
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ /* REALLY? Is stacking secdiscard "legal"? */
+ if (blk_queue_secdiscard(b))
+ queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
+ } else {
+ q->limits.max_discard_sectors = 0;
+ queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
+ queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
+ }
blk_queue_stack_limits(q, b);
@@ -1164,8 +1195,14 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
else
peer = DRBD_MAX_BIO_SIZE;
- }
+ /* We may later detach and re-attach on a disconnected Primary.
+ * Avoid this setting to jump back in that case.
+ * We want to store what we know the peer DRBD can handle,
+ * not what the peer IO backend can handle. */
+ if (peer > device->peer_max_bio_size)
+ device->peer_max_bio_size = peer;
+ }
new = min(local, peer);
if (device->state.role == R_PRIMARY && new < now)
@@ -1258,19 +1295,21 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
struct drbd_device *device;
struct disk_conf *new_disk_conf, *old_disk_conf;
struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
int err, fifo_size;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
- goto out;
+ goto finish;
device = adm_ctx.device;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
/* we also need a disk
* to change the options on */
@@ -1294,7 +1333,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
err = disk_conf_from_attrs_for_change(new_disk_conf, info);
if (err && err != -ENOMSG) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto fail_unlock;
}
@@ -1385,12 +1424,15 @@ fail_unlock:
success:
put_ldev(device);
out:
- drbd_adm_finish(info, retcode);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
+ finish:
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_device *device;
int err;
enum drbd_ret_code retcode;
@@ -1406,13 +1448,14 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
enum drbd_state_rv rv;
struct net_conf *nc;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto finish;
device = adm_ctx.device;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
conn_reconfig_start(first_peer_device(device)->connection);
/* if you want to reconfigure, please tear down first */
@@ -1455,7 +1498,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
err = disk_conf_from_attrs(new_disk_conf, info);
if (err) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto fail;
}
@@ -1619,7 +1662,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
}
if (device->state.conn < C_CONNECTED &&
- device->state.role == R_PRIMARY &&
+ device->state.role == R_PRIMARY && device->ed_uuid &&
(device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
(unsigned long long)device->ed_uuid);
@@ -1797,7 +1840,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
put_ldev(device);
conn_reconfig_done(first_peer_device(device)->connection);
- drbd_adm_finish(info, retcode);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
force_diskless_dec:
@@ -1819,9 +1863,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
kfree(new_disk_conf);
lc_destroy(resync_lru);
kfree(new_plan);
-
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
finish:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -1860,11 +1904,12 @@ out:
* Only then we have finally detached. */
int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
struct detach_parms parms = { };
int err;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -1874,14 +1919,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
err = detach_parms_from_attrs(&parms, info);
if (err) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto out;
}
}
+ mutex_lock(&adm_ctx.resource->adm_mutex);
retcode = adm_detach(adm_ctx.device, parms.force_detach);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -2055,6 +2102,7 @@ static void free_crypto(struct crypto *crypto)
int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
struct drbd_connection *connection;
struct net_conf *old_net_conf, *new_net_conf = NULL;
@@ -2063,13 +2111,14 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
int rsr; /* re-sync running */
struct crypto crypto = { };
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
- goto out;
+ goto finish;
connection = adm_ctx.connection;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
if (!new_net_conf) {
@@ -2084,7 +2133,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
old_net_conf = connection->net_conf;
if (!old_net_conf) {
- drbd_msg_put_info("net conf missing, try connect");
+ drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
retcode = ERR_INVALID_REQUEST;
goto fail;
}
@@ -2096,7 +2145,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
err = net_conf_from_attrs_for_change(new_net_conf, info);
if (err && err != -ENOMSG) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto fail;
}
@@ -2167,12 +2216,15 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
done:
conn_reconfig_done(connection);
out:
- drbd_adm_finish(info, retcode);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
+ finish:
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_peer_device *peer_device;
struct net_conf *old_net_conf, *new_net_conf = NULL;
struct crypto crypto = { };
@@ -2182,14 +2234,14 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
int i;
int err;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
- drbd_msg_put_info("connection endpoint(s) missing");
+ drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
retcode = ERR_INVALID_REQUEST;
goto out;
}
@@ -2215,6 +2267,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
}
}
+ mutex_lock(&adm_ctx.resource->adm_mutex);
connection = first_connection(adm_ctx.resource);
conn_reconfig_start(connection);
@@ -2235,7 +2288,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
err = net_conf_from_attrs(new_net_conf, info);
if (err && err != -ENOMSG) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto fail;
}
@@ -2284,7 +2337,8 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
conn_reconfig_done(connection);
- drbd_adm_finish(info, retcode);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
fail:
@@ -2292,8 +2346,9 @@ fail:
kfree(new_net_conf);
conn_reconfig_done(connection);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -2356,13 +2411,14 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct disconnect_parms parms;
struct drbd_connection *connection;
enum drbd_state_rv rv;
enum drbd_ret_code retcode;
int err;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -2374,18 +2430,20 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
err = disconnect_parms_from_attrs(&parms, info);
if (err) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto fail;
}
}
+ mutex_lock(&adm_ctx.resource->adm_mutex);
rv = conn_try_disconnect(connection, parms.force_disconnect);
if (rv < SS_SUCCESS)
retcode = rv; /* FIXME: Type mismatch. */
else
retcode = NO_ERROR;
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
fail:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -2407,6 +2465,7 @@ void resync_after_online_grow(struct drbd_device *device)
int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
struct resize_parms rs;
struct drbd_device *device;
@@ -2417,12 +2476,13 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
sector_t u_size;
int err;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
- goto fail;
+ goto finish;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
if (!get_ldev(device)) {
retcode = ERR_NO_DISK;
@@ -2436,7 +2496,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
err = resize_parms_from_attrs(&rs, info);
if (err) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto fail_ldev;
}
}
@@ -2482,7 +2542,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
goto fail_ldev;
}
- if (device->state.conn != C_CONNECTED) {
+ if (device->state.conn != C_CONNECTED && !rs.resize_force) {
retcode = ERR_MD_LAYOUT_CONNECTED;
goto fail_ldev;
}
@@ -2528,7 +2588,9 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
}
fail:
- drbd_adm_finish(info, retcode);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
+ finish:
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
fail_ldev:
@@ -2538,11 +2600,12 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
struct res_opts res_opts;
int err;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -2555,33 +2618,37 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
err = res_opts_from_attrs(&res_opts, info);
if (err && err != -ENOMSG) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto fail;
}
+ mutex_lock(&adm_ctx.resource->adm_mutex);
err = set_resource_options(adm_ctx.resource, &res_opts);
if (err) {
retcode = ERR_INVALID_REQUEST;
if (err == -ENOMEM)
retcode = ERR_NOMEM;
}
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
fail:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_device *device;
int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
/* If there is still bitmap IO pending, probably because of a previous
@@ -2605,26 +2672,29 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
} else
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
drbd_resume_io(device);
-
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
union drbd_state mask, union drbd_state val)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
retcode = drbd_request_state(adm_ctx.device, mask, val);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -2639,15 +2709,17 @@ static int drbd_bmio_set_susp_al(struct drbd_device *device)
int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
int retcode; /* drbd_ret_code, drbd_state_rv */
struct drbd_device *device;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
/* If there is still bitmap IO pending, probably because of a previous
@@ -2674,40 +2746,45 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
} else
retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
drbd_resume_io(device);
-
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
retcode = ERR_PAUSE_IS_SET;
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
union drbd_dev_state s;
enum drbd_ret_code retcode;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
s = adm_ctx.device->state;
if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
@@ -2717,9 +2794,9 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
retcode = ERR_PAUSE_IS_CLEAR;
}
}
-
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -2730,15 +2807,17 @@ int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_device *device;
int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
if (test_bit(NEW_CUR_UUID, &device->flags)) {
drbd_uuid_new_current(device);
@@ -2753,9 +2832,9 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
}
drbd_resume_io(device);
-
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -2931,10 +3010,11 @@ nla_put_failure:
int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
int err;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -2946,7 +3026,7 @@ int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
return err;
}
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -3133,11 +3213,12 @@ dump:
int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
struct timeout_parms tp;
int err;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -3154,17 +3235,18 @@ int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
return err;
}
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_device *device;
enum drbd_ret_code retcode;
struct start_ov_parms parms;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -3179,10 +3261,12 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
int err = start_ov_parms_from_attrs(&parms, info);
if (err) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto out;
}
}
+ mutex_lock(&adm_ctx.resource->adm_mutex);
+
/* w_make_ov_request expects position to be aligned */
device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
device->ov_stop_sector = parms.ov_stop_sector;
@@ -3193,21 +3277,24 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
drbd_resume_io(device);
+
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_device *device;
enum drbd_ret_code retcode;
int skip_initial_sync = 0;
int err;
struct new_c_uuid_parms args;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -3219,11 +3306,12 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
err = new_c_uuid_parms_from_attrs(&args, info);
if (err) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto out_nolock;
}
}
+ mutex_lock(&adm_ctx.resource->adm_mutex);
mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
if (!get_ldev(device)) {
@@ -3268,22 +3356,24 @@ out_dec:
put_ldev(device);
out:
mutex_unlock(device->state_mutex);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out_nolock:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
static enum drbd_ret_code
-drbd_check_resource_name(const char *name)
+drbd_check_resource_name(struct drbd_config_context *adm_ctx)
{
+ const char *name = adm_ctx->resource_name;
if (!name || !name[0]) {
- drbd_msg_put_info("resource name missing");
+ drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
return ERR_MANDATORY_TAG;
}
/* if we want to use these in sysfs/configfs/debugfs some day,
* we must not allow slashes */
if (strchr(name, '/')) {
- drbd_msg_put_info("invalid resource name");
+ drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
return ERR_INVALID_REQUEST;
}
return NO_ERROR;
@@ -3291,11 +3381,12 @@ drbd_check_resource_name(const char *name)
int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
struct res_opts res_opts;
int err;
- retcode = drbd_adm_prepare(skb, info, 0);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
@@ -3305,48 +3396,50 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
err = res_opts_from_attrs(&res_opts, info);
if (err && err != -ENOMSG) {
retcode = ERR_MANDATORY_TAG;
- drbd_msg_put_info(from_attrs_err_to_txt(err));
+ drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
goto out;
}
- retcode = drbd_check_resource_name(adm_ctx.resource_name);
+ retcode = drbd_check_resource_name(&adm_ctx);
if (retcode != NO_ERROR)
goto out;
if (adm_ctx.resource) {
if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
retcode = ERR_INVALID_REQUEST;
- drbd_msg_put_info("resource exists");
+ drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
}
/* else: still NO_ERROR */
goto out;
}
+ /* not yet safe for genl_family.parallel_ops */
if (!conn_create(adm_ctx.resource_name, &res_opts))
retcode = ERR_NOMEM;
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_genlmsghdr *dh = info->userhdr;
enum drbd_ret_code retcode;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
if (dh->minor > MINORMASK) {
- drbd_msg_put_info("requested minor out of range");
+ drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
retcode = ERR_INVALID_REQUEST;
goto out;
}
if (adm_ctx.volume > DRBD_VOLUME_MAX) {
- drbd_msg_put_info("requested volume id out of range");
+ drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
retcode = ERR_INVALID_REQUEST;
goto out;
}
@@ -3360,9 +3453,11 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- retcode = drbd_create_device(adm_ctx.resource, dh->minor, adm_ctx.volume);
+ mutex_lock(&adm_ctx.resource->adm_mutex);
+ retcode = drbd_create_device(&adm_ctx, dh->minor);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
@@ -3383,35 +3478,40 @@ static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
goto out;
+ mutex_lock(&adm_ctx.resource->adm_mutex);
retcode = adm_del_minor(adm_ctx.device);
+ mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
- drbd_adm_finish(info, retcode);
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_resource *resource;
struct drbd_connection *connection;
struct drbd_device *device;
int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
unsigned i;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
- goto out;
+ goto finish;
resource = adm_ctx.resource;
+ mutex_lock(&resource->adm_mutex);
/* demote */
for_each_connection(connection, resource) {
struct drbd_peer_device *peer_device;
@@ -3419,14 +3519,14 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
idr_for_each_entry(&connection->peer_devices, peer_device, i) {
retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
if (retcode < SS_SUCCESS) {
- drbd_msg_put_info("failed to demote");
+ drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
goto out;
}
}
retcode = conn_try_disconnect(connection, 0);
if (retcode < SS_SUCCESS) {
- drbd_msg_put_info("failed to disconnect");
+ drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
goto out;
}
}
@@ -3435,7 +3535,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
idr_for_each_entry(&resource->devices, device, i) {
retcode = adm_detach(device, 0);
if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
- drbd_msg_put_info("failed to detach");
+ drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
goto out;
}
}
@@ -3453,7 +3553,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
retcode = adm_del_minor(device);
if (retcode != NO_ERROR) {
/* "can not happen" */
- drbd_msg_put_info("failed to delete volume");
+ drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
goto out;
}
}
@@ -3462,25 +3562,28 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
synchronize_rcu();
drbd_free_resource(resource);
retcode = NO_ERROR;
-
out:
- drbd_adm_finish(info, retcode);
+ mutex_unlock(&resource->adm_mutex);
+finish:
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_config_context adm_ctx;
struct drbd_resource *resource;
struct drbd_connection *connection;
enum drbd_ret_code retcode;
- retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+ retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
if (!adm_ctx.reply_skb)
return retcode;
if (retcode != NO_ERROR)
- goto out;
+ goto finish;
resource = adm_ctx.resource;
+ mutex_lock(&resource->adm_mutex);
for_each_connection(connection, resource) {
if (connection->cstate > C_STANDALONE) {
retcode = ERR_NET_CONFIGURED;
@@ -3499,7 +3602,9 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
drbd_free_resource(resource);
retcode = NO_ERROR;
out:
- drbd_adm_finish(info, retcode);
+ mutex_unlock(&resource->adm_mutex);
+finish:
+ drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c
index fa672b6df8d6..b2d4791498a6 100644
--- a/drivers/block/drbd/drbd_nla.c
+++ b/drivers/block/drbd/drbd_nla.c
@@ -1,4 +1,3 @@
-#include "drbd_wrappers.h"
#include <linux/kernel.h>
#include <net/netlink.h>
#include <linux/drbd_genl_api.h>
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 2f26e8ffa45b..89736bdbbc70 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -116,7 +116,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
/* ------------------------ ~18s average ------------------------ */
i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS;
dt = (jiffies - device->rs_mark_time[i]) / HZ;
- if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
+ if (dt > 180)
stalled = 1;
if (!dt)
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index 3c04ec0ea333..2da9104a3851 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -54,6 +54,11 @@ enum drbd_packet {
P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */
P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */
P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */
+ /* 0x2e to 0x30 reserved, used in drbd 9 */
+
+ /* REQ_DISCARD. We used "discard" in different contexts before,
+ * which is why I chose TRIM here, to disambiguate. */
+ P_TRIM = 0x31,
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
P_MAX_OPT_CMD = 0x101,
@@ -119,6 +124,11 @@ struct p_data {
u32 dp_flags;
} __packed;
+struct p_trim {
+ struct p_data p_data;
+ u32 size; /* == bio->bi_size */
+} __packed;
+
/*
* commands which share a struct:
* p_block_ack:
@@ -150,6 +160,8 @@ struct p_block_req {
* ReportParams
*/
+#define FF_TRIM 1
+
struct p_connection_features {
u32 protocol_min;
u32 feature_flags;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 68e3992e8838..b6c8aaf4931b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -46,9 +46,10 @@
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
-
#include "drbd_vli.h"
+#define PRO_FEATURES (FF_TRIM)
+
struct packet_info {
enum drbd_packet cmd;
unsigned int size;
@@ -65,7 +66,7 @@ enum finish_epoch {
static int drbd_do_features(struct drbd_connection *connection);
static int drbd_do_auth(struct drbd_connection *connection);
static int drbd_disconnected(struct drbd_peer_device *);
-
+static void conn_wait_active_ee_empty(struct drbd_connection *connection);
static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
static int e_end_block(struct drbd_work *, int);
@@ -234,9 +235,17 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
* @retry: whether to retry, if not enough pages are available right now
*
* Tries to allocate number pages, first from our own page pool, then from
- * the kernel, unless this allocation would exceed the max_buffers setting.
+ * the kernel.
* Possibly retry until DRBD frees sufficient pages somewhere else.
*
+ * If this allocation would exceed the max_buffers setting, we throttle
+ * allocation (schedule_timeout) to give the system some room to breathe.
+ *
+ * We do not use max-buffers as hard limit, because it could lead to
+ * congestion and further to a distributed deadlock during online-verify or
+ * (checksum based) resync, if the max-buffers, socket buffer sizes and
+ * resync-rate settings are mis-configured.
+ *
* Returns a page chain linked via page->private.
*/
struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
@@ -246,10 +255,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
struct page *page = NULL;
struct net_conf *nc;
DEFINE_WAIT(wait);
- int mxb;
+ unsigned int mxb;
- /* Yes, we may run up to @number over max_buffers. If we
- * follow it strictly, the admin will get it wrong anyways. */
rcu_read_lock();
nc = rcu_dereference(peer_device->connection->net_conf);
mxb = nc ? nc->max_buffers : 1000000;
@@ -277,7 +284,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int
break;
}
- schedule();
+ if (schedule_timeout(HZ/10) == 0)
+ mxb = UINT_MAX;
}
finish_wait(&drbd_pp_wait, &wait);
@@ -331,7 +339,7 @@ You must not have the req_lock:
struct drbd_peer_request *
drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
- unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
+ unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
{
struct drbd_device *device = peer_device->device;
struct drbd_peer_request *peer_req;
@@ -348,7 +356,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
return NULL;
}
- if (data_size) {
+ if (has_payload && data_size) {
page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
if (!page)
goto fail;
@@ -1026,24 +1034,27 @@ randomize:
if (drbd_send_protocol(connection) == -EOPNOTSUPP)
return -1;
+ /* Prevent a race between resync-handshake and
+ * being promoted to Primary.
+ *
+ * Grab and release the state mutex, so we know that any current
+ * drbd_set_role() is finished, and any incoming drbd_set_role
+ * will see the STATE_SENT flag, and wait for it to be cleared.
+ */
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+ mutex_lock(peer_device->device->state_mutex);
+
set_bit(STATE_SENT, &connection->flags);
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+ mutex_unlock(peer_device->device->state_mutex);
+
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
kref_get(&device->kref);
rcu_read_unlock();
- /* Prevent a race between resync-handshake and
- * being promoted to Primary.
- *
- * Grab and release the state mutex, so we know that any current
- * drbd_set_role() is finished, and any incoming drbd_set_role
- * will see the STATE_SENT flag, and wait for it to be cleared.
- */
- mutex_lock(device->state_mutex);
- mutex_unlock(device->state_mutex);
-
if (discard_my_data)
set_bit(DISCARD_MY_DATA, &device->flags);
else
@@ -1315,6 +1326,20 @@ int drbd_submit_peer_request(struct drbd_device *device,
unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
int err = -ENOMEM;
+ if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
+ /* wait for all pending IO completions, before we start
+ * zeroing things out. */
+ conn_wait_active_ee_empty(first_peer_device(device)->connection);
+ if (blkdev_issue_zeroout(device->ldev->backing_bdev,
+ sector, ds >> 9, GFP_NOIO))
+ peer_req->flags |= EE_WAS_ERROR;
+ drbd_endio_write_sec_final(peer_req);
+ return 0;
+ }
+
+ if (peer_req->flags & EE_IS_TRIM)
+ nr_pages = 0; /* discards don't have any payload. */
+
/* In most cases, we will only need one bio. But in case the lower
* level restrictions happen to be different at this offset on this
* side than those of the sending peer, we may need to submit the
@@ -1326,7 +1351,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
next_bio:
bio = bio_alloc(GFP_NOIO, nr_pages);
if (!bio) {
- drbd_err(device, "submit_ee: Allocation of a bio failed\n");
+ drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
goto fail;
}
/* > peer_req->i.sector, unless this is the first bio */
@@ -1340,6 +1365,11 @@ next_bio:
bios = bio;
++n_bios;
+ if (rw & REQ_DISCARD) {
+ bio->bi_iter.bi_size = ds;
+ goto submit;
+ }
+
page_chain_for_each(page) {
unsigned len = min_t(unsigned, ds, PAGE_SIZE);
if (!bio_add_page(bio, page, len, 0)) {
@@ -1360,8 +1390,9 @@ next_bio:
sector += len >> 9;
--nr_pages;
}
- D_ASSERT(device, page == NULL);
D_ASSERT(device, ds == 0);
+submit:
+ D_ASSERT(device, page == NULL);
atomic_set(&peer_req->pending_bios, n_bios);
do {
@@ -1490,19 +1521,21 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
* and from receive_Data */
static struct drbd_peer_request *
read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
- int data_size) __must_hold(local)
+ struct packet_info *pi) __must_hold(local)
{
struct drbd_device *device = peer_device->device;
const sector_t capacity = drbd_get_capacity(device->this_bdev);
struct drbd_peer_request *peer_req;
struct page *page;
int dgs, ds, err;
+ int data_size = pi->size;
void *dig_in = peer_device->connection->int_dig_in;
void *dig_vv = peer_device->connection->int_dig_vv;
unsigned long *data;
+ struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
dgs = 0;
- if (peer_device->connection->peer_integrity_tfm) {
+ if (!trim && peer_device->connection->peer_integrity_tfm) {
dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
/*
* FIXME: Receive the incoming digest into the receive buffer
@@ -1514,9 +1547,15 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
data_size -= dgs;
}
+ if (trim) {
+ D_ASSERT(peer_device, data_size == 0);
+ data_size = be32_to_cpu(trim->size);
+ }
+
if (!expect(IS_ALIGNED(data_size, 512)))
return NULL;
- if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
+ /* prepare for larger trim requests. */
+ if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
return NULL;
/* even though we trust out peer,
@@ -1532,11 +1571,11 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
* "criss-cross" setup, that might cause write-out on some other DRBD,
* which in turn might block on the other node at this very place. */
- peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO);
+ peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
if (!peer_req)
return NULL;
- if (!data_size)
+ if (trim)
return peer_req;
ds = data_size;
@@ -1676,12 +1715,12 @@ static int e_end_resync_block(struct drbd_work *w, int unused)
}
static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
- int data_size) __releases(local)
+ struct packet_info *pi) __releases(local)
{
struct drbd_device *device = peer_device->device;
struct drbd_peer_request *peer_req;
- peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size);
+ peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
if (!peer_req)
goto fail;
@@ -1697,7 +1736,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
list_add(&peer_req->w.list, &device->sync_ee);
spin_unlock_irq(&device->resource->req_lock);
- atomic_add(data_size >> 9, &device->rs_sect_ev);
+ atomic_add(pi->size >> 9, &device->rs_sect_ev);
if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
return 0;
@@ -1785,7 +1824,7 @@ static int receive_RSDataReply(struct drbd_connection *connection, struct packet
/* data is submitted to disk within recv_resync_read.
* corresponding put_ldev done below on error,
* or in drbd_peer_request_endio. */
- err = recv_resync_read(peer_device, sector, pi->size);
+ err = recv_resync_read(peer_device, sector, pi);
} else {
if (__ratelimit(&drbd_ratelimit_state))
drbd_err(device, "Can not write resync data to local disk.\n");
@@ -2196,7 +2235,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
*/
sector = be64_to_cpu(p->sector);
- peer_req = read_in_block(peer_device, p->block_id, sector, pi->size);
+ peer_req = read_in_block(peer_device, p->block_id, sector, pi);
if (!peer_req) {
put_ldev(device);
return -EIO;
@@ -2206,7 +2245,15 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
dp_flags = be32_to_cpu(p->dp_flags);
rw |= wire_flags_to_bio(dp_flags);
- if (peer_req->pages == NULL) {
+ if (pi->cmd == P_TRIM) {
+ struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+ peer_req->flags |= EE_IS_TRIM;
+ if (!blk_queue_discard(q))
+ peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
+ D_ASSERT(peer_device, peer_req->i.size > 0);
+ D_ASSERT(peer_device, rw & REQ_DISCARD);
+ D_ASSERT(peer_device, peer_req->pages == NULL);
+ } else if (peer_req->pages == NULL) {
D_ASSERT(device, peer_req->i.size == 0);
D_ASSERT(device, dp_flags & DP_FLUSH);
}
@@ -2242,7 +2289,12 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
update_peer_seq(peer_device, peer_seq);
spin_lock_irq(&device->resource->req_lock);
}
- list_add(&peer_req->w.list, &device->active_ee);
+ /* if we use the zeroout fallback code, we process synchronously
+ * and we wait for all pending requests, respectively wait for
+ * active_ee to become empty in drbd_submit_peer_request();
+ * better not add ourselves here. */
+ if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
+ list_add(&peer_req->w.list, &device->active_ee);
spin_unlock_irq(&device->resource->req_lock);
if (device->state.conn == C_SYNC_TARGET)
@@ -2313,39 +2365,45 @@ out_interrupted:
* The current sync rate used here uses only the most recent two step marks,
* to have a short time average so we can react faster.
*/
-int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
+bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
{
- struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
- unsigned long db, dt, dbdt;
struct lc_element *tmp;
- int curr_events;
- int throttle = 0;
- unsigned int c_min_rate;
-
- rcu_read_lock();
- c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
- rcu_read_unlock();
+ bool throttle = true;
- /* feature disabled? */
- if (c_min_rate == 0)
- return 0;
+ if (!drbd_rs_c_min_rate_throttle(device))
+ return false;
spin_lock_irq(&device->al_lock);
tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
if (tmp) {
struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
- if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
- spin_unlock_irq(&device->al_lock);
- return 0;
- }
+ if (test_bit(BME_PRIORITY, &bm_ext->flags))
+ throttle = false;
/* Do not slow down if app IO is already waiting for this extent */
}
spin_unlock_irq(&device->al_lock);
+ return throttle;
+}
+
+bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
+{
+ struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
+ unsigned long db, dt, dbdt;
+ unsigned int c_min_rate;
+ int curr_events;
+
+ rcu_read_lock();
+ c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
+ rcu_read_unlock();
+
+ /* feature disabled? */
+ if (c_min_rate == 0)
+ return false;
+
curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
(int)part_stat_read(&disk->part0, sectors[1]) -
atomic_read(&device->rs_sect_ev);
-
if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
unsigned long rs_left;
int i;
@@ -2368,12 +2426,11 @@ int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
dbdt = Bit2KB(db/dt);
if (dbdt > c_min_rate)
- throttle = 1;
+ return true;
}
- return throttle;
+ return false;
}
-
static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
{
struct drbd_peer_device *peer_device;
@@ -2436,7 +2493,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
* "criss-cross" setup, that might cause write-out on some other DRBD,
* which in turn might block on the other node at this very place. */
- peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO);
+ peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
+ true /* has real payload */, GFP_NOIO);
if (!peer_req) {
put_ldev(device);
return -ENOMEM;
@@ -3648,6 +3706,13 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
put_ldev(device);
}
+ device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
+ drbd_reconsider_max_bio_size(device);
+ /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
+ In case we cleared the QUEUE_FLAG_DISCARD from our queue in
+ drbd_reconsider_max_bio_size(), we can be sure that after
+ drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
+
ddsf = be16_to_cpu(p->dds_flags);
if (get_ldev(device)) {
dd = drbd_determine_dev_size(device, ddsf, NULL);
@@ -3660,9 +3725,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
drbd_set_my_capacity(device, p_size);
}
- device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
- drbd_reconsider_max_bio_size(device);
-
if (get_ldev(device)) {
if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
@@ -4423,6 +4485,7 @@ static struct data_cmd drbd_cmd_handler[] = {
[P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
+ [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
};
static void drbdd(struct drbd_connection *connection)
@@ -4630,6 +4693,7 @@ static int drbd_send_features(struct drbd_connection *connection)
memset(p, 0, sizeof(*p));
p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
+ p->feature_flags = cpu_to_be32(PRO_FEATURES);
return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
}
@@ -4683,10 +4747,14 @@ static int drbd_do_features(struct drbd_connection *connection)
goto incompat;
connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
+ connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
drbd_info(connection, "Handshake successful: "
"Agreed network protocol version %d\n", connection->agreed_pro_version);
+ drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
+ connection->agreed_features & FF_TRIM ? " " : " not ");
+
return 1;
incompat:
@@ -4778,6 +4846,12 @@ static int drbd_do_auth(struct drbd_connection *connection)
goto fail;
}
+ if (pi.size < CHALLENGE_LEN) {
+ drbd_err(connection, "AuthChallenge payload too small.\n");
+ rv = -1;
+ goto fail;
+ }
+
peers_ch = kmalloc(pi.size, GFP_NOIO);
if (peers_ch == NULL) {
drbd_err(connection, "kmalloc of peers_ch failed\n");
@@ -4791,6 +4865,12 @@ static int drbd_do_auth(struct drbd_connection *connection)
goto fail;
}
+ if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
+ drbd_err(connection, "Peer presented the same challenge!\n");
+ rv = -1;
+ goto fail;
+ }
+
resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
response = kmalloc(resp_size, GFP_NOIO);
if (response == NULL) {
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3779c8d2875b..09803d0d5207 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -522,6 +522,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
break;
+ case DISCARD_COMPLETED_NOTSUPP:
+ case DISCARD_COMPLETED_WITH_ERROR:
+ /* I'd rather not detach from local disk just because it
+ * failed a REQ_DISCARD. */
+ mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED);
+ break;
+
case QUEUE_FOR_NET_READ:
/* READ or READA, and
* no local disk,
@@ -1235,6 +1242,7 @@ void do_submit(struct work_struct *ws)
if (list_empty(&incoming))
break;
+skip_fast_path:
wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
/* Maybe more was queued, while we prepared the transaction?
* Try to stuff them into this transaction as well.
@@ -1273,6 +1281,25 @@ void do_submit(struct work_struct *ws)
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
+
+ /* If all currently hot activity log extents are kept busy by
+ * incoming requests, we still must not totally starve new
+ * requests to cold extents. In that case, prepare one request
+ * in blocking mode. */
+ list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
+ list_del_init(&req->tl_requests);
+ req->rq_state |= RQ_IN_ACT_LOG;
+ if (!drbd_al_begin_io_prepare(device, &req->i)) {
+ /* Corresponding extent was hot after all? */
+ drbd_send_and_submit(device, req);
+ } else {
+ /* Found a request to a cold extent.
+ * Put on "pending" list,
+ * and try to cumulate with more. */
+ list_add(&req->tl_requests, &pending);
+ goto skip_fast_path;
+ }
+ }
}
}
@@ -1326,23 +1353,35 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
return limit;
}
-static struct drbd_request *find_oldest_request(struct drbd_connection *connection)
+static void find_oldest_requests(
+ struct drbd_connection *connection,
+ struct drbd_device *device,
+ struct drbd_request **oldest_req_waiting_for_peer,
+ struct drbd_request **oldest_req_waiting_for_disk)
{
- /* Walk the transfer log,
- * and find the oldest not yet completed request */
struct drbd_request *r;
+ *oldest_req_waiting_for_peer = NULL;
+ *oldest_req_waiting_for_disk = NULL;
list_for_each_entry(r, &connection->transfer_log, tl_requests) {
- if (atomic_read(&r->completion_ref))
- return r;
+ const unsigned s = r->rq_state;
+ if (!*oldest_req_waiting_for_peer
+ && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
+ *oldest_req_waiting_for_peer = r;
+
+ if (!*oldest_req_waiting_for_disk
+ && (s & RQ_LOCAL_PENDING) && r->device == device)
+ *oldest_req_waiting_for_disk = r;
+
+ if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
+ break;
}
- return NULL;
}
void request_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
struct drbd_connection *connection = first_peer_device(device)->connection;
- struct drbd_request *req; /* oldest request */
+ struct drbd_request *req_disk, *req_peer; /* oldest request */
struct net_conf *nc;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now;
@@ -1366,8 +1405,8 @@ void request_timer_fn(unsigned long data)
now = jiffies;
spin_lock_irq(&device->resource->req_lock);
- req = find_oldest_request(connection);
- if (!req) {
+ find_oldest_requests(connection, device, &req_peer, &req_disk);
+ if (req_peer == NULL && req_disk == NULL) {
spin_unlock_irq(&device->resource->req_lock);
mod_timer(&device->request_timer, now + et);
return;
@@ -1389,19 +1428,26 @@ void request_timer_fn(unsigned long data)
* ~198 days with 250 HZ, we have a window where the timeout would need
* to expire twice (worst case) to become effective. Good enough.
*/
- if (ent && req->rq_state & RQ_NET_PENDING &&
- time_after(now, req->start_time + ent) &&
+ if (ent && req_peer &&
+ time_after(now, req_peer->start_time + ent) &&
!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
}
- if (dt && req->rq_state & RQ_LOCAL_PENDING && req->device == device &&
- time_after(now, req->start_time + dt) &&
+ if (dt && req_disk &&
+ time_after(now, req_disk->start_time + dt) &&
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
}
- nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
+
+ /* Reschedule timer for the nearest not already expired timeout.
+ * Fallback to now + min(effective network timeout, disk timeout). */
+ ent = (ent && req_peer && time_before(now, req_peer->start_time + ent))
+ ? req_peer->start_time + ent : now + et;
+ dt = (dt && req_disk && time_before(now, req_disk->start_time + dt))
+ ? req_disk->start_time + dt : now + et;
+ nt = time_before(ent, dt) ? ent : dt;
spin_unlock_irq(&connection->resource->req_lock);
mod_timer(&device->request_timer, nt);
}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index c684c963538e..8566cd5866b4 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -30,7 +30,6 @@
#include <linux/slab.h>
#include <linux/drbd.h>
#include "drbd_int.h"
-#include "drbd_wrappers.h"
/* The request callbacks will be called in irq context by the IDE drivers,
and in Softirqs/Tasklets/BH context by the SCSI drivers,
@@ -111,11 +110,14 @@ enum drbd_req_event {
BARRIER_ACKED, /* in protocol A and B */
DATA_RECEIVED, /* (remote read) */
+ COMPLETED_OK,
READ_COMPLETED_WITH_ERROR,
READ_AHEAD_COMPLETED_WITH_ERROR,
WRITE_COMPLETED_WITH_ERROR,
+ DISCARD_COMPLETED_NOTSUPP,
+ DISCARD_COMPLETED_WITH_ERROR,
+
ABORT_DISK_IO,
- COMPLETED_OK,
RESEND,
FAIL_FROZEN_DISK_IO,
RESTART_FROZEN_DISK_IO,
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 1a84345a3868..a5d8aae00e04 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -54,8 +54,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
-static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns,
- enum sanitize_state_warnings *warn);
+static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
+ union drbd_state ns, enum sanitize_state_warnings *warn);
static inline bool is_susp(union drbd_state s)
{
@@ -287,7 +287,7 @@ _req_st_cond(struct drbd_device *device, union drbd_state mask,
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
- ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+ ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
rv = is_valid_transition(os, ns);
if (rv >= SS_SUCCESS)
rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
@@ -333,7 +333,7 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
spin_lock_irqsave(&device->resource->req_lock, flags);
os = drbd_read_state(device);
- ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+ ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
rv = is_valid_transition(os, ns);
if (rv < SS_SUCCESS) {
spin_unlock_irqrestore(&device->resource->req_lock, flags);
@@ -740,8 +740,8 @@ static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_st
* When we loose connection, we have to set the state of the peers disk (pdsk)
* to D_UNKNOWN. This rule and many more along those lines are in this function.
*/
-static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns,
- enum sanitize_state_warnings *warn)
+static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
+ union drbd_state ns, enum sanitize_state_warnings *warn)
{
enum drbd_fencing_p fp;
enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
@@ -882,11 +882,13 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st
}
if (fp == FP_STONITH &&
- (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
+ (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
+ !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
- (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+ (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
+ !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
@@ -958,7 +960,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
os = drbd_read_state(device);
- ns = sanitize_state(device, ns, &ssw);
+ ns = sanitize_state(device, os, ns, &ssw);
if (ns.i == os.i)
return SS_NOTHING_TO_DO;
@@ -1656,7 +1658,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
os = drbd_read_state(device);
- ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL);
+ ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk;
@@ -1718,7 +1720,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
number_of_volumes++;
os = drbd_read_state(device);
ns = apply_mask_val(os, mask, val);
- ns = sanitize_state(device, ns, NULL);
+ ns = sanitize_state(device, os, ns, NULL);
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk;
@@ -1763,19 +1765,19 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union
static enum drbd_state_rv
_conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val)
{
- enum drbd_state_rv rv;
+ enum drbd_state_rv err, rv = SS_UNKNOWN_ERROR; /* continue waiting */;
if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags))
- return SS_CW_SUCCESS;
+ rv = SS_CW_SUCCESS;
if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags))
- return SS_CW_FAILED_BY_PEER;
+ rv = SS_CW_FAILED_BY_PEER;
- rv = conn_is_valid_transition(connection, mask, val, 0);
- if (rv == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
- rv = SS_UNKNOWN_ERROR; /* continue waiting */
+ err = conn_is_valid_transition(connection, mask, val, 0);
+ if (err == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
+ return rv;
- return rv;
+ return err;
}
enum drbd_state_rv
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 2c4ce42c3657..d8f57b6305cd 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -118,7 +118,7 @@ static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __rele
/* writes on behalf of the partner, or resync writes,
* "submitted" by the receiver, final stage. */
-static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
+void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
{
unsigned long flags = 0;
struct drbd_peer_device *peer_device = peer_req->peer_device;
@@ -150,7 +150,9 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel
do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
- if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
+ /* FIXME do we want to detach for failed REQ_DISCARD?
+ * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
+ if (peer_req->flags & EE_WAS_ERROR)
__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
spin_unlock_irqrestore(&device->resource->req_lock, flags);
@@ -176,10 +178,12 @@ void drbd_peer_request_endio(struct bio *bio, int error)
struct drbd_device *device = peer_req->peer_device->device;
int uptodate = bio_flagged(bio, BIO_UPTODATE);
int is_write = bio_data_dir(bio) == WRITE;
+ int is_discard = !!(bio->bi_rw & REQ_DISCARD);
if (error && __ratelimit(&drbd_ratelimit_state))
drbd_warn(device, "%s: error=%d s=%llus\n",
- is_write ? "write" : "read", error,
+ is_write ? (is_discard ? "discard" : "write")
+ : "read", error,
(unsigned long long)peer_req->i.sector);
if (!error && !uptodate) {
if (__ratelimit(&drbd_ratelimit_state))
@@ -263,7 +267,12 @@ void drbd_request_endio(struct bio *bio, int error)
/* to avoid recursion in __req_mod */
if (unlikely(error)) {
- what = (bio_data_dir(bio) == WRITE)
+ if (bio->bi_rw & REQ_DISCARD)
+ what = (error == -EOPNOTSUPP)
+ ? DISCARD_COMPLETED_NOTSUPP
+ : DISCARD_COMPLETED_WITH_ERROR;
+ else
+ what = (bio_data_dir(bio) == WRITE)
? WRITE_COMPLETED_WITH_ERROR
: (bio_rw(bio) == READ)
? READ_COMPLETED_WITH_ERROR
@@ -395,7 +404,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
/* GFP_TRY, because if there is no memory available right now, this may
* be rescheduled for later. It is "only" background resync, after all. */
peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
- size, GFP_TRY);
+ size, true /* has real payload */, GFP_TRY);
if (!peer_req)
goto defer;
@@ -492,10 +501,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size)
return fb;
}
-static int drbd_rs_controller(struct drbd_device *device)
+static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
{
struct disk_conf *dc;
- unsigned int sect_in; /* Number of sectors that came in since the last turn */
unsigned int want; /* The number of sectors we want in the proxy */
int req_sect; /* Number of sectors to request in this turn */
int correction; /* Number of sectors more we need in the proxy*/
@@ -505,9 +513,6 @@ static int drbd_rs_controller(struct drbd_device *device)
int max_sect;
struct fifo_buffer *plan;
- sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */
- device->rs_in_flight -= sect_in;
-
dc = rcu_dereference(device->ldev->disk_conf);
plan = rcu_dereference(device->rs_plan_s);
@@ -550,11 +555,16 @@ static int drbd_rs_controller(struct drbd_device *device)
static int drbd_rs_number_requests(struct drbd_device *device)
{
- int number;
+ unsigned int sect_in; /* Number of sectors that came in since the last turn */
+ int number, mxb;
+
+ sect_in = atomic_xchg(&device->rs_sect_in, 0);
+ device->rs_in_flight -= sect_in;
rcu_read_lock();
+ mxb = drbd_get_max_buffers(device) / 2;
if (rcu_dereference(device->rs_plan_s)->size) {
- number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9);
+ number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
} else {
device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
@@ -562,8 +572,14 @@ static int drbd_rs_number_requests(struct drbd_device *device)
}
rcu_read_unlock();
- /* ignore the amount of pending requests, the resync controller should
- * throttle down to incoming reply rate soon enough anyways. */
+ /* Don't have more than "max-buffers"/2 in-flight.
+ * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
+ * potentially causing a distributed deadlock on congestion during
+ * online-verify or (checksum-based) resync, if max-buffers,
+ * socket buffer sizes and resync rate settings are mis-configured. */
+ if (mxb - device->rs_in_flight < number)
+ number = mxb - device->rs_in_flight;
+
return number;
}
@@ -597,7 +613,7 @@ static int make_resync_request(struct drbd_device *device, int cancel)
max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
number = drbd_rs_number_requests(device);
- if (number == 0)
+ if (number <= 0)
goto requeue;
for (i = 0; i < number; i++) {
@@ -647,7 +663,7 @@ next_sector:
*/
align = 1;
rollback_i = i;
- for (;;) {
+ while (i < number) {
if (size + BM_BLOCK_SIZE > max_bio_size)
break;
@@ -1670,11 +1686,15 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
}
clear_bit(B_RS_H_DONE, &device->flags);
- write_lock_irq(&global_state_lock);
+ /* req_lock: serialize with drbd_send_and_submit() and others
+ * global_state_lock: for stable sync-after dependencies */
+ spin_lock_irq(&device->resource->req_lock);
+ write_lock(&global_state_lock);
/* Did some connection breakage or IO error race with us? */
if (device->state.conn < C_CONNECTED
|| !get_ldev_if_state(device, D_NEGOTIATING)) {
- write_unlock_irq(&global_state_lock);
+ write_unlock(&global_state_lock);
+ spin_unlock_irq(&device->resource->req_lock);
mutex_unlock(device->state_mutex);
return;
}
@@ -1714,7 +1734,8 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
}
_drbd_pause_after(device);
}
- write_unlock_irq(&global_state_lock);
+ write_unlock(&global_state_lock);
+ spin_unlock_irq(&device->resource->req_lock);
if (r == SS_SUCCESS) {
/* reset rs_last_bcast when a resync or verify is started,
@@ -1778,34 +1799,6 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
mutex_unlock(device->state_mutex);
}
-/* If the resource already closed the current epoch, but we did not
- * (because we have not yet seen new requests), we should send the
- * corresponding barrier now. Must be checked within the same spinlock
- * that is used to check for new requests. */
-static bool need_to_send_barrier(struct drbd_connection *connection)
-{
- if (!connection->send.seen_any_write_yet)
- return false;
-
- /* Skip barriers that do not contain any writes.
- * This may happen during AHEAD mode. */
- if (!connection->send.current_epoch_writes)
- return false;
-
- /* ->req_lock is held when requests are queued on
- * connection->sender_work, and put into ->transfer_log.
- * It is also held when ->current_tle_nr is increased.
- * So either there are already new requests queued,
- * and corresponding barriers will be send there.
- * Or nothing new is queued yet, so the difference will be 1.
- */
- if (atomic_read(&connection->current_tle_nr) !=
- connection->send.current_epoch_nr + 1)
- return false;
-
- return true;
-}
-
static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
{
spin_lock_irq(&queue->q_lock);
@@ -1864,12 +1857,22 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
spin_unlock_irq(&connection->resource->req_lock);
break;
}
- send_barrier = need_to_send_barrier(connection);
+
+ /* We found nothing new to do, no to-be-communicated request,
+ * no other work item. We may still need to close the last
+ * epoch. Next incoming request epoch will be connection ->
+ * current transfer log epoch number. If that is different
+ * from the epoch of the last request we communicated, it is
+ * safe to send the epoch separating barrier now.
+ */
+ send_barrier =
+ atomic_read(&connection->current_tle_nr) !=
+ connection->send.current_epoch_nr;
spin_unlock_irq(&connection->resource->req_lock);
- if (send_barrier) {
- drbd_send_barrier(connection);
- connection->send.current_epoch_nr++;
- }
+
+ if (send_barrier)
+ maybe_send_barrier(connection,
+ connection->send.current_epoch_nr + 1);
schedule();
/* may be woken up for other things but new work, too,
* e.g. if the current epoch got closed.
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h
deleted file mode 100644
index 3db9ebaf64f6..000000000000
--- a/drivers/block/drbd/drbd_wrappers.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef _DRBD_WRAPPERS_H
-#define _DRBD_WRAPPERS_H
-
-#include <linux/ctype.h>
-#include <linux/mm.h>
-#include "drbd_int.h"
-
-/* see get_sb_bdev and bd_claim */
-extern char *drbd_sec_holder;
-
-/* sets the number of 512 byte sectors of our virtual device */
-static inline void drbd_set_my_capacity(struct drbd_device *device,
- sector_t size)
-{
- /* set_capacity(device->this_bdev->bd_disk, size); */
- set_capacity(device->vdisk, size);
- device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
-}
-
-#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
-
-/* bi_end_io handlers */
-extern void drbd_md_io_complete(struct bio *bio, int error);
-extern void drbd_peer_request_endio(struct bio *bio, int error);
-extern void drbd_request_endio(struct bio *bio, int error);
-
-/*
- * used to submit our private bio
- */
-static inline void drbd_generic_make_request(struct drbd_device *device,
- int fault_type, struct bio *bio)
-{
- __release(local);
- if (!bio->bi_bdev) {
- printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
- "bio->bi_bdev == NULL\n",
- device_to_minor(device));
- dump_stack();
- bio_endio(bio, -ENODEV);
- return;
- }
-
- if (drbd_insert_fault(device, fault_type))
- bio_endio(bio, -EIO);
- else
- generic_make_request(bio);
-}
-
-#ifndef __CHECKER__
-# undef __cond_lock
-# define __cond_lock(x,c) (c)
-#endif
-
-#endif