From 265be2d09853d425ad14a61cda0ca63345613d0c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 31 May 2010 10:14:17 +0200 Subject: drbd: Finished the "on-no-data-accessible suspend-io;" functionality When no data is accessible (no connection to the peer, nor a local disk) allow the user to select to freeze all IO operations instead of getting IO errors. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/drbd.h') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 479ee3a1d901..7be069fcca57 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -91,6 +91,11 @@ enum drbd_after_sb_p { ASB_VIOLENTLY }; +enum drbd_on_no_data { + OND_IO_ERROR, + OND_SUSPEND_IO +}; + /* KEEP the order, do not delete or insert. Only append. */ enum drbd_ret_codes { ERR_CODE_BASE = 100, -- cgit v1.2.3-59-g8ed1b From 47ff2d0a8e7ce87fed180729e8341f650bf585c8 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 18 Jun 2010 13:56:57 +0200 Subject: drbd: Do not allow a fencing-policy of resource-and-stonith with protocol A Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 20 +++++++++++++++++++- drivers/block/drbd/drbd_req.c | 2 +- include/linux/drbd.h | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux/drbd.h') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 563a6ade0179..5288bd72cd27 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -806,6 +806,15 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp goto fail; } + if (get_net_conf(mdev)) { + int prot = mdev->net_conf->wire_protocol; + put_net_conf(mdev); + if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { + retcode = ERR_STONITH_AND_PROT_A; + goto fail; + } + } + nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); if (IS_ERR(nbc->lo_file)) { dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, @@ -1238,7 +1247,16 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, && (new_conf->wire_protocol != DRBD_PROT_C)) { retcode = ERR_NOT_PROTO_C; goto fail; - }; + } + + if (get_ldev(mdev)) { + enum drbd_fencing_p fp = mdev->ldev->dc.fencing; + put_ldev(mdev); + if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { + retcode = ERR_STONITH_AND_PROT_A; + goto fail; + } + } if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { retcode = ERR_DISCARD; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8259d4f77285..fbe027886bad 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -660,7 +660,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case resend: /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK - before the connection loss; only P_BARRIER_ACK was missing. + before the connection loss (B&C only); only P_BARRIER_ACK was missing. Trowing them out of the TL here by pretending we got a BARRIER_ACK TODO: Either resync them, or ensure peer was not rebooted. */ if (!(req->rq_state & RQ_NET_OK)) { diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 7be069fcca57..0b2bfb58d9c5 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -145,6 +145,7 @@ enum drbd_ret_codes { ERR_CONNECTED = 151, /* DRBD 8.3 only */ ERR_PERM = 152, ERR_NEED_APV_93 = 153, + ERR_STONITH_AND_PROT_A = 154, /* insert new ones above this line */ AFTER_LAST_ERR_CODE -- cgit v1.2.3-59-g8ed1b From 0b70a13dac014ec9274640b9e945bde493ba365e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 20 Aug 2010 13:36:10 +0200 Subject: drbd: Sending of big packets, for payloads from 64KByte to 4GByte Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 74 +++++++++++++++++------------- drivers/block/drbd/drbd_main.c | 79 +++++++++++++++++++------------- drivers/block/drbd/drbd_receiver.c | 94 +++++++++++++++++++------------------- drivers/block/drbd/drbd_worker.c | 2 +- include/linux/drbd.h | 2 + 5 files changed, 139 insertions(+), 112 deletions(-) (limited to 'include/linux/drbd.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 0fedcc0b8dc9..3f10efc2ac14 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -337,13 +337,25 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) * NOTE that the payload starts at a long aligned offset, * regardless of 32 or 64 bit arch! */ -struct p_header { +struct p_header80 { u32 magic; u16 command; u16 length; /* bytes of data after this header */ u8 payload[0]; } __packed; -/* 8 bytes. packet FIXED for the next century! */ + +/* Header for big packets, Used for data packets exceeding 64kB */ +struct p_header95 { + u16 magic; /* use DRBD_MAGIC_BIG here */ + u16 command; + u32 length; + u8 payload[0]; +} __packed; + +union p_header { + struct p_header80 h80; + struct p_header95 h95; +}; /* * short commands, packets without payload, plain p_header: @@ -367,7 +379,7 @@ struct p_header { #define DP_MAY_SET_IN_SYNC 4 struct p_data { - struct p_header head; + union p_header head; u64 sector; /* 64 bits sector number */ u64 block_id; /* to identify the request in protocol B&C */ u32 seq_num; @@ -383,7 +395,7 @@ struct p_data { * P_DATA_REQUEST, P_RS_DATA_REQUEST */ struct p_block_ack { - struct p_header head; + struct p_header80 head; u64 sector; u64 block_id; u32 blksize; @@ -392,7 +404,7 @@ struct p_block_ack { struct p_block_req { - struct p_header head; + struct p_header80 head; u64 sector; u64 block_id; u32 blksize; @@ -409,7 +421,7 @@ struct p_block_req { */ struct p_handshake { - struct p_header head; /* 8 bytes */ + struct p_header80 head; /* 8 bytes */ u32 protocol_min; u32 feature_flags; u32 protocol_max; @@ -424,19 +436,19 @@ struct p_handshake { /* 80 bytes, FIXED for the next century */ struct p_barrier { - struct p_header head; + struct p_header80 head; u32 barrier; /* barrier number _handle_ only */ u32 pad; /* to multiple of 8 Byte */ } __packed; struct p_barrier_ack { - struct p_header head; + struct p_header80 head; u32 barrier; u32 set_size; } __packed; struct p_rs_param { - struct p_header head; + struct p_header80 head; u32 rate; /* Since protocol version 88 and higher. */ @@ -444,7 +456,7 @@ struct p_rs_param { } __packed; struct p_rs_param_89 { - struct p_header head; + struct p_header80 head; u32 rate; /* protocol version 89: */ char verify_alg[SHARED_SECRET_MAX]; @@ -452,7 +464,7 @@ struct p_rs_param_89 { } __packed; struct p_rs_param_95 { - struct p_header head; + struct p_header80 head; u32 rate; char verify_alg[SHARED_SECRET_MAX]; char csums_alg[SHARED_SECRET_MAX]; @@ -468,7 +480,7 @@ enum drbd_conn_flags { }; struct p_protocol { - struct p_header head; + struct p_header80 head; u32 protocol; u32 after_sb_0p; u32 after_sb_1p; @@ -482,17 +494,17 @@ struct p_protocol { } __packed; struct p_uuids { - struct p_header head; + struct p_header80 head; u64 uuid[UI_EXTENDED_SIZE]; } __packed; struct p_rs_uuid { - struct p_header head; + struct p_header80 head; u64 uuid; } __packed; struct p_sizes { - struct p_header head; + struct p_header80 head; u64 d_size; /* size of disk */ u64 u_size; /* user requested size */ u64 c_size; /* current exported size */ @@ -502,18 +514,18 @@ struct p_sizes { } __packed; struct p_state { - struct p_header head; + struct p_header80 head; u32 state; } __packed; struct p_req_state { - struct p_header head; + struct p_header80 head; u32 mask; u32 val; } __packed; struct p_req_state_reply { - struct p_header head; + struct p_header80 head; u32 retcode; } __packed; @@ -528,7 +540,7 @@ struct p_drbd06_param { } __packed; struct p_discard { - struct p_header head; + struct p_header80 head; u64 block_id; u32 seq_num; u32 pad; @@ -544,7 +556,7 @@ enum drbd_bitmap_code { }; struct p_compressed_bm { - struct p_header head; + struct p_header80 head; /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code * (encoding & 0x80): polarity (set/unset) of first runlength * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits @@ -555,10 +567,10 @@ struct p_compressed_bm { u8 code[0]; } __packed; -struct p_delay_probe { - struct p_header head; - u32 seq_num; /* sequence number to match the two probe packets */ - u32 offset; /* usecs the probe got sent after the reference time point */ +struct p_delay_probe93 { + struct p_header80 head; + u32 seq_num; /* sequence number to match the two probe packets */ + u32 offset; /* usecs the probe got sent after the reference time point */ } __packed; /* DCBP: Drbd Compressed Bitmap Packet ... */ @@ -605,7 +617,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n) * so we need to use the fixed size 4KiB page size * most architechtures have used for a long time. */ -#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) +#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80)) #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) #if (PAGE_SIZE < 4096) @@ -614,7 +626,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n) #endif union p_polymorph { - struct p_header header; + struct p_header80 header; struct p_handshake handshake; struct p_data data; struct p_block_ack block_ack; @@ -1188,12 +1200,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev); extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *h, + enum drbd_packets cmd, struct p_header80 *h, size_t size, unsigned msg_flags); #define USE_DATA_SOCKET 1 #define USE_META_SOCKET 0 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header *h, + enum drbd_packets cmd, struct p_header80 *h, size_t size); extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size); @@ -1936,19 +1948,19 @@ static inline void request_ping(struct drbd_conf *mdev) static inline int drbd_send_short_cmd(struct drbd_conf *mdev, enum drbd_packets cmd) { - struct p_header h; + struct p_header80 h; return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); } static inline int drbd_send_ping(struct drbd_conf *mdev) { - struct p_header h; + struct p_header80 h; return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); } static inline int drbd_send_ping_ack(struct drbd_conf *mdev) { - struct p_header h; + struct p_header80 h; return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index db93eee7e543..f3f4ea9c5eb9 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1647,7 +1647,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *h, + enum drbd_packets cmd, struct p_header80 *h, size_t size, unsigned msg_flags) { int sent, ok; @@ -1657,7 +1657,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, h->magic = BE_DRBD_MAGIC; h->command = cpu_to_be16(cmd); - h->length = cpu_to_be16(size-sizeof(struct p_header)); + h->length = cpu_to_be16(size-sizeof(struct p_header80)); sent = drbd_send(mdev, sock, h, size, msg_flags); @@ -1672,7 +1672,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, * when we hold the appropriate socket mutex. */ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, - enum drbd_packets cmd, struct p_header *h, size_t size) + enum drbd_packets cmd, struct p_header80 *h, size_t size) { int ok = 0; struct socket *sock; @@ -1700,7 +1700,7 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size) { - struct p_header h; + struct p_header80 h; int ok; h.magic = BE_DRBD_MAGIC; @@ -1807,7 +1807,7 @@ int drbd_send_protocol(struct drbd_conf *mdev) strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, - (struct p_header *)p, size); + (struct p_header80 *)p, size); kfree(p); return rv; } @@ -1833,7 +1833,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags) put_ldev(mdev); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); } int drbd_send_uuids(struct drbd_conf *mdev) @@ -1854,7 +1854,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val) p.uuid = cpu_to_be64(val); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); } int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) @@ -1884,7 +1884,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl p.dds_flags = cpu_to_be16(flags); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -1909,7 +1909,7 @@ int drbd_send_state(struct drbd_conf *mdev) if (likely(sock != NULL)) { ok = _drbd_send_cmd(mdev, sock, P_STATE, - (struct p_header *)&p, sizeof(p), 0); + (struct p_header80 *)&p, sizeof(p), 0); } mutex_unlock(&mdev->data.mutex); @@ -1927,7 +1927,7 @@ int drbd_send_state_req(struct drbd_conf *mdev, p.val = cpu_to_be32(val.i); return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); } int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) @@ -1937,7 +1937,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) p.retcode = cpu_to_be32(retcode); return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); } int fill_bitmap_rle_bits(struct drbd_conf *mdev, @@ -2036,7 +2036,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev, enum { OK, FAILED, DONE } send_bitmap_rle_or_plain(struct drbd_conf *mdev, - struct p_header *h, struct bm_xfer_ctx *c) + struct p_header80 *h, struct bm_xfer_ctx *c) { struct p_compressed_bm *p = (void*)h; unsigned long num_words; @@ -2066,12 +2066,12 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, if (len) drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP, - h, sizeof(struct p_header) + len, 0); + h, sizeof(struct p_header80) + len, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; c->packets[1]++; - c->bytes[1] += sizeof(struct p_header) + len; + c->bytes[1] += sizeof(struct p_header80) + len; if (c->bit_offset > c->bm_bits) c->bit_offset = c->bm_bits; @@ -2087,14 +2087,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev, int _drbd_send_bitmap(struct drbd_conf *mdev) { struct bm_xfer_ctx c; - struct p_header *p; + struct p_header80 *p; int ret; ERR_IF(!mdev->bitmap) return FALSE; /* maybe we should use some per thread scratch page, * and allocate that during initial device creation? */ - p = (struct p_header *) __get_free_page(GFP_NOIO); + p = (struct p_header80 *) __get_free_page(GFP_NOIO); if (!p) { dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); return FALSE; @@ -2152,7 +2152,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size) if (mdev->state.conn < C_CONNECTED) return FALSE; ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -2180,7 +2180,7 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd, if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) return FALSE; ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -2188,8 +2188,8 @@ int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, struct p_data *dp) { const int header_size = sizeof(struct p_data) - - sizeof(struct p_header); - int data_size = ((struct p_header *)dp)->length - header_size; + - sizeof(struct p_header80); + int data_size = ((struct p_header80 *)dp)->length - header_size; return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), dp->block_id); @@ -2238,7 +2238,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd, p.blksize = cpu_to_be32(size); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -2256,7 +2256,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, p.head.magic = BE_DRBD_MAGIC; p.head.command = cpu_to_be16(cmd); - p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size); + p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); mutex_lock(&mdev->data.mutex); @@ -2278,7 +2278,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size) p.blksize = cpu_to_be32(size); ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, - (struct p_header *)&p, sizeof(p)); + (struct p_header80 *)&p, sizeof(p)); return ok; } @@ -2447,10 +2447,17 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; - p.head.magic = BE_DRBD_MAGIC; - p.head.command = cpu_to_be16(P_DATA); - p.head.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size); + if (req->size <= (1 << 15)) { + p.head.h80.magic = BE_DRBD_MAGIC; + p.head.h80.command = cpu_to_be16(P_DATA); + p.head.h80.length = + cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size); + } else { + p.head.h95.magic = BE_DRBD_MAGIC_BIG; + p.head.h95.command = cpu_to_be16(P_DATA); + p.head.h95.length = + cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size); + } p.sector = cpu_to_be64(req->sector); p.block_id = (unsigned long)req; @@ -2511,10 +2518,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; - p.head.magic = BE_DRBD_MAGIC; - p.head.command = cpu_to_be16(cmd); - p.head.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size); + if (e->size <= (1 << 15)) { + p.head.h80.magic = BE_DRBD_MAGIC; + p.head.h80.command = cpu_to_be16(cmd); + p.head.h80.length = + cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); + } else { + p.head.h95.magic = BE_DRBD_MAGIC_BIG; + p.head.h95.command = cpu_to_be16(cmd); + p.head.h95.length = + cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size); + } p.sector = cpu_to_be64(e->sector); p.block_id = e->block_id; @@ -2527,8 +2541,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, if (!drbd_get_data_sock(mdev)) return 0; - ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, - sizeof(p), dgs ? MSG_MORE : 0); + ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 591a171291d9..9b3321e2c3cd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -720,14 +720,14 @@ out: static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { - struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; + struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { - struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; + struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header; int rr; rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); @@ -944,7 +944,7 @@ out_release_sockets: return -1; } -static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) +static int drbd_recv_header(struct drbd_conf *mdev, struct p_header80 *h) { int r; @@ -1266,7 +1266,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea return 1; } -static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) +static int receive_Barrier(struct drbd_conf *mdev, struct p_header80 *h) { int rv, issue_flush; struct p_barrier *p = (struct p_barrier *)h; @@ -1570,7 +1570,7 @@ fail: return FALSE; } -static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) +static int receive_DataReply(struct drbd_conf *mdev, struct p_header80 *h) { struct drbd_request *req; sector_t sector; @@ -1610,7 +1610,7 @@ static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) return ok; } -static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) +static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; unsigned int header_size, data_size; @@ -1767,7 +1767,7 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) } /* mirrored write */ -static int receive_Data(struct drbd_conf *mdev, struct p_header *h) +static int receive_Data(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; struct drbd_epoch_entry *e; @@ -2066,7 +2066,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) } -static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) +static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -2756,7 +2756,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) +static int receive_protocol(struct drbd_conf *mdev, struct p_header80 *h) { struct p_protocol *p = (struct p_protocol *)h; int header_size, data_size; @@ -2862,7 +2862,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) +static int receive_SyncParam(struct drbd_conf *mdev, struct p_header80 *h) { int ok = TRUE; struct p_rs_param_95 *p = (struct p_rs_param_95 *)h; @@ -3032,7 +3032,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) +static int receive_sizes(struct drbd_conf *mdev, struct p_header80 *h) { struct p_sizes *p = (struct p_sizes *)h; enum determine_dev_size dd = unchanged; @@ -3148,7 +3148,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_uuids(struct drbd_conf *mdev, struct p_header *h) +static int receive_uuids(struct drbd_conf *mdev, struct p_header80 *h) { struct p_uuids *p = (struct p_uuids *)h; u64 *p_uuid; @@ -3241,7 +3241,7 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) +static int receive_req_state(struct drbd_conf *mdev, struct p_header80 *h) { struct p_req_state *p = (struct p_req_state *)h; union drbd_state mask, val; @@ -3271,7 +3271,7 @@ static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_state(struct drbd_conf *mdev, struct p_header *h) +static int receive_state(struct drbd_conf *mdev, struct p_header80 *h) { struct p_state *p = (struct p_state *)h; enum drbd_conns nconn, oconn; @@ -3395,7 +3395,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) +static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header80 *h) { struct p_rs_uuid *p = (struct p_rs_uuid *)h; @@ -3428,7 +3428,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) enum receive_bitmap_ret { OK, DONE, FAILED }; static enum receive_bitmap_ret -receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h, +receive_bitmap_plain(struct drbd_conf *mdev, struct p_header80 *h, unsigned long *buffer, struct bm_xfer_ctx *c) { unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); @@ -3533,7 +3533,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned plain = sizeof(struct p_header) * + unsigned plain = sizeof(struct p_header80) * ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + c->bm_words * sizeof(long); unsigned total = c->bytes[0] + c->bytes[1]; @@ -3571,7 +3571,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) +static int receive_bitmap(struct drbd_conf *mdev, struct p_header80 *h) { struct bm_xfer_ctx c; void *buffer; @@ -3623,7 +3623,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) } c.packets[h->command == P_BITMAP]++; - c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length; + c.bytes[h->command == P_BITMAP] += sizeof(struct p_header80) + h->length; if (ret != OK) break; @@ -3659,7 +3659,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) return ok; } -static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent) +static int receive_skip_(struct drbd_conf *mdev, struct p_header80 *h, int silent) { /* TODO zero copy sink :) */ static char sink[128]; @@ -3679,17 +3679,17 @@ static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent) return size == 0; } -static int receive_skip(struct drbd_conf *mdev, struct p_header *h) +static int receive_skip(struct drbd_conf *mdev, struct p_header80 *h) { return receive_skip_(mdev, h, 0); } -static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h) +static int receive_skip_silent(struct drbd_conf *mdev, struct p_header80 *h) { return receive_skip_(mdev, h, 1); } -static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) +static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header80 *h) { if (mdev->state.disk >= D_INCONSISTENT) drbd_kick_lo(mdev); @@ -3701,7 +3701,7 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header80 *); static drbd_cmd_handler_f drbd_default_handler[] = { [P_DATA] = receive_Data, @@ -3736,7 +3736,7 @@ static drbd_cmd_handler_f *drbd_opt_cmd_handler; static void drbdd(struct drbd_conf *mdev) { drbd_cmd_handler_f handler; - struct p_header *header = &mdev->data.rbuf.header; + struct p_header80 *header = &mdev->data.rbuf.header; while (get_t_state(&mdev->receiver) == Running) { drbd_thread_current_set_cpu(mdev); @@ -3964,7 +3964,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev) p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, - (struct p_header *)p, sizeof(*p), 0 ); + (struct p_header80 *)p, sizeof(*p), 0 ); mutex_unlock(&mdev->data.mutex); return ok; } @@ -3981,7 +3981,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) /* ASSERT current == mdev->receiver ... */ struct p_handshake *p = &mdev->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - -sizeof(struct p_header); + -sizeof(struct p_header80); int rv; rv = drbd_send_handshake(mdev); @@ -4058,7 +4058,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - struct p_header p; + struct p_header80 p; unsigned int key_len = strlen(mdev->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; @@ -4231,7 +4231,7 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) +static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) { struct p_req_state_reply *p = (struct p_req_state_reply *)h; @@ -4249,13 +4249,13 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_Ping(struct drbd_conf *mdev, struct p_header *h) +static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) { return drbd_send_ping_ack(mdev); } -static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) +static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) { /* restore idle timeout */ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; @@ -4265,7 +4265,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) +static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4336,7 +4336,7 @@ static int validate_req_change_req_state(struct drbd_conf *mdev, return TRUE; } -static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) +static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4376,7 +4376,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) _ack_id_to_req, __func__ , what); } -static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) +static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4396,7 +4396,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) _ack_id_to_req, __func__ , neg_acked); } -static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) +static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4409,7 +4409,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) _ar_id_to_req, __func__ , neg_acked); } -static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) +static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; int size; @@ -4431,7 +4431,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) +static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_barrier_ack *p = (struct p_barrier_ack *)h; @@ -4440,7 +4440,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) +static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; struct drbd_work *w; @@ -4474,7 +4474,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h) +static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header80 *h) { /* IGNORE */ return TRUE; @@ -4482,7 +4482,7 @@ static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h) struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *mdev, struct p_header *h); + int (*process)(struct drbd_conf *mdev, struct p_header80 *h); }; static struct asender_cmd *get_asender_cmd(int cmd) @@ -4491,8 +4491,8 @@ static struct asender_cmd *get_asender_cmd(int cmd) /* anything missing from this table is in * the drbd_cmd_handler (drbd_default_handler) table, * see the beginning of drbdd() */ - [P_PING] = { sizeof(struct p_header), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_PING] = { sizeof(struct p_header80), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck }, [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, @@ -4504,7 +4504,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, - [P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_something_to_ignore_m }, + [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_something_to_ignore_m }, [P_MAX_CMD] = { 0, NULL }, }; if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) @@ -4515,13 +4515,13 @@ static struct asender_cmd *get_asender_cmd(int cmd) int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct p_header *h = &mdev->meta.rbuf.header; + struct p_header80 *h = &mdev->meta.rbuf.header; struct asender_cmd *cmd = NULL; int rv, len; void *buf = h; int received = 0; - int expect = sizeof(struct p_header); + int expect = sizeof(struct p_header80); int empty; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4621,7 +4621,7 @@ int drbd_asender(struct drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - ERR_IF(len != expect-sizeof(struct p_header)) + ERR_IF(len != expect-sizeof(struct p_header80)) goto reconnect; } if (received == expect) { @@ -4631,7 +4631,7 @@ int drbd_asender(struct drbd_thread *thi) buf = h; received = 0; - expect = sizeof(struct p_header); + expect = sizeof(struct p_header80); cmd = NULL; } } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1eeb55423b3e..3d0e14e3ade3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1204,7 +1204,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * dec_ap_pending will be done in got_BarrierAck * or (on connection loss) in w_clear_epoch. */ ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER, - (struct p_header *)p, sizeof(*p), 0); + (struct p_header80 *)p, sizeof(*p), 0); drbd_put_data_sock(mdev); return ok; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 0b2bfb58d9c5..89718a39791e 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -318,6 +318,8 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) +#define DRBD_MAGIC_BIG 0x835a +#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 -- cgit v1.2.3-59-g8ed1b From fb22c402ffdf61dd121795b5809de587185d5240 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 8 Sep 2010 23:20:21 +0200 Subject: drbd: Track the reasons to suspend IO in dedicated state bits There are three ways to get IO suspended: * Loss of any access to data * Fence-peer-handler running * User requested to suspend IO Track those in different bits, so that one condition clearing its state bit does not interfere with the other two conditions. Only when the user resumes IO he overrules all three bits. The fact is hidden from the user, he sees only a single suspend bit. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 9 ++++++++- drivers/block/drbd/drbd_main.c | 36 +++++++++++++++++++++++------------- drivers/block/drbd/drbd_nl.c | 20 ++++++++++++++------ drivers/block/drbd/drbd_proc.c | 2 +- drivers/block/drbd/drbd_receiver.c | 6 +++--- drivers/block/drbd/drbd_req.c | 6 +++--- include/linux/drbd.h | 10 +++++++--- 7 files changed, 59 insertions(+), 30 deletions(-) (limited to 'include/linux/drbd.h') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index eb1273d04caf..ff7fffa00dac 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1681,6 +1681,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev, #define susp_MASK 1 #define user_isp_MASK 1 #define aftr_isp_MASK 1 +#define susp_nod_MASK 1 +#define susp_fen_MASK 1 #define NS(T, S) \ ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ @@ -2254,11 +2256,16 @@ static inline int drbd_state_is_stable(union drbd_state s) return 1; } +static inline int is_susp(union drbd_state s) +{ + return s.susp || s.susp_nod || s.susp_fen; +} + static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) { int mxb = drbd_get_max_buffers(mdev); - if (mdev->state.susp) + if (is_susp(mdev->state)) return 0; if (test_bit(SUSPEND_IO, &mdev->flags)) return 0; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 04c305d36f8e..4f33714fb3cd 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -654,7 +654,7 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns) drbd_role_str(ns.peer), drbd_disk_str(ns.disk), drbd_disk_str(ns.pdsk), - ns.susp ? 's' : 'r', + is_susp(ns) ? 's' : 'r', ns.aftr_isp ? 'a' : '-', ns.peer_isp ? 'p' : '-', ns.user_isp ? 'u' : '-' @@ -925,12 +925,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state if (fp == FP_STONITH && (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) - ns.susp = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ + ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO && (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) - ns.susp = 1; /* Suspend IO while no data available (no accessible data available) */ + ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { if (ns.conn == C_SYNC_SOURCE) @@ -1030,7 +1030,10 @@ int __drbd_set_state(struct drbd_conf *mdev, PSC(conn); PSC(disk); PSC(pdsk); - PSC(susp); + if (is_susp(ns) != is_susp(os)) + pbp += sprintf(pbp, "susp( %s -> %s ) ", + drbd_susp_str(is_susp(os)), + drbd_susp_str(is_susp(ns))); PSC(aftr_isp); PSC(peer_isp); PSC(user_isp); @@ -1218,6 +1221,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, { enum drbd_fencing_p fp; enum drbd_req_event what = nothing; + union drbd_state nsm = (union drbd_state){ .i = -1 }; if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { clear_bit(CRASHED_PRIMARY, &mdev->flags); @@ -1241,19 +1245,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, /* Here we have the actions that are performed after a state change. This function might sleep */ - if (os.susp && ns.susp && mdev->sync_conf.on_no_data == OND_SUSPEND_IO) { + nsm.i = -1; + if (ns.susp_nod) { if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { if (ns.conn == C_CONNECTED) - what = resend; + what = resend, nsm.susp_nod = 0; else /* ns.conn > C_CONNECTED */ dev_err(DEV, "Unexpected Resynd going on!\n"); } if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) - what = restart_frozen_disk_io; + what = restart_frozen_disk_io, nsm.susp_nod = 0; + } - if (fp == FP_STONITH && ns.susp) { + if (ns.susp_fen) { /* case1: The outdate peer handler is successful: */ if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { tl_clear(mdev); @@ -1263,20 +1269,22 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_md_sync(mdev); } spin_lock_irq(&mdev->req_lock); - _drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL); + _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->req_lock); } /* case2: The connection was established again: */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { clear_bit(NEW_CUR_UUID, &mdev->flags); what = resend; + nsm.susp_fen = 0; } } if (what != nothing) { spin_lock_irq(&mdev->req_lock); _tl_restart(mdev, what); - _drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL); + nsm.i &= mdev->state.i; + _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL); spin_unlock_irq(&mdev->req_lock); } @@ -1298,7 +1306,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { - if (mdev->state.susp) { + if (is_susp(mdev->state)) { set_bit(NEW_CUR_UUID, &mdev->flags); } else { drbd_uuid_new_current(mdev); @@ -1417,7 +1425,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, resume_next_sg(mdev); /* free tl_hash if we Got thawed and are C_STANDALONE */ - if (ns.conn == C_STANDALONE && ns.susp == 0 && mdev->tl_hash) + if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash) drbd_free_tl_hash(mdev); /* Upon network connection, we need to start the receiver */ @@ -2732,7 +2740,9 @@ static void drbd_set_defaults(struct drbd_conf *mdev) .conn = C_STANDALONE, .disk = D_DISKLESS, .pdsk = D_UNKNOWN, - .susp = 0 + .susp = 0, + .susp_nod = 0, + .susp_fen = 0 } }; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 5b30f90cab3e..9ee44568dce3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -209,7 +209,8 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) put_ldev(mdev); } else { dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); - return mdev->state.pdsk; + nps = mdev->state.pdsk; + goto out; } r = drbd_khelper(mdev, "fence-peer"); @@ -256,6 +257,14 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) dev_info(DEV, "fence-peer helper returned %d (%s)\n", (r>>8) & 0xff, ex_to_string); + +out: + if (mdev->state.susp_fen && nps >= D_UNKNOWN) { + /* The handler was not successful... unfreeze here, the + state engine can not unfreeze... */ + _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE); + } + return nps; } @@ -550,7 +559,7 @@ char *ppsize(char *buf, unsigned long long size) void drbd_suspend_io(struct drbd_conf *mdev) { set_bit(SUSPEND_IO, &mdev->flags); - if (mdev->state.susp) + if (is_susp(mdev->state)) return; wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); } @@ -1016,7 +1025,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_suspend_io(mdev); /* also wait for the last barrier ack. */ - wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || mdev->state.susp); + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); /* and for any other previously queued work */ drbd_flush_workqueue(mdev); @@ -1114,8 +1123,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp clear_bit(CRASHED_PRIMARY, &mdev->flags); if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && - !(mdev->state.role == R_PRIMARY && mdev->state.susp && - mdev->sync_conf.on_no_data == OND_SUSPEND_IO)) { + !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) { set_bit(CRASHED_PRIMARY, &mdev->flags); cp_discovered = 1; } @@ -1939,7 +1947,7 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_sync(mdev); } drbd_suspend_io(mdev); - reply->ret_code = drbd_request_state(mdev, NS(susp, 0)); + reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); if (reply->ret_code == SS_SUCCESS) { if (mdev->state.conn < C_CONNECTED) tl_clear(mdev); diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index a4a4a06908c5..aec8426c1bf6 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -213,7 +213,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v) drbd_disk_str(mdev->state.pdsk), (mdev->net_conf == NULL ? ' ' : (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), - mdev->state.susp ? 's' : 'r', + is_susp(mdev->state) ? 's' : 'r', mdev->state.aftr_isp ? 'a' : '-', mdev->state.peer_isp ? 'p' : '-', mdev->state.user_isp ? 'u' : '-', diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 983e49cbd233..6b69b2f734dc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3315,7 +3315,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) ns.disk = mdev->new_state_tmp.disk; cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD); - if (ns.pdsk == D_CONSISTENT && ns.susp && nconn == C_CONNECTED && oconn < C_CONNECTED && + if (ns.pdsk == D_CONSISTENT && is_susp(ns) && nconn == C_CONNECTED && oconn < C_CONNECTED && test_bit(NEW_CUR_UUID, &mdev->flags)) { /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this for temporal network outages! */ @@ -3829,7 +3829,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) kfree(mdev->p_uuid); mdev->p_uuid = NULL; - if (!mdev->state.susp) + if (!is_susp(mdev->state)) tl_clear(mdev); dev_info(DEV, "Connection closed\n"); @@ -3858,7 +3858,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (os.conn == C_DISCONNECTING) { wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); - if (!mdev->state.susp) { + if (!is_susp(mdev->state)) { /* we must not free the tl_hash * while application io is still on the fly */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index af608b39c4e0..9e91a2545fc8 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -287,7 +287,7 @@ static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_e { struct drbd_conf *mdev = req->mdev; - if (!mdev->state.susp) + if (!is_susp(mdev->state)) _req_may_be_done(req, m); } @@ -812,7 +812,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) (mdev->state.pdsk == D_INCONSISTENT && mdev->state.conn >= C_CONNECTED)); - if (!(local || remote) && !mdev->state.susp) { + if (!(local || remote) && !is_susp(mdev->state)) { dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); goto fail_free_complete; } @@ -838,7 +838,7 @@ allocate_barrier: /* GOOD, everything prepared, grab the spin_lock */ spin_lock_irq(&mdev->req_lock); - if (mdev->state.susp) { + if (is_susp(mdev->state)) { /* If we got suspended, use the retry mechanism of generic_make_request() to restart processing of this bio. In the next call to drbd_make_request_26 diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 89718a39791e..5e72a5d3d48f 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -232,13 +232,17 @@ union drbd_state { unsigned conn:5 ; /* 17/32 cstates */ unsigned disk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ unsigned pdsk:4 ; /* 8/16 from D_DISKLESS to D_UP_TO_DATE */ - unsigned susp:1 ; /* 2/2 IO suspended no/yes */ + unsigned susp:1 ; /* 2/2 IO suspended no/yes (by user) */ unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ unsigned peer_isp:1 ; unsigned user_isp:1 ; - unsigned _pad:11; /* 0 unused */ + unsigned susp_nod:1 ; /* IO suspended because no data */ + unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/ + unsigned _pad:9; /* 0 unused */ #elif defined(__BIG_ENDIAN_BITFIELD) - unsigned _pad:11; /* 0 unused */ + unsigned _pad:9; + unsigned susp_fen:1 ; + unsigned susp_nod:1 ; unsigned user_isp:1 ; unsigned peer_isp:1 ; unsigned aftr_isp:1 ; /* isp .. imposed sync pause */ -- cgit v1.2.3-59-g8ed1b From 22cc37a943832c948808884604ec6f5ff2594c1d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Sep 2010 20:40:41 +0200 Subject: drbd: fix unlikely access after free and list corruption Various cleanup paths have been incomplete, for the very unlikely case that we cannot allocate enough bios from process context when submitting on behalf of the peer or resync process. Never observed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 25 +++++++++++++++++++++++++ drivers/block/drbd/drbd_worker.c | 7 +++++++ include/linux/drbd.h | 4 ++-- 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux/drbd.h') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 990fe01afa50..71775a9de21d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1573,6 +1573,13 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + spin_unlock_irq(&mdev->req_lock); + drbd_free_ee(mdev, e); fail: put_ldev(mdev); @@ -1998,6 +2005,16 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + hlist_del_init(&e->colision); + spin_unlock_irq(&mdev->req_lock); + if (e->flags & EE_CALL_AL_COMPLETE_IO) + drbd_al_complete_io(mdev, e->sector); + out_interrupted: /* yes, the epoch_size now is imbalanced. * but we drop the connection anyways, so we don't have a chance to @@ -2202,6 +2219,14 @@ submit: if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + spin_unlock_irq(&mdev->req_lock); + /* no drbd_rs_complete_io(), we are dropping the connection anyways */ + out_free_e: put_ldev(mdev); drbd_free_ee(mdev, e); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 88be45ad84ed..f12822d53867 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -387,6 +387,13 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size) if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) return 0; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + spin_unlock_irq(&mdev->req_lock); + drbd_free_ee(mdev, e); defer: put_ldev(mdev); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 5e72a5d3d48f..da7d9bd4f3f0 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,10 +53,10 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.8.1" +#define REL_VERSION "8.3.9rc1" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 94 +#define PRO_VERSION_MAX 95 enum drbd_io_error_p { -- cgit v1.2.3-59-g8ed1b From 5dbfe7aedf54aa7f62fd659e34371d4ea0e7bffe Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 15 Oct 2010 09:52:46 +0200 Subject: drbd: add race-breaker to drbd_go_diskless This adds a necessary race breaker to these commits: drbd: fix for possible deadlock on IO error during resync drbd: drop wrong debug asserts, fix recently introduced race What we do is get a refcount, check the state, then depending on the state and the requested minimum disk state, either hold it (success), or give it back immediately (failed "try lock"). Some code paths (flushing of drbd metadata) may still grab and hold a refcount even if we are D_FAILED (application IO won't). So even if we hit local_cnt == 0 once after being D_FAILED, we still need to wait for that again after we changed to D_DISKLESS. Once local_cnt reaches 0 while we are D_DISKLESS, we can be sure that no one will look at the protected members anymore, so only then is it safe to free them. We cannot easily convert to standard locking primitives here, as we want to be able to use it in atomic context (we always do a "try lock"), as well as hold references for a "long time" (from IO submission to completion callback). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_main.c | 3 +++ include/linux/drbd.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/drbd.h') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index bbe3bff2cad6..8bfedc7164fa 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -3763,6 +3763,9 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused * the protected members anymore, though, so in the after_state_ch work * it will be safe to free them. */ drbd_force_state(mdev, NS(disk, D_DISKLESS)); + /* We need to wait for return of references checked out while we still + * have been D_FAILED, though (drbd_md_sync, bitmap io). */ + wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); clear_bit(GO_DISKLESS, &mdev->flags); return 1; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index da7d9bd4f3f0..9b2a0158f399 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.9rc1" +#define REL_VERSION "8.3.9rc2" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 95 -- cgit v1.2.3-59-g8ed1b From 8825f7c3e5c7b251b49fc594658a96f59417ee16 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 21 Oct 2010 17:21:19 +0200 Subject: drbd: Silenced an assert That assertion's condition needed adjustment for today's semantics Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_req.c | 2 +- include/linux/drbd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/drbd.h') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 31d04b17c45f..5c2254853559 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -258,7 +258,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m) if (!hlist_unhashed(&req->colision)) hlist_del(&req->colision); else - D_ASSERT((s & RQ_NET_MASK) == 0); + D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0); /* for writes we need to do some extra housekeeping */ if (rw == WRITE) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9b2a0158f399..ef44c7a0638c 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@ extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.9rc2" +#define REL_VERSION "8.3.9" #define API_VERSION 88 #define PRO_VERSION_MIN 86 #define PRO_VERSION_MAX 95 -- cgit v1.2.3-59-g8ed1b