diff options
| -rw-r--r-- | MAINTAINERS | 6 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 104 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 146 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_int.h | 90 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_main.c | 357 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_nl.c | 48 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_proc.c | 2 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 95 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_req.c | 132 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_req.h | 19 | ||||
| -rw-r--r-- | drivers/block/drbd/drbd_worker.c | 31 | ||||
| -rw-r--r-- | drivers/block/floppy.c | 161 | ||||
| -rw-r--r-- | drivers/block/xen-blkfront.c | 44 | ||||
| -rw-r--r-- | include/linux/drbd.h | 6 | ||||
| -rw-r--r-- | include/linux/drbd_limits.h | 7 | ||||
| -rw-r--r-- | include/linux/drbd_nl.h | 5 | 
16 files changed, 862 insertions, 391 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index a246490c95eb..64e675d6d478 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2818,6 +2818,12 @@ F:	Documentation/firmware_class/  F:	drivers/base/firmware*.c  F:	include/linux/firmware.h +FLOPPY DRIVER +M:	Jiri Kosina <jkosina@suse.cz> +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/floppy.git +S:	Odd fixes +F:	drivers/block/floppy.c +  FPU EMULATOR  M:	Bill Metzenthen <billm@melbpc.org.au>  W:	http://floatingpoint.sourceforge.net/emulator/index.html diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index cf0e63dd97da..e54e31b02b88 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -65,39 +65,80 @@ struct drbd_atodb_wait {  int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); +void *drbd_md_get_buffer(struct drbd_conf *mdev) +{ +	int r; + +	wait_event(mdev->misc_wait, +		   (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 || +		   mdev->state.disk <= D_FAILED); + +	return r ? NULL : page_address(mdev->md_io_page); +} + +void drbd_md_put_buffer(struct drbd_conf *mdev) +{ +	if (atomic_dec_and_test(&mdev->md_io_in_use)) +		wake_up(&mdev->misc_wait); +} + +static bool md_io_allowed(struct drbd_conf *mdev) +{ +	enum drbd_disk_state ds = mdev->state.disk; +	return ds >= D_NEGOTIATING || ds == D_ATTACHING; +} + +void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, +				     unsigned int *done) +{ +	long dt = bdev->dc.disk_timeout * HZ / 10; +	if (dt == 0) +		dt = MAX_SCHEDULE_TIMEOUT; + +	dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt); +	if (dt == 0) +		dev_err(DEV, "meta-data IO operation timed out\n"); +} +  static int _drbd_md_sync_page_io(struct drbd_conf *mdev,  				 struct drbd_backing_dev *bdev,  				 struct page *page, sector_t sector,  				 int rw, int size)  {  	struct bio *bio; -	struct drbd_md_io md_io;  	int ok; -	md_io.mdev = mdev; -	init_completion(&md_io.event); -	md_io.error = 0; +	mdev->md_io.done = 0; +	mdev->md_io.error = -ENODEV;  	if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))  		rw |= REQ_FUA | REQ_FLUSH;  	rw |= REQ_SYNC; -	bio = bio_alloc(GFP_NOIO, 1); +	bio = bio_alloc_drbd(GFP_NOIO);  	bio->bi_bdev = bdev->md_bdev;  	bio->bi_sector = sector;  	ok = (bio_add_page(bio, page, size, 0) == size);  	if (!ok)  		goto out; -	bio->bi_private = &md_io; +	bio->bi_private = &mdev->md_io;  	bio->bi_end_io = drbd_md_io_complete;  	bio->bi_rw = rw; +	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* Corresponding put_ldev in drbd_md_io_complete() */ +		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n"); +		ok = 0; +		goto out; +	} + +	bio_get(bio); /* one bio_put() is in the completion handler */ +	atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */  	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))  		bio_endio(bio, -EIO);  	else  		submit_bio(rw, bio); -	wait_for_completion(&md_io.event); -	ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; +	wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done); +	ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0;   out:  	bio_put(bio); @@ -111,7 +152,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,  	int offset = 0;  	struct page *iop = mdev->md_io_page; -	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); +	D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);  	BUG_ON(!bdev->md_bdev); @@ -328,8 +369,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)  		return 1;  	} -	mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ -	buffer = (struct al_transaction *)page_address(mdev->md_io_page); +	buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */ +	if (!buffer) { +		dev_err(DEV, "disk failed while waiting for md_io buffer\n"); +		complete(&((struct update_al_work *)w)->event); +		put_ldev(mdev); +		return 1; +	}  	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);  	buffer->tr_number = cpu_to_be32(mdev->al_tr_number); @@ -374,7 +420,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)  	D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);  	mdev->al_tr_number++; -	mutex_unlock(&mdev->md_io_mutex); +	drbd_md_put_buffer(mdev);  	complete(&((struct update_al_work *)w)->event);  	put_ldev(mdev); @@ -443,8 +489,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)  	/* lock out all other meta data io for now,  	 * and make sure the page is mapped.  	 */ -	mutex_lock(&mdev->md_io_mutex); -	buffer = page_address(mdev->md_io_page); +	buffer = drbd_md_get_buffer(mdev); +	if (!buffer) +		return 0;  	/* Find the valid transaction in the log */  	for (i = 0; i <= mx; i++) { @@ -452,7 +499,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)  		if (rv == 0)  			continue;  		if (rv == -1) { -			mutex_unlock(&mdev->md_io_mutex); +			drbd_md_put_buffer(mdev);  			return 0;  		}  		cnr = be32_to_cpu(buffer->tr_number); @@ -478,7 +525,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)  	if (!found_valid) {  		dev_warn(DEV, "No usable activity log found.\n"); -		mutex_unlock(&mdev->md_io_mutex); +		drbd_md_put_buffer(mdev);  		return 1;  	} @@ -493,7 +540,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)  		rv = drbd_al_read_tr(mdev, bdev, buffer, i);  		ERR_IF(rv == 0) goto cancel;  		if (rv == -1) { -			mutex_unlock(&mdev->md_io_mutex); +			drbd_md_put_buffer(mdev);  			return 0;  		} @@ -534,7 +581,7 @@ cancel:  		mdev->al_tr_pos = 0;  	/* ok, we are done with it */ -	mutex_unlock(&mdev->md_io_mutex); +	drbd_md_put_buffer(mdev);  	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",  	     transactions, active_extents); @@ -671,16 +718,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,  			else  				ext->rs_failed += count;  			if (ext->rs_left < ext->rs_failed) { -				dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " -				    "rs_failed=%d count=%d\n", +				dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d " +				    "rs_failed=%d count=%d cstate=%s\n",  				     (unsigned long long)sector,  				     ext->lce.lc_number, ext->rs_left, -				     ext->rs_failed, count); -				dump_stack(); - -				lc_put(mdev->resync, &ext->lce); -				drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); -				return; +				     ext->rs_failed, count, +				     drbd_conn_str(mdev->state.conn)); + +				/* We don't expect to be able to clear more bits +				 * than have been set when we originally counted +				 * the set bits to cache that value in ext->rs_left. +				 * Whatever the reason (disconnect during resync, +				 * delayed local completion of an application write), +				 * try to fix it up by recounting here. */ +				ext->rs_left = drbd_bm_e_weight(mdev, enr);  			}  		} else {  			/* Normally this element should be in the cache, @@ -1192,6 +1243,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev)  		put_ldev(mdev);  	}  	spin_unlock_irq(&mdev->al_lock); +	wake_up(&mdev->al_wait);  	return 0;  } diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 3030201c69d8..b5c5ff53cb57 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -205,7 +205,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)  static void bm_store_page_idx(struct page *page, unsigned long idx)  {  	BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); -	page_private(page) |= idx; +	set_page_private(page, idx);  }  static unsigned long bm_page_to_idx(struct page *page) @@ -886,12 +886,21 @@ void drbd_bm_clear_all(struct drbd_conf *mdev)  struct bm_aio_ctx {  	struct drbd_conf *mdev;  	atomic_t in_flight; -	struct completion done; +	unsigned int done;  	unsigned flags;  #define BM_AIO_COPY_PAGES	1  	int error; +	struct kref kref;  }; +static void bm_aio_ctx_destroy(struct kref *kref) +{ +	struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); + +	put_ldev(ctx->mdev); +	kfree(ctx); +} +  /* bv_page may be a copy, or may be the original */  static void bm_async_io_complete(struct bio *bio, int error)  { @@ -930,20 +939,21 @@ static void bm_async_io_complete(struct bio *bio, int error)  	bm_page_unlock_io(mdev, idx); -	/* FIXME give back to page pool */  	if (ctx->flags & BM_AIO_COPY_PAGES) -		put_page(bio->bi_io_vec[0].bv_page); +		mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);  	bio_put(bio); -	if (atomic_dec_and_test(&ctx->in_flight)) -		complete(&ctx->done); +	if (atomic_dec_and_test(&ctx->in_flight)) { +		ctx->done = 1; +		wake_up(&mdev->misc_wait); +		kref_put(&ctx->kref, &bm_aio_ctx_destroy); +	}  }  static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)  { -	/* we are process context. we always get a bio */ -	struct bio *bio = bio_alloc(GFP_KERNEL, 1); +	struct bio *bio = bio_alloc_drbd(GFP_NOIO);  	struct drbd_conf *mdev = ctx->mdev;  	struct drbd_bitmap *b = mdev->bitmap;  	struct page *page; @@ -966,10 +976,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must  	bm_set_page_unchanged(b->bm_pages[page_nr]);  	if (ctx->flags & BM_AIO_COPY_PAGES) { -		/* FIXME alloc_page is good enough for now, but actually needs -		 * to use pre-allocated page pool */  		void *src, *dest; -		page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); +		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);  		dest = kmap_atomic(page);  		src = kmap_atomic(b->bm_pages[page_nr]);  		memcpy(dest, src, PAGE_SIZE); @@ -981,6 +989,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must  	bio->bi_bdev = mdev->ldev->md_bdev;  	bio->bi_sector = on_disk_sector; +	/* bio_add_page of a single page to an empty bio will always succeed, +	 * according to api.  Do we want to assert that? */  	bio_add_page(bio, page, len, 0);  	bio->bi_private = ctx;  	bio->bi_end_io = bm_async_io_complete; @@ -999,14 +1009,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must  /*   * bm_rw: read/write the whole bitmap from/to its on disk location.   */ -static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) +static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)  { -	struct bm_aio_ctx ctx = { -		.mdev = mdev, -		.in_flight = ATOMIC_INIT(1), -		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), -		.flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0, -	}; +	struct bm_aio_ctx *ctx;  	struct drbd_bitmap *b = mdev->bitmap;  	int num_pages, i, count = 0;  	unsigned long now; @@ -1021,7 +1026,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id  	 * For lazy writeout, we don't care for ongoing changes to the bitmap,  	 * as we submit copies of pages anyways.  	 */ -	if (!ctx.flags) + +	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); +	if (!ctx) +		return -ENOMEM; + +	*ctx = (struct bm_aio_ctx) { +		.mdev = mdev, +		.in_flight = ATOMIC_INIT(1), +		.done = 0, +		.flags = flags, +		.error = 0, +		.kref = { ATOMIC_INIT(2) }, +	}; + +	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */ +		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); +		kfree(ctx); +		return -ENODEV; +	} + +	if (!ctx->flags)  		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));  	num_pages = b->bm_number_of_pages; @@ -1046,29 +1071,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id  				continue;  			}  		} -		atomic_inc(&ctx.in_flight); -		bm_page_io_async(&ctx, i, rw); +		atomic_inc(&ctx->in_flight); +		bm_page_io_async(ctx, i, rw);  		++count;  		cond_resched();  	}  	/* -	 * We initialize ctx.in_flight to one to make sure bm_async_io_complete -	 * will not complete() early, and decrement / test it here.  If there +	 * We initialize ctx->in_flight to one to make sure bm_async_io_complete +	 * will not set ctx->done early, and decrement / test it here.  If there  	 * are still some bios in flight, we need to wait for them here. +	 * If all IO is done already (or nothing had been submitted), there is +	 * no need to wait.  Still, we need to put the kref associated with the +	 * "in_flight reached zero, all done" event.  	 */ -	if (!atomic_dec_and_test(&ctx.in_flight)) -		wait_for_completion(&ctx.done); +	if (!atomic_dec_and_test(&ctx->in_flight)) +		wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); +	else +		kref_put(&ctx->kref, &bm_aio_ctx_destroy); +  	dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",  			rw == WRITE ? "WRITE" : "READ",  			count, jiffies - now); -	if (ctx.error) { +	if (ctx->error) {  		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");  		drbd_chk_io_error(mdev, 1, true); -		err = -EIO; /* ctx.error ? */ +		err = -EIO; /* ctx->error ? */  	} +	if (atomic_read(&ctx->in_flight)) +		err = -EIO; /* Disk failed during IO... */ +  	now = jiffies;  	if (rw == WRITE) {  		drbd_md_flush(mdev); @@ -1082,6 +1116,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id  	dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",  	     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); +	kref_put(&ctx->kref, &bm_aio_ctx_destroy);  	return err;  } @@ -1091,7 +1126,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id   */  int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)  { -	return bm_rw(mdev, READ, 0); +	return bm_rw(mdev, READ, 0, 0);  }  /** @@ -1102,7 +1137,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)   */  int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)  { -	return bm_rw(mdev, WRITE, 0); +	return bm_rw(mdev, WRITE, 0, 0);  }  /** @@ -1112,7 +1147,23 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)   */  int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)  { -	return bm_rw(mdev, WRITE, upper_idx); +	return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx); +} + +/** + * drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location. + * @mdev:	DRBD device. + * + * Will only write pages that have changed since last IO. + * In contrast to drbd_bm_write(), this will copy the bitmap pages + * to temporary writeout pages. It is intended to trigger a full write-out + * while still allowing the bitmap to change, for example if a resync or online + * verify is aborted due to a failed peer disk, while local IO continues, or + * pending resync acks are still being processed. + */ +int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) +{ +	return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);  } @@ -1130,28 +1181,45 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l   */  int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)  { -	struct bm_aio_ctx ctx = { +	struct bm_aio_ctx *ctx; +	int err; + +	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { +		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); +		return 0; +	} + +	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); +	if (!ctx) +		return -ENOMEM; + +	*ctx = (struct bm_aio_ctx) {  		.mdev = mdev,  		.in_flight = ATOMIC_INIT(1), -		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), +		.done = 0,  		.flags = BM_AIO_COPY_PAGES, +		.error = 0, +		.kref = { ATOMIC_INIT(2) },  	}; -	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { -		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); -		return 0; +	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */ +		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); +		kfree(ctx); +		return -ENODEV;  	} -	bm_page_io_async(&ctx, idx, WRITE_SYNC); -	wait_for_completion(&ctx.done); +	bm_page_io_async(ctx, idx, WRITE_SYNC); +	wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); -	if (ctx.error) +	if (ctx->error)  		drbd_chk_io_error(mdev, 1, true);  		/* that should force detach, so the in memory bitmap will be  		 * gone in a moment as well. */  	mdev->bm_writ_cnt++; -	return ctx.error; +	err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; +	kref_put(&ctx->kref, &bm_aio_ctx_destroy); +	return err;  }  /* NOTE diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8d680562ba73..02f013a073a7 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -712,7 +712,6 @@ struct drbd_request {  	struct list_head tl_requests; /* ring list in the transfer log */  	struct bio *master_bio;       /* master bio pointer */  	unsigned long rq_state; /* see comments above _req_mod() */ -	int seq_num;  	unsigned long start_time;  }; @@ -851,6 +850,7 @@ enum {  	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */  	AL_SUSPENDED,		/* Activity logging is currently suspended. */  	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */ +	STATE_SENT,		/* Do not change state/UUIDs while this is set */  };  struct drbd_bitmap; /* opaque for drbd_conf */ @@ -862,31 +862,30 @@ enum bm_flag {  	BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */  	/* currently locked for bulk operation */ -	BM_LOCKED_MASK = 0x7, +	BM_LOCKED_MASK = 0xf,  	/* in detail, that is: */  	BM_DONT_CLEAR = 0x1,  	BM_DONT_SET   = 0x2,  	BM_DONT_TEST  = 0x4, +	/* so we can mark it locked for bulk operation, +	 * and still allow all non-bulk operations */ +	BM_IS_LOCKED  = 0x8, +  	/* (test bit, count bit) allowed (common case) */ -	BM_LOCKED_TEST_ALLOWED = 0x3, +	BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED,  	/* testing bits, as well as setting new bits allowed, but clearing bits  	 * would be unexpected.  Used during bitmap receive.  Setting new bits  	 * requires sending of "out-of-sync" information, though. */ -	BM_LOCKED_SET_ALLOWED = 0x1, +	BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED, -	/* clear is not expected while bitmap is locked for bulk operation */ +	/* for drbd_bm_write_copy_pages, everything is allowed, +	 * only concurrent bulk operations are locked out. */ +	BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED,  }; - -/* TODO sort members for performance - * MAYBE group them further */ - -/* THINK maybe we actually want to use the default "event/%s" worker threads - * or similar in linux 2.6, which uses per cpu data and threads. - */  struct drbd_work_queue {  	struct list_head q;  	struct semaphore s; /* producers up it, worker down()s it */ @@ -938,8 +937,7 @@ struct drbd_backing_dev {  };  struct drbd_md_io { -	struct drbd_conf *mdev; -	struct completion event; +	unsigned int done;  	int error;  }; @@ -1022,6 +1020,7 @@ struct drbd_conf {  	struct drbd_tl_epoch *newest_tle;  	struct drbd_tl_epoch *oldest_tle;  	struct list_head out_of_sequence_requests; +	struct list_head barrier_acked_requests;  	struct hlist_head *tl_hash;  	unsigned int tl_hash_s; @@ -1056,6 +1055,8 @@ struct drbd_conf {  	struct crypto_hash *csums_tfm;  	struct crypto_hash *verify_tfm; +	unsigned long last_reattach_jif; +	unsigned long last_reconnect_jif;  	struct drbd_thread receiver;  	struct drbd_thread worker;  	struct drbd_thread asender; @@ -1094,7 +1095,8 @@ struct drbd_conf {  	wait_queue_head_t ee_wait;  	struct page *md_io_page;	/* one page buffer for md_io */  	struct page *md_io_tmpp;	/* for logical_block_size != 512 */ -	struct mutex md_io_mutex;	/* protects the md_io_buffer */ +	struct drbd_md_io md_io; +	atomic_t md_io_in_use;		/* protects the md_io, md_io_page and md_io_tmpp */  	spinlock_t al_lock;  	wait_queue_head_t al_wait;  	struct lru_cache *act_log;	/* activity log */ @@ -1228,8 +1230,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev);  extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);  extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);  extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); -extern int _drbd_send_state(struct drbd_conf *mdev); -extern int drbd_send_state(struct drbd_conf *mdev); +extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s); +extern int drbd_send_current_state(struct drbd_conf *mdev);  extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,  			enum drbd_packets cmd, struct p_header80 *h,  			size_t size, unsigned msg_flags); @@ -1461,6 +1463,7 @@ extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);  extern int  drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);  extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);  extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); +extern int  drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);  extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,  		unsigned long al_enr);  extern size_t	     drbd_bm_words(struct drbd_conf *mdev); @@ -1493,11 +1496,38 @@ extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */  extern mempool_t *drbd_request_mempool;  extern mempool_t *drbd_ee_mempool; -extern struct page *drbd_pp_pool; /* drbd's page pool */ +/* drbd's page pool, used to buffer data received from the peer, + * or data requested by the peer. + * + * This does not have an emergency reserve. + * + * When allocating from this pool, it first takes pages from the pool. + * Only if the pool is depleted will try to allocate from the system. + * + * The assumption is that pages taken from this pool will be processed, + * and given back, "quickly", and then can be recycled, so we can avoid + * frequent calls to alloc_page(), and still will be able to make progress even + * under memory pressure. + */ +extern struct page *drbd_pp_pool;  extern spinlock_t   drbd_pp_lock;  extern int	    drbd_pp_vacant;  extern wait_queue_head_t drbd_pp_wait; +/* We also need a standard (emergency-reserve backed) page pool + * for meta data IO (activity log, bitmap). + * We can keep it global, as long as it is used as "N pages at a time". + * 128 should be plenty, currently we probably can get away with as few as 1. + */ +#define DRBD_MIN_POOL_PAGES	128 +extern mempool_t *drbd_md_io_page_pool; + +/* We also need to make sure we get a bio + * when we need it for housekeeping purposes */ +extern struct bio_set *drbd_md_io_bio_set; +/* to allocate from that set */ +extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); +  extern rwlock_t global_state_lock;  extern struct drbd_conf *drbd_new_device(unsigned int minor); @@ -1536,8 +1566,12 @@ extern void resume_next_sg(struct drbd_conf *mdev);  extern void suspend_other_sg(struct drbd_conf *mdev);  extern int drbd_resync_finished(struct drbd_conf *mdev);  /* maybe rather drbd_main.c ? */ +extern void *drbd_md_get_buffer(struct drbd_conf *mdev); +extern void drbd_md_put_buffer(struct drbd_conf *mdev);  extern int drbd_md_sync_page_io(struct drbd_conf *mdev, -		struct drbd_backing_dev *bdev, sector_t sector, int rw); +				struct drbd_backing_dev *bdev, sector_t sector, int rw); +extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, +					    unsigned int *done);  extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);  extern void drbd_rs_controller_reset(struct drbd_conf *mdev); @@ -1754,19 +1788,6 @@ static inline struct page *page_chain_next(struct page *page)  #define page_chain_for_each_safe(page, n) \  	for (; page && ({ n = page_chain_next(page); 1; }); page = n) -static inline int drbd_bio_has_active_page(struct bio *bio) -{ -	struct bio_vec *bvec; -	int i; - -	__bio_for_each_segment(bvec, bio, i, 0) { -		if (page_count(bvec->bv_page) > 1) -			return 1; -	} - -	return 0; -} -  static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)  {  	struct page *page = e->pages; @@ -1777,7 +1798,6 @@ static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)  	return 0;  } -  static inline void drbd_state_lock(struct drbd_conf *mdev)  {  	wait_event(mdev->misc_wait, @@ -2230,7 +2250,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,  		 * Note: currently we don't support such large bitmaps on 32bit  		 * arch anyways, but no harm done to be prepared for it here.  		 */ -		unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; +		unsigned int shift = mdev->rs_total > UINT_MAX ? 16 : 10;  		unsigned long left = *bits_left >> shift;  		unsigned long total = 1UL + (mdev->rs_total >> shift);  		unsigned long tmp = 1000UL - left * 1000UL/total; @@ -2306,12 +2326,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)  	case D_OUTDATED:  	case D_CONSISTENT:  	case D_UP_TO_DATE: +	case D_FAILED:  		/* disk state is stable as well. */  		break;  	/* no new io accepted during tansitional states */  	case D_ATTACHING: -	case D_FAILED:  	case D_NEGOTIATING:  	case D_UNKNOWN:  	case D_MASK: diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 211fc44f84be..920ede2829d6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -139,6 +139,8 @@ struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */  struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */  mempool_t *drbd_request_mempool;  mempool_t *drbd_ee_mempool; +mempool_t *drbd_md_io_page_pool; +struct bio_set *drbd_md_io_bio_set;  /* I do not use a standard mempool, because:     1) I want to hand out the pre-allocated objects first. @@ -159,7 +161,24 @@ static const struct block_device_operations drbd_ops = {  	.release = drbd_release,  }; -#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0])) +static void bio_destructor_drbd(struct bio *bio) +{ +	bio_free(bio, drbd_md_io_bio_set); +} + +struct bio *bio_alloc_drbd(gfp_t gfp_mask) +{ +	struct bio *bio; + +	if (!drbd_md_io_bio_set) +		return bio_alloc(gfp_mask, 1); + +	bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); +	if (!bio) +		return NULL; +	bio->bi_destructor = bio_destructor_drbd; +	return bio; +}  #ifdef __CHECKER__  /* When checking with sparse, and this is an inline function, sparse will @@ -208,6 +227,7 @@ static int tl_init(struct drbd_conf *mdev)  	mdev->oldest_tle = b;  	mdev->newest_tle = b;  	INIT_LIST_HEAD(&mdev->out_of_sequence_requests); +	INIT_LIST_HEAD(&mdev->barrier_acked_requests);  	mdev->tl_hash = NULL;  	mdev->tl_hash_s = 0; @@ -246,9 +266,7 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)  	new->n_writes = 0;  	newest_before = mdev->newest_tle; -	/* never send a barrier number == 0, because that is special-cased -	 * when using TCQ for our write ordering code */ -	new->br_number = (newest_before->br_number+1) ?: 1; +	new->br_number = newest_before->br_number+1;  	if (mdev->newest_tle != new) {  		mdev->newest_tle->next = new;  		mdev->newest_tle = new; @@ -311,7 +329,7 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,  	   These have been list_move'd to the out_of_sequence_requests list in  	   _req_mod(, barrier_acked) above.  	   */ -	list_del_init(&b->requests); +	list_splice_init(&b->requests, &mdev->barrier_acked_requests);  	nob = b->next;  	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) { @@ -411,6 +429,23 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)  		b = tmp;  		list_splice(&carry_reads, &b->requests);  	} + +	/* Actions operating on the disk state, also want to work on +	   requests that got barrier acked. */ +	switch (what) { +	case fail_frozen_disk_io: +	case restart_frozen_disk_io: +		list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { +			req = list_entry(le, struct drbd_request, tl_requests); +			_req_mod(req, what); +		} + +	case connection_lost_while_pending: +	case resend: +		break; +	default: +		dev_err(DEV, "what = %d in _tl_restart()\n", what); +	}  } @@ -458,6 +493,38 @@ void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)  }  /** + * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL + * @mdev:	DRBD device. + */ +void tl_abort_disk_io(struct drbd_conf *mdev) +{ +	struct drbd_tl_epoch *b; +	struct list_head *le, *tle; +	struct drbd_request *req; + +	spin_lock_irq(&mdev->req_lock); +	b = mdev->oldest_tle; +	while (b) { +		list_for_each_safe(le, tle, &b->requests) { +			req = list_entry(le, struct drbd_request, tl_requests); +			if (!(req->rq_state & RQ_LOCAL_PENDING)) +				continue; +			_req_mod(req, abort_disk_io); +		} +		b = b->next; +	} + +	list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { +		req = list_entry(le, struct drbd_request, tl_requests); +		if (!(req->rq_state & RQ_LOCAL_PENDING)) +			continue; +		_req_mod(req, abort_disk_io); +	} + +	spin_unlock_irq(&mdev->req_lock); +} + +/**   * cl_wide_st_chg() - true if the state change is a cluster wide one   * @mdev:	DRBD device.   * @os:		old (current) state. @@ -470,7 +537,7 @@ static int cl_wide_st_chg(struct drbd_conf *mdev,  		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||  		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||  		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) || -		  (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) || +		  (os.disk != D_FAILED && ns.disk == D_FAILED))) ||  		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||  		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);  } @@ -509,8 +576,16 @@ static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);  static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,  						    union drbd_state,  						    union drbd_state); +enum sanitize_state_warnings { +	NO_WARNING, +	ABORTED_ONLINE_VERIFY, +	ABORTED_RESYNC, +	CONNECTION_LOST_NEGOTIATING, +	IMPLICITLY_UPGRADED_DISK, +	IMPLICITLY_UPGRADED_PDSK, +};  static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, -				       union drbd_state ns, const char **warn_sync_abort); +				       union drbd_state ns, enum sanitize_state_warnings *warn);  int drbd_send_state_req(struct drbd_conf *,  			union drbd_state, union drbd_state); @@ -785,6 +860,13 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,  	if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)  		rv = SS_IN_TRANSIENT_STATE; +	/* While establishing a connection only allow cstate to change. +	   Delay/refuse role changes, detach attach etc... */ +	if (test_bit(STATE_SENT, &mdev->flags) && +	    !(os.conn == C_WF_REPORT_PARAMS || +	      (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION))) +		rv = SS_IN_TRANSIENT_STATE; +  	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)  		rv = SS_NEED_CONNECTION; @@ -803,6 +885,21 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,  	return rv;  } +static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn) +{ +	static const char *msg_table[] = { +		[NO_WARNING] = "", +		[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.", +		[ABORTED_RESYNC] = "Resync aborted.", +		[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!", +		[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk", +		[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk", +	}; + +	if (warn != NO_WARNING) +		dev_warn(DEV, "%s\n", msg_table[warn]); +} +  /**   * sanitize_state() - Resolves implicitly necessary additional changes to a state transition   * @mdev:	DRBD device. @@ -814,11 +911,14 @@ is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,   * to D_UNKNOWN. This rule and many more along those lines are in this function.   */  static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, -				       union drbd_state ns, const char **warn_sync_abort) +				       union drbd_state ns, enum sanitize_state_warnings *warn)  {  	enum drbd_fencing_p fp;  	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; +	if (warn) +		*warn = NO_WARNING; +  	fp = FP_DONT_CARE;  	if (get_ldev(mdev)) {  		fp = mdev->ldev->dc.fencing; @@ -833,18 +933,13 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state  	/* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.  	 * If you try to go into some Sync* state, that shall fail (elsewhere). */  	if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && -	    ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN) +	    ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)  		ns.conn = os.conn;  	/* we cannot fail (again) if we already detached */  	if (ns.disk == D_FAILED && os.disk == D_DISKLESS)  		ns.disk = D_DISKLESS; -	/* if we are only D_ATTACHING yet, -	 * we can (and should) go directly to D_DISKLESS. */ -	if (ns.disk == D_FAILED && os.disk == D_ATTACHING) -		ns.disk = D_DISKLESS; -  	/* After C_DISCONNECTING only C_STANDALONE may follow */  	if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)  		ns.conn = os.conn; @@ -863,10 +958,9 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state  	/* Abort resync if a disk fails/detaches */  	if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&  	    (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { -		if (warn_sync_abort) -			*warn_sync_abort = -				os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? -				"Online-verify" : "Resync"; +		if (warn) +			*warn =	os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ? +				ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;  		ns.conn = C_CONNECTED;  	} @@ -877,7 +971,8 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state  			ns.disk = mdev->new_state_tmp.disk;  			ns.pdsk = mdev->new_state_tmp.pdsk;  		} else { -			dev_alert(DEV, "Connection lost while negotiating, no data!\n"); +			if (warn) +				*warn = CONNECTION_LOST_NEGOTIATING;  			ns.disk = D_DISKLESS;  			ns.pdsk = D_UNKNOWN;  		} @@ -959,16 +1054,16 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state  		ns.disk = disk_max;  	if (ns.disk < disk_min) { -		dev_warn(DEV, "Implicitly set disk from %s to %s\n", -			 drbd_disk_str(ns.disk), drbd_disk_str(disk_min)); +		if (warn) +			*warn = IMPLICITLY_UPGRADED_DISK;  		ns.disk = disk_min;  	}  	if (ns.pdsk > pdsk_max)  		ns.pdsk = pdsk_max;  	if (ns.pdsk < pdsk_min) { -		dev_warn(DEV, "Implicitly set pdsk from %s to %s\n", -			 drbd_disk_str(ns.pdsk), drbd_disk_str(pdsk_min)); +		if (warn) +			*warn = IMPLICITLY_UPGRADED_PDSK;  		ns.pdsk = pdsk_min;  	} @@ -1045,12 +1140,12 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,  {  	union drbd_state os;  	enum drbd_state_rv rv = SS_SUCCESS; -	const char *warn_sync_abort = NULL; +	enum sanitize_state_warnings ssw;  	struct after_state_chg_work *ascw;  	os = mdev->state; -	ns = sanitize_state(mdev, os, ns, &warn_sync_abort); +	ns = sanitize_state(mdev, os, ns, &ssw);  	if (ns.i == os.i)  		return SS_NOTHING_TO_DO; @@ -1076,8 +1171,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,  		return rv;  	} -	if (warn_sync_abort) -		dev_warn(DEV, "%s aborted.\n", warn_sync_abort); +	print_sanitize_warnings(mdev, ssw);  	{  	char *pbp, pb[300]; @@ -1243,7 +1337,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,  		drbd_thread_stop_nowait(&mdev->receiver);  	/* Upon network failure, we need to restart the receiver. */ -	if (os.conn > C_TEAR_DOWN && +	if (os.conn > C_WF_CONNECTION &&  	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)  		drbd_thread_restart_nowait(&mdev->receiver); @@ -1251,6 +1345,15 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,  	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)  		drbd_resume_al(mdev); +	/* remember last connect and attach times so request_timer_fn() won't +	 * kill newly established sessions while we are still trying to thaw +	 * previously frozen IO */ +	if (os.conn != C_WF_REPORT_PARAMS && ns.conn == C_WF_REPORT_PARAMS) +		mdev->last_reconnect_jif = jiffies; +	if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && +	    ns.disk > D_NEGOTIATING) +		mdev->last_reattach_jif = jiffies; +  	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);  	if (ascw) {  		ascw->os = os; @@ -1354,12 +1457,16 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,  	/* Here we have the actions that are performed after a  	   state change. This function might sleep */ +	if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING) +		mod_timer(&mdev->request_timer, jiffies + HZ); +  	nsm.i = -1;  	if (ns.susp_nod) {  		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)  			what = resend; -		if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING) +		if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) && +		    ns.disk > D_NEGOTIATING)  			what = restart_frozen_disk_io;  		if (what != nothing) @@ -1408,7 +1515,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,  	/* Do not change the order of the if above and the two below... */  	if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */  		drbd_send_uuids(mdev); -		drbd_send_state(mdev); +		drbd_send_state(mdev, ns);  	}  	/* No point in queuing send_bitmap if we don't have a connection  	 * anymore, so check also the _current_ state, not only the new state @@ -1441,11 +1548,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,  	}  	if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { -		if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) { +		if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && +		    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {  			drbd_uuid_new_current(mdev);  			drbd_send_uuids(mdev);  		} -  		/* D_DISKLESS Peer becomes secondary */  		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)  			/* We may still be Primary ourselves. @@ -1473,14 +1580,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,  	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {  		drbd_send_sizes(mdev, 0, 0);  /* to start sync... */  		drbd_send_uuids(mdev); -		drbd_send_state(mdev); +		drbd_send_state(mdev, ns);  	}  	/* We want to pause/continue resync, tell peer. */  	if (ns.conn >= C_CONNECTED &&  	     ((os.aftr_isp != ns.aftr_isp) ||  	      (os.user_isp != ns.user_isp))) -		drbd_send_state(mdev); +		drbd_send_state(mdev, ns);  	/* In case one of the isp bits got set, suspend other devices. */  	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && @@ -1490,10 +1597,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,  	/* Make sure the peer gets informed about eventual state  	   changes (ISP bits) while we were in WFReportParams. */  	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) -		drbd_send_state(mdev); +		drbd_send_state(mdev, ns);  	if (os.conn != C_AHEAD && ns.conn == C_AHEAD) -		drbd_send_state(mdev); +		drbd_send_state(mdev, ns);  	/* We are in the progress to start a full sync... */  	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || @@ -1513,33 +1620,38 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,  	/* first half of local IO error, failure to attach,  	 * or administrative detach */  	if (os.disk != D_FAILED && ns.disk == D_FAILED) { -		enum drbd_io_error_p eh; -		int was_io_error; +		enum drbd_io_error_p eh = EP_PASS_ON; +		int was_io_error = 0;  		/* corresponding get_ldev was in __drbd_set_state, to serialize -		 * our cleanup here with the transition to D_DISKLESS, -		 * so it is safe to dreference ldev here. */ -		eh = mdev->ldev->dc.on_io_error; -		was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); - -		/* current state still has to be D_FAILED, -		 * there is only one way out: to D_DISKLESS, -		 * and that may only happen after our put_ldev below. */ -		if (mdev->state.disk != D_FAILED) -			dev_err(DEV, -				"ASSERT FAILED: disk is %s during detach\n", -				drbd_disk_str(mdev->state.disk)); - -		if (drbd_send_state(mdev)) -			dev_warn(DEV, "Notified peer that I am detaching my disk\n"); -		else -			dev_err(DEV, "Sending state for detaching disk failed\n"); - -		drbd_rs_cancel_all(mdev); - -		/* In case we want to get something to stable storage still, -		 * this may be the last chance. -		 * Following put_ldev may transition to D_DISKLESS. */ -		drbd_md_sync(mdev); +		 * our cleanup here with the transition to D_DISKLESS. +		 * But is is still not save to dreference ldev here, since +		 * we might come from an failed Attach before ldev was set. */ +		if (mdev->ldev) { +			eh = mdev->ldev->dc.on_io_error; +			was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags); + +			/* Immediately allow completion of all application IO, that waits +			   for completion from the local disk. */ +			tl_abort_disk_io(mdev); + +			/* current state still has to be D_FAILED, +			 * there is only one way out: to D_DISKLESS, +			 * and that may only happen after our put_ldev below. */ +			if (mdev->state.disk != D_FAILED) +				dev_err(DEV, +					"ASSERT FAILED: disk is %s during detach\n", +					drbd_disk_str(mdev->state.disk)); + +			if (ns.conn >= C_CONNECTED) +				drbd_send_state(mdev, ns); + +			drbd_rs_cancel_all(mdev); + +			/* In case we want to get something to stable storage still, +			 * this may be the last chance. +			 * Following put_ldev may transition to D_DISKLESS. */ +			drbd_md_sync(mdev); +		}  		put_ldev(mdev);  		if (was_io_error && eh == EP_CALL_HELPER) @@ -1561,16 +1673,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,                  mdev->rs_failed = 0;                  atomic_set(&mdev->rs_pending_cnt, 0); -		if (drbd_send_state(mdev)) -			dev_warn(DEV, "Notified peer that I'm now diskless.\n"); +		if (ns.conn >= C_CONNECTED) +			drbd_send_state(mdev, ns); +  		/* corresponding get_ldev in __drbd_set_state  		 * this may finally trigger drbd_ldev_destroy. */  		put_ldev(mdev);  	}  	/* Notify peer that I had a local IO error, and did not detached.. */ -	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT) -		drbd_send_state(mdev); +	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) +		drbd_send_state(mdev, ns);  	/* Disks got bigger while they were detached */  	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && @@ -1588,7 +1701,13 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,  	/* sync target done with resync.  Explicitly notify peer, even though  	 * it should (at least for non-empty resyncs) already know itself. */  	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) -		drbd_send_state(mdev); +		drbd_send_state(mdev, ns); + +	/* Wake up role changes, that were delayed because of connection establishing */ +	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS) { +		clear_bit(STATE_SENT, &mdev->flags); +		wake_up(&mdev->state_wait); +	}  	/* This triggers bitmap writeout of potentially still unwritten pages  	 * if the resync finished cleanly, or aborted because of peer disk @@ -1598,8 +1717,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,  	 * No harm done if some bits change during this phase.  	 */  	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) { -		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, -			"write from resync_finished", BM_LOCKED_SET_ALLOWED); +		drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL, +			"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);  		put_ldev(mdev);  	} @@ -2057,7 +2176,11 @@ int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)  	D_ASSERT(mdev->state.disk == D_UP_TO_DATE); -	uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET; +	uuid = mdev->ldev->md.uuid[UI_BITMAP]; +	if (uuid && uuid != UUID_JUST_CREATED) +		uuid = uuid + UUID_NEW_BM_OFFSET; +	else +		get_random_bytes(&uuid, sizeof(u64));  	drbd_uuid_set(mdev, UI_BITMAP, uuid);  	drbd_print_uuids(mdev, "updated sync UUID");  	drbd_md_sync(mdev); @@ -2089,6 +2212,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl  		max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */  	} +	/* Never allow old drbd (up to 8.3.7) to see more than 32KiB */ +	if (mdev->agreed_pro_version <= 94) +		max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET); +  	p.d_size = cpu_to_be64(d_size);  	p.u_size = cpu_to_be64(u_size);  	p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev)); @@ -2102,10 +2229,10 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl  }  /** - * drbd_send_state() - Sends the drbd state to the peer + * drbd_send_current_state() - Sends the drbd state to the peer   * @mdev:	DRBD device.   */ -int drbd_send_state(struct drbd_conf *mdev) +int drbd_send_current_state(struct drbd_conf *mdev)  {  	struct socket *sock;  	struct p_state p; @@ -2131,6 +2258,37 @@ int drbd_send_state(struct drbd_conf *mdev)  	return ok;  } +/** + * drbd_send_state() - After a state change, sends the new state to the peer + * @mdev:	DRBD device. + * @state:	the state to send, not necessarily the current state. + * + * Each state change queues an "after_state_ch" work, which will eventually + * send the resulting new state to the peer. If more state changes happen + * between queuing and processing of the after_state_ch work, we still + * want to send each intermediary state in the order it occurred. + */ +int drbd_send_state(struct drbd_conf *mdev, union drbd_state state) +{ +	struct socket *sock; +	struct p_state p; +	int ok = 0; + +	mutex_lock(&mdev->data.mutex); + +	p.state = cpu_to_be32(state.i); +	sock = mdev->data.socket; + +	if (likely(sock != NULL)) { +		ok = _drbd_send_cmd(mdev, sock, P_STATE, +				    (struct p_header80 *)&p, sizeof(p), 0); +	} + +	mutex_unlock(&mdev->data.mutex); + +	return ok; +} +  int drbd_send_state_req(struct drbd_conf *mdev,  	union drbd_state mask, union drbd_state val)  { @@ -2615,7 +2773,7 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)  	struct bio_vec *bvec;  	int i;  	/* hint all but last page with MSG_MORE */ -	__bio_for_each_segment(bvec, bio, i, 0) { +	bio_for_each_segment(bvec, bio, i) {  		if (!_drbd_no_send_page(mdev, bvec->bv_page,  				     bvec->bv_offset, bvec->bv_len,  				     i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) @@ -2629,7 +2787,7 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)  	struct bio_vec *bvec;  	int i;  	/* hint all but last page with MSG_MORE */ -	__bio_for_each_segment(bvec, bio, i, 0) { +	bio_for_each_segment(bvec, bio, i) {  		if (!_drbd_send_page(mdev, bvec->bv_page,  				     bvec->bv_offset, bvec->bv_len,  				     i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) @@ -2695,8 +2853,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)  	p.sector   = cpu_to_be64(req->sector);  	p.block_id = (unsigned long)req; -	p.seq_num  = cpu_to_be32(req->seq_num = -				 atomic_add_return(1, &mdev->packet_seq)); +	p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));  	dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); @@ -2987,8 +3144,8 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)  	atomic_set(&mdev->rs_sect_in, 0);  	atomic_set(&mdev->rs_sect_ev, 0);  	atomic_set(&mdev->ap_in_flight, 0); +	atomic_set(&mdev->md_io_in_use, 0); -	mutex_init(&mdev->md_io_mutex);  	mutex_init(&mdev->data.mutex);  	mutex_init(&mdev->meta.mutex);  	sema_init(&mdev->data.work.s, 0); @@ -3126,6 +3283,10 @@ static void drbd_destroy_mempools(void)  	/* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */ +	if (drbd_md_io_bio_set) +		bioset_free(drbd_md_io_bio_set); +	if (drbd_md_io_page_pool) +		mempool_destroy(drbd_md_io_page_pool);  	if (drbd_ee_mempool)  		mempool_destroy(drbd_ee_mempool);  	if (drbd_request_mempool) @@ -3139,6 +3300,8 @@ static void drbd_destroy_mempools(void)  	if (drbd_al_ext_cache)  		kmem_cache_destroy(drbd_al_ext_cache); +	drbd_md_io_bio_set   = NULL; +	drbd_md_io_page_pool = NULL;  	drbd_ee_mempool      = NULL;  	drbd_request_mempool = NULL;  	drbd_ee_cache        = NULL; @@ -3162,6 +3325,8 @@ static int drbd_create_mempools(void)  	drbd_bm_ext_cache    = NULL;  	drbd_al_ext_cache    = NULL;  	drbd_pp_pool         = NULL; +	drbd_md_io_page_pool = NULL; +	drbd_md_io_bio_set   = NULL;  	/* caches */  	drbd_request_cache = kmem_cache_create( @@ -3185,6 +3350,16 @@ static int drbd_create_mempools(void)  		goto Enomem;  	/* mempools */ +#ifdef COMPAT_HAVE_BIOSET_CREATE +	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0); +	if (drbd_md_io_bio_set == NULL) +		goto Enomem; +#endif + +	drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0); +	if (drbd_md_io_page_pool == NULL) +		goto Enomem; +  	drbd_request_mempool = mempool_create(number,  		mempool_alloc_slab, mempool_free_slab, drbd_request_cache);  	if (drbd_request_mempool == NULL) @@ -3262,6 +3437,8 @@ static void drbd_delete_device(unsigned int minor)  	if (!mdev)  		return; +	del_timer_sync(&mdev->request_timer); +  	/* paranoia asserts */  	if (mdev->open_cnt != 0)  		dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt, @@ -3666,8 +3843,10 @@ void drbd_md_sync(struct drbd_conf *mdev)  	if (!get_ldev_if_state(mdev, D_FAILED))  		return; -	mutex_lock(&mdev->md_io_mutex); -	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); +	buffer = drbd_md_get_buffer(mdev); +	if (!buffer) +		goto out; +  	memset(buffer, 0, 512);  	buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev)); @@ -3698,7 +3877,8 @@ void drbd_md_sync(struct drbd_conf *mdev)  	 * since we updated it on metadata. */  	mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev); -	mutex_unlock(&mdev->md_io_mutex); +	drbd_md_put_buffer(mdev); +out:  	put_ldev(mdev);  } @@ -3718,8 +3898,9 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)  	if (!get_ldev_if_state(mdev, D_ATTACHING))  		return ERR_IO_MD_DISK; -	mutex_lock(&mdev->md_io_mutex); -	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page); +	buffer = drbd_md_get_buffer(mdev); +	if (!buffer) +		goto out;  	if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {  		/* NOTE: can't do normal error processing here as this is @@ -3780,7 +3961,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)  		mdev->sync_conf.al_extents = 127;   err: -	mutex_unlock(&mdev->md_io_mutex); +	drbd_md_put_buffer(mdev); + out:  	put_ldev(mdev);  	return rv; @@ -4183,12 +4365,11 @@ const char *drbd_buildtag(void)  	static char buildtag[38] = "\0uilt-in";  	if (buildtag[0] == 0) { -#ifdef CONFIG_MODULES -		if (THIS_MODULE != NULL) -			sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); -		else +#ifdef MODULE +		sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion); +#else +		buildtag[0] = 'b';  #endif -			buildtag[0] = 'b';  	}  	return buildtag; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 946166e13953..6d4de6a72e80 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -289,7 +289,7 @@ static int _try_outdate_peer_async(void *data)  	*/  	spin_lock_irq(&mdev->req_lock);  	ns = mdev->state; -	if (ns.conn < C_WF_REPORT_PARAMS) { +	if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) {  		ns.pdsk = nps;  		_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);  	} @@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)  		/* if this was forced, we should consider sync */  		if (forced)  			drbd_send_uuids(mdev); -		drbd_send_state(mdev); +		drbd_send_current_state(mdev);  	}  	drbd_md_sync(mdev); @@ -845,9 +845,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)  	   Because new from 8.3.8 onwards the peer can use multiple  	   BIOs for a single peer_request */  	if (mdev->state.conn >= C_CONNECTED) { -		if (mdev->agreed_pro_version < 94) -			peer = mdev->peer_max_bio_size; -		else if (mdev->agreed_pro_version == 94) +		if (mdev->agreed_pro_version < 94) { +			peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); +			/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ +		} else if (mdev->agreed_pro_version == 94)  			peer = DRBD_MAX_SIZE_H80_PACKET;  		else /* drbd 8.3.8 onwards */  			peer = DRBD_MAX_BIO_SIZE; @@ -1032,7 +1033,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp  		dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",  			(unsigned long long) drbd_get_max_capacity(nbc),  			(unsigned long long) nbc->dc.disk_size); -		retcode = ERR_DISK_TO_SMALL; +		retcode = ERR_DISK_TOO_SMALL;  		goto fail;  	} @@ -1046,7 +1047,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp  	}  	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { -		retcode = ERR_MD_DISK_TO_SMALL; +		retcode = ERR_MD_DISK_TOO_SMALL;  		dev_warn(DEV, "refusing attach: md-device too small, "  		     "at least %llu sectors needed for this meta-disk type\n",  		     (unsigned long long) min_md_device_sectors); @@ -1057,7 +1058,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp  	 * (we may currently be R_PRIMARY with no local disk...) */  	if (drbd_get_max_capacity(nbc) <  	    drbd_get_capacity(mdev->this_bdev)) { -		retcode = ERR_DISK_TO_SMALL; +		retcode = ERR_DISK_TOO_SMALL;  		goto fail;  	} @@ -1138,7 +1139,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp  	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&  	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {  		dev_warn(DEV, "refusing to truncate a consistent device\n"); -		retcode = ERR_DISK_TO_SMALL; +		retcode = ERR_DISK_TOO_SMALL;  		goto force_diskless_dec;  	} @@ -1336,17 +1337,34 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,  {  	enum drbd_ret_code retcode;  	int ret; +	struct detach dt = {}; + +	if (!detach_from_tags(mdev, nlp->tag_list, &dt)) { +		reply->ret_code = ERR_MANDATORY_TAG; +		goto out; +	} + +	if (dt.detach_force) { +		drbd_force_state(mdev, NS(disk, D_FAILED)); +		reply->ret_code = SS_SUCCESS; +		goto out; +	} +  	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ +	drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */  	retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); +	drbd_md_put_buffer(mdev);  	/* D_FAILED will transition to DISKLESS. */  	ret = wait_event_interruptible(mdev->misc_wait,  			mdev->state.disk != D_FAILED);  	drbd_resume_io(mdev); +  	if ((int)retcode == (int)SS_IS_DISKLESS)  		retcode = SS_NOTHING_TO_DO;  	if (ret)  		retcode = ERR_INTR;  	reply->ret_code = retcode; +out:  	return 0;  } @@ -1711,7 +1729,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,  	if (rs.no_resync && mdev->agreed_pro_version < 93) {  		retcode = ERR_NEED_APV_93; -		goto fail; +		goto fail_ldev;  	}  	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) @@ -1738,6 +1756,10 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,   fail:  	reply->ret_code = retcode;  	return 0; + + fail_ldev: +	put_ldev(mdev); +	goto fail;  }  static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, @@ -1941,6 +1963,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl  	/* If there is still bitmap IO pending, probably because of a previous  	 * resync just being finished, wait for it before requesting a new resync. */ +	drbd_suspend_io(mdev);  	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));  	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); @@ -1959,6 +1982,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl  		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));  	} +	drbd_resume_io(mdev);  	reply->ret_code = retcode;  	return 0; @@ -1980,6 +2004,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re  	/* If there is still bitmap IO pending, probably because of a previous  	 * resync just being finished, wait for it before requesting a new resync. */ +	drbd_suspend_io(mdev);  	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));  	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); @@ -1998,6 +2023,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re  		} else  			retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));  	} +	drbd_resume_io(mdev);  	reply->ret_code = retcode;  	return 0; @@ -2170,11 +2196,13 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,  	/* If there is still bitmap IO pending, e.g. previous resync or verify  	 * just being finished, wait for it before requesting a new resync. */ +	drbd_suspend_io(mdev);  	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));  	/* w_make_ov_request expects position to be aligned */  	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;  	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); +	drbd_resume_io(mdev);  	return 0;  } diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 2959cdfb77f5..869bada2ed06 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -52,7 +52,7 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)  	if (unlikely(v >= 1000000)) {  		/* cool: > GiByte/s */  		seq_printf(seq, "%ld,", v / 1000000); -		v /= 1000000; +		v %= 1000000;  		seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);  	} else if (likely(v >= 1000))  		seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 436f519bed1c..ea4836e0ae98 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -466,6 +466,7 @@ static int drbd_accept(struct drbd_conf *mdev, const char **what,  		goto out;  	}  	(*newsock)->ops  = sock->ops; +	__module_get((*newsock)->ops->owner);  out:  	return err; @@ -750,6 +751,7 @@ static int drbd_connect(struct drbd_conf *mdev)  {  	struct socket *s, *sock, *msock;  	int try, h, ok; +	enum drbd_state_rv rv;  	D_ASSERT(!mdev->data.socket); @@ -888,25 +890,32 @@ retry:  		}  	} -	if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS) -		return 0; -  	sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;  	sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;  	atomic_set(&mdev->packet_seq, 0);  	mdev->peer_seq = 0; -	drbd_thread_start(&mdev->asender); -  	if (drbd_send_protocol(mdev) == -1)  		return -1; +	set_bit(STATE_SENT, &mdev->flags);  	drbd_send_sync_param(mdev, &mdev->sync_conf);  	drbd_send_sizes(mdev, 0, 0);  	drbd_send_uuids(mdev); -	drbd_send_state(mdev); +	drbd_send_current_state(mdev);  	clear_bit(USE_DEGR_WFC_T, &mdev->flags);  	clear_bit(RESIZE_PENDING, &mdev->flags); + +	spin_lock_irq(&mdev->req_lock); +	rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL); +	if (mdev->state.conn != C_WF_REPORT_PARAMS) +		clear_bit(STATE_SENT, &mdev->flags); +	spin_unlock_irq(&mdev->req_lock); + +	if (rv < SS_SUCCESS) +		return 0; + +	drbd_thread_start(&mdev->asender);  	mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */  	return 1; @@ -957,7 +966,7 @@ static void drbd_flush(struct drbd_conf *mdev)  		rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,  					NULL);  		if (rv) { -			dev_err(DEV, "local disk flush failed with status %d\n", rv); +			dev_info(DEV, "local disk flush failed with status %d\n", rv);  			/* would rather check on EOPNOTSUPP, but that is not reliable.  			 * don't try again for ANY return value != 0  			 * if (rv == -EOPNOTSUPP) */ @@ -1001,13 +1010,14 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,  		if (epoch_size != 0 &&  		    atomic_read(&epoch->active) == 0 && -		    test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) { +		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {  			if (!(ev & EV_CLEANUP)) {  				spin_unlock(&mdev->epoch_lock);  				drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);  				spin_lock(&mdev->epoch_lock);  			} -			dec_unacked(mdev); +			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) +				dec_unacked(mdev);  			if (mdev->current_epoch != epoch) {  				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list); @@ -1096,7 +1106,11 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,  	/* In most cases, we will only need one bio.  But in case the lower  	 * level restrictions happen to be different at this offset on this  	 * side than those of the sending peer, we may need to submit the -	 * request in more than one bio. */ +	 * request in more than one bio. +	 * +	 * Plain bio_alloc is good enough here, this is no DRBD internally +	 * generated bio, but a bio allocated on behalf of the peer. +	 */  next_bio:  	bio = bio_alloc(GFP_NOIO, nr_pages);  	if (!bio) { @@ -1583,6 +1597,24 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u  	return ok;  } +static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_entry *data_e) +{ + +	struct drbd_epoch_entry *rs_e; +	bool rv = 0; + +	spin_lock_irq(&mdev->req_lock); +	list_for_each_entry(rs_e, &mdev->sync_ee, w.list) { +		if (overlaps(data_e->sector, data_e->size, rs_e->sector, rs_e->size)) { +			rv = 1; +			break; +		} +	} +	spin_unlock_irq(&mdev->req_lock); + +	return rv; +} +  /* Called from receive_Data.   * Synchronize packets on sock with packets on msock.   * @@ -1826,6 +1858,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned  	list_add(&e->w.list, &mdev->active_ee);  	spin_unlock_irq(&mdev->req_lock); +	if (mdev->state.conn == C_SYNC_TARGET) +		wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, e)); +  	switch (mdev->net_conf->wire_protocol) {  	case DRBD_PROT_C:  		inc_unacked(mdev); @@ -2420,7 +2455,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l  			mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];  			mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; -			dev_info(DEV, "Did not got last syncUUID packet, corrected:\n"); +			dev_info(DEV, "Lost last syncUUID packet, corrected:\n");  			drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);  			return -1; @@ -2806,10 +2841,10 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi  	if (apv >= 88) {  		if (apv == 88) { -			if (data_size > SHARED_SECRET_MAX) { -				dev_err(DEV, "verify-alg too long, " -				    "peer wants %u, accepting only %u byte\n", -						data_size, SHARED_SECRET_MAX); +			if (data_size > SHARED_SECRET_MAX || data_size == 0) { +				dev_err(DEV, "verify-alg of wrong size, " +					"peer wants %u, accepting only up to %u byte\n", +					data_size, SHARED_SECRET_MAX);  				return false;  			} @@ -3168,9 +3203,20 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned  	os = ns = mdev->state;  	spin_unlock_irq(&mdev->req_lock); -	/* peer says his disk is uptodate, while we think it is inconsistent, -	 * and this happens while we think we have a sync going on. */ -	if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE && +	/* If some other part of the code (asender thread, timeout) +	 * already decided to close the connection again, +	 * we must not "re-establish" it here. */ +	if (os.conn <= C_TEAR_DOWN) +		return false; + +	/* If this is the "end of sync" confirmation, usually the peer disk +	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits +	 * set) resync started in PausedSyncT, or if the timing of pause-/ +	 * unpause-sync events has been "just right", the peer disk may +	 * transition from D_CONSISTENT to D_UP_TO_DATE as well. +	 */ +	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) && +	    real_peer_disk == D_UP_TO_DATE &&  	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {  		/* If we are (becoming) SyncSource, but peer is still in sync  		 * preparation, ignore its uptodate-ness to avoid flapping, it @@ -3288,7 +3334,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned  			/* Nowadays only used when forcing a node into primary role and  			   setting its disk to UpToDate with that */  			drbd_send_uuids(mdev); -			drbd_send_state(mdev); +			drbd_send_current_state(mdev);  		}  	} @@ -3776,6 +3822,13 @@ static void drbd_disconnect(struct drbd_conf *mdev)  	if (mdev->state.conn == C_STANDALONE)  		return; +	/* We are about to start the cleanup after connection loss. +	 * Make sure drbd_make_request knows about that. +	 * Usually we should be in some network failure state already, +	 * but just in case we are not, we fix it up here. +	 */ +	drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); +  	/* asender does not clean up anything. it must not interfere, either */  	drbd_thread_stop(&mdev->asender);  	drbd_free_sock(mdev); @@ -3803,8 +3856,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)  	atomic_set(&mdev->rs_pending_cnt, 0);  	wake_up(&mdev->misc_wait); -	del_timer(&mdev->request_timer); -  	/* make sure syncer is stopped and w_resume_next_sg queued */  	del_timer_sync(&mdev->resync_timer);  	resync_timer_fn((unsigned long)mdev); @@ -4433,7 +4484,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)  	if (mdev->state.conn == C_AHEAD &&  	    atomic_read(&mdev->ap_in_flight) == 0 && -	    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) { +	    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {  		mdev->start_resync_timer.expires = jiffies + HZ;  		add_timer(&mdev->start_resync_timer);  	} diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 4a0f314086e5..9c5c84946b05 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -37,6 +37,7 @@ static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req  	const int rw = bio_data_dir(bio);  	int cpu;  	cpu = part_stat_lock(); +	part_round_stats(cpu, &mdev->vdisk->part0);  	part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);  	part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));  	part_inc_in_flight(&mdev->vdisk->part0, rw); @@ -214,8 +215,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)  {  	const unsigned long s = req->rq_state;  	struct drbd_conf *mdev = req->mdev; -	/* only WRITES may end up here without a master bio (on barrier ack) */ -	int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE; +	int rw = req->rq_state & RQ_WRITE ? WRITE : READ;  	/* we must not complete the master bio, while it is  	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock) @@ -230,7 +230,7 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)  		return;  	if (s & RQ_NET_PENDING)  		return; -	if (s & RQ_LOCAL_PENDING) +	if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))  		return;  	if (req->master_bio) { @@ -277,6 +277,9 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)  		req->master_bio = NULL;  	} +	if (s & RQ_LOCAL_PENDING) +		return; +  	if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {  		/* this is disconnected (local only) operation,  		 * or protocol C P_WRITE_ACK, @@ -429,7 +432,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  		break;  	case completed_ok: -		if (bio_data_dir(req->master_bio) == WRITE) +		if (req->rq_state & RQ_WRITE)  			mdev->writ_cnt += req->size>>9;  		else  			mdev->read_cnt += req->size>>9; @@ -438,7 +441,14 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  		req->rq_state &= ~RQ_LOCAL_PENDING;  		_req_may_be_done_not_susp(req, m); -		put_ldev(mdev); +		break; + +	case abort_disk_io: +		req->rq_state |= RQ_LOCAL_ABORTED; +		if (req->rq_state & RQ_WRITE) +			_req_may_be_done_not_susp(req, m); +		else +			goto goto_queue_for_net_read;  		break;  	case write_completed_with_error: @@ -447,7 +457,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  		__drbd_chk_io_error(mdev, false);  		_req_may_be_done_not_susp(req, m); -		put_ldev(mdev);  		break;  	case read_ahead_completed_with_error: @@ -455,7 +464,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  		req->rq_state |= RQ_LOCAL_COMPLETED;  		req->rq_state &= ~RQ_LOCAL_PENDING;  		_req_may_be_done_not_susp(req, m); -		put_ldev(mdev);  		break;  	case read_completed_with_error: @@ -467,7 +475,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  		D_ASSERT(!(req->rq_state & RQ_NET_MASK));  		__drbd_chk_io_error(mdev, false); -		put_ldev(mdev); + +	goto_queue_for_net_read:  		/* no point in retrying if there is no good remote data,  		 * or we have no connection. */ @@ -556,10 +565,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  		drbd_queue_work(&mdev->data.work, &req->w);  		break; -	case oos_handed_to_network: -		/* actually the same */ +	case read_retry_remote_canceled:  	case send_canceled: -		/* treat it the same */  	case send_failed:  		/* real cleanup will be done from tl_clear.  just update flags  		 * so it is no longer marked as on the worker queue */ @@ -589,17 +596,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  		}  		req->rq_state &= ~RQ_NET_QUEUED;  		req->rq_state |= RQ_NET_SENT; -		/* because _drbd_send_zc_bio could sleep, and may want to -		 * dereference the bio even after the "write_acked_by_peer" and -		 * "completed_ok" events came in, once we return from -		 * _drbd_send_zc_bio (drbd_send_dblock), we have to check -		 * whether it is done already, and end it.  */  		_req_may_be_done_not_susp(req, m);  		break; -	case read_retry_remote_canceled: +	case oos_handed_to_network: +		/* Was not set PENDING, no longer QUEUED, so is now DONE +		 * as far as this connection is concerned. */  		req->rq_state &= ~RQ_NET_QUEUED; -		/* fall through, in case we raced with drbd_disconnect */ +		req->rq_state |= RQ_NET_DONE; +		_req_may_be_done_not_susp(req, m); +		break; +  	case connection_lost_while_pending:  		/* transfer log cleanup after connection loss */  		/* assert something? */ @@ -616,8 +623,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  			_req_may_be_done(req, m); /* Allowed while state.susp */  		break; -	case write_acked_by_peer_and_sis: -		req->rq_state |= RQ_NET_SIS;  	case conflict_discarded_by_peer:  		/* for discarded conflicting writes of multiple primaries,  		 * there is no need to keep anything in the tl, potential @@ -628,18 +633,15 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,  			      (unsigned long long)req->sector, req->size);  		req->rq_state |= RQ_NET_DONE;  		/* fall through */ +	case write_acked_by_peer_and_sis:  	case write_acked_by_peer: +		if (what == write_acked_by_peer_and_sis) +			req->rq_state |= RQ_NET_SIS;  		/* protocol C; successfully written on peer. -		 * Nothing to do here. +		 * Nothing more to do here.  		 * We want to keep the tl in place for all protocols, to cater -		 * for volatile write-back caches on lower level devices. -		 * -		 * A barrier request is expected to have forced all prior -		 * requests onto stable storage, so completion of a barrier -		 * request could set NET_DONE right here, and not wait for the -		 * P_BARRIER_ACK, but that is an unnecessary optimization. */ +		 * for volatile write-back caches on lower level devices. */ -		/* this makes it effectively the same as for: */  	case recv_acked_by_peer:  		/* protocol B; pretends to be successfully written on peer.  		 * see also notes above in handed_over_to_network about @@ -773,6 +775,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns  	int local, remote, send_oos = 0;  	int err = -EIO;  	int ret = 0; +	union drbd_state s;  	/* allocate outside of all locks; */  	req = drbd_req_new(mdev, bio); @@ -834,8 +837,9 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns  		drbd_al_begin_io(mdev, sector);  	} -	remote = remote && drbd_should_do_remote(mdev->state); -	send_oos = rw == WRITE && drbd_should_send_oos(mdev->state); +	s = mdev->state; +	remote = remote && drbd_should_do_remote(s); +	send_oos = rw == WRITE && drbd_should_send_oos(s);  	D_ASSERT(!(remote && send_oos));  	if (!(local || remote) && !is_susp(mdev->state)) { @@ -867,7 +871,7 @@ allocate_barrier:  	if (is_susp(mdev->state)) {  		/* If we got suspended, use the retry mechanism of -		   generic_make_request() to restart processing of this +		   drbd_make_request() to restart processing of this  		   bio. In the next call to drbd_make_request  		   we sleep in inc_ap_bio() */  		ret = 1; @@ -1091,7 +1095,6 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)  	 */  	D_ASSERT(bio->bi_size > 0);  	D_ASSERT((bio->bi_size & 0x1ff) == 0); -	D_ASSERT(bio->bi_idx == 0);  	/* to make some things easier, force alignment of requests within the  	 * granularity of our hash tables */ @@ -1099,8 +1102,9 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)  	e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;  	if (likely(s_enr == e_enr)) { -		inc_ap_bio(mdev, 1); -		drbd_make_request_common(mdev, bio, start_time); +		do { +			inc_ap_bio(mdev, 1); +		} while (drbd_make_request_common(mdev, bio, start_time));  		return;  	} @@ -1196,36 +1200,66 @@ void request_timer_fn(unsigned long data)  	struct drbd_conf *mdev = (struct drbd_conf *) data;  	struct drbd_request *req; /* oldest request */  	struct list_head *le; -	unsigned long et = 0; /* effective timeout = ko_count * timeout */ +	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ +	unsigned long now;  	if (get_net_conf(mdev)) { -		et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count; +		if (mdev->state.conn >= C_WF_REPORT_PARAMS) +			ent = mdev->net_conf->timeout*HZ/10 +				* mdev->net_conf->ko_count;  		put_net_conf(mdev);  	} -	if (!et || mdev->state.conn < C_WF_REPORT_PARAMS) +	if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */ +		dt = mdev->ldev->dc.disk_timeout * HZ / 10; +		put_ldev(mdev); +	} +	et = min_not_zero(dt, ent); + +	if (!et)  		return; /* Recurring timer stopped */ +	now = jiffies; +  	spin_lock_irq(&mdev->req_lock);  	le = &mdev->oldest_tle->requests;  	if (list_empty(le)) {  		spin_unlock_irq(&mdev->req_lock); -		mod_timer(&mdev->request_timer, jiffies + et); +		mod_timer(&mdev->request_timer, now + et);  		return;  	}  	le = le->prev;  	req = list_entry(le, struct drbd_request, tl_requests); -	if (time_is_before_eq_jiffies(req->start_time + et)) { -		if (req->rq_state & RQ_NET_PENDING) { -			dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); -			_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL); -		} else { -			dev_warn(DEV, "Local backing block device frozen?\n"); -			mod_timer(&mdev->request_timer, jiffies + et); -		} -	} else { -		mod_timer(&mdev->request_timer, req->start_time + et); -	} +	/* The request is considered timed out, if +	 * - we have some effective timeout from the configuration, +	 *   with above state restrictions applied, +	 * - the oldest request is waiting for a response from the network +	 *   resp. the local disk, +	 * - the oldest request is in fact older than the effective timeout, +	 * - the connection was established (resp. disk was attached) +	 *   for longer than the timeout already. +	 * Note that for 32bit jiffies and very stable connections/disks, +	 * we may have a wrap around, which is catched by +	 *   !time_in_range(now, last_..._jif, last_..._jif + timeout). +	 * +	 * Side effect: once per 32bit wrap-around interval, which means every +	 * ~198 days with 250 HZ, we have a window where the timeout would need +	 * to expire twice (worst case) to become effective. Good enough. +	 */ +	if (ent && req->rq_state & RQ_NET_PENDING && +		 time_after(now, req->start_time + ent) && +		!time_in_range(now, mdev->last_reconnect_jif, mdev->last_reconnect_jif + ent)) { +		dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n"); +		_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); +	} +	if (dt && req->rq_state & RQ_LOCAL_PENDING && +		 time_after(now, req->start_time + dt) && +		!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) { +		dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n"); +		__drbd_chk_io_error(mdev, 1); +	} +	nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;  	spin_unlock_irq(&mdev->req_lock); +	mod_timer(&mdev->request_timer, nt);  } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 68a234a5fdc5..3d2111919486 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -105,6 +105,7 @@ enum drbd_req_event {  	read_completed_with_error,  	read_ahead_completed_with_error,  	write_completed_with_error, +	abort_disk_io,  	completed_ok,  	resend,  	fail_frozen_disk_io, @@ -118,18 +119,21 @@ enum drbd_req_event {   * same time, so we should hold the request lock anyways.   */  enum drbd_req_state_bits { -	/* 210 -	 * 000: no local possible -	 * 001: to be submitted +	/* 3210 +	 * 0000: no local possible +	 * 0001: to be submitted  	 *    UNUSED, we could map: 011: submitted, completion still pending -	 * 110: completed ok -	 * 010: completed with error +	 * 0110: completed ok +	 * 0010: completed with error +	 * 1001: Aborted (before completion) +	 * 1x10: Aborted and completed -> free  	 */  	__RQ_LOCAL_PENDING,  	__RQ_LOCAL_COMPLETED,  	__RQ_LOCAL_OK, +	__RQ_LOCAL_ABORTED, -	/* 76543 +	/* 87654  	 * 00000: no network possible  	 * 00001: to be send  	 * 00011: to be send, on worker queue @@ -199,8 +203,9 @@ enum drbd_req_state_bits {  #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)  #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)  #define RQ_LOCAL_OK        (1UL << __RQ_LOCAL_OK) +#define RQ_LOCAL_ABORTED   (1UL << __RQ_LOCAL_ABORTED) -#define RQ_LOCAL_MASK      ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ +#define RQ_LOCAL_MASK      ((RQ_LOCAL_ABORTED << 1)-1)  #define RQ_NET_PENDING     (1UL << __RQ_NET_PENDING)  #define RQ_NET_QUEUED      (1UL << __RQ_NET_QUEUED) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 4d3e6f6213ba..620c70ff2231 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -70,11 +70,29 @@ rwlock_t global_state_lock;  void drbd_md_io_complete(struct bio *bio, int error)  {  	struct drbd_md_io *md_io; +	struct drbd_conf *mdev;  	md_io = (struct drbd_md_io *)bio->bi_private; +	mdev = container_of(md_io, struct drbd_conf, md_io); +  	md_io->error = error; -	complete(&md_io->event); +	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able +	 * to timeout on the lower level device, and eventually detach from it. +	 * If this io completion runs after that timeout expired, this +	 * drbd_md_put_buffer() may allow us to finally try and re-attach. +	 * During normal operation, this only puts that extra reference +	 * down to 1 again. +	 * Make sure we first drop the reference, and only then signal +	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the +	 * next drbd_md_sync_page_io(), that we trigger the +	 * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there. +	 */ +	drbd_md_put_buffer(mdev); +	md_io->done = 1; +	wake_up(&mdev->misc_wait); +	bio_put(bio); +	put_ldev(mdev);  }  /* reads on behalf of the partner, @@ -226,6 +244,7 @@ void drbd_endio_pri(struct bio *bio, int error)  	spin_lock_irqsave(&mdev->req_lock, flags);  	__req_mod(req, what, &m);  	spin_unlock_irqrestore(&mdev->req_lock, flags); +	put_ldev(mdev);  	if (m.bio)  		complete_master_bio(mdev, &m); @@ -290,7 +309,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *  	sg_init_table(&sg, 1);  	crypto_hash_init(&desc); -	__bio_for_each_segment(bvec, bio, i, 0) { +	bio_for_each_segment(bvec, bio, i) {  		sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);  		crypto_hash_update(&desc, &sg, sg.length);  	} @@ -728,7 +747,7 @@ int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)  	}  	drbd_start_resync(mdev, C_SYNC_SOURCE); -	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); +	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);  	return 1;  } @@ -1519,14 +1538,14 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)  	}  	drbd_state_lock(mdev); - +	write_lock_irq(&global_state_lock);  	if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { +		write_unlock_irq(&global_state_lock);  		drbd_state_unlock(mdev);  		return;  	} -	write_lock_irq(&global_state_lock); -	ns = mdev->state; +	ns.i = mdev->state.i;  	ns.aftr_isp = !_drbd_may_sync_now(mdev); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b0b00d70c166..cce7df367b79 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -551,7 +551,7 @@ static void floppy_ready(void);  static void floppy_start(void);  static void process_fd_request(void);  static void recalibrate_floppy(void); -static void floppy_shutdown(unsigned long); +static void floppy_shutdown(struct work_struct *);  static int floppy_request_regions(int);  static void floppy_release_regions(int); @@ -588,6 +588,8 @@ static int buffer_max = -1;  static struct floppy_fdc_state fdc_state[N_FDC];  static int fdc;			/* current fdc */ +static struct workqueue_struct *floppy_wq; +  static struct floppy_struct *_floppy = floppy_type;  static unsigned char current_drive;  static long current_count_sectors; @@ -629,16 +631,15 @@ static inline void set_debugt(void) { }  static inline void debugt(const char *func, const char *msg) { }  #endif /* DEBUGT */ -typedef void (*timeout_fn)(unsigned long); -static DEFINE_TIMER(fd_timeout, floppy_shutdown, 0, 0); +static DECLARE_DELAYED_WORK(fd_timeout, floppy_shutdown);  static const char *timeout_message;  static void is_alive(const char *func, const char *message)  {  	/* this routine checks whether the floppy driver is "alive" */  	if (test_bit(0, &fdc_busy) && command_status < 2 && -	    !timer_pending(&fd_timeout)) { +	    !delayed_work_pending(&fd_timeout)) {  		DPRINT("%s: timeout handler died.  %s\n", func, message);  	}  } @@ -666,15 +667,18 @@ static int output_log_pos;  static void __reschedule_timeout(int drive, const char *message)  { +	unsigned long delay; +  	if (drive == current_reqD)  		drive = current_drive; -	del_timer(&fd_timeout); +  	if (drive < 0 || drive >= N_DRIVE) { -		fd_timeout.expires = jiffies + 20UL * HZ; +		delay = 20UL * HZ;  		drive = 0;  	} else -		fd_timeout.expires = jiffies + UDP->timeout; -	add_timer(&fd_timeout); +		delay = UDP->timeout; + +	queue_delayed_work(floppy_wq, &fd_timeout, delay);  	if (UDP->flags & FD_DEBUG)  		DPRINT("reschedule timeout %s\n", message);  	timeout_message = message; @@ -872,7 +876,7 @@ static int lock_fdc(int drive, bool interruptible)  	command_status = FD_COMMAND_NONE; -	__reschedule_timeout(drive, "lock fdc"); +	reschedule_timeout(drive, "lock fdc");  	set_fdc(drive);  	return 0;  } @@ -880,23 +884,15 @@ static int lock_fdc(int drive, bool interruptible)  /* unlocks the driver */  static void unlock_fdc(void)  { -	unsigned long flags; - -	raw_cmd = NULL;  	if (!test_bit(0, &fdc_busy))  		DPRINT("FDC access conflict!\n"); -	if (do_floppy) -		DPRINT("device interrupt still active at FDC release: %pf!\n", -		       do_floppy); +	raw_cmd = NULL;  	command_status = FD_COMMAND_NONE; -	spin_lock_irqsave(&floppy_lock, flags); -	del_timer(&fd_timeout); +	__cancel_delayed_work(&fd_timeout); +	do_floppy = NULL;  	cont = NULL;  	clear_bit(0, &fdc_busy); -	if (current_req || set_next_request()) -		do_fd_request(current_req->q); -	spin_unlock_irqrestore(&floppy_lock, flags);  	wake_up(&fdc_wait);  } @@ -968,26 +964,24 @@ static DECLARE_WORK(floppy_work, NULL);  static void schedule_bh(void (*handler)(void))  { +	WARN_ON(work_pending(&floppy_work)); +  	PREPARE_WORK(&floppy_work, (work_func_t)handler); -	schedule_work(&floppy_work); +	queue_work(floppy_wq, &floppy_work);  } -static DEFINE_TIMER(fd_timer, NULL, 0, 0); +static DECLARE_DELAYED_WORK(fd_timer, NULL);  static void cancel_activity(void)  { -	unsigned long flags; - -	spin_lock_irqsave(&floppy_lock, flags);  	do_floppy = NULL; -	PREPARE_WORK(&floppy_work, (work_func_t)empty); -	del_timer(&fd_timer); -	spin_unlock_irqrestore(&floppy_lock, flags); +	cancel_delayed_work_sync(&fd_timer); +	cancel_work_sync(&floppy_work);  }  /* this function makes sure that the disk stays in the drive during the   * transfer */ -static void fd_watchdog(void) +static void fd_watchdog(struct work_struct *arg)  {  	debug_dcl(DP->flags, "calling disk change from watchdog\n"); @@ -997,21 +991,20 @@ static void fd_watchdog(void)  		cont->done(0);  		reset_fdc();  	} else { -		del_timer(&fd_timer); -		fd_timer.function = (timeout_fn)fd_watchdog; -		fd_timer.expires = jiffies + HZ / 10; -		add_timer(&fd_timer); +		cancel_delayed_work(&fd_timer); +		PREPARE_DELAYED_WORK(&fd_timer, fd_watchdog); +		queue_delayed_work(floppy_wq, &fd_timer, HZ / 10);  	}  }  static void main_command_interrupt(void)  { -	del_timer(&fd_timer); +	cancel_delayed_work(&fd_timer);  	cont->interrupt();  }  /* waits for a delay (spinup or select) to pass */ -static int fd_wait_for_completion(unsigned long delay, timeout_fn function) +static int fd_wait_for_completion(unsigned long expires, work_func_t function)  {  	if (FDCS->reset) {  		reset_fdc();	/* do the reset during sleep to win time @@ -1020,11 +1013,10 @@ static int fd_wait_for_completion(unsigned long delay, timeout_fn function)  		return 1;  	} -	if (time_before(jiffies, delay)) { -		del_timer(&fd_timer); -		fd_timer.function = function; -		fd_timer.expires = delay; -		add_timer(&fd_timer); +	if (time_before(jiffies, expires)) { +		cancel_delayed_work(&fd_timer); +		PREPARE_DELAYED_WORK(&fd_timer, function); +		queue_delayed_work(floppy_wq, &fd_timer, expires - jiffies);  		return 1;  	}  	return 0; @@ -1342,7 +1334,7 @@ static int fdc_dtr(void)  	 */  	FDCS->dtr = raw_cmd->rate & 3;  	return fd_wait_for_completion(jiffies + 2UL * HZ / 100, -				      (timeout_fn)floppy_ready); +				      (work_func_t)floppy_ready);  }				/* fdc_dtr */  static void tell_sector(void) @@ -1447,7 +1439,7 @@ static void setup_rw_floppy(void)  	int flags;  	int dflags;  	unsigned long ready_date; -	timeout_fn function; +	work_func_t function;  	flags = raw_cmd->flags;  	if (flags & (FD_RAW_READ | FD_RAW_WRITE)) @@ -1461,9 +1453,9 @@ static void setup_rw_floppy(void)  		 */  		if (time_after(ready_date, jiffies + DP->select_delay)) {  			ready_date -= DP->select_delay; -			function = (timeout_fn)floppy_start; +			function = (work_func_t)floppy_start;  		} else -			function = (timeout_fn)setup_rw_floppy; +			function = (work_func_t)setup_rw_floppy;  		/* wait until the floppy is spinning fast enough */  		if (fd_wait_for_completion(ready_date, function)) @@ -1493,7 +1485,7 @@ static void setup_rw_floppy(void)  		inr = result();  		cont->interrupt();  	} else if (flags & FD_RAW_NEED_DISK) -		fd_watchdog(); +		fd_watchdog(NULL);  }  static int blind_seek; @@ -1802,20 +1794,22 @@ static void show_floppy(void)  		pr_info("do_floppy=%pf\n", do_floppy);  	if (work_pending(&floppy_work))  		pr_info("floppy_work.func=%pf\n", floppy_work.func); -	if (timer_pending(&fd_timer)) -		pr_info("fd_timer.function=%pf\n", fd_timer.function); -	if (timer_pending(&fd_timeout)) { -		pr_info("timer_function=%pf\n", fd_timeout.function); -		pr_info("expires=%lu\n", fd_timeout.expires - jiffies); -		pr_info("now=%lu\n", jiffies); -	} +	if (delayed_work_pending(&fd_timer)) +		pr_info("delayed work.function=%p expires=%ld\n", +		       fd_timer.work.func, +		       fd_timer.timer.expires - jiffies); +	if (delayed_work_pending(&fd_timeout)) +		pr_info("timer_function=%p expires=%ld\n", +		       fd_timeout.work.func, +		       fd_timeout.timer.expires - jiffies); +  	pr_info("cont=%p\n", cont);  	pr_info("current_req=%p\n", current_req);  	pr_info("command_status=%d\n", command_status);  	pr_info("\n");  } -static void floppy_shutdown(unsigned long data) +static void floppy_shutdown(struct work_struct *arg)  {  	unsigned long flags; @@ -1868,7 +1862,7 @@ static int start_motor(void (*function)(void))  	/* wait_for_completion also schedules reset if needed. */  	return fd_wait_for_completion(DRS->select_date + DP->select_delay, -				      (timeout_fn)function); +				      (work_func_t)function);  }  static void floppy_ready(void) @@ -2821,7 +2815,6 @@ do_request:  		spin_lock_irq(&floppy_lock);  		pending = set_next_request();  		spin_unlock_irq(&floppy_lock); -  		if (!pending) {  			do_floppy = NULL;  			unlock_fdc(); @@ -2898,13 +2891,15 @@ static void do_fd_request(struct request_queue *q)  		 current_req->cmd_flags))  		return; -	if (test_bit(0, &fdc_busy)) { +	if (test_and_set_bit(0, &fdc_busy)) {  		/* fdc busy, this new request will be treated when the  		   current one is done */  		is_alive(__func__, "old request running");  		return;  	} -	lock_fdc(MAXTIMEOUT, false); +	command_status = FD_COMMAND_NONE; +	__reschedule_timeout(MAXTIMEOUT, "fd_request"); +	set_fdc(0);  	process_fd_request();  	is_alive(__func__, "");  } @@ -3612,9 +3607,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)  	mutex_lock(&floppy_mutex);  	mutex_lock(&open_lock); -	if (UDRS->fd_ref < 0) -		UDRS->fd_ref = 0; -	else if (!UDRS->fd_ref--) { +	if (!UDRS->fd_ref--) {  		DPRINT("floppy_release with fd_ref == 0");  		UDRS->fd_ref = 0;  	} @@ -3650,13 +3643,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)  		set_bit(FD_VERIFY_BIT, &UDRS->flags);  	} -	if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (mode & FMODE_EXCL))) -		goto out2; - -	if (mode & FMODE_EXCL) -		UDRS->fd_ref = -1; -	else -		UDRS->fd_ref++; +	UDRS->fd_ref++;  	opened_bdev[drive] = bdev; @@ -3719,10 +3706,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)  	mutex_unlock(&floppy_mutex);  	return 0;  out: -	if (UDRS->fd_ref < 0) -		UDRS->fd_ref = 0; -	else -		UDRS->fd_ref--; +	UDRS->fd_ref--; +  	if (!UDRS->fd_ref)  		opened_bdev[drive] = NULL;  out2: @@ -4159,10 +4144,16 @@ static int __init floppy_init(void)  			goto out_put_disk;  		} +		floppy_wq = alloc_ordered_workqueue("floppy", 0); +		if (!floppy_wq) { +			err = -ENOMEM; +			goto out_put_disk; +		} +  		disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);  		if (!disks[dr]->queue) {  			err = -ENOMEM; -			goto out_put_disk; +			goto out_destroy_workq;  		}  		blk_queue_max_hw_sectors(disks[dr]->queue, 64); @@ -4213,7 +4204,7 @@ static int __init floppy_init(void)  	use_virtual_dma = can_use_virtual_dma & 1;  	fdc_state[0].address = FDC1;  	if (fdc_state[0].address == -1) { -		del_timer_sync(&fd_timeout); +		cancel_delayed_work(&fd_timeout);  		err = -ENODEV;  		goto out_unreg_region;  	} @@ -4224,7 +4215,7 @@ static int __init floppy_init(void)  	fdc = 0;		/* reset fdc in case of unexpected interrupt */  	err = floppy_grab_irq_and_dma();  	if (err) { -		del_timer_sync(&fd_timeout); +		cancel_delayed_work(&fd_timeout);  		err = -EBUSY;  		goto out_unreg_region;  	} @@ -4281,13 +4272,13 @@ static int __init floppy_init(void)  		user_reset_fdc(-1, FD_RESET_ALWAYS, false);  	}  	fdc = 0; -	del_timer_sync(&fd_timeout); +	cancel_delayed_work(&fd_timeout);  	current_drive = 0;  	initialized = true;  	if (have_no_fdc) {  		DPRINT("no floppy controllers found\n");  		err = have_no_fdc; -		goto out_flush_work; +		goto out_release_dma;  	}  	for (drive = 0; drive < N_DRIVE; drive++) { @@ -4302,7 +4293,7 @@ static int __init floppy_init(void)  		err = platform_device_register(&floppy_device[drive]);  		if (err) -			goto out_flush_work; +			goto out_release_dma;  		err = device_create_file(&floppy_device[drive].dev,  					 &dev_attr_cmos); @@ -4320,13 +4311,14 @@ static int __init floppy_init(void)  out_unreg_platform_dev:  	platform_device_unregister(&floppy_device[drive]); -out_flush_work: -	flush_work_sync(&floppy_work); +out_release_dma:  	if (atomic_read(&usage_count))  		floppy_release_irq_and_dma();  out_unreg_region:  	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);  	platform_driver_unregister(&floppy_driver); +out_destroy_workq: +	destroy_workqueue(floppy_wq);  out_unreg_blkdev:  	unregister_blkdev(FLOPPY_MAJOR, "fd");  out_put_disk: @@ -4397,7 +4389,7 @@ static int floppy_grab_irq_and_dma(void)  	 * We might have scheduled a free_irq(), wait it to  	 * drain first:  	 */ -	flush_work_sync(&floppy_work); +	flush_workqueue(floppy_wq);  	if (fd_request_irq()) {  		DPRINT("Unable to grab IRQ%d for the floppy driver\n", @@ -4488,9 +4480,9 @@ static void floppy_release_irq_and_dma(void)  			pr_info("motor off timer %d still active\n", drive);  #endif -	if (timer_pending(&fd_timeout)) +	if (delayed_work_pending(&fd_timeout))  		pr_info("floppy timer still active:%s\n", timeout_message); -	if (timer_pending(&fd_timer)) +	if (delayed_work_pending(&fd_timer))  		pr_info("auxiliary floppy timer still active\n");  	if (work_pending(&floppy_work))  		pr_info("work still pending\n"); @@ -4560,8 +4552,9 @@ static void __exit floppy_module_exit(void)  		put_disk(disks[drive]);  	} -	del_timer_sync(&fd_timeout); -	del_timer_sync(&fd_timer); +	cancel_delayed_work_sync(&fd_timeout); +	cancel_delayed_work_sync(&fd_timer); +	destroy_workqueue(floppy_wq);  	if (atomic_read(&usage_count))  		floppy_release_irq_and_dma(); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 4e86393a09cf..60eed4bdd2e4 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -526,6 +526,14 @@ static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)  	return 0;  } +static char *encode_disk_name(char *ptr, unsigned int n) +{ +	if (n >= 26) +		ptr = encode_disk_name(ptr, n / 26 - 1); +	*ptr = 'a' + n % 26; +	return ptr + 1; +} +  static int xlvbd_alloc_gendisk(blkif_sector_t capacity,  			       struct blkfront_info *info,  			       u16 vdisk_info, u16 sector_size) @@ -536,6 +544,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,  	unsigned int offset;  	int minor;  	int nr_parts; +	char *ptr;  	BUG_ON(info->gd != NULL);  	BUG_ON(info->rq != NULL); @@ -560,7 +569,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,  					"emulated IDE disks,\n\t choose an xvd device name"  					"from xvde on\n", info->vdevice);  	} -	err = -ENODEV; +	if (minor >> MINORBITS) { +		pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n", +			info->vdevice, minor); +		return -ENODEV; +	}  	if ((minor % nr_parts) == 0)  		nr_minors = nr_parts; @@ -574,23 +587,14 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,  	if (gd == NULL)  		goto release; -	if (nr_minors > 1) { -		if (offset < 26) -			sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); -		else -			sprintf(gd->disk_name, "%s%c%c", DEV_NAME, -				'a' + ((offset / 26)-1), 'a' + (offset % 26)); -	} else { -		if (offset < 26) -			sprintf(gd->disk_name, "%s%c%d", DEV_NAME, -				'a' + offset, -				minor & (nr_parts - 1)); -		else -			sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, -				'a' + ((offset / 26) - 1), -				'a' + (offset % 26), -				minor & (nr_parts - 1)); -	} +	strcpy(gd->disk_name, DEV_NAME); +	ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); +	BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN); +	if (nr_minors > 1) +		*ptr = 0; +	else +		snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr, +			 "%d", minor & (nr_parts - 1));  	gd->major = XENVBD_MAJOR;  	gd->first_minor = minor; @@ -1496,7 +1500,9 @@ module_init(xlblk_init);  static void __exit xlblk_exit(void)  { -	return xenbus_unregister_driver(&blkfront_driver); +	xenbus_unregister_driver(&blkfront_driver); +	unregister_blkdev(XENVBD_MAJOR, DEV_NAME); +	kfree(minors);  }  module_exit(xlblk_exit); diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9e5f5607eba3..47e3d4850584 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -53,7 +53,7 @@  extern const char *drbd_buildtag(void); -#define REL_VERSION "8.3.11" +#define REL_VERSION "8.3.13"  #define API_VERSION 88  #define PRO_VERSION_MIN 86  #define PRO_VERSION_MAX 96 @@ -112,8 +112,8 @@ enum drbd_ret_code {  	ERR_OPEN_MD_DISK	= 105,  	ERR_DISK_NOT_BDEV	= 107,  	ERR_MD_NOT_BDEV		= 108, -	ERR_DISK_TO_SMALL	= 111, -	ERR_MD_DISK_TO_SMALL	= 112, +	ERR_DISK_TOO_SMALL	= 111, +	ERR_MD_DISK_TOO_SMALL	= 112,  	ERR_BDCLAIM_DISK	= 114,  	ERR_BDCLAIM_MD_DISK	= 115,  	ERR_MD_IDX_INVALID	= 116, diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 447c36752385..fb670bf603f7 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -48,6 +48,11 @@  #define DRBD_TIMEOUT_MAX 600  #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */ + /* If backing disk takes longer than disk_timeout, mark the disk as failed */ +#define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */ +#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */ +#define DRBD_DISK_TIMEOUT_DEF 0    /* disabled */ +    /* active connection retries when C_WF_CONNECTION */  #define DRBD_CONNECT_INT_MIN 1  #define DRBD_CONNECT_INT_MAX 120 @@ -60,7 +65,7 @@   /* timeout for the ping packets.*/  #define DRBD_PING_TIMEO_MIN  1 -#define DRBD_PING_TIMEO_MAX  100 +#define DRBD_PING_TIMEO_MAX  300  #define DRBD_PING_TIMEO_DEF  5    /* max number of write requests between write barriers */ diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ab6159e4fcf0..a8706f08ab36 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -31,9 +31,12 @@ NL_PACKET(disk_conf, 3,  	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs)  	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier)  	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain) +	NL_INTEGER(	89,	T_MAY_IGNORE,	disk_timeout)  ) -NL_PACKET(detach, 4, ) +NL_PACKET(detach, 4, +	NL_BIT(		88,	T_MANDATORY,	detach_force) +)  NL_PACKET(net_conf, 5,  	NL_STRING(	8,	T_MANDATORY,	my_addr,	128)  | 
