From 881245dcff29df992d8431392a41fb81549129f9 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Tue, 9 Mar 2010 09:26:04 +0100 Subject: Add DocBook documentation for the block tracepoints. This patch adds a simple description of the various block tracepoints available in the kernel. Signed-off-by: William Cohen Acked-by: Randy Dunlap Signed-off-by: Jens Axboe --- include/trace/events/block.h | 164 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 5fb72733331e..d870a918559c 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -40,6 +40,16 @@ DECLARE_EVENT_CLASS(block_rq_with_error, __entry->nr_sector, __entry->errors) ); +/** + * block_rq_abort - abort block operation request + * @q: queue containing the block operation request + * @rq: block IO operation request + * + * Called immediately after pending block IO operation request @rq in + * queue @q is aborted. The fields in the operation request @rq + * can be examined to determine which device and sectors the pending + * operation would access. + */ DEFINE_EVENT(block_rq_with_error, block_rq_abort, TP_PROTO(struct request_queue *q, struct request *rq), @@ -47,6 +57,15 @@ DEFINE_EVENT(block_rq_with_error, block_rq_abort, TP_ARGS(q, rq) ); +/** + * block_rq_requeue - place block IO request back on a queue + * @q: queue holding operation + * @rq: block IO operation request + * + * The block operation request @rq is being placed back into queue + * @q. For some reason the request was not completed and needs to be + * put back in the queue. + */ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, TP_PROTO(struct request_queue *q, struct request *rq), @@ -54,6 +73,17 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue, TP_ARGS(q, rq) ); +/** + * block_rq_complete - block IO operation completed by device driver + * @q: queue containing the block operation request + * @rq: block operations request + * + * The block_rq_complete tracepoint event indicates that some portion + * of operation request has been completed by the device driver. If + * the @rq->bio is %NULL, then there is absolutely no additional work to + * do for the request. If @rq->bio is non-NULL then there is + * additional work required to complete the request. + */ DEFINE_EVENT(block_rq_with_error, block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq), @@ -95,6 +125,16 @@ DECLARE_EVENT_CLASS(block_rq, __entry->nr_sector, __entry->comm) ); +/** + * block_rq_insert - insert block operation request into queue + * @q: target queue + * @rq: block IO operation request + * + * Called immediately before block operation request @rq is inserted + * into queue @q. The fields in the operation request @rq struct can + * be examined to determine which device and sectors the pending + * operation would access. + */ DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), @@ -102,6 +142,14 @@ DEFINE_EVENT(block_rq, block_rq_insert, TP_ARGS(q, rq) ); +/** + * block_rq_issue - issue pending block IO request operation to device driver + * @q: queue holding operation + * @rq: block IO operation operation request + * + * Called when block operation request @rq from queue @q is sent to a + * device driver for processing. + */ DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), @@ -109,6 +157,17 @@ DEFINE_EVENT(block_rq, block_rq_issue, TP_ARGS(q, rq) ); +/** + * block_bio_bounce - used bounce buffer when processing block operation + * @q: queue holding the block operation + * @bio: block operation + * + * A bounce buffer was used to handle the block operation @bio in @q. + * This occurs when hardware limitations prevent a direct transfer of + * data between the @bio data memory area and the IO device. Use of a + * bounce buffer requires extra copying of data and decreases + * performance. + */ TRACE_EVENT(block_bio_bounce, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -138,6 +197,14 @@ TRACE_EVENT(block_bio_bounce, __entry->nr_sector, __entry->comm) ); +/** + * block_bio_complete - completed all work on the block operation + * @q: queue holding the block operation + * @bio: block operation completed + * + * This tracepoint indicates there is no further work to do on this + * block IO operation @bio. + */ TRACE_EVENT(block_bio_complete, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -193,6 +260,14 @@ DECLARE_EVENT_CLASS(block_bio, __entry->nr_sector, __entry->comm) ); +/** + * block_bio_backmerge - merging block operation to the end of an existing operation + * @q: queue holding operation + * @bio: new block operation to merge + * + * Merging block request @bio to the end of an existing block request + * in queue @q. + */ DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -200,6 +275,14 @@ DEFINE_EVENT(block_bio, block_bio_backmerge, TP_ARGS(q, bio) ); +/** + * block_bio_frontmerge - merging block operation to the beginning of an existing operation + * @q: queue holding operation + * @bio: new block operation to merge + * + * Merging block IO operation @bio to the beginning of an existing block + * operation in queue @q. + */ DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -207,6 +290,13 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_ARGS(q, bio) ); +/** + * block_bio_queue - putting new block IO operation in queue + * @q: queue holding operation + * @bio: new block operation + * + * About to place the block IO operation @bio into queue @q. + */ DEFINE_EVENT(block_bio, block_bio_queue, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -243,6 +333,15 @@ DECLARE_EVENT_CLASS(block_get_rq, __entry->nr_sector, __entry->comm) ); +/** + * block_getrq - get a free request entry in queue for block IO operations + * @q: queue for operations + * @bio: pending block IO operation + * @rw: low bit indicates a read (%0) or a write (%1) + * + * A request struct for queue @q has been allocated to handle the + * block IO operation @bio. + */ DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), @@ -250,6 +349,17 @@ DEFINE_EVENT(block_get_rq, block_getrq, TP_ARGS(q, bio, rw) ); +/** + * block_sleeprq - waiting to get a free request entry in queue for block IO operation + * @q: queue for operation + * @bio: pending block IO operation + * @rw: low bit indicates a read (%0) or a write (%1) + * + * In the case where a request struct cannot be provided for queue @q + * the process needs to wait for an request struct to become + * available. This tracepoint event is generated each time the + * process goes to sleep waiting for request struct become available. + */ DEFINE_EVENT(block_get_rq, block_sleeprq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), @@ -257,6 +367,14 @@ DEFINE_EVENT(block_get_rq, block_sleeprq, TP_ARGS(q, bio, rw) ); +/** + * block_plug - keep operations requests in request queue + * @q: request queue to plug + * + * Plug the request queue @q. Do not allow block operation requests + * to be sent to the device driver. Instead, accumulate requests in + * the queue to improve throughput performance of the block device. + */ TRACE_EVENT(block_plug, TP_PROTO(struct request_queue *q), @@ -293,6 +411,13 @@ DECLARE_EVENT_CLASS(block_unplug, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); +/** + * block_unplug_timer - timed release of operations requests in queue to device driver + * @q: request queue to unplug + * + * Unplug the request queue @q because a timer expired and allow block + * operation requests to be sent to the device driver. + */ DEFINE_EVENT(block_unplug, block_unplug_timer, TP_PROTO(struct request_queue *q), @@ -300,6 +425,13 @@ DEFINE_EVENT(block_unplug, block_unplug_timer, TP_ARGS(q) ); +/** + * block_unplug_io - release of operations requests in request queue + * @q: request queue to unplug + * + * Unplug request queue @q because device driver is scheduled to work + * on elements in the request queue. + */ DEFINE_EVENT(block_unplug, block_unplug_io, TP_PROTO(struct request_queue *q), @@ -307,6 +439,17 @@ DEFINE_EVENT(block_unplug, block_unplug_io, TP_ARGS(q) ); +/** + * block_split - split a single bio struct into two bio structs + * @q: queue containing the bio + * @bio: block operation being split + * @new_sector: The starting sector for the new bio + * + * The bio request @bio in request queue @q needs to be split into two + * bio requests. The newly created @bio request starts at + * @new_sector. This split may be required due to hardware limitation + * such as operation crossing device boundaries in a RAID system. + */ TRACE_EVENT(block_split, TP_PROTO(struct request_queue *q, struct bio *bio, @@ -337,6 +480,16 @@ TRACE_EVENT(block_split, __entry->comm) ); +/** + * block_remap - map request for a partition to the raw device + * @q: queue holding the operation + * @bio: revised operation + * @dev: device for the operation + * @from: original sector for the operation + * + * An operation for a partition on a block device has been mapped to the + * raw block device. + */ TRACE_EVENT(block_remap, TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, @@ -370,6 +523,17 @@ TRACE_EVENT(block_remap, (unsigned long long)__entry->old_sector) ); +/** + * block_rq_remap - map request for a block operation request + * @q: queue holding the operation + * @rq: block IO operation request + * @dev: device for the operation + * @from: original sector for the operation + * + * The block operation request @rq in @q has been remapped. The block + * operation request @rq holds the current information and @from hold + * the original sector. + */ TRACE_EVENT(block_rq_remap, TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, -- cgit v1.2.3-59-g8ed1b From cf14c2e987ba0a09a7b09be2ecd55af0bc9c17b4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 2 Feb 2010 21:03:50 +0100 Subject: drbd: --dry-run option for drbdsetup net ( drbdadm -- --dry-run connect ) Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 8 +++++++- drivers/block/drbd/drbd_main.c | 16 ++++++++++++++-- drivers/block/drbd/drbd_receiver.c | 22 ++++++++++++++++++++-- include/linux/drbd.h | 2 +- include/linux/drbd_nl.h | 1 + 5 files changed, 43 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 2bf3a6ef3684..1aae724e37fb 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -443,13 +443,18 @@ struct p_rs_param_89 { char csums_alg[SHARED_SECRET_MAX]; } __packed; +enum drbd_conn_flags { + CF_WANT_LOSE = 1, + CF_DRY_RUN = 2, +}; + struct p_protocol { struct p_header head; u32 protocol; u32 after_sb_0p; u32 after_sb_1p; u32 after_sb_2p; - u32 want_lose; + u32 conn_flags; u32 two_primaries; /* Since protocol version 87 and higher. */ @@ -791,6 +796,7 @@ enum { * while this is set. */ RESIZE_PENDING, /* Size change detected locally, waiting for the response from * the peer, if it changed there as well. */ + CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ }; struct drbd_bitmap; /* opaque for drbd_conf */ diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index ab871e00ffc5..b2d347d18c7d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1668,7 +1668,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) int drbd_send_protocol(struct drbd_conf *mdev) { struct p_protocol *p; - int size, rv; + int size, cf, rv; size = sizeof(struct p_protocol); @@ -1685,9 +1685,21 @@ int drbd_send_protocol(struct drbd_conf *mdev) p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p); p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p); p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p); - p->want_lose = cpu_to_be32(mdev->net_conf->want_lose); p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries); + cf = 0; + if (mdev->net_conf->want_lose) + cf |= CF_WANT_LOSE; + if (mdev->net_conf->dry_run) { + if (mdev->agreed_pro_version >= 92) + cf |= CF_DRY_RUN; + else { + dev_err(DEV, "--dry-run is not supported by peer"); + return 0; + } + } + p->conn_flags = cpu_to_be32(cf); + if (mdev->agreed_pro_version >= 87) strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index d065c646b35a..8bcde4a9632b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2538,6 +2538,16 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol } } + if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) { + if (hg == 0) + dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n"); + else + dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.", + drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET), + abs(hg) >= 2 ? "full" : "bit-map based"); + return C_MASK; + } + if (abs(hg) >= 2) { dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) @@ -2585,7 +2595,7 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) struct p_protocol *p = (struct p_protocol *)h; int header_size, data_size; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; - int p_want_lose, p_two_primaries; + int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; header_size = sizeof(*p) - sizeof(*h); @@ -2598,8 +2608,14 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) p_after_sb_0p = be32_to_cpu(p->after_sb_0p); p_after_sb_1p = be32_to_cpu(p->after_sb_1p); p_after_sb_2p = be32_to_cpu(p->after_sb_2p); - p_want_lose = be32_to_cpu(p->want_lose); p_two_primaries = be32_to_cpu(p->two_primaries); + cf = be32_to_cpu(p->conn_flags); + p_want_lose = cf & CF_WANT_LOSE; + + clear_bit(CONN_DRY_RUN, &mdev->flags); + + if (cf & CF_DRY_RUN) + set_bit(CONN_DRY_RUN, &mdev->flags); if (p_proto != mdev->net_conf->wire_protocol) { dev_err(DEV, "incompatible communication protocols\n"); @@ -3125,6 +3141,8 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) dev_err(DEV, "Disk attach process on the peer node was aborted.\n"); peer_state.disk = D_DISKLESS; } else { + if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) + return FALSE; D_ASSERT(oconn == C_WF_REPORT_PARAMS); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 78962272338a..4341b1a97a34 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.7" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 91 +#define PRO_VERSION_MAX 92 enum drbd_io_error_p { diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index a4d82f895994..b41050e8cc2f 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -63,6 +63,7 @@ NL_PACKET(net_conf, 5, NL_BIT( 41, T_MAY_IGNORE, always_asbp) NL_BIT( 61, T_MAY_IGNORE, no_cork) NL_BIT( 62, T_MANDATORY, auto_sndbuf_size) + NL_BIT( 70, T_MANDATORY, dry_run) ) NL_PACKET(disconnect, 6, ) -- cgit v1.2.3-59-g8ed1b From 1f55243024087b56aef0b1e6d9c0ea89c76f0a6b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 4 Mar 2010 15:51:01 +0100 Subject: drbd: Renamed overwrite_peer to primary_force Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_nl.c | 2 +- include/linux/drbd_nl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 6492e321ec00..6429d2b19e06 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -407,7 +407,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, } reply->ret_code = - drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer); + drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force); return 0; } diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index b41050e8cc2f..f7431a4ca608 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -12,7 +12,7 @@ #endif NL_PACKET(primary, 1, - NL_BIT( 1, T_MAY_IGNORE, overwrite_peer) + NL_BIT( 1, T_MAY_IGNORE, primary_force) ) NL_PACKET(secondary, 2, ) -- cgit v1.2.3-59-g8ed1b From f11c9c5c259cb2c3d698548dc3936f773ab1f5b9 Mon Sep 17 00:00:00 2001 From: Edward Shishkin Date: Thu, 11 Mar 2010 14:09:47 -0800 Subject: vfs: improve writeback_inodes_wb() Do not pin/unpin superblock for every inode in writeback_inodes_wb(), pin it for the whole group of inodes which belong to the same superblock and call writeback_sb_inodes() handler for them. Signed-off-by: Edward Shishkin Cc: Jens Axboe Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 133 +++++++++++++++++++++++++--------------------- include/linux/writeback.h | 3 ++ 2 files changed, 76 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 76fc4d594acb..6841effa47ca 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -553,108 +553,85 @@ select_queue: return ret; } -static void unpin_sb_for_writeback(struct super_block **psb) +static void unpin_sb_for_writeback(struct super_block *sb) { - struct super_block *sb = *psb; - - if (sb) { - up_read(&sb->s_umount); - put_super(sb); - *psb = NULL; - } + up_read(&sb->s_umount); + put_super(sb); } +enum sb_pin_state { + SB_PINNED, + SB_NOT_PINNED, + SB_PIN_FAILED +}; + /* * For WB_SYNC_NONE writeback, the caller does not have the sb pinned * before calling writeback. So make sure that we do pin it, so it doesn't * go away while we are writing inodes from it. - * - * Returns 0 if the super was successfully pinned (or pinning wasn't needed), - * 1 if we failed. */ -static int pin_sb_for_writeback(struct writeback_control *wbc, - struct inode *inode, struct super_block **psb) +static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, + struct super_block *sb) { - struct super_block *sb = inode->i_sb; - - /* - * If this sb is already pinned, nothing more to do. If not and - * *psb is non-NULL, unpin the old one first - */ - if (sb == *psb) - return 0; - else if (*psb) - unpin_sb_for_writeback(psb); - /* * Caller must already hold the ref for this */ if (wbc->sync_mode == WB_SYNC_ALL) { WARN_ON(!rwsem_is_locked(&sb->s_umount)); - return 0; + return SB_NOT_PINNED; } - spin_lock(&sb_lock); sb->s_count++; if (down_read_trylock(&sb->s_umount)) { if (sb->s_root) { spin_unlock(&sb_lock); - goto pinned; + return SB_PINNED; } /* * umounted, drop rwsem again and fall through to failure */ up_read(&sb->s_umount); } - sb->s_count--; spin_unlock(&sb_lock); - return 1; -pinned: - *psb = sb; - return 0; + return SB_PIN_FAILED; } -static void writeback_inodes_wb(struct bdi_writeback *wb, - struct writeback_control *wbc) +/* + * Write a portion of b_io inodes which belong to @sb. + * If @wbc->sb != NULL, then find and write all such + * inodes. Otherwise write only ones which go sequentially + * in reverse order. + * Return 1, if the caller writeback routine should be + * interrupted. Otherwise return 0. + */ +static int writeback_sb_inodes(struct super_block *sb, + struct bdi_writeback *wb, + struct writeback_control *wbc) { - struct super_block *sb = wbc->sb, *pin_sb = NULL; - const unsigned long start = jiffies; /* livelock avoidance */ - - spin_lock(&inode_lock); - - if (!wbc->for_kupdate || list_empty(&wb->b_io)) - queue_io(wb, wbc->older_than_this); - while (!list_empty(&wb->b_io)) { - struct inode *inode = list_entry(wb->b_io.prev, - struct inode, i_list); long pages_skipped; - - /* - * super block given and doesn't match, skip this inode - */ - if (sb && sb != inode->i_sb) { + struct inode *inode = list_entry(wb->b_io.prev, + struct inode, i_list); + if (wbc->sb && sb != inode->i_sb) { + /* super block given and doesn't + match, skip this inode */ redirty_tail(inode); continue; } - + if (sb != inode->i_sb) + /* finish with this superblock */ + return 0; if (inode->i_state & (I_NEW | I_WILL_FREE)) { requeue_io(inode); continue; } - /* * Was this inode dirtied after sync_sb_inodes was called? * This keeps sync from extra jobs and livelock. */ - if (inode_dirtied_after(inode, start)) - break; - - if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { - requeue_io(inode); - continue; - } + if (inode_dirtied_after(inode, wbc->wb_start)) + return 1; BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); __iget(inode); @@ -673,14 +650,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, spin_lock(&inode_lock); if (wbc->nr_to_write <= 0) { wbc->more_io = 1; - break; + return 1; } if (!list_empty(&wb->b_more_io)) wbc->more_io = 1; } + /* b_io is empty */ + return 1; +} + +static void writeback_inodes_wb(struct bdi_writeback *wb, + struct writeback_control *wbc) +{ + int ret = 0; - unpin_sb_for_writeback(&pin_sb); + wbc->wb_start = jiffies; /* livelock avoidance */ + spin_lock(&inode_lock); + if (!wbc->for_kupdate || list_empty(&wb->b_io)) + queue_io(wb, wbc->older_than_this); + + while (!list_empty(&wb->b_io)) { + struct inode *inode = list_entry(wb->b_io.prev, + struct inode, i_list); + struct super_block *sb = inode->i_sb; + enum sb_pin_state state; + + if (wbc->sb && sb != wbc->sb) { + /* super block given and doesn't + match, skip this inode */ + redirty_tail(inode); + continue; + } + state = pin_sb_for_writeback(wbc, sb); + + if (state == SB_PIN_FAILED) { + requeue_io(inode); + continue; + } + ret = writeback_sb_inodes(sb, wb, wbc); + if (state == SB_PINNED) + unpin_sb_for_writeback(sb); + if (ret) + break; + } spin_unlock(&inode_lock); /* Leave any unwritten inodes on b_io */ } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 76e8903cd204..36520ded3e06 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -34,6 +34,9 @@ struct writeback_control { enum writeback_sync_modes sync_mode; unsigned long *older_than_this; /* If !NULL, only write back inodes older than this */ + unsigned long wb_start; /* Time writeback_inodes_wb was + called. This is needed to avoid + extra jobs and livelock */ long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ -- cgit v1.2.3-59-g8ed1b From 2cda2728aa1c8c006418a24f867b25e5eb7a32e2 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 15 Mar 2010 12:46:51 +0100 Subject: block: Fix overrun in lcm() and move it to lib lcm() was defined to take integer-sized arguments. The supplied arguments are multiplied, however, causing us to overflow given sufficiently large input. That in turn led to incorrect optimal I/O size reporting in some cases (RAID over RAID). Switch lcm() over to unsigned long similar to gcd() and move the function from blk-settings.c to lib. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 11 +---------- include/linux/lcm.h | 8 ++++++++ lib/Makefile | 2 +- lib/lcm.c | 15 +++++++++++++++ 4 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 include/linux/lcm.h create mode 100644 lib/lcm.c (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 31e7a9375c13..4c4700dca56a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -8,6 +8,7 @@ #include #include /* for max_pfn/max_low_pfn */ #include +#include #include #include "blk.h" @@ -461,16 +462,6 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) } EXPORT_SYMBOL(blk_queue_stack_limits); -static unsigned int lcm(unsigned int a, unsigned int b) -{ - if (a && b) - return (a * b) / gcd(a, b); - else if (b) - return b; - - return a; -} - /** * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top device) diff --git a/include/linux/lcm.h b/include/linux/lcm.h new file mode 100644 index 000000000000..7bf01d779b45 --- /dev/null +++ b/include/linux/lcm.h @@ -0,0 +1,8 @@ +#ifndef _LCM_H +#define _LCM_H + +#include + +unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__; + +#endif /* _LCM_H */ diff --git a/lib/Makefile b/lib/Makefile index 2e152aed7198..0d4015205c64 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -21,7 +21,7 @@ lib-y += kobject.o kref.o klist.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - string_helpers.o gcd.o list_sort.o + string_helpers.o gcd.o lcm.o list_sort.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/lcm.c b/lib/lcm.c new file mode 100644 index 000000000000..157cd88a6ffc --- /dev/null +++ b/lib/lcm.c @@ -0,0 +1,15 @@ +#include +#include +#include + +/* Lowest common multiple */ +unsigned long lcm(unsigned long a, unsigned long b) +{ + if (a && b) + return (a * b) / gcd(a, b); + else if (b) + return b; + + return a; +} +EXPORT_SYMBOL_GPL(lcm); -- cgit v1.2.3-59-g8ed1b From ee714f2dd33e726346e34f5cda12543162f4753e Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 10 Mar 2010 00:48:32 -0500 Subject: block: Finalize conversion of block limits functions Remove compatibility wrappers and update remaining drivers. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/block/DAC960.c | 1 - drivers/block/virtio_blk.c | 5 ++--- include/linux/blkdev.h | 24 ------------------------ 3 files changed, 2 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 459f1bc25a7b..c5f22bb0a48e 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -2533,7 +2533,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) Controller->RequestQueue[n] = RequestQueue; blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit); RequestQueue->queuedata = Controller; - blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit); blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit); blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand); disk->queue = RequestQueue; diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3c64af05fa82..653817ceeedd 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -347,14 +347,13 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) set_capacity(vblk->disk, cap); /* We can handle whatever the host told us to handle. */ - blk_queue_max_phys_segments(q, vblk->sg_elems-2); - blk_queue_max_hw_segments(q, vblk->sg_elems-2); + blk_queue_max_segments(q, vblk->sg_elems-2); /* No need to bounce any requests */ blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); /* No real sector limit. */ - blk_queue_max_sectors(q, -1U); + blk_queue_max_hw_sectors(q, -1U); /* Host can optionally specify maximum segment size and number of * segments. */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ebd22dbed861..41551c9341b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -921,26 +921,7 @@ extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); - -/* Temporary compatibility wrapper */ -static inline void blk_queue_max_sectors(struct request_queue *q, unsigned int max) -{ - blk_queue_max_hw_sectors(q, max); -} - extern void blk_queue_max_segments(struct request_queue *, unsigned short); - -static inline void blk_queue_max_phys_segments(struct request_queue *q, unsigned short max) -{ - blk_queue_max_segments(q, max); -} - -static inline void blk_queue_max_hw_segments(struct request_queue *q, unsigned short max) -{ - blk_queue_max_segments(q, max); -} - - extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); @@ -1030,11 +1011,6 @@ static inline int sb_issue_discard(struct super_block *sb, extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -#define MAX_PHYS_SEGMENTS 128 -#define MAX_HW_SEGMENTS 128 -#define SAFE_MAX_SECTORS 255 -#define MAX_SEGMENT_SIZE 65536 - enum blk_default_limits { BLK_MAX_SEGMENTS = 128, BLK_SAFE_MAX_SECTORS = 255, -- cgit v1.2.3-59-g8ed1b From 97fedbbe1046b3118f49df249840ca21041eefe4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 16 Mar 2010 08:55:32 +0100 Subject: Remove GENHD_FL_DRIVERFS This flag is not used, so best discarded. Signed-off-by: NeilBrown -- Hi Jens, I came across this recently - these are the only two occurances of "GENHD_FL_DRIVERFS" in the kernel, so it cannot be needed. NeilBrown Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 2 +- include/linux/genhd.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 1dd4d8407694..466fae8ef770 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2185,7 +2185,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); gd->driverfs_dev = &sdp->sdev_gendev; - gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; + gd->flags = GENHD_FL_EXT_DEVT; if (sdp->removable) gd->flags |= GENHD_FL_REMOVABLE; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 56b50514ab25..5f2f4c4d8fb0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -109,7 +109,7 @@ struct hd_struct { }; #define GENHD_FL_REMOVABLE 1 -#define GENHD_FL_DRIVERFS 2 +/* 2 is unused */ #define GENHD_FL_MEDIA_CHANGE_NOTIFY 4 #define GENHD_FL_CD 8 #define GENHD_FL_UP 16 -- cgit v1.2.3-59-g8ed1b From 181fdde3b4268cb7b4af76ba6337e7ec8accbb36 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Fri, 19 Mar 2010 08:58:16 +0100 Subject: block: remove 16 bytes of padding from struct request on 64bits Remove alignment padding to shrink struct request from 336 to 320 bytes so needing one fewer cacheline and therefore removing 48 bytes from struct request_queue. Signed-off-by: Richard Kennedy Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 41551c9341b6..6690e8bae7bb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -158,7 +158,6 @@ enum rq_flag_bits { struct request { struct list_head queuelist; struct call_single_data csd; - int cpu; struct request_queue *q; @@ -166,9 +165,11 @@ struct request { enum rq_cmd_type_bits cmd_type; unsigned long atomic_flags; + int cpu; + /* the following two fields are internal, NEVER access directly */ - sector_t __sector; /* sector cursor */ unsigned int __data_len; /* total data len */ + sector_t __sector; /* sector cursor */ struct bio *bio; struct bio *biotail; @@ -201,20 +202,20 @@ struct request { unsigned short ioprio; + int ref_count; + void *special; /* opaque pointer available for LLD use */ char *buffer; /* kaddr of the current segment if available */ int tag; int errors; - int ref_count; - /* * when request is used as a packet command carrier */ - unsigned short cmd_len; unsigned char __cmd[BLK_MAX_CDB]; unsigned char *cmd; + unsigned short cmd_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; -- cgit v1.2.3-59-g8ed1b From 8a6d9b149f105f8bdfa8e42dd9753e45a1887a16 Mon Sep 17 00:00:00 2001 From: Ferenc Wagner Date: Wed, 24 Mar 2010 08:20:03 +0100 Subject: i2o: Remove the dangerous kobj_to_i2o_device macro This macro worked only when applied to variables named 'kobj'. While this could have been fixed by simply renaming the macro argument, a more type-safe replacement by an inline function would be preferred. However, nobody uses this macro, so it's simpler to just remove it. Signed-off-by: Ferenc Wagner Signed-off-by: Jens Axboe --- include/linux/i2o.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 87018dc5527d..9e7a12d6385d 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -782,7 +782,6 @@ extern int i2o_exec_lct_get(struct i2o_controller *); #define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver) #define to_i2o_device(dev) container_of(dev, struct i2o_device, device) #define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device) -#define kobj_to_i2o_device(kobj) to_i2o_device(container_of(kobj, struct device, kobj)) /** * i2o_out_to_virt - Turn an I2O message to a virtual address -- cgit v1.2.3-59-g8ed1b From 6072f7491f5ef391a575e18a1165e72a3eef1601 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 31 Mar 2010 20:11:59 +0000 Subject: ide: Requeue request after DMA timeout I noticed that my KVM virtual machines were experiencing IDE issues resulting in processes stuck on waiting for buffers to complete. The root cause is of course race conditions in the ancient qemu backend that I'm using. However, the fact that the guest isn't recovering is a bug. I've tracked it down to the change made last year to dequeue requests at the start rather than at the end in the IDE layer. commit 8f6205cd572fece673da0255d74843680f67f879 Author: Tejun Heo Date: Fri May 8 11:53:59 2009 +0900 ide: dequeue in-flight request The problem is that the function ide_dma_timeout_retry does not requeue the current request, causing one request to be lost for each DMA timeout. This patch fixes this by requeueing the request. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/ide/ide-dma.c | 1 + drivers/ide/ide-io.c | 2 +- include/linux/ide.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index ee58c88dee5a..fd40a8116574 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -492,6 +492,7 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) if (rq) { hwif->rq = NULL; rq->errors = 0; + ide_requeue_and_plug(drive, rq); } return ret; } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index db96138fefcd..172ac9218154 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -566,7 +566,7 @@ plug_device_2: blk_plug_device(q); } -static void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) +void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; unsigned long flags; diff --git a/include/linux/ide.h b/include/linux/ide.h index 97e6ab435184..3239d1c10acb 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1169,6 +1169,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); extern void do_ide_request(struct request_queue *); +extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); void ide_init_disk(struct gendisk *, ide_drive_t *); -- cgit v1.2.3-59-g8ed1b From 9d32c30542f9ecdb4b96a1a960924c9f403e3562 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Mon, 5 Apr 2010 22:29:09 -0700 Subject: Input: matrix_keypad - allow platform to disable key autorepeat In an embedded system the matrix_keypad driver might be used to interface with an external control panel and not an actual keyboard. On the control panel some of the keys could be used to turn on/off various functions. If key autorepeat is enabled this causes the function to quickly toggle between the on and off states and makes operation difficult. Add an option in the platform-specific data to disable the key autorepeat. Signed-off-by: H Hartley Sweeten Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/matrix_keypad.c | 4 +++- include/linux/input/matrix_keypad.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index d3c8b61a941d..f9d86cfb0db0 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -373,7 +373,9 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) input_dev->name = pdev->name; input_dev->id.bustype = BUS_HOST; input_dev->dev.parent = &pdev->dev; - input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); + input_dev->evbit[0] = BIT_MASK(EV_KEY); + if (!pdata->no_autorepeat) + input_dev->evbit[0] |= BIT_MASK(EV_REP); input_dev->open = matrix_keypad_start; input_dev->close = matrix_keypad_stop; diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 3bd018baae20..c964cd7f436a 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -44,6 +44,7 @@ struct matrix_keymap_data { * @active_low: gpio polarity * @wakeup: controls whether the device should be set up as wakeup * source + * @no_autorepeat: disable key autorepeat * * This structure represents platform-specific data that use used by * matrix_keypad driver to perform proper initialization. @@ -64,6 +65,7 @@ struct matrix_keypad_platform_data { bool active_low; bool wakeup; + bool no_autorepeat; }; /** -- cgit v1.2.3-59-g8ed1b From f5eb917b861828da18dc28854308068c66d1449a Mon Sep 17 00:00:00 2001 From: John Hughes Date: Wed, 7 Apr 2010 21:29:25 -0700 Subject: x25: Patch to fix bug 15678 - x25 accesses fields beyond end of packet. Here is a patch to stop X.25 examining fields beyond the end of the packet. For example, when a simple CALL ACCEPTED was received: 10 10 0f x25_parse_facilities was attempting to decode the FACILITIES field, but this packet contains no facilities field. Signed-off-by: John Hughes Signed-off-by: David S. Miller --- include/net/x25.h | 4 ++++ net/x25/af_x25.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- net/x25/x25_facilities.c | 12 +++++++++++- net/x25/x25_in.c | 15 +++++++++++---- 4 files changed, 72 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/x25.h b/include/net/x25.h index 9baa07dc7d17..33f67fb78586 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -182,6 +182,10 @@ extern int sysctl_x25_clear_request_timeout; extern int sysctl_x25_ack_holdback_timeout; extern int sysctl_x25_forward; +extern int x25_parse_address_block(struct sk_buff *skb, + struct x25_address *called_addr, + struct x25_address *calling_addr); + extern int x25_addr_ntoa(unsigned char *, struct x25_address *, struct x25_address *); extern int x25_addr_aton(unsigned char *, struct x25_address *, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 9796f3ed1edb..fe26c01ef3e6 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -82,6 +82,41 @@ struct compat_x25_subscrip_struct { }; #endif + +int x25_parse_address_block(struct sk_buff *skb, + struct x25_address *called_addr, + struct x25_address *calling_addr) +{ + unsigned char len; + int needed; + int rc; + + if (skb->len < 1) { + /* packet has no address block */ + rc = 0; + goto empty; + } + + len = *skb->data; + needed = 1 + (len >> 4) + (len & 0x0f); + + if (skb->len < needed) { + /* packet is too short to hold the addresses it claims + to hold */ + rc = -1; + goto empty; + } + + return x25_addr_ntoa(skb->data, called_addr, calling_addr); + +empty: + *called_addr->x25_addr = 0; + *calling_addr->x25_addr = 0; + + return rc; +} + + int x25_addr_ntoa(unsigned char *p, struct x25_address *called_addr, struct x25_address *calling_addr) { @@ -921,16 +956,26 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, /* * Extract the X.25 addresses and convert them to ASCII strings, * and remove them. + * + * Address block is mandatory in call request packets */ - addr_len = x25_addr_ntoa(skb->data, &source_addr, &dest_addr); + addr_len = x25_parse_address_block(skb, &source_addr, &dest_addr); + if (addr_len <= 0) + goto out_clear_request; skb_pull(skb, addr_len); /* * Get the length of the facilities, skip past them for the moment * get the call user data because this is needed to determine * the correct listener + * + * Facilities length is mandatory in call request packets */ + if (skb->len < 1) + goto out_clear_request; len = skb->data[0] + 1; + if (skb->len < len) + goto out_clear_request; skb_pull(skb,len); /* diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index a21f6646eb3a..a2765c6b1f1a 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -35,7 +35,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, struct x25_dte_facilities *dte_facs, unsigned long *vc_fac_mask) { unsigned char *p = skb->data; - unsigned int len = *p++; + unsigned int len; *vc_fac_mask = 0; @@ -50,6 +50,14 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, memset(dte_facs->called_ae, '\0', sizeof(dte_facs->called_ae)); memset(dte_facs->calling_ae, '\0', sizeof(dte_facs->calling_ae)); + if (skb->len < 1) + return 0; + + len = *p++; + + if (len >= skb->len) + return -1; + while (len > 0) { switch (*p & X25_FAC_CLASS_MASK) { case X25_FAC_CLASS_A: @@ -247,6 +255,8 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, memcpy(new, ours, sizeof(*new)); len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask); + if (len < 0) + return len; /* * They want reverse charging, we won't accept it. diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 96d922783547..b39072f3a297 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -89,6 +89,7 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametype) { struct x25_address source_addr, dest_addr; + int len; switch (frametype) { case X25_CALL_ACCEPTED: { @@ -106,11 +107,17 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp * Parse the data in the frame. */ skb_pull(skb, X25_STD_MIN_LEN); - skb_pull(skb, x25_addr_ntoa(skb->data, &source_addr, &dest_addr)); - skb_pull(skb, - x25_parse_facilities(skb, &x25->facilities, + + len = x25_parse_address_block(skb, &source_addr, + &dest_addr); + if (len > 0) + skb_pull(skb, len); + + len = x25_parse_facilities(skb, &x25->facilities, &x25->dte_facilities, - &x25->vc_facil_mask)); + &x25->vc_facil_mask); + if (len > 0) + skb_pull(skb, len); /* * Copy any Call User Data. */ -- cgit v1.2.3-59-g8ed1b From 45c4d015a92f72ec47acd0c7557abdc0c8a6499d Mon Sep 17 00:00:00 2001 From: Mark Lord Date: Wed, 7 Apr 2010 13:52:08 -0400 Subject: libata: Fix accesses at LBA28 boundary (old bug, but nasty) (v2) Most drives from Seagate, Hitachi, and possibly other brands, do not allow LBA28 access to sector number 0x0fffffff (2^28 - 1). So instead use LBA48 for such accesses. This bug could bite a lot of systems, especially when the user has taken care to align partitions to 4KB boundaries. On misaligned systems, it is less likely to be encountered, since a 4KB read would end at 0x10000000 rather than at 0x0fffffff. Signed-off-by: Mark Lord Signed-off-by: Jeff Garzik --- include/linux/ata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index b4c85e2adef5..700c5b9b3583 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -1025,8 +1025,8 @@ static inline int ata_ok(u8 status) static inline int lba_28_ok(u64 block, u32 n_block) { - /* check the ending block number */ - return ((block + n_block) < ((u64)1 << 28)) && (n_block <= 256); + /* check the ending block number: must be LESS THAN 0x0fffffff */ + return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= 256); } static inline int lba_48_ok(u64 block, u32 n_block) -- cgit v1.2.3-59-g8ed1b From fc1c183353a113c71675fecd0485e5aa0fe68d72 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 7 Apr 2010 19:23:40 +0300 Subject: slab: Generify kernel pointer validation As suggested by Linus, introduce a kern_ptr_validate() helper that does some sanity checks to make sure a pointer is a valid kernel pointer. This is a preparational step for fixing SLUB kmem_ptr_validate(). Cc: Andrew Morton Cc: Christoph Lameter Cc: David Rientjes Cc: Ingo Molnar Cc: Matt Mackall Cc: Nick Piggin Signed-off-by: Pekka Enberg Signed-off-by: Linus Torvalds --- include/linux/slab.h | 1 + mm/slab.c | 13 +------------ mm/util.c | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 488446289cab..49d1247cd6d9 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -106,6 +106,7 @@ int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); unsigned int kmem_cache_size(struct kmem_cache *); const char *kmem_cache_name(struct kmem_cache *); +int kern_ptr_validate(const void *ptr, unsigned long size); int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr); /* diff --git a/mm/slab.c b/mm/slab.c index a9f325b28bed..bac0f4fcc216 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3602,21 +3602,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_notrace); */ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) { - unsigned long addr = (unsigned long)ptr; - unsigned long min_addr = PAGE_OFFSET; - unsigned long align_mask = BYTES_PER_WORD - 1; unsigned long size = cachep->buffer_size; struct page *page; - if (unlikely(addr < min_addr)) - goto out; - if (unlikely(addr > (unsigned long)high_memory - size)) - goto out; - if (unlikely(addr & align_mask)) - goto out; - if (unlikely(!kern_addr_valid(addr))) - goto out; - if (unlikely(!kern_addr_valid(addr + size - 1))) + if (unlikely(!kern_ptr_validate(ptr, size))) goto out; page = virt_to_page(ptr); if (unlikely(!PageSlab(page))) diff --git a/mm/util.c b/mm/util.c index 834db7be240f..f5712e8964be 100644 --- a/mm/util.c +++ b/mm/util.c @@ -186,6 +186,27 @@ void kzfree(const void *p) } EXPORT_SYMBOL(kzfree); +int kern_ptr_validate(const void *ptr, unsigned long size) +{ + unsigned long addr = (unsigned long)ptr; + unsigned long min_addr = PAGE_OFFSET; + unsigned long align_mask = sizeof(void *) - 1; + + if (unlikely(addr < min_addr)) + goto out; + if (unlikely(addr > (unsigned long)high_memory - size)) + goto out; + if (unlikely(addr & align_mask)) + goto out; + if (unlikely(!kern_addr_valid(addr))) + goto out; + if (unlikely(!kern_addr_valid(addr + size - 1))) + goto out; + return 1; +out: + return 0; +} + /* * strndup_user - duplicate an existing string from user space * @s: The string to duplicate -- cgit v1.2.3-59-g8ed1b From ce82653d6cfcc95ba88c25908664878459fb1b8d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 6 Apr 2010 22:36:20 +0100 Subject: radix_tree_tag_get() is not as safe as the docs make out [ver #2] radix_tree_tag_get() is not safe to use concurrently with radix_tree_tag_set() or radix_tree_tag_clear(). The problem is that the double tag_get() in radix_tree_tag_get(): if (!tag_get(node, tag, offset)) saw_unset_tag = 1; if (height == 1) { int ret = tag_get(node, tag, offset); may see the value change due to the action of set/clear. RCU is no protection against this as no pointers are being changed, no nodes are being replaced according to a COW protocol - set/clear alter the node directly. The documentation in linux/radix-tree.h, however, says that radix_tree_tag_get() is an exception to the rule that "any function modifying the tree or tags (...) must exclude other modifications, and exclude any functions reading the tree". The problem is that the next statement in radix_tree_tag_get() checks that the tag doesn't vary over time: BUG_ON(ret && saw_unset_tag); This has been seen happening in FS-Cache: https://www.redhat.com/archives/linux-cachefs/2010-April/msg00013.html To this end, remove the BUG_ON() from radix_tree_tag_get() and note in various comments that the value of the tag may change whilst the RCU read lock is held, and thus that the return value of radix_tree_tag_get() may not be relied upon unless radix_tree_tag_set/clear() and radix_tree_delete() are excluded from running concurrently with it. Reported-by: Romain DEGEZ Signed-off-by: David Howells Acked-by: Nick Piggin Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 7 +++++++ lib/radix-tree.c | 12 ++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c5da74918096..55ca73cf25e5 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -121,6 +121,13 @@ do { \ * (Note, rcu_assign_pointer and rcu_dereference are not needed to control * access to data items when inserting into or looking up from the radix tree) * + * Note that the value returned by radix_tree_tag_get() may not be relied upon + * if only the RCU read lock is held. Functions to set/clear tags and to + * delete nodes running concurrently with it may affect its result such that + * two consecutive reads in the same locked section may return different + * values. If reliability is required, modification functions must also be + * excluded from concurrency. + * * radix_tree_tagged is able to be called without locking or RCU. */ diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 0871582aa29d..2a087e0f9863 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -555,6 +555,10 @@ EXPORT_SYMBOL(radix_tree_tag_clear); * * 0: tag not present or not set * 1: tag set + * + * Note that the return value of this function may not be relied on, even if + * the RCU lock is held, unless tag modification and node deletion are excluded + * from concurrency. */ int radix_tree_tag_get(struct radix_tree_root *root, unsigned long index, unsigned int tag) @@ -595,12 +599,8 @@ int radix_tree_tag_get(struct radix_tree_root *root, */ if (!tag_get(node, tag, offset)) saw_unset_tag = 1; - if (height == 1) { - int ret = tag_get(node, tag, offset); - - BUG_ON(ret && saw_unset_tag); - return !!ret; - } + if (height == 1) + return !!tag_get(node, tag, offset); node = rcu_dereference_raw(node->slots[offset]); shift -= RADIX_TREE_MAP_SHIFT; height--; -- cgit v1.2.3-59-g8ed1b From aa6fec3cdeb14ecc916eb78c4cd9ed79e4f7fe8d Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Wed, 31 Mar 2010 16:26:52 +0200 Subject: firewire: cdev: iso packet documentation Add the missing documentation for iso packets. Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 40b11013408e..011fdf1eaec5 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -438,7 +438,7 @@ struct fw_cdev_remove_descriptor { * @type: %FW_CDEV_ISO_CONTEXT_TRANSMIT or %FW_CDEV_ISO_CONTEXT_RECEIVE * @header_size: Header size to strip for receive contexts * @channel: Channel to bind to - * @speed: Speed to transmit at + * @speed: Speed for transmit contexts * @closure: To be returned in &fw_cdev_event_iso_interrupt * @handle: Handle to context, written back by kernel * @@ -451,6 +451,9 @@ struct fw_cdev_remove_descriptor { * If a context was successfully created, the kernel writes back a handle to the * context, which must be passed in for subsequent operations on that context. * + * For receive contexts, @header_size must be at least 4 and must be a multiple + * of 4. + * * Note that the effect of a @header_size > 4 depends on * &fw_cdev_get_info.version, as documented at &fw_cdev_event_iso_interrupt. */ @@ -481,10 +484,34 @@ struct fw_cdev_create_iso_context { * * &struct fw_cdev_iso_packet is used to describe isochronous packet queues. * - * Use the FW_CDEV_ISO_ macros to fill in @control. The sy and tag fields are - * specified by IEEE 1394a and IEC 61883. - * - * FIXME - finish this documentation + * Use the FW_CDEV_ISO_ macros to fill in @control. + * + * For transmit packets, the header length must be a multiple of 4 and specifies + * the numbers of bytes in @header that will be prepended to the packet's + * payload; these bytes are copied into the kernel and will not be accessed + * after the ioctl has returned. The sy and tag fields are copied to the iso + * packet header (these fields are specified by IEEE 1394a and IEC 61883-1). + * The skip flag specifies that no packet is to be sent in a frame; when using + * this, all other fields except the interrupt flag must be zero. + * + * For receive packets, the header length must be a multiple of the context's + * header size; if the header length is larger than the context's header size, + * multiple packets are queued for this entry. The sy and tag fields are + * ignored. If the sync flag is set, the context drops all packets until + * a packet with a matching sy field is received (the sync value to wait for is + * specified in the &fw_cdev_start_iso structure). The payload length defines + * how many payload bytes can be received for one packet (in addition to payload + * quadlets that have been defined as headers and are stripped and returned in + * the &fw_cdev_event_iso_interrupt structure). If more bytes are received, the + * additional bytes are dropped. If less bytes are received, the remaining + * bytes in this part of the payload buffer will not be written to, not even by + * the next packet, i.e., packets received in consecutive frames will not + * necessarily be consecutive in memory. If an entry has queued multiple + * packets, the payload length is divided equally among them. + * + * When a packet with the interrupt flag set has been completed, the + * &fw_cdev_event_iso_interrupt event will be sent. An entry that has queued + * multiple receive packets is completed when its last packet is completed. */ struct fw_cdev_iso_packet { __u32 control; @@ -501,7 +528,7 @@ struct fw_cdev_iso_packet { * Queue a number of isochronous packets for reception or transmission. * This ioctl takes a pointer to an array of &fw_cdev_iso_packet structs, * which describe how to transmit from or receive into a contiguous region - * of a mmap()'ed payload buffer. As part of the packet descriptors, + * of a mmap()'ed payload buffer. As part of transmit packet descriptors, * a series of headers can be supplied, which will be prepended to the * payload during DMA. * -- cgit v1.2.3-59-g8ed1b From ca658b1e29d6be939207532e337fb640eb697f71 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 10 Apr 2010 12:23:09 +0200 Subject: firewire: cdev: comment fixlet Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 011fdf1eaec5..6ffb24a1f2f2 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -647,8 +647,8 @@ struct fw_cdev_get_cycle_timer2 { * instead of allocated. * An %FW_CDEV_EVENT_ISO_RESOURCE_DEALLOCATED event concludes this operation. * - * To summarize, %FW_CDEV_IOC_DEALLOCATE_ISO_RESOURCE allocates iso resources - * for the lifetime of the fd or handle. + * To summarize, %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE allocates iso resources + * for the lifetime of the fd or @handle. * In contrast, %FW_CDEV_IOC_ALLOCATE_ISO_RESOURCE_ONCE allocates iso resources * for the duration of a bus generation. * -- cgit v1.2.3-59-g8ed1b From 0df5dd4aae211edeeeb84f7f84f6d093406d7c22 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 11 Apr 2010 16:48:44 -0400 Subject: NFSv4: fix delegated locking Arnaud Giersch reports that NFSv4 locking is broken when we hold a delegation since commit 8e469ebd6dc32cbaf620e134d79f740bf0ebab79 (NFSv4: Don't allow posix locking against servers that don't support it). According to Arnaud, the lock succeeds the first time he opens the file (since we cannot do a delegated open) but then fails after we start using delegated opens. The following patch fixes it by ensuring that locking behaviour is governed by a per-filesystem capability flag that is initially set, but gets cleared if the server ever returns an OPEN without the NFS4_OPEN_RESULT_LOCKTYPE_POSIX flag being set. Reported-by: Arnaud Giersch Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/client.c | 3 ++- fs/nfs/nfs4proc.c | 4 +++- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2a3d352c0bff..a8766c4ef2e0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1294,7 +1294,8 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| + NFS_CAP_POSIX_LOCK; server->options = data->options; /* Get a client record */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fe0cd9eb1d4d..638067007c65 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1523,6 +1523,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_post_op_update_inode(dir, o_res->dir_attr); } else nfs_refresh_inode(dir, o_res->dir_attr); + if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) + server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(data); if (status != 0) @@ -1664,7 +1666,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in status = PTR_ERR(state); if (IS_ERR(state)) goto err_opendata_put; - if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0) + if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 717a5e54eb1d..e82957acea56 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -176,6 +176,7 @@ struct nfs_server { #define NFS_CAP_ATIME (1U << 11) #define NFS_CAP_CTIME (1U << 12) #define NFS_CAP_MTIME (1U << 13) +#define NFS_CAP_POSIX_LOCK (1U << 14) /* maximum number of slots to use */ -- cgit v1.2.3-59-g8ed1b From b62730baea32f86fe91a7930e4b7ee8d82778b79 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 9 Apr 2010 15:39:10 -0700 Subject: rcu: Add rcu_access_pointer and rcu_dereference_protected This patch adds variants of rcu_dereference() that handle situations where the RCU-protected data structure cannot change, perhaps due to our holding the update-side lock, or where the RCU-protected pointer is only to be fetched, not dereferenced. These are needed due to some performance concerns with using rcu_dereference() where it is not required, aside from the need for lockdep/sparse checking. The new rcu_access_pointer() primitive is for the case where the pointer is be fetch and not dereferenced. This primitive may be used without protection, RCU or otherwise, due to the fact that it uses ACCESS_ONCE(). The new rcu_dereference_protected() primitive is for the case where updates are prevented, for example, due to holding the update-side lock. This primitive does neither ACCESS_ONCE() nor smp_read_barrier_depends(), so can only be used when updates are somehow prevented. Suggested-by: David Howells Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: eric.dumazet@gmail.com LKML-Reference: <1270852752-25278-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 872a98e13d6a..8fe86609441f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -209,12 +209,44 @@ static inline int rcu_read_lock_sched_held(void) rcu_dereference_raw(p); \ }) +/** + * rcu_dereference_protected - fetch RCU pointer when updates prevented + * + * Return the value of the specified RCU-protected pointer, but omit + * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This + * is useful in cases where update-side locks prevent the value of the + * pointer from changing. Please note that this primitive does -not- + * prevent the compiler from repeating this reference or combining it + * with other references, so it should not be used without protection + * of appropriate locks. + */ +#define rcu_dereference_protected(p, c) \ + ({ \ + if (debug_lockdep_rcu_enabled() && !(c)) \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ + (p); \ + }) + #else /* #ifdef CONFIG_PROVE_RCU */ #define rcu_dereference_check(p, c) rcu_dereference_raw(p) +#define rcu_dereference_protected(p, c) (p) #endif /* #else #ifdef CONFIG_PROVE_RCU */ +/** + * rcu_access_pointer - fetch RCU pointer with no dereferencing + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. This may also be used in cases where update-side locks prevent + * the value of the pointer from changing, but rcu_dereference_protected() + * is a lighter-weight primitive for this use case. + */ +#define rcu_access_pointer(p) ACCESS_ONCE(p) + /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * -- cgit v1.2.3-59-g8ed1b From c08c68dd76bd6b776bc0eb45a5e8f354ed772cdf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 9 Apr 2010 15:39:11 -0700 Subject: rcu: Better explain the condition parameter of rcu_dereference_check() Better explain the condition parameter of rcu_dereference_check() that describes the conditions under which the dereference is permitted to take place (and incorporate Yong Zhang's suggestion). This condition is only checked under lockdep proving. Signed-off-by: David Howells Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: eric.dumazet@gmail.com LKML-Reference: <1270852752-25278-2-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8fe86609441f..9f1ddfef84b5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -195,12 +195,30 @@ static inline int rcu_read_lock_sched_held(void) /** * rcu_dereference_check - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place * - * Do an rcu_dereference(), but check that the context is correct. - * For example, rcu_dereference_check(gp, rcu_read_lock_held()) to - * ensure that the rcu_dereference_check() executes within an RCU - * read-side critical section. It is also possible to check for - * locks being held, for example, by using lockdep_is_held(). + * Do an rcu_dereference(), but check that the conditions under which the + * dereference will take place are correct. Typically the conditions indicate + * the various locking conditions that should be held at that point. The check + * should return true if the conditions are satisfied. + * + * For example: + * + * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || + * lockdep_is_held(&foo->lock)); + * + * could be used to indicate to lockdep that foo->bar may only be dereferenced + * if either the RCU read lock is held, or that the lock required to replace + * the bar struct at foo->bar is held. + * + * Note that the list of conditions may also include indications of when a lock + * need not be held, for example during initialisation or destruction of the + * target struct: + * + * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || + * lockdep_is_held(&foo->lock) || + * atomic_read(&foo->usage) == 0); */ #define rcu_dereference_check(p, c) \ ({ \ -- cgit v1.2.3-59-g8ed1b From 19b3eecc21b65a24b0aae2684ca0c8e1b99ef802 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 11 Apr 2010 11:52:12 +0200 Subject: firewire: cdev: change license of exported header files to MIT license MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Among else, this allows projects like libdc1394 to carry copies of the ABI related header files without them or distributors having to worry about effects on the project's overall license terms. Switch to MIT license as suggested by Kristian. Also update the year in the copyright statement according to source history. Cc: Jay Fenlason Acked-by: Clemens Ladisch Signed-off-by: Stefan Richter Signed-off-by: Kristian Høgsberg --- include/linux/firewire-cdev.h | 35 ++++++++++++++++++++--------------- include/linux/firewire-constants.h | 29 +++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 6ffb24a1f2f2..81f3b14d5d76 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -1,21 +1,26 @@ /* * Char device interface. * - * Copyright (C) 2005-2006 Kristian Hoegsberg - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * Copyright (C) 2005-2007 Kristian Hoegsberg + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. */ #ifndef _LINUX_FIREWIRE_CDEV_H diff --git a/include/linux/firewire-constants.h b/include/linux/firewire-constants.h index b316770a43fd..9c63f06e67f2 100644 --- a/include/linux/firewire-constants.h +++ b/include/linux/firewire-constants.h @@ -1,3 +1,28 @@ +/* + * IEEE 1394 constants. + * + * Copyright (C) 2005-2007 Kristian Hoegsberg + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + #ifndef _LINUX_FIREWIRE_CONSTANTS_H #define _LINUX_FIREWIRE_CONSTANTS_H @@ -21,7 +46,7 @@ #define EXTCODE_WRAP_ADD 0x6 #define EXTCODE_VENDOR_DEPENDENT 0x7 -/* Juju specific tcodes */ +/* Linux firewire-core (Juju) specific tcodes */ #define TCODE_LOCK_MASK_SWAP (0x10 | EXTCODE_MASK_SWAP) #define TCODE_LOCK_COMPARE_SWAP (0x10 | EXTCODE_COMPARE_SWAP) #define TCODE_LOCK_FETCH_ADD (0x10 | EXTCODE_FETCH_ADD) @@ -36,7 +61,7 @@ #define RCODE_TYPE_ERROR 0x6 #define RCODE_ADDRESS_ERROR 0x7 -/* Juju specific rcodes */ +/* Linux firewire-core (Juju) specific rcodes */ #define RCODE_SEND_ERROR 0x10 #define RCODE_CANCELLED 0x11 #define RCODE_BUSY 0x12 -- cgit v1.2.3-59-g8ed1b From a2612cb16d4d8447793609cbdd2a2f4f156c0020 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 15 Apr 2010 22:16:04 +0200 Subject: firewire: cdev: fix cut+paste mistake in disclaimer This was supposed to be generic "authors or copyright holders"; I mistakenly picked up text from a wrong file. Reported-by: Daniel K. Signed-off-by: Stefan Richter --- include/linux/firewire-cdev.h | 2 +- include/linux/firewire-constants.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h index 81f3b14d5d76..68f883b30a53 100644 --- a/include/linux/firewire-cdev.h +++ b/include/linux/firewire-cdev.h @@ -17,7 +17,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. diff --git a/include/linux/firewire-constants.h b/include/linux/firewire-constants.h index 9c63f06e67f2..9b4bb5fbba4b 100644 --- a/include/linux/firewire-constants.h +++ b/include/linux/firewire-constants.h @@ -17,7 +17,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. -- cgit v1.2.3-59-g8ed1b From bc293d62b26ec590afc90a9e0a31c45d355b7bd8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 Apr 2010 12:50:39 -0700 Subject: rcu: Make RCU lockdep check the lockdep_recursion variable The lockdep facility temporarily disables lockdep checking by incrementing the current->lockdep_recursion variable. Such disabling happens in NMIs and in other situations where lockdep might expect to recurse on itself. This patch therefore checks current->lockdep_recursion, disabling RCU lockdep splats when this variable is non-zero. In addition, this patch removes the "likely()", as suggested by Lai Jiangshan. Reported-by: Frederic Weisbecker Reported-by: David Miller Tested-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: eric.dumazet@gmail.com LKML-Reference: <20100415195039.GA22623@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 5 +---- kernel/rcupdate.c | 7 +++++++ 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9f1ddfef84b5..07db2feb8572 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -101,10 +101,7 @@ extern struct lockdep_map rcu_sched_lock_map; # define rcu_read_release_sched() \ lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) -static inline int debug_lockdep_rcu_enabled(void) -{ - return likely(rcu_scheduler_active && debug_locks); -} +extern int debug_lockdep_rcu_enabled(void); /** * rcu_read_lock_held - might we be in RCU read-side critical section? diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 63fe25433980..03a7ea1579f6 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -69,6 +69,13 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); #ifdef CONFIG_DEBUG_LOCK_ALLOC +int debug_lockdep_rcu_enabled(void) +{ + return rcu_scheduler_active && debug_locks && + current->lockdep_recursion == 0; +} +EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); + /** * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? * -- cgit v1.2.3-59-g8ed1b From 79b9517a33a283c5d9db875c263670ed1e055f7e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 19 Apr 2010 17:54:31 +1000 Subject: drm/radeon/kms: add FireMV 2400 PCI ID. This is an M24/X600 chip. From RH# 581927 cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 04a6ebc27b96..2d428b088cc8 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -6,6 +6,7 @@ {0x1002, 0x3150, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x3152, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x3154, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x3155, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x3E50, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x3E54, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x4136, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS100|RADEON_IS_IGP}, \ -- cgit v1.2.3-59-g8ed1b From be1a50d4eba4cdb3ebf9d97a0a8693c153436775 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 3 Apr 2010 17:37:45 +0200 Subject: regulator: Let drivers know when they use the stub API Have the stub variant of regulator_get() return NULL, so that drivers can (but still don't have to) handle this case specifically. Signed-off-by: Jean Delvare Cc: Mark Brown Cc: Jerome Oufella Acked-by: Mark Brown Signed-off-by: Liam Girdwood --- include/linux/regulator/consumer.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 28c9fd020d39..ebd747265294 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -183,9 +183,13 @@ static inline struct regulator *__must_check regulator_get(struct device *dev, { /* Nothing except the stubbed out regulator API should be * looking at the value except to check if it is an error - * value so the actual return value doesn't matter. + * value. Drivers are free to handle NULL specifically by + * skipping all regulator API calls, but they don't have to. + * Drivers which don't, should make sure they properly handle + * corner cases of the API, such as regulator_get_voltage() + * returning 0. */ - return (struct regulator *)id; + return NULL; } static inline void regulator_put(struct regulator *regulator) { -- cgit v1.2.3-59-g8ed1b From 87bf6e7de1134f48681fd2ce4b7c1ec45458cb6d Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 12 Apr 2010 19:35:35 +0900 Subject: KVM: fix the handling of dirty bitmaps to avoid overflows Int is not long enough to store the size of a dirty bitmap. This patch fixes this problem with the introduction of a wrapper function to calculate the sizes of dirty bitmaps. Note: in mark_page_dirty(), we have to consider the fact that __set_bit() takes the offset as int, not long. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/ia64/kvm/kvm-ia64.c | 9 +++++---- arch/powerpc/kvm/book3s.c | 5 +++-- arch/x86/kvm/x86.c | 5 +++-- include/linux/kvm_host.h | 5 +++++ virt/kvm/kvm_main.c | 13 ++++++++----- 5 files changed, 24 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 73c5c2b05f64..7f3c0a2e60cd 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1802,7 +1802,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, { struct kvm_memory_slot *memslot; int r, i; - long n, base; + long base; + unsigned long n; unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); @@ -1815,7 +1816,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); base = memslot->base_gfn / BITS_PER_LONG; for (i = 0; i < n/sizeof(long); ++i) { @@ -1831,7 +1832,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { int r; - int n; + unsigned long n; struct kvm_memory_slot *memslot; int is_dirty = 0; @@ -1850,7 +1851,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (is_dirty) { kvm_flush_remote_tlbs(kvm); memslot = &kvm->memslots->memslots[log->slot]; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } r = 0; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 25da07fd9f77..604af29b71ed 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1004,7 +1004,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_vcpu *vcpu; ulong ga, ga_end; int is_dirty = 0; - int r, n; + int r; + unsigned long n; mutex_lock(&kvm->slots_lock); @@ -1022,7 +1023,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, kvm_for_each_vcpu(n, vcpu, kvm) kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9ad3d064c781..45aa90f8cc57 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2612,8 +2612,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - int r, n, i; + int r, i; struct kvm_memory_slot *memslot; + unsigned long n; unsigned long is_dirty = 0; unsigned long *dirty_bitmap = NULL; @@ -2628,7 +2629,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); r = -ENOMEM; dirty_bitmap = vmalloc(n); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a3fd0f91d943..9ad825e1c79b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -119,6 +119,11 @@ struct kvm_memory_slot { int user_alloc; }; +static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) +{ + return ALIGN(memslot->npages, BITS_PER_LONG) / 8; +} + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5a0cd194dce0..364daacafb58 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -648,7 +648,7 @@ skip_lpage: /* Allocate page dirty bitmap if needed */ if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { - unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); new.dirty_bitmap = vmalloc(dirty_bytes); if (!new.dirty_bitmap) @@ -768,7 +768,7 @@ int kvm_get_dirty_log(struct kvm *kvm, { struct kvm_memory_slot *memslot; int r, i; - int n; + unsigned long n; unsigned long any = 0; r = -EINVAL; @@ -780,7 +780,7 @@ int kvm_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); for (i = 0; !any && i < n/sizeof(long); ++i) any = memslot->dirty_bitmap[i]; @@ -1186,10 +1186,13 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot_unaliased(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; + unsigned long *p = memslot->dirty_bitmap + + rel_gfn / BITS_PER_LONG; + int offset = rel_gfn % BITS_PER_LONG; /* avoid RMW */ - if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) - generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); + if (!generic_test_le_bit(offset, p)) + generic___set_le_bit(offset, p); } } -- cgit v1.2.3-59-g8ed1b From e80e2a60ff7914dae691345a976c80bbbff3ec74 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 30 Mar 2010 16:48:25 -0700 Subject: KVM: Increase NR_IOBUS_DEVS limit to 200 This patch increases the current hardcoded limit of NR_IOBUS_DEVS from 6 to 200. We are hitting this limit when creating a guest with more than 1 virtio-net device using vhost-net backend. Each virtio-net device requires 2 such devices to service notifications from rx/tx queues. Signed-off-by: Sridhar Samudrala Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9ad825e1c79b..169d07758ee5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -54,7 +54,7 @@ extern struct kmem_cache *kvm_vcpu_cache; */ struct kvm_io_bus { int dev_count; -#define NR_IOBUS_DEVS 6 +#define NR_IOBUS_DEVS 200 struct kvm_io_device *devs[NR_IOBUS_DEVS]; }; -- cgit v1.2.3-59-g8ed1b