From 8618e30bc14b06bfafa0f164cca7b0e06451f88a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 8 Oct 2012 20:37:30 -0700 Subject: rbd: let con_work() handle backoff Both ceph_fault() and con_work() include handling for imposing a delay before doing further processing on a faulted connection. The latter is used only if ceph_fault() is unable to. Instead, just let con_work() always be responsible for implementing the delay. After setting up the delay value, set the BACKOFF flag on the connection unconditionally and call queue_con() to ensure con_work() will get called to handle it. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index cad0d17ec45e..973c16c20c42 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2398,24 +2398,8 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - con->ops->get(con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("fault queued %p delay %lu\n", con, con->delay); - } else { - con->ops->put(con); - dout("fault failed to queue %p delay %lu, backoff\n", - con, con->delay); - /* - * In many cases we see a socket state change - * while con_work is running and end up - * queuing (non-delayed) work, such that we - * can't backoff with a delay. Set a flag so - * that when con_work restarts we schedule the - * delay then. - */ - set_bit(CON_FLAG_BACKOFF, &con->flags); - } + set_bit(CON_FLAG_BACKOFF, &con->flags); + queue_con(con); } out_unlock: -- cgit v1.2.3-59-g8ed1b From 802c6d967fbdcd2cbc91b917425661bb8bbfaade Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 8 Oct 2012 20:37:30 -0700 Subject: rbd: define common queue_con_delay() This patch defines a single function, queue_con_delay() to call queue_delayed_work() for a connection. It basically generalizes what was previously queue_con() by adding the delay argument. queue_con() is now a simple helper that passes 0 for its delay. queue_con_delay() returns 0 if it queued work or an errno if it did not for some reason. If con_work() finds the BACKOFF flag set for a connection, it now calls queue_con_delay() to handle arranging to start again after a delay. Note about connection reference counts: con_work() only ever gets called as a work item function. At the time that work is scheduled, a reference to the connection is acquired, and the corresponding con_work() call is then responsible for dropping that reference before it returns. Previously, the backoff handling inside con_work() silently handed off its reference to delayed work it scheduled. Now that queue_con_delay() is used, a new reference is acquired for the newly-scheduled work, and the original reference is dropped by the con->ops->put() call at the end of the function. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 973c16c20c42..66f6f56bcb23 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2244,22 +2244,33 @@ bad_tag: /* - * Atomically queue work on a connection. Bump @con reference to - * avoid races with connection teardown. + * Atomically queue work on a connection after the specified delay. + * Bump @con reference to avoid races with connection teardown. + * Returns 0 if work was queued, or an error code otherwise. */ -static void queue_con(struct ceph_connection *con) +static int queue_con_delay(struct ceph_connection *con, unsigned long delay) { if (!con->ops->get(con)) { - dout("queue_con %p ref count 0\n", con); - return; + dout("%s %p ref count 0\n", __func__, con); + + return -ENOENT; } - if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { - dout("queue_con %p - already queued\n", con); + if (!queue_delayed_work(ceph_msgr_wq, &con->work, delay)) { + dout("%s %p - already queued\n", __func__, con); con->ops->put(con); - } else { - dout("queue_con %p\n", con); + + return -EBUSY; } + + dout("%s %p %lu\n", __func__, con, delay); + + return 0; +} + +static void queue_con(struct ceph_connection *con) +{ + (void) queue_con_delay(con, 0); } /* @@ -2294,14 +2305,11 @@ restart: if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { dout("con_work %p backing off\n", con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay))) { - dout("con_work %p backoff %lu\n", con, con->delay); - mutex_unlock(&con->mutex); - return; - } else { + ret = queue_con_delay(con, round_jiffies_relative(con->delay)); + if (ret) { dout("con_work %p FAILED to back off %lu\n", con, con->delay); + BUG_ON(ret == -ENOENT); set_bit(CON_FLAG_BACKOFF, &con->flags); } goto done; -- cgit v1.2.3-59-g8ed1b From 7246240c7c186542f73af4fadc744d66440f616f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 25 Oct 2012 08:49:41 -0700 Subject: libceph: avoid NULL kref_put from NULL alloc_msg return The ceph_on_in_msg_alloc() method calls the ->alloc_msg() helper which may return NULL. It also drops con->mutex while it allocates a message, which means that the connection state may change (e.g., get closed). If that happens, we clean up and bail out. Avoid calling ceph_msg_put() on a NULL return value and triggering a crash. This was observed when an ->alloc_msg() call races with a timeout that resends a zillion messages and resets the connection, and ->alloc_msg() returns NULL (because the request was resent to another target). Fixes http://tracker.newdream.net/issues/3342 Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- net/ceph/messenger.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 66f6f56bcb23..1041114453db 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2742,7 +2742,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) msg = con->ops->alloc_msg(con, hdr, skip); mutex_lock(&con->mutex); if (con->state != CON_STATE_OPEN) { - ceph_msg_put(msg); + if (msg) + ceph_msg_put(msg); return -EAGAIN; } con->in_msg = msg; -- cgit v1.2.3-59-g8ed1b From 0ed7285e0001b960c888e5455ae982025210ed3d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 29 Oct 2012 11:01:42 -0700 Subject: libceph: fix osdmap decode error paths Ensure that we set the err value correctly so that we do not pass a 0 value to ERR_PTR and confuse the calling code. (In particular, osd_client.c handle_map() will BUG(!newmap)). Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- net/ceph/osdmap.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 5433fb0eb3c6..f552aa48fd9e 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -645,10 +645,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, max, bad); while (max--) { ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); + err = -ENOMEM; pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) goto bad; pi->id = ceph_decode_32(p); + err = -EINVAL; ev = ceph_decode_8(p); /* encoding version */ if (ev > CEPH_PG_POOL_VERSION) { pr_warning("got unknown v %d > %d of ceph_pg_pool\n", @@ -664,8 +666,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) __insert_pg_pool(&map->pg_pools, pi); } - if (version >= 5 && __decode_pool_names(p, end, map) < 0) - goto bad; + if (version >= 5) { + err = __decode_pool_names(p, end, map); + if (err < 0) { + dout("fail to decode pool names"); + goto bad; + } + } ceph_decode_32_safe(p, end, map->pool_max, bad); @@ -745,7 +752,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) return map; bad: - dout("osdmap_decode fail\n"); + dout("osdmap_decode fail err %d\n", err); ceph_osdmap_destroy(map); return ERR_PTR(err); } @@ -839,6 +846,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (ev > CEPH_PG_POOL_VERSION) { pr_warning("got unknown v %d > %d of ceph_pg_pool\n", ev, CEPH_PG_POOL_VERSION); + err = -EINVAL; goto bad; } pi = __lookup_pg_pool(&map->pg_pools, pool); @@ -855,8 +863,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, if (err < 0) goto bad; } - if (version >= 5 && __decode_pool_names(p, end, map) < 0) - goto bad; + if (version >= 5) { + err = __decode_pool_names(p, end, map); + if (err < 0) + goto bad; + } /* old_pool */ ceph_decode_32_safe(p, end, len, bad); @@ -932,15 +943,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, (void) __remove_pg_mapping(&map->pg_temp, pgid); /* insert */ - if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) { - err = -EINVAL; + err = -EINVAL; + if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) goto bad; - } + err = -ENOMEM; pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); - if (!pg) { - err = -ENOMEM; + if (!pg) goto bad; - } pg->pgid = pgid; pg->len = pglen; for (j = 0; j < pglen; j++) -- cgit v1.2.3-59-g8ed1b From 72afc71ffca0f444ee0e1ef8c7e34ab209bb48b3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 30 Oct 2012 19:40:33 -0500 Subject: libceph: define ceph_pg_pool_name_by_id() Define and export function ceph_pg_pool_name_by_id() to supply the name of a pg pool whose id is given. This will be used by the next patch. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osdmap.h | 1 + net/ceph/osdmap.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'net') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index e88a620b9f8a..5ea57ba69320 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -123,6 +123,7 @@ extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); +extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id); extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name); #endif diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index f552aa48fd9e..de73214b5d26 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -469,6 +469,22 @@ static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) return NULL; } +const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id) +{ + struct ceph_pg_pool_info *pi; + + if (id == CEPH_NOPOOL) + return NULL; + + if (WARN_ON_ONCE(id > (u64) INT_MAX)) + return NULL; + + pi = __lookup_pg_pool(&map->pg_pools, (int) id); + + return pi ? pi->name : NULL; +} +EXPORT_SYMBOL(ceph_pg_pool_name_by_id); + int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name) { struct rb_node *rbp; -- cgit v1.2.3-59-g8ed1b From 83aff95eb9d60aff5497e9f44a2ae906b86d8e88 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 28 Nov 2012 12:28:24 -0800 Subject: libceph: remove 'osdtimeout' option This would reset a connection with any OSD that had an outstanding request that was taking more than N seconds. The idea was that if the OSD was buggy, the client could compensate by resending the request. In reality, this only served to hide server bugs, and we haven't actually seen such a bug in quite a while. Moreover, the userspace client code never did this. More importantly, often the request is taking a long time because the OSD is trying to recover, or overloaded, and killing the connection and retrying would only make the situation worse by giving the OSD more work to do. Signed-off-by: Sage Weil Reviewed-by: Alex Elder --- fs/ceph/super.c | 2 -- include/linux/ceph/libceph.h | 2 -- net/ceph/ceph_common.c | 3 +-- net/ceph/osd_client.c | 47 ++++---------------------------------------- 4 files changed, 5 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 2f586b0e5e0f..fcda1c73a1e5 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -403,8 +403,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); - if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) - seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, ",osdkeepalivetimeout=%d", opt->osd_keepalive_timeout); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 42624789b06f..317aff8feb0a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -43,7 +43,6 @@ struct ceph_options { struct ceph_entity_addr my_addr; int mount_timeout; int osd_idle_ttl; - int osd_timeout; int osd_keepalive_timeout; /* @@ -63,7 +62,6 @@ struct ceph_options { * defaults */ #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 -#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ #define CEPH_OSD_KEEPALIVE_DEFAULT 5 #define CEPH_OSD_IDLE_TTL_DEFAULT 60 diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index a8020293f342..ee71ea26777a 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -305,7 +305,6 @@ ceph_parse_options(char *options, const char *dev_name, /* start with defaults */ opt->flags = CEPH_OPT_DEFAULT; - opt->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ @@ -391,7 +390,7 @@ ceph_parse_options(char *options, const char *dev_name, /* misc */ case Opt_osdtimeout: - opt->osd_timeout = intval; + pr_warning("ignoring deprecated osdtimeout option\n"); break; case Opt_osdkeepalivetimeout: opt->osd_keepalive_timeout = intval; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ccbdfbba9e53..7ebfe13267e6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -608,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, } } -static void kick_osd_requests(struct ceph_osd_client *osdc, - struct ceph_osd *kickosd) -{ - mutex_lock(&osdc->request_mutex); - __kick_osd_requests(osdc, kickosd); - mutex_unlock(&osdc->request_mutex); -} - /* * If the osd connection drops, we need to resubmit all requests. */ @@ -629,7 +621,9 @@ static void osd_reset(struct ceph_connection *con) dout("osd_reset osd%d\n", osd->o_osd); osdc = osd->o_osdc; down_read(&osdc->map_sem); - kick_osd_requests(osdc, osd); + mutex_lock(&osdc->request_mutex); + __kick_osd_requests(osdc, osd); + mutex_unlock(&osdc->request_mutex); send_queued(osdc); up_read(&osdc->map_sem); } @@ -1091,12 +1085,10 @@ static void handle_timeout(struct work_struct *work) { struct ceph_osd_client *osdc = container_of(work, struct ceph_osd_client, timeout_work.work); - struct ceph_osd_request *req, *last_req = NULL; + struct ceph_osd_request *req; struct ceph_osd *osd; - unsigned long timeout = osdc->client->options->osd_timeout * HZ; unsigned long keepalive = osdc->client->options->osd_keepalive_timeout * HZ; - unsigned long last_stamp = 0; struct list_head slow_osds; dout("timeout\n"); down_read(&osdc->map_sem); @@ -1105,37 +1097,6 @@ static void handle_timeout(struct work_struct *work) mutex_lock(&osdc->request_mutex); - /* - * reset osds that appear to be _really_ unresponsive. this - * is a failsafe measure.. we really shouldn't be getting to - * this point if the system is working properly. the monitors - * should mark the osd as failed and we should find out about - * it from an updated osd map. - */ - while (timeout && !list_empty(&osdc->req_lru)) { - req = list_entry(osdc->req_lru.next, struct ceph_osd_request, - r_req_lru_item); - - /* hasn't been long enough since we sent it? */ - if (time_before(jiffies, req->r_stamp + timeout)) - break; - - /* hasn't been long enough since it was acked? */ - if (req->r_request->ack_stamp == 0 || - time_before(jiffies, req->r_request->ack_stamp + timeout)) - break; - - BUG_ON(req == last_req && req->r_stamp == last_stamp); - last_req = req; - last_stamp = req->r_stamp; - - osd = req->r_osd; - BUG_ON(!osd); - pr_warning(" tid %llu timed out on osd%d, will reset osd\n", - req->r_tid, osd->o_osd); - __kick_osd_requests(osdc, osd); - } - /* * ping osds that are a bit slow. this ensures that if there * is a break in the TCP connection we will notice, and reopen -- cgit v1.2.3-59-g8ed1b From 7d5f24812bd182a2471cb69c1c2baf0648332e1f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 29 Nov 2012 08:37:03 -0600 Subject: ceph: don't reference req after put In __unregister_request(), there is a call to list_del_init() referencing a request that was the subject of a call to ceph_osdc_put_request() on the previous line. This is not safe, because the request structure could have been freed by the time we reach the list_del_init(). Fix this by reversing the order of these lines. Signed-off-by: Alex Elder Reviewed-off-by: Sage Weil --- net/ceph/osd_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 7ebfe13267e6..ac7be7202faa 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -871,9 +871,9 @@ static void __unregister_request(struct ceph_osd_client *osdc, req->r_osd = NULL; } + list_del_init(&req->r_req_lru_item); ceph_osdc_put_request(req); - list_del_init(&req->r_req_lru_item); if (osdc->num_requests == 0) { dout(" no requests, canceling timeout\n"); __cancel_osd_timeout(osdc); -- cgit v1.2.3-59-g8ed1b From 685a7555ca69030739ddb57a47f0ea8ea80196a4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 7 Dec 2012 09:57:58 -0600 Subject: libceph: avoid using freed osd in __kick_osd_requests() If an osd has no requests and no linger requests, __reset_osd() will just remove it with a call to __remove_osd(). That drops a reference to the osd, and therefore the osd may have been free by the time __reset_osd() returns. That function offers no indication this may have occurred, and as a result the osd will continue to be used even when it's no longer valid. Change__reset_osd() so it returns an error (ENODEV) when it deletes the osd being reset. And change __kick_osd_requests() so it returns immediately (before referencing osd again) if __reset_osd() returns *any* error. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ac7be7202faa..60c74c1f1ea9 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -581,7 +581,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, dout("__kick_osd_requests osd%d\n", osd->o_osd); err = __reset_osd(osdc, osd); - if (err == -EAGAIN) + if (err) return; list_for_each_entry(req, &osd->o_requests, r_osd_item) { @@ -745,6 +745,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { __remove_osd(osdc, osd); + ret = -ENODEV; } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], &osd->o_con.peer_addr, sizeof(osd->o_con.peer_addr)) == 0 && -- cgit v1.2.3-59-g8ed1b From 61c74035626beb25a39b0273ccf7d75510bc36a1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 6 Dec 2012 09:37:23 -0600 Subject: rbd: remove linger unconditionally In __unregister_linger_request(), the request is being removed from the osd client's req_linger list only when the request has a non-null osd pointer. It should be done whether or not the request currently has an osd. This is most likely a non-issue because I believe the request will always have an osd when this function is called. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 60c74c1f1ea9..32bd696b39a8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -906,8 +906,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { dout("__unregister_linger_request %p\n", req); + list_del_init(&req->r_linger_item); if (req->r_osd) { - list_del_init(&req->r_linger_item); list_del_init(&req->r_linger_osd); if (list_empty(&req->r_osd->o_requests) && -- cgit v1.2.3-59-g8ed1b From 7bb21d68c535ad8be38e14a715632ae398b37ac1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 7 Dec 2012 19:50:07 -0600 Subject: libceph: socket can close in any connection state A connection's socket can close for any reason, independent of the state of the connection (and without irrespective of the connection mutex). As a result, the connectino can be in pretty much any state at the time its socket is closed. Handle those other cases at the top of con_work(). Pull this whole block of code into a separate function to reduce the clutter. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1041114453db..4b04ccc60db1 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2273,6 +2273,35 @@ static void queue_con(struct ceph_connection *con) (void) queue_con_delay(con, 0); } +static bool con_sock_closed(struct ceph_connection *con) +{ + if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) + return false; + +#define CASE(x) \ + case CON_STATE_ ## x: \ + con->error_msg = "socket closed (con state " #x ")"; \ + break; + + switch (con->state) { + CASE(CLOSED); + CASE(PREOPEN); + CASE(CONNECTING); + CASE(NEGOTIATING); + CASE(OPEN); + CASE(STANDBY); + default: + pr_warning("%s con %p unrecognized state %lu\n", + __func__, con, con->state); + con->error_msg = "unrecognized con state"; + BUG(); + break; + } +#undef CASE + + return true; +} + /* * Do some work on a connection. Drop a connection ref when we're done. */ @@ -2284,24 +2313,8 @@ static void con_work(struct work_struct *work) mutex_lock(&con->mutex); restart: - if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) { - switch (con->state) { - case CON_STATE_CONNECTING: - con->error_msg = "connection failed"; - break; - case CON_STATE_NEGOTIATING: - con->error_msg = "negotiation failed"; - break; - case CON_STATE_OPEN: - con->error_msg = "socket closed"; - break; - default: - dout("unrecognized con state %d\n", (int)con->state); - con->error_msg = "unrecognized con state"; - BUG(); - } + if (con_sock_closed(con)) goto fault; - } if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { dout("con_work %p backing off\n", con); -- cgit v1.2.3-59-g8ed1b From 28362986f8743124b3a0fda20a8ed3e80309cce1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 14 Dec 2012 16:47:41 -0600 Subject: libceph: report connection fault with warning When a connection's socket disconnects, or if there's a protocol error of some kind on the connection, a fault is signaled and the connection is reset (closed and reopened, basically). We currently get an error message on the log whenever this occurs. A ceph connection will attempt to reestablish a socket connection repeatedly if a fault occurs. This means that these error messages will get repeatedly added to the log, which is undesirable. Change the error message to be a warning, so they don't get logged by default. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/messenger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 4b04ccc60db1..4d111fd2b492 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2377,7 +2377,7 @@ fault: static void ceph_fault(struct ceph_connection *con) __releases(con->mutex) { - pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), + pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); -- cgit v1.2.3-59-g8ed1b From f407731d12214e7686819018f3a1e9d7b6f83a02 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 6 Dec 2012 07:22:04 -0600 Subject: libceph: init osd->o_node in create_osd() The red-black node node in the ceph osd structure is not initialized in create_osd(). Because this node can be the subject of a RB_EMPTY_NODE() call later on, we should ensure the node is initialized properly for that. Add a call to RB_CLEAR_NODE() initialize it. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 32bd696b39a8..a6dc6acd6566 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -642,6 +642,7 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) atomic_set(&osd->o_ref, 1); osd->o_osdc = osdc; osd->o_osd = onum; + RB_CLEAR_NODE(&osd->o_node); INIT_LIST_HEAD(&osd->o_requests); INIT_LIST_HEAD(&osd->o_linger_requests); INIT_LIST_HEAD(&osd->o_osd_lru); -- cgit v1.2.3-59-g8ed1b From 3ee5234df68d253c415ba4f2db72ad250d9c21a9 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 17 Dec 2012 12:23:48 -0600 Subject: libceph: init event->node in ceph_osdc_create_event() The red-black node node in the ceph osd event structure is not initialized in create_osdc_create_event(). Because this node can be the subject of a RB_EMPTY_NODE() call later on, we should ensure the node is initialized properly for that. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index a6dc6acd6566..2bce3d4be1c6 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1563,6 +1563,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); + RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ init_completion(&event->completion); -- cgit v1.2.3-59-g8ed1b From a978fa20fb657548561dddbfb605fe43654f0825 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 17 Dec 2012 12:23:48 -0600 Subject: libceph: don't use rb_init_node() in ceph_osdc_alloc_request() The red-black node in the ceph osd request structure is initialized in ceph_osdc_alloc_request() using rbd_init_node(). We do need to initialize this, because in __unregister_request() we call RB_EMPTY_NODE(), which expects the node it's checking to have been initialized. But rb_init_node() is apparently overkill, and may in fact be on its way out. So use RB_CLEAR_NODE() instead. For a little more background, see this commit: 4c199a93 rbtree: empty nodes have no color" Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 2bce3d4be1c6..7cd0a7f3bf43 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -221,7 +221,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); - rb_init_node(&req->r_node); + RB_CLEAR_NODE(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); -- cgit v1.2.3-59-g8ed1b From c89ce05e0c5a01a256100ac6a6019f276bdd1ca6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 6 Dec 2012 07:22:04 -0600 Subject: libceph: register request before unregister linger In kick_requests(), we need to register the request before we unregister the linger request. Otherwise the unregister will reset the request's osd pointer to NULL. Signed-off-by: Alex Elder Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 7cd0a7f3bf43..780caf6b0491 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1328,8 +1328,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); - __unregister_linger_request(osdc, req); __register_request(osdc, req); + __unregister_linger_request(osdc, req); } mutex_unlock(&osdc->request_mutex); -- cgit v1.2.3-59-g8ed1b