From 96e4dac66f69d28af2b736e723364efbbdf9fdee Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 22 May 2013 20:54:25 -0500 Subject: libceph: add lingering request reference when registered When an osd request is set to linger, the osd client holds onto the request so it can be re-submitted following certain osd map changes. The osd client holds a reference to the request until it is unregistered. This is used by rbd for watch requests. Currently, the reference is taken when the request is marked with the linger flag. This means that if an error occurs after that time but before the the request completes successfully, that reference is leaked. There's really no reason to take the reference until the request is registered in the the osd client's list of lingering requests, and that only happens when the lingering (watch) request completes successfully. So take that reference only when it gets registered following succesful completion, and drop it (as before) when the request gets unregistered. This avoids the reference problem on error in rbd. Rearrange ceph_osdc_unregister_linger_request() to avoid using the request pointer after it may have been freed. And hold an extra reference in kick_requests() while handling a linger request that has not yet been registered, to ensure it doesn't go away. This resolves: http://tracker.ceph.com/issues/3859 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- net/ceph/osd_client.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3a246a6cab47..e0abb83b520e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1174,6 +1174,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { dout("__register_linger_request %p\n", req); + ceph_osdc_get_request(req); list_add_tail(&req->r_linger_item, &osdc->req_linger); if (req->r_osd) list_add_tail(&req->r_linger_osd, @@ -1196,6 +1197,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, if (list_empty(&req->r_osd_item)) req->r_osd = NULL; } + ceph_osdc_put_request(req); } void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, @@ -1203,9 +1205,8 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, { mutex_lock(&osdc->request_mutex); if (req->r_linger) { - __unregister_linger_request(osdc, req); req->r_linger = 0; - ceph_osdc_put_request(req); + __unregister_linger_request(osdc, req); } mutex_unlock(&osdc->request_mutex); } @@ -1217,11 +1218,6 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, if (!req->r_linger) { dout("set_request_linger %p\n", req); req->r_linger = 1; - /* - * caller is now responsible for calling - * unregister_linger_request - */ - ceph_osdc_get_request(req); } } EXPORT_SYMBOL(ceph_osdc_set_request_linger); @@ -1633,8 +1629,10 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("%p tid %llu restart on osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); + ceph_osdc_get_request(req); __unregister_request(osdc, req); __register_linger_request(osdc, req); + ceph_osdc_put_request(req); continue; } -- cgit v1.2.3-59-g8ed1b From 4974341eb99861720d54db9337bf1fe78eb8b9d0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 29 May 2013 11:19:00 -0500 Subject: libceph: print more info for short message header If an osd client response message arrives that has a front section that's too big for the buffer set aside to receive it, a warning gets reported and a new buffer is allocated. The warning says nothing about which connection had the problem. Add the peer type and number to what gets reported, to be a bit more informative. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- net/ceph/osd_client.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e0abb83b520e..61147fe70181 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2454,8 +2454,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, ceph_msg_revoke_incoming(req->r_reply); if (front > req->r_reply->front.iov_len) { - pr_warning("get_reply front %d > preallocated %d\n", - front, (int)req->r_reply->front.iov_len); + pr_warning("get_reply front %d > preallocated %d (%u#%llu)\n", + front, (int)req->r_reply->front.iov_len, + (unsigned int)con->peer_name.type, + le64_to_cpu(con->peer_name.num)); m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false); if (!m) goto out; -- cgit v1.2.3-59-g8ed1b From eb845ff13a44477f8a411baedbf11d678b9daf0a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 31 May 2013 15:54:44 +0800 Subject: libceph: fix safe completion handle_reply() calls complete_request() only if the first OSD reply has ONDISK flag. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- include/linux/ceph/osd_client.h | 1 - net/ceph/osd_client.c | 17 ++++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 186db0bf4951..ce6df39f60ff 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -145,7 +145,6 @@ struct ceph_osd_request { s32 r_reply_op_result[CEPH_OSD_MAX_OP]; int r_got_reply; int r_linger; - int r_completed; struct ceph_osd_client *r_osdc; struct kref r_kref; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 61147fe70181..3480b058794b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1522,6 +1522,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, for (i = 0; i < numops; i++) req->r_reply_op_result[i] = ceph_decode_32(&p); + already_completed = req->r_got_reply; + if (!req->r_got_reply) { req->r_result = result; @@ -1552,16 +1554,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); - already_completed = req->r_completed; - req->r_completed = 1; mutex_unlock(&osdc->request_mutex); - if (already_completed) - goto done; - if (req->r_callback) - req->r_callback(req, msg); - else - complete_all(&req->r_completion); + if (!already_completed) { + if (req->r_callback) + req->r_callback(req, msg); + else + complete_all(&req->r_completion); + } if (flags & CEPH_OSD_FLAG_ONDISK) complete_request(req); @@ -2121,7 +2121,6 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, __register_request(osdc, req); req->r_sent = 0; req->r_got_reply = 0; - req->r_completed = 0; rc = __map_request(osdc, req, 0); if (rc < 0) { if (nofail) { -- cgit v1.2.3-59-g8ed1b From ccca4e37b1a912da3db68aee826557ea66145273 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 2 Jun 2013 18:40:23 +0800 Subject: libceph: fix truncate size calculation check the "not truncated yet" case Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3480b058794b..540dd29c9210 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -733,12 +733,14 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, object_size = le32_to_cpu(layout->fl_object_size); object_base = off - objoff; - if (truncate_size <= object_base) { - truncate_size = 0; - } else { - truncate_size -= object_base; - if (truncate_size > object_size) - truncate_size = object_size; + if (!(truncate_seq == 1 && truncate_size == -1ULL)) { + if (truncate_size <= object_base) { + truncate_size = 0; + } else { + truncate_size -= object_base; + if (truncate_size > object_size) + truncate_size = object_size; + } } osd_req_op_extent_init(req, 0, opcode, objoff, objlen, -- cgit v1.2.3-59-g8ed1b From 2cb33cac622afde897aa02d3dcd9fbba8bae839e Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Thu, 20 Jun 2013 13:13:59 -0700 Subject: libceph: Fix NULL pointer dereference in auth client code A malicious monitor can craft an auth reply message that could cause a NULL function pointer dereference in the client's kernel. To prevent this, the auth_none protocol handler needs an empty ceph_auth_client_ops->build_request() function. CVE-2013-1059 Signed-off-by: Tyler Hicks Reported-by: Chanam Park Reviewed-by: Seth Arnold Reviewed-by: Sage Weil Cc: stable@vger.kernel.org --- net/ceph/auth_none.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 925ca583c09c..8c93fa8d81bc 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -39,6 +39,11 @@ static int should_authenticate(struct ceph_auth_client *ac) return xi->starting; } +static int build_request(struct ceph_auth_client *ac, void *buf, void *end) +{ + return 0; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -106,6 +111,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .destroy = destroy, .is_authenticated = is_authenticated, .should_authenticate = should_authenticate, + .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, -- cgit v1.2.3-59-g8ed1b From 61c5d6bf7074ee32d014dcdf7698dc8c59eb712d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 24 Jun 2013 14:41:27 +0800 Subject: libceph: call r_unsafe_callback when unsafe reply is received We can't use !req->r_sent to check if OSD request is sent for the first time, this is because __cancel_request() zeros req->r_sent when OSD map changes. Rather than adding a new variable to struct ceph_osd_request to indicate if it's sent for the first time, We can call the unsafe callback only when unsafe OSD reply is received. If OSD's first reply is safe, just skip calling the unsafe callback. The purpose of unsafe callback is adding unsafe request to a list, so that fsync(2) can wait for the safe reply. fsync(2) doesn't need to wait for a write(2) that hasn't returned yet. So it's OK to add request to the unsafe list when the first OSD reply is received. (ceph_sync_write() returns after receiving the first OSD reply) Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- net/ceph/osd_client.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 540dd29c9210..dd47889adc4a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1337,10 +1337,6 @@ static void __send_request(struct ceph_osd_client *osdc, ceph_msg_get(req->r_request); /* send consumes a ref */ - /* Mark the request unsafe if this is the first timet's being sent. */ - - if (!req->r_sent && req->r_unsafe_callback) - req->r_unsafe_callback(req, true); req->r_sent = req->r_osd->o_incarnation; ceph_con_send(&req->r_osd->o_con, req->r_request); @@ -1431,8 +1427,6 @@ static void handle_osds_timeout(struct work_struct *work) static void complete_request(struct ceph_osd_request *req) { - if (req->r_unsafe_callback) - req->r_unsafe_callback(req, false); complete_all(&req->r_safe_completion); /* fsync waiter */ } @@ -1559,14 +1553,20 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, mutex_unlock(&osdc->request_mutex); if (!already_completed) { + if (req->r_unsafe_callback && + result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK)) + req->r_unsafe_callback(req, true); if (req->r_callback) req->r_callback(req, msg); else complete_all(&req->r_completion); } - if (flags & CEPH_OSD_FLAG_ONDISK) + if (flags & CEPH_OSD_FLAG_ONDISK) { + if (req->r_unsafe_callback && already_completed) + req->r_unsafe_callback(req, false); complete_request(req); + } done: dout("req=%p req->r_linger=%d\n", req, req->r_linger); -- cgit v1.2.3-59-g8ed1b