From 3231300bb986947a6b74e7075d84a2f434e4d788 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 22 Oct 2014 17:13:26 -0700 Subject: ceph: fix flush tid comparision TID of cap flush ack is 64 bits, but ceph_inode_info::flushing_cap_tid is only 16 bits. 16 bits should be plenty to let the cap flush updates pipeline appropriately, but we need to cast in the proper direction when comparing these differently-sized versions. So downcast the 64-bits one to 16 bits. Reflects ceph.git commit a5184cf46a6e867287e24aeb731634828467cd98. Signed-off-by: Yan, Zheng Reviewed-by: Ilya Dryomov --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 659f2ea9e6f7..cefca661464b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, for (i = 0; i < CEPH_CAP_BITS; i++) if ((dirty & (1 << i)) && - flush_tid == ci->i_cap_flush_tid[i]) + (u16)flush_tid == ci->i_cap_flush_tid[i]) cleaned |= 1 << i; dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," -- cgit v1.2.3-59-g8ed1b From aaef31703a0cf6a733e651885bfb49edc3ac6774 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 23 Oct 2014 00:25:22 +0400 Subject: libceph: do not crash on large auth tickets Large (greater than 32k, the value of PAGE_ALLOC_COSTLY_ORDER) auth tickets will have their buffers vmalloc'ed, which leads to the following crash in crypto: [ 28.685082] BUG: unable to handle kernel paging request at ffffeb04000032c0 [ 28.686032] IP: [] scatterwalk_pagedone+0x22/0x80 [ 28.686032] PGD 0 [ 28.688088] Oops: 0000 [#1] PREEMPT SMP [ 28.688088] Modules linked in: [ 28.688088] CPU: 0 PID: 878 Comm: kworker/0:2 Not tainted 3.17.0-vm+ #305 [ 28.688088] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 28.688088] Workqueue: ceph-msgr con_work [ 28.688088] task: ffff88011a7f9030 ti: ffff8800d903c000 task.ti: ffff8800d903c000 [ 28.688088] RIP: 0010:[] [] scatterwalk_pagedone+0x22/0x80 [ 28.688088] RSP: 0018:ffff8800d903f688 EFLAGS: 00010286 [ 28.688088] RAX: ffffeb04000032c0 RBX: ffff8800d903f718 RCX: ffffeb04000032c0 [ 28.688088] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8800d903f750 [ 28.688088] RBP: ffff8800d903f688 R08: 00000000000007de R09: ffff8800d903f880 [ 28.688088] R10: 18df467c72d6257b R11: 0000000000000000 R12: 0000000000000010 [ 28.688088] R13: ffff8800d903f750 R14: ffff8800d903f8a0 R15: 0000000000000000 [ 28.688088] FS: 00007f50a41c7700(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000 [ 28.688088] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 28.688088] CR2: ffffeb04000032c0 CR3: 00000000da3f3000 CR4: 00000000000006b0 [ 28.688088] Stack: [ 28.688088] ffff8800d903f698 ffffffff81392ca8 ffff8800d903f6e8 ffffffff81395d32 [ 28.688088] ffff8800dac96000 ffff880000000000 ffff8800d903f980 ffff880119b7e020 [ 28.688088] ffff880119b7e010 0000000000000000 0000000000000010 0000000000000010 [ 28.688088] Call Trace: [ 28.688088] [] scatterwalk_done+0x38/0x40 [ 28.688088] [] scatterwalk_done+0x38/0x40 [ 28.688088] [] blkcipher_walk_done+0x182/0x220 [ 28.688088] [] crypto_cbc_encrypt+0x15f/0x180 [ 28.688088] [] ? crypto_aes_set_key+0x30/0x30 [ 28.688088] [] ceph_aes_encrypt2+0x29c/0x2e0 [ 28.688088] [] ceph_encrypt2+0x93/0xb0 [ 28.688088] [] ceph_x_encrypt+0x4a/0x60 [ 28.688088] [] ? ceph_buffer_new+0x5d/0xf0 [ 28.688088] [] ceph_x_build_authorizer.isra.6+0x297/0x360 [ 28.688088] [] ? kmem_cache_alloc_trace+0x11b/0x1c0 [ 28.688088] [] ? ceph_auth_create_authorizer+0x36/0x80 [ 28.688088] [] ceph_x_create_authorizer+0x63/0xd0 [ 28.688088] [] ceph_auth_create_authorizer+0x54/0x80 [ 28.688088] [] get_authorizer+0x80/0xd0 [ 28.688088] [] prepare_write_connect+0x18b/0x2b0 [ 28.688088] [] try_read+0x1e59/0x1f10 This is because we set up crypto scatterlists as if all buffers were kmalloc'ed. Fix it. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/crypto.c | 169 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 132 insertions(+), 37 deletions(-) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 62fc5e7a9acf..790fe89d90c0 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -90,11 +90,82 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) static const u8 *aes_iv = (u8 *)CEPH_AES_IV; +/* + * Should be used for buffers allocated with ceph_kvmalloc(). + * Currently these are encrypt out-buffer (ceph_buffer) and decrypt + * in-buffer (msg front). + * + * Dispose of @sgt with teardown_sgtable(). + * + * @prealloc_sg is to avoid memory allocation inside sg_alloc_table() + * in cases where a single sg is sufficient. No attempt to reduce the + * number of sgs by squeezing physically contiguous pages together is + * made though, for simplicity. + */ +static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, + const void *buf, unsigned int buf_len) +{ + struct scatterlist *sg; + const bool is_vmalloc = is_vmalloc_addr(buf); + unsigned int off = offset_in_page(buf); + unsigned int chunk_cnt = 1; + unsigned int chunk_len = PAGE_ALIGN(off + buf_len); + int i; + int ret; + + if (buf_len == 0) { + memset(sgt, 0, sizeof(*sgt)); + return -EINVAL; + } + + if (is_vmalloc) { + chunk_cnt = chunk_len >> PAGE_SHIFT; + chunk_len = PAGE_SIZE; + } + + if (chunk_cnt > 1) { + ret = sg_alloc_table(sgt, chunk_cnt, GFP_NOFS); + if (ret) + return ret; + } else { + WARN_ON(chunk_cnt != 1); + sg_init_table(prealloc_sg, 1); + sgt->sgl = prealloc_sg; + sgt->nents = sgt->orig_nents = 1; + } + + for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) { + struct page *page; + unsigned int len = min(chunk_len - off, buf_len); + + if (is_vmalloc) + page = vmalloc_to_page(buf); + else + page = virt_to_page(buf); + + sg_set_page(sg, page, len, off); + + off = 0; + buf += len; + buf_len -= len; + } + WARN_ON(buf_len != 0); + + return 0; +} + +static void teardown_sgtable(struct sg_table *sgt) +{ + if (sgt->orig_nents > 1) + sg_free_table(sgt); +} + static int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, const void *src, size_t src_len) { - struct scatterlist sg_in[2], sg_out[1]; + struct scatterlist sg_in[2], prealloc_sg; + struct sg_table sg_out; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; int ret; @@ -110,16 +181,18 @@ static int ceph_aes_encrypt(const void *key, int key_len, *dst_len = src_len + zero_padding; - crypto_blkcipher_setkey((void *)tfm, key, key_len); sg_init_table(sg_in, 2); sg_set_buf(&sg_in[0], src, src_len); sg_set_buf(&sg_in[1], pad, zero_padding); - sg_init_table(sg_out, 1); - sg_set_buf(sg_out, dst, *dst_len); + ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); + if (ret) + goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); + /* print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, key, key_len, 1); @@ -128,16 +201,22 @@ static int ceph_aes_encrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, pad, zero_padding, 1); */ - ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, src_len + zero_padding); - crypto_free_blkcipher(tfm); - if (ret < 0) + if (ret < 0) { pr_err("ceph_aes_crypt failed %d\n", ret); + goto out_sg; + } /* print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_out); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, @@ -145,7 +224,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, const void *src1, size_t src1_len, const void *src2, size_t src2_len) { - struct scatterlist sg_in[3], sg_out[1]; + struct scatterlist sg_in[3], prealloc_sg; + struct sg_table sg_out; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; int ret; @@ -161,17 +241,19 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, *dst_len = src1_len + src2_len + zero_padding; - crypto_blkcipher_setkey((void *)tfm, key, key_len); sg_init_table(sg_in, 3); sg_set_buf(&sg_in[0], src1, src1_len); sg_set_buf(&sg_in[1], src2, src2_len); sg_set_buf(&sg_in[2], pad, zero_padding); - sg_init_table(sg_out, 1); - sg_set_buf(sg_out, dst, *dst_len); + ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); + if (ret) + goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); + /* print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, key, key_len, 1); @@ -182,23 +264,30 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, pad, zero_padding, 1); */ - ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, src1_len + src2_len + zero_padding); - crypto_free_blkcipher(tfm); - if (ret < 0) + if (ret < 0) { pr_err("ceph_aes_crypt2 failed %d\n", ret); + goto out_sg; + } /* print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_out); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, const void *src, size_t src_len) { - struct scatterlist sg_in[1], sg_out[2]; + struct sg_table sg_in; + struct scatterlist sg_out[2], prealloc_sg; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm }; char pad[16]; @@ -210,16 +299,16 @@ static int ceph_aes_decrypt(const void *key, int key_len, if (IS_ERR(tfm)) return PTR_ERR(tfm); - crypto_blkcipher_setkey((void *)tfm, key, key_len); - sg_init_table(sg_in, 1); sg_init_table(sg_out, 2); - sg_set_buf(sg_in, src, src_len); sg_set_buf(&sg_out[0], dst, *dst_len); sg_set_buf(&sg_out[1], pad, sizeof(pad)); + ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + if (ret) + goto out_tfm; + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); /* @@ -228,12 +317,10 @@ static int ceph_aes_decrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, src, src_len, 1); */ - - ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); - crypto_free_blkcipher(tfm); + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); if (ret < 0) { pr_err("ceph_aes_decrypt failed %d\n", ret); - return ret; + goto out_sg; } if (src_len <= *dst_len) @@ -251,7 +338,12 @@ static int ceph_aes_decrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_in); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_decrypt2(const void *key, int key_len, @@ -259,7 +351,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len, void *dst2, size_t *dst2_len, const void *src, size_t src_len) { - struct scatterlist sg_in[1], sg_out[3]; + struct sg_table sg_in; + struct scatterlist sg_out[3], prealloc_sg; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm }; char pad[16]; @@ -271,17 +364,17 @@ static int ceph_aes_decrypt2(const void *key, int key_len, if (IS_ERR(tfm)) return PTR_ERR(tfm); - sg_init_table(sg_in, 1); - sg_set_buf(sg_in, src, src_len); sg_init_table(sg_out, 3); sg_set_buf(&sg_out[0], dst1, *dst1_len); sg_set_buf(&sg_out[1], dst2, *dst2_len); sg_set_buf(&sg_out[2], pad, sizeof(pad)); + ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + if (ret) + goto out_tfm; crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); /* @@ -290,12 +383,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, src, src_len, 1); */ - - ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); - crypto_free_blkcipher(tfm); + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); if (ret < 0) { pr_err("ceph_aes_decrypt failed %d\n", ret); - return ret; + goto out_sg; } if (src_len <= *dst1_len) @@ -325,7 +416,11 @@ static int ceph_aes_decrypt2(const void *key, int key_len, dst2, *dst2_len, 1); */ - return 0; +out_sg: + teardown_sgtable(&sg_in); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } -- cgit v1.2.3-59-g8ed1b From a390de0208e7f2f8fdb2fbf970240e4f7b308037 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 4 Nov 2014 18:32:14 +0300 Subject: libceph: unlink from o_linger_requests when clearing r_osd Requests have to be unlinked from both osd->o_requests (normal requests) and osd->o_linger_requests (linger requests) lists when clearing req->r_osd. Otherwise __unregister_linger_request() gets confused and we trip over a !list_empty(&osd->o_linger_requests) assert in __remove_osd(). MON=1 OSD=1: # cat remove-osd.sh #!/bin/bash rbd create --size 1 test DEV=$(rbd map test) ceph osd out 0 sleep 3 rbd map dne/dne # obtain a new osdmap as a side effect rbd unmap $DEV & # will block sleep 3 ceph osd in 0 Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f3fc54eac09d..75abaa87abac 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1395,6 +1395,7 @@ static int __map_request(struct ceph_osd_client *osdc, if (req->r_osd) { __cancel_request(req); list_del_init(&req->r_osd_item); + list_del_init(&req->r_linger_osd_item); req->r_osd = NULL; } -- cgit v1.2.3-59-g8ed1b From ba9d114ec5578e6e99a4dfa37ff8ae688040fd64 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 5 Nov 2014 15:45:58 +0300 Subject: libceph: clear r_req_lru_item in __unregister_linger_request() kick_requests() can put linger requests on the notarget list. This means we need to clear the much-overloaded req->r_req_lru_item in __unregister_linger_request() as well, or we get an assertion failure in ceph_osdc_release_request() - !list_empty(&req->r_req_lru_item). AFAICT the assumption was that registered linger requests cannot be on any of req->r_req_lru_item lists, but that's clearly not the case. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 75abaa87abac..decc3b74e65f 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1254,6 +1254,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, if (list_empty(&req->r_osd_item)) req->r_osd = NULL; } + + list_del_init(&req->r_req_lru_item); /* can be on notarget */ ceph_osdc_put_request(req); } -- cgit v1.2.3-59-g8ed1b From cc9f1f518cec079289d11d732efa490306b1ddad Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 5 Nov 2014 19:33:44 +0300 Subject: libceph: change from BUG to WARN for __remove_osd() asserts No reason to use BUG_ON for osd request list assertions. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- net/ceph/osd_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index decc3b74e65f..6f164289bde8 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1007,8 +1007,8 @@ static void put_osd(struct ceph_osd *osd) static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { dout("__remove_osd %p\n", osd); - BUG_ON(!list_empty(&osd->o_requests)); - BUG_ON(!list_empty(&osd->o_linger_requests)); + WARN_ON(!list_empty(&osd->o_requests)); + WARN_ON(!list_empty(&osd->o_linger_requests)); rb_erase(&osd->o_node, &osdc->osds); list_del_init(&osd->o_osd_lru); -- cgit v1.2.3-59-g8ed1b