aboutsummaryrefslogtreecommitdiffstats
path: root/net/ceph
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--net/ceph/buffer.c4
-rw-r--r--net/ceph/ceph_common.c52
-rw-r--r--net/ceph/crush/mapper.c5
-rw-r--r--net/ceph/crypto.c2
-rw-r--r--net/ceph/messenger.c61
-rw-r--r--net/ceph/messenger_v1.c58
-rw-r--r--net/ceph/messenger_v2.c246
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osd_client.c334
-rw-r--r--net/ceph/osdmap.c44
-rw-r--r--net/ceph/pagelist.c2
11 files changed, 440 insertions, 370 deletions
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 5622763ad402..7e51f128045d 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -7,7 +7,7 @@
#include <linux/ceph/buffer.h>
#include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
+#include <linux/ceph/libceph.h> /* for kvmalloc */
struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
{
@@ -17,7 +17,7 @@ struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
if (!b)
return NULL;
- b->vec.iov_base = ceph_kvmalloc(len, gfp);
+ b->vec.iov_base = kvmalloc(len, gfp);
if (!b->vec.iov_base) {
kfree(b);
return NULL;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 97d6ea763e32..4c6441536d55 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -190,41 +190,14 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
EXPORT_SYMBOL(ceph_compare_options);
-/*
- * kvmalloc() doesn't fall back to the vmalloc allocator unless flags are
- * compatible with (a superset of) GFP_KERNEL. This is because while the
- * actual pages are allocated with the specified flags, the page table pages
- * are always allocated with GFP_KERNEL.
- *
- * ceph_kvmalloc() may be called with GFP_KERNEL, GFP_NOFS or GFP_NOIO.
- */
-void *ceph_kvmalloc(size_t size, gfp_t flags)
-{
- void *p;
-
- if ((flags & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) {
- p = kvmalloc(size, flags);
- } else if ((flags & (__GFP_IO | __GFP_FS)) == __GFP_IO) {
- unsigned int nofs_flag = memalloc_nofs_save();
- p = kvmalloc(size, GFP_KERNEL);
- memalloc_nofs_restore(nofs_flag);
- } else {
- unsigned int noio_flag = memalloc_noio_save();
- p = kvmalloc(size, GFP_KERNEL);
- memalloc_noio_restore(noio_flag);
- }
-
- return p;
-}
-
-static int parse_fsid(const char *str, struct ceph_fsid *fsid)
+int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid)
{
int i = 0;
char tmp[3];
int err = -EINVAL;
int d;
- dout("parse_fsid '%s'\n", str);
+ dout("%s '%s'\n", __func__, str);
tmp[2] = 0;
while (*str && i < 16) {
if (ispunct(*str)) {
@@ -244,9 +217,10 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
if (i == 16)
err = 0;
- dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
+ dout("%s ret %d got fsid %pU\n", __func__, err, fsid);
return err;
}
+EXPORT_SYMBOL(ceph_parse_fsid);
/*
* ceph options
@@ -272,6 +246,7 @@ enum {
Opt_cephx_sign_messages,
Opt_tcp_nodelay,
Opt_abort_on_full,
+ Opt_rxbounce,
};
enum {
@@ -321,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
+ fsparam_flag ("rxbounce", Opt_rxbounce),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret),
@@ -422,14 +398,14 @@ out:
}
int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt,
- struct fc_log *l)
+ struct fc_log *l, char delim)
{
struct p_log log = {.prefix = "libceph", .log = l};
int ret;
- /* ip1[:port1][,ip2[:port2]...] */
+ /* ip1[:port1][<delim>ip2[:port2]...] */
ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON,
- &opt->num_mon);
+ &opt->num_mon, delim);
if (ret) {
error_plog(&log, "Failed to parse monitor IPs: %d", ret);
return ret;
@@ -455,8 +431,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_ip:
err = ceph_parse_ips(param->string,
param->string + param->size,
- &opt->my_addr,
- 1, NULL);
+ &opt->my_addr, 1, NULL, ',');
if (err) {
error_plog(&log, "Failed to parse ip: %d", err);
return err;
@@ -465,7 +440,7 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
break;
case Opt_fsid:
- err = parse_fsid(param->string, &opt->fsid);
+ err = ceph_parse_fsid(param->string, &opt->fsid);
if (err) {
error_plog(&log, "Failed to parse fsid: %d", err);
return err;
@@ -611,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
case Opt_abort_on_full:
opt->flags |= CEPH_OPT_ABORT_ON_FULL;
break;
+ case Opt_rxbounce:
+ opt->flags |= CEPH_OPT_RXBOUNCE;
+ break;
default:
BUG();
@@ -687,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
seq_puts(m, "notcp_nodelay,");
if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL))
seq_puts(m, "abort_on_full,");
+ if (opt->flags & CEPH_OPT_RXBOUNCE)
+ seq_puts(m, "rxbounce,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
seq_printf(m, "mount_timeout=%d,",
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 7057f8db4f99..1daf95e17d67 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -906,7 +906,6 @@ int crush_do_rule(const struct crush_map *map,
int recurse_to_leaf;
int wsize = 0;
int osize;
- int *tmp;
const struct crush_rule *rule;
__u32 step;
int i, j;
@@ -1073,9 +1072,7 @@ int crush_do_rule(const struct crush_map *map,
memcpy(o, c, osize*sizeof(*o));
/* swap o and w arrays */
- tmp = o;
- o = w;
- w = tmp;
+ swap(o, w);
wsize = osize;
break;
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 92d89b331645..051d22c0e4ad 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -147,7 +147,7 @@ void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
static const u8 *aes_iv = (u8 *)CEPH_AES_IV;
/*
- * Should be used for buffers allocated with ceph_kvmalloc().
+ * Should be used for buffers allocated with kvmalloc().
* Currently these are encrypt out-buffer (ceph_buffer) and decrypt
* in-buffer (msg front).
*
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 57d043b382ed..dfa237fbd5a3 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con)
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
+ if (con->bounce_page) {
+ __free_page(con->bounce_page);
+ con->bounce_page = NULL;
+ }
if (ceph_msgr2(from_msgr(con->msgr)))
ceph_con_v2_reset_protocol(con);
@@ -724,7 +728,6 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
it->iter.bi_size = cursor->resid;
BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
- cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
}
static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
@@ -750,10 +753,8 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
cursor->resid -= bytes;
bio_advance_iter(it->bio, &it->iter, bytes);
- if (!cursor->resid) {
- BUG_ON(!cursor->last_piece);
+ if (!cursor->resid)
return false; /* no more data */
- }
if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done &&
page == bio_iter_page(it->bio, it->iter)))
@@ -766,9 +767,7 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
it->iter.bi_size = cursor->resid;
}
- BUG_ON(cursor->last_piece);
BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
- cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
return true;
}
#endif /* CONFIG_BLOCK */
@@ -784,8 +783,6 @@ static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor,
cursor->bvec_iter.bi_size = cursor->resid;
BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
- cursor->last_piece =
- cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
}
static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor,
@@ -811,19 +808,14 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
cursor->resid -= bytes;
bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes);
- if (!cursor->resid) {
- BUG_ON(!cursor->last_piece);
+ if (!cursor->resid)
return false; /* no more data */
- }
if (!bytes || (cursor->bvec_iter.bi_bvec_done &&
page == bvec_iter_page(bvecs, cursor->bvec_iter)))
return false; /* more bytes to process in this segment */
- BUG_ON(cursor->last_piece);
BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
- cursor->last_piece =
- cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
return true;
}
@@ -849,7 +841,6 @@ static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
BUG_ON(page_count > (int)USHRT_MAX);
cursor->page_count = (unsigned short)page_count;
BUG_ON(length > SIZE_MAX - cursor->page_offset);
- cursor->last_piece = cursor->page_offset + cursor->resid <= PAGE_SIZE;
}
static struct page *
@@ -864,11 +855,7 @@ ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
BUG_ON(cursor->page_offset >= PAGE_SIZE);
*page_offset = cursor->page_offset;
- if (cursor->last_piece)
- *length = cursor->resid;
- else
- *length = PAGE_SIZE - *page_offset;
-
+ *length = min_t(size_t, cursor->resid, PAGE_SIZE - *page_offset);
return data->pages[cursor->page_index];
}
@@ -893,8 +880,6 @@ static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
BUG_ON(cursor->page_index >= cursor->page_count);
cursor->page_index++;
- cursor->last_piece = cursor->resid <= PAGE_SIZE;
-
return true;
}
@@ -924,7 +909,6 @@ ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
cursor->resid = min(length, pagelist->length);
cursor->page = page;
cursor->offset = 0;
- cursor->last_piece = cursor->resid <= PAGE_SIZE;
}
static struct page *
@@ -944,11 +928,7 @@ ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
/* offset of first page in pagelist is always 0 */
*page_offset = cursor->offset & ~PAGE_MASK;
- if (cursor->last_piece)
- *length = cursor->resid;
- else
- *length = PAGE_SIZE - *page_offset;
-
+ *length = min_t(size_t, cursor->resid, PAGE_SIZE - *page_offset);
return cursor->page;
}
@@ -981,8 +961,6 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
cursor->page = list_next_entry(cursor->page, lru);
- cursor->last_piece = cursor->resid <= PAGE_SIZE;
-
return true;
}
@@ -1040,8 +1018,7 @@ void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
* Indicate whether this is the last piece in this data item.
*/
struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
- size_t *page_offset, size_t *length,
- bool *last_piece)
+ size_t *page_offset, size_t *length)
{
struct page *page;
@@ -1070,8 +1047,6 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
BUG_ON(*page_offset + *length > PAGE_SIZE);
BUG_ON(!*length);
BUG_ON(*length > cursor->resid);
- if (last_piece)
- *last_piece = cursor->last_piece;
return page;
}
@@ -1108,7 +1083,6 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
cursor->total_resid -= bytes;
if (!cursor->resid && cursor->total_resid) {
- WARN_ON(!cursor->last_piece);
cursor->data++;
__ceph_msg_data_cursor_init(cursor);
new_piece = true;
@@ -1267,30 +1241,31 @@ static int ceph_parse_server_name(const char *name, size_t namelen,
*/
int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
- int max_count, int *count)
+ int max_count, int *count, char delim)
{
int i, ret = -EINVAL;
const char *p = c;
dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) {
+ char cur_delim = delim;
const char *ipend;
int port;
- char delim = ',';
if (*p == '[') {
- delim = ']';
+ cur_delim = ']';
p++;
}
- ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
+ ret = ceph_parse_server_name(p, end - p, &addr[i], cur_delim,
+ &ipend);
if (ret)
goto bad;
ret = -EINVAL;
p = ipend;
- if (delim == ']') {
+ if (cur_delim == ']') {
if (*p != ']') {
dout("missing matching ']'\n");
goto bad;
@@ -1326,11 +1301,11 @@ int ceph_parse_ips(const char *c, const char *end,
addr[i].type = CEPH_ENTITY_ADDR_TYPE_LEGACY;
addr[i].nonce = 0;
- dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
+ dout("%s got %s\n", __func__, ceph_pr_addr(&addr[i]));
if (p == end)
break;
- if (*p != ',')
+ if (*p != delim)
goto bad;
p++;
}
@@ -1920,7 +1895,7 @@ struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
/* front */
if (front_len) {
- m->front.iov_base = ceph_kvmalloc(front_len, flags);
+ m->front.iov_base = kvmalloc(front_len, flags);
if (m->front.iov_base == NULL) {
dout("ceph_msg_new can't allocate %d bytes\n",
front_len);
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index 2cb5ffdf071a..3ddbde87e4d6 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -495,7 +495,7 @@ static int write_partial_message_data(struct ceph_connection *con)
continue;
}
- page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
+ page = ceph_msg_data_next(cursor, &page_offset, &length);
if (length == cursor->total_resid)
more = MSG_MORE;
ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
@@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con,
static int read_partial_msg_data(struct ceph_connection *con)
{
- struct ceph_msg *msg = con->in_msg;
- struct ceph_msg_data_cursor *cursor = &msg->cursor;
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
struct page *page;
size_t page_offset;
@@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con)
u32 crc = 0;
int ret;
- if (!msg->num_data_items)
- return -EIO;
-
if (do_datacrc)
crc = con->in_data_crc;
while (cursor->total_resid) {
@@ -1012,7 +1008,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
continue;
}
- page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
+ page = ceph_msg_data_next(cursor, &page_offset, &length);
ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
if (ret <= 0) {
if (do_datacrc)
@@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con)
return 1; /* must return > 0 to indicate success */
}
+static int read_partial_msg_data_bounce(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ struct page *page;
+ size_t off, len;
+ u32 crc;
+ int ret;
+
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ crc = con->in_data_crc;
+ while (cursor->total_resid) {
+ if (!cursor->resid) {
+ ceph_msg_data_advance(cursor, 0);
+ continue;
+ }
+
+ page = ceph_msg_data_next(cursor, &off, &len);
+ ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
+ if (ret <= 0) {
+ con->in_data_crc = crc;
+ return ret;
+ }
+
+ crc = crc32c(crc, page_address(con->bounce_page), ret);
+ memcpy_to_page(page, off, page_address(con->bounce_page), ret);
+
+ ceph_msg_data_advance(cursor, ret);
+ }
+ con->in_data_crc = crc;
+
+ return 1; /* must return > 0 to indicate success */
+}
+
/*
* read (part of) a message.
*/
@@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con)
/* (page) data */
if (data_len) {
- ret = read_partial_msg_data(con);
+ if (!m->num_data_items)
+ return -EIO;
+
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ ret = read_partial_msg_data_bounce(con);
+ else
+ ret = read_partial_msg_data(con);
if (ret <= 0)
return ret;
}
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index cc40ce4e02fb..cc8ff81a50b7 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -57,8 +57,9 @@
#define IN_S_HANDLE_CONTROL_REMAINDER 3
#define IN_S_PREPARE_READ_DATA 4
#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_HANDLE_EPILOGUE 6
-#define IN_S_FINISH_SKIP 7
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_HANDLE_EPILOGUE 7
+#define IN_S_FINISH_SKIP 8
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
@@ -308,7 +309,7 @@ static void *alloc_conn_buf(struct ceph_connection *con, int len)
if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs)))
return NULL;
- buf = ceph_kvmalloc(len, GFP_NOIO);
+ buf = kvmalloc(len, GFP_NOIO);
if (!buf)
return NULL;
@@ -861,7 +862,7 @@ static void get_bvec_at(struct ceph_msg_data_cursor *cursor,
ceph_msg_data_advance(cursor, 0);
/* get a piece of data, cursor isn't advanced */
- page = ceph_msg_data_next(cursor, &off, &len, NULL);
+ page = ceph_msg_data_next(cursor, &off, &len);
bv->bv_page = page;
bv->bv_offset = off;
@@ -1032,22 +1033,41 @@ static int decrypt_control_remainder(struct ceph_connection *con)
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
-static int decrypt_message(struct ceph_connection *con)
+static int decrypt_tail(struct ceph_connection *con)
{
+ struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ int tail_len;
int ret;
+ tail_len = tail_onwire_len(con->in_msg, true);
+ ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt, 0, tail_len,
+ GFP_NOIO);
+ if (ret)
+ goto out;
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
con->v2.in_buf, true);
if (ret)
goto out;
- ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl,
- tail_onwire_len(con->in_msg, true));
+ dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con,
+ con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents);
+ ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len);
+ if (ret)
+ goto out;
+
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
out:
sg_free_table(&sgt);
+ sg_free_table(&enc_sgt);
return ret;
}
@@ -1733,54 +1753,153 @@ static int prepare_read_control_remainder(struct ceph_connection *con)
return 0;
}
-static void prepare_read_data(struct ceph_connection *con)
+static int prepare_read_data(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
- con->in_data_crc = -1;
+ con->in_data_crc = -1;
ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
data_len(con->in_msg));
get_bvec_at(&con->v2.in_cursor, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
set_in_bvec(con, &bv);
con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT;
+ return 0;
}
static void prepare_read_data_cont(struct ceph_connection *con)
{
struct bio_vec bv;
- if (!con_secure(con))
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+
+ get_bvec_at(&con->v2.in_cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
con->v2.in_bvec.bv_page,
con->v2.in_bvec.bv_offset,
con->v2.in_bvec.bv_len);
+ }
ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len);
if (con->v2.in_cursor.total_resid) {
get_bvec_at(&con->v2.in_cursor, &bv);
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
set_in_bvec(con, &bv);
WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT);
return;
}
/*
- * We've read all data. Prepare to read data padding (if any)
- * and epilogue.
+ * We've read all data. Prepare to read epilogue.
*/
reset_in_kvecs(con);
- if (con_secure(con)) {
- if (need_padding(data_len(con->in_msg)))
- add_in_kvec(con, DATA_PAD(con->v2.in_buf),
- padding_len(data_len(con->in_msg)));
- add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+}
+
+static int prepare_read_tail_plain(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ if (!front_len(msg) && !middle_len(msg)) {
+ WARN_ON(!data_len(msg));
+ return prepare_read_data(con);
+ }
+
+ reset_in_kvecs(con);
+ if (front_len(msg)) {
+ add_in_kvec(con, msg->front.iov_base, front_len(msg));
+ WARN_ON(msg->front.iov_len != front_len(msg));
+ }
+ if (middle_len(msg)) {
+ add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
+ WARN_ON(msg->middle->vec.iov_len != middle_len(msg));
+ }
+
+ if (data_len(msg)) {
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+ return 0;
+}
+
+static void prepare_read_enc_page(struct ceph_connection *con)
+{
+ struct bio_vec bv;
+
+ dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i,
+ con->v2.in_enc_resid);
+ WARN_ON(!con->v2.in_enc_resid);
+
+ bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i];
+ bv.bv_offset = 0;
+ bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE);
+
+ set_in_bvec(con, &bv);
+ con->v2.in_enc_i++;
+ con->v2.in_enc_resid -= bv.bv_len;
+
+ if (con->v2.in_enc_resid) {
+ con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE;
+ return;
+ }
+
+ /*
+ * We are set to read the last piece of ciphertext (ending
+ * with epilogue) + auth tag.
+ */
+ WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_read_tail_secure(struct ceph_connection *con)
+{
+ struct page **enc_pages;
+ int enc_page_cnt;
+ int tail_len;
+
+ tail_len = tail_onwire_len(con->in_msg, true);
+ WARN_ON(!tail_len);
+
+ enc_page_cnt = calc_pages_for(0, tail_len);
+ enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO);
+ if (IS_ERR(enc_pages))
+ return PTR_ERR(enc_pages);
+
+ WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = enc_pages;
+ con->v2.in_enc_page_cnt = enc_page_cnt;
+ con->v2.in_enc_resid = tail_len;
+ con->v2.in_enc_i = 0;
+
+ prepare_read_enc_page(con);
+ return 0;
+}
+
static void __finish_skip(struct ceph_connection *con)
{
con->in_seq++;
@@ -2589,47 +2708,26 @@ static int __handle_control(struct ceph_connection *con, void *p)
}
msg = con->in_msg; /* set in process_message_header() */
- if (!front_len(msg) && !middle_len(msg)) {
- if (!data_len(msg))
- return process_message(con);
-
- prepare_read_data(con);
- return 0;
- }
-
- reset_in_kvecs(con);
if (front_len(msg)) {
WARN_ON(front_len(msg) > msg->front_alloc_len);
- add_in_kvec(con, msg->front.iov_base, front_len(msg));
msg->front.iov_len = front_len(msg);
-
- if (con_secure(con) && need_padding(front_len(msg)))
- add_in_kvec(con, FRONT_PAD(con->v2.in_buf),
- padding_len(front_len(msg)));
} else {
msg->front.iov_len = 0;
}
if (middle_len(msg)) {
WARN_ON(middle_len(msg) > msg->middle->alloc_len);
- add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg));
msg->middle->vec.iov_len = middle_len(msg);
-
- if (con_secure(con) && need_padding(middle_len(msg)))
- add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf),
- padding_len(middle_len(msg)));
} else if (msg->middle) {
msg->middle->vec.iov_len = 0;
}
- if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
- } else {
- add_in_kvec(con, con->v2.in_buf,
- con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN :
- CEPH_EPILOGUE_PLAIN_LEN);
- con->v2.in_state = IN_S_HANDLE_EPILOGUE;
- }
- return 0;
+ if (!front_len(msg) && !middle_len(msg) && !data_len(msg))
+ return process_message(con);
+
+ if (con_secure(con))
+ return prepare_read_tail_secure(con);
+
+ return prepare_read_tail_plain(con);
}
static int handle_preamble(struct ceph_connection *con)
@@ -2717,7 +2815,7 @@ static int handle_epilogue(struct ceph_connection *con)
int ret;
if (con_secure(con)) {
- ret = decrypt_message(con);
+ ret = decrypt_tail(con);
if (ret) {
if (ret == -EBADMSG)
con->error_msg = "integrity error, bad epilogue auth tag";
@@ -2785,13 +2883,16 @@ static int populate_in_iter(struct ceph_connection *con)
ret = handle_control_remainder(con);
break;
case IN_S_PREPARE_READ_DATA:
- prepare_read_data(con);
- ret = 0;
+ ret = prepare_read_data(con);
break;
case IN_S_PREPARE_READ_DATA_CONT:
prepare_read_data_cont(con);
ret = 0;
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ prepare_read_enc_page(con);
+ ret = 0;
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
@@ -3326,20 +3427,16 @@ void ceph_con_v2_revoke(struct ceph_connection *con)
static void revoke_at_prepare_read_data(struct ceph_connection *con)
{
- int remaining; /* data + [data padding] + epilogue */
+ int remaining;
int resid;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
- if (con_secure(con))
- remaining = padded_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p resid %d remaining %d\n", __func__, con, resid,
remaining);
con->v2.in_iter.count -= resid;
@@ -3350,8 +3447,9 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con)
static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
{
int recved, resid; /* current piece of data */
- int remaining; /* [data padding] + epilogue */
+ int remaining;
+ WARN_ON(con_secure(con));
WARN_ON(!data_len(con->in_msg));
WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
@@ -3363,12 +3461,7 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
ceph_msg_data_advance(&con->v2.in_cursor, recved);
WARN_ON(resid > con->v2.in_cursor.total_resid);
- if (con_secure(con))
- remaining = padding_len(data_len(con->in_msg)) +
- CEPH_EPILOGUE_SECURE_LEN;
- else
- remaining = CEPH_EPILOGUE_PLAIN_LEN;
-
+ remaining = CEPH_EPILOGUE_PLAIN_LEN;
dout("%s con %p total_resid %zu remaining %d\n", __func__, con,
con->v2.in_cursor.total_resid, remaining);
con->v2.in_iter.count -= resid;
@@ -3376,11 +3469,26 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con)
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_read_enc_page(struct ceph_connection *con)
+{
+ int resid; /* current enc page (not necessarily data) */
+
+ WARN_ON(!con_secure(con));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ WARN_ON(!resid || resid > con->v2.in_bvec.bv_len);
+
+ dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid,
+ con->v2.in_enc_resid);
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + con->v2.in_enc_resid);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
- WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter));
resid = iov_iter_count(&con->v2.in_iter);
WARN_ON(!resid);
@@ -3399,6 +3507,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
case IN_S_PREPARE_READ_DATA_CONT:
revoke_at_prepare_read_data_cont(con);
break;
+ case IN_S_PREPARE_READ_ENC_PAGE:
+ revoke_at_prepare_read_enc_page(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
@@ -3432,6 +3543,13 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con)
clear_out_sign_kvecs(con);
free_conn_bufs(con);
+ if (con->v2.in_enc_pages) {
+ WARN_ON(!con->v2.in_enc_page_cnt);
+ ceph_release_page_vector(con->v2.in_enc_pages,
+ con->v2.in_enc_page_cnt);
+ con->v2.in_enc_pages = NULL;
+ con->v2.in_enc_page_cnt = 0;
+ }
if (con->v2.out_enc_pages) {
WARN_ON(!con->v2.out_enc_page_cnt);
ceph_release_page_vector(con->v2.out_enc_pages,
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 6a6898ee4049..db60217f911b 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -222,7 +222,7 @@ static void pick_new_mon(struct ceph_mon_client *monc)
max--;
}
- n = prandom_u32() % max;
+ n = prandom_u32_max(max);
if (o >= 0 && n >= o)
n++;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1c5815530e0d..4e4f1e4bc265 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd_request *req)
target_init(&req->r_t);
}
-/*
- * This is ugly, but it allows us to reuse linger registration and ping
- * requests, keeping the structure of the code around send_linger{_ping}()
- * reasonable. Setting up a min_nr=2 mempool for each linger request
- * and dealing with copying ops (this blasts req only, watch op remains
- * intact) isn't any better.
- */
-static void request_reinit(struct ceph_osd_request *req)
-{
- struct ceph_osd_client *osdc = req->r_osdc;
- bool mempool = req->r_mempool;
- unsigned int num_ops = req->r_num_ops;
- u64 snapid = req->r_snapid;
- struct ceph_snap_context *snapc = req->r_snapc;
- bool linger = req->r_linger;
- struct ceph_msg *request_msg = req->r_request;
- struct ceph_msg *reply_msg = req->r_reply;
-
- dout("%s req %p\n", __func__, req);
- WARN_ON(kref_read(&req->r_kref) != 1);
- request_release_checks(req);
-
- WARN_ON(kref_read(&request_msg->kref) != 1);
- WARN_ON(kref_read(&reply_msg->kref) != 1);
- target_destroy(&req->r_t);
-
- request_init(req);
- req->r_osdc = osdc;
- req->r_mempool = mempool;
- req->r_num_ops = num_ops;
- req->r_snapid = snapid;
- req->r_snapc = snapc;
- req->r_linger = linger;
- req->r_request = request_msg;
- req->r_reply = reply_msg;
-}
-
struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
unsigned int num_ops,
@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init);
* @watch_opcode: CEPH_OSD_WATCH_OP_*
*/
static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
- u64 cookie, u8 watch_opcode)
+ u8 watch_opcode, u64 cookie, u32 gen)
{
struct ceph_osd_req_op *op;
op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0);
op->watch.cookie = cookie;
op->watch.op = watch_opcode;
- op->watch.gen = 0;
+ op->watch.gen = gen;
+}
+
+/*
+ * prot_ver, timeout and notify payload (may be empty) should already be
+ * encoded in @request_pl
+ */
+static void osd_req_op_notify_init(struct ceph_osd_request *req, int which,
+ u64 cookie, struct ceph_pagelist *request_pl)
+{
+ struct ceph_osd_req_op *op;
+
+ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
+ op->notify.cookie = cookie;
+
+ ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl);
+ op->indata_len = request_pl->length;
}
/*
@@ -1500,7 +1479,7 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
static int pick_random_replica(const struct ceph_osds *acting)
{
- int i = prandom_u32() % acting->size;
+ int i = prandom_u32_max(acting->size);
dout("%s picked osd%d, primary osd%d\n", __func__,
acting->osds[i], acting->primary);
@@ -2385,7 +2364,11 @@ again:
if (ceph_test_opt(osdc->client, ABORT_ON_FULL)) {
err = -ENOSPC;
} else {
- pr_warn_ratelimited("FULL or reached pool quota\n");
+ if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL))
+ pr_warn_ratelimited("cluster is full (osdmap FULL)\n");
+ else
+ pr_warn_ratelimited("pool %lld is full or reached quota\n",
+ req->r_t.base_oloc.pool);
req->r_t.paused = true;
maybe_request_map(osdc);
}
@@ -2727,10 +2710,13 @@ static void linger_release(struct kref *kref)
WARN_ON(!list_empty(&lreq->pending_lworks));
WARN_ON(lreq->osd);
- if (lreq->reg_req)
- ceph_osdc_put_request(lreq->reg_req);
- if (lreq->ping_req)
- ceph_osdc_put_request(lreq->ping_req);
+ if (lreq->request_pl)
+ ceph_pagelist_release(lreq->request_pl);
+ if (lreq->notify_id_pages)
+ ceph_release_page_vector(lreq->notify_id_pages, 1);
+
+ ceph_osdc_put_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->ping_req);
target_destroy(&lreq->t);
kfree(lreq);
}
@@ -2999,6 +2985,12 @@ static void linger_commit_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
lreq->linger_id, req->r_result);
linger_reg_commit_complete(lreq, req->r_result);
@@ -3022,6 +3014,7 @@ static void linger_commit_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3044,6 +3037,12 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->reg_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->reg_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__,
lreq, lreq->linger_id, req->r_result, lreq->last_error);
if (req->r_result < 0) {
@@ -3053,46 +3052,64 @@ static void linger_reconnect_cb(struct ceph_osd_request *req)
}
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
static void send_linger(struct ceph_osd_linger_request *lreq)
{
- struct ceph_osd_request *req = lreq->reg_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_client *osdc = lreq->osdc;
+ struct ceph_osd_request *req;
+ int ret;
- verify_osdc_wrlocked(req->r_osdc);
+ verify_osdc_wrlocked(osdc);
+ mutex_lock(&lreq->lock);
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->reg_req) {
+ if (lreq->reg_req->r_osd)
+ cancel_linger_request(lreq->reg_req);
+ ceph_osdc_put_request(lreq->reg_req);
+ }
+
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- request_reinit(req);
target_copy(&req->r_t, &lreq->t);
req->r_mtime = lreq->mtime;
- mutex_lock(&lreq->lock);
if (lreq->is_watch && lreq->committed) {
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id);
- op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT;
- op->watch.gen = ++lreq->register_gen;
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT,
+ lreq->linger_id, ++lreq->register_gen);
dout("lreq %p reconnect register_gen %u\n", lreq,
- op->watch.gen);
+ req->r_ops[0].watch.gen);
req->r_callback = linger_reconnect_cb;
} else {
- if (!lreq->is_watch)
+ if (lreq->is_watch) {
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH,
+ lreq->linger_id, 0);
+ } else {
lreq->notify_id = 0;
- else
- WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH);
+
+ refcount_inc(&lreq->request_pl->refcnt);
+ osd_req_op_notify_init(req, 0, lreq->linger_id,
+ lreq->request_pl);
+ ceph_osd_data_pages_init(
+ osd_req_op_data(req, 0, notify, response_data),
+ lreq->notify_id_pages, PAGE_SIZE, 0, false, false);
+ }
dout("lreq %p register\n", lreq);
req->r_callback = linger_commit_cb;
}
- mutex_unlock(&lreq->lock);
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->reg_req = req;
+ mutex_unlock(&lreq->lock);
submit_request(req, true);
}
@@ -3102,6 +3119,12 @@ static void linger_ping_cb(struct ceph_osd_request *req)
struct ceph_osd_linger_request *lreq = req->r_priv;
mutex_lock(&lreq->lock);
+ if (req != lreq->ping_req) {
+ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n",
+ __func__, lreq, lreq->linger_id, req, lreq->ping_req);
+ goto out;
+ }
+
dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n",
__func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent,
lreq->last_error);
@@ -3117,6 +3140,7 @@ static void linger_ping_cb(struct ceph_osd_request *req)
lreq->register_gen, req->r_ops[0].watch.gen);
}
+out:
mutex_unlock(&lreq->lock);
linger_put(lreq);
}
@@ -3124,8 +3148,8 @@ static void linger_ping_cb(struct ceph_osd_request *req)
static void send_linger_ping(struct ceph_osd_linger_request *lreq)
{
struct ceph_osd_client *osdc = lreq->osdc;
- struct ceph_osd_request *req = lreq->ping_req;
- struct ceph_osd_req_op *op = &req->r_ops[0];
+ struct ceph_osd_request *req;
+ int ret;
if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) {
dout("%s PAUSERD\n", __func__);
@@ -3137,19 +3161,26 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq)
__func__, lreq, lreq->linger_id, lreq->ping_sent,
lreq->register_gen);
- if (req->r_osd)
- cancel_linger_request(req);
+ if (lreq->ping_req) {
+ if (lreq->ping_req->r_osd)
+ cancel_linger_request(lreq->ping_req);
+ ceph_osdc_put_request(lreq->ping_req);
+ }
- request_reinit(req);
- target_copy(&req->r_t, &lreq->t);
+ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO);
+ BUG_ON(!req);
- WARN_ON(op->op != CEPH_OSD_OP_WATCH ||
- op->watch.cookie != lreq->linger_id ||
- op->watch.op != CEPH_OSD_WATCH_OP_PING);
- op->watch.gen = lreq->register_gen;
+ target_copy(&req->r_t, &lreq->t);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id,
+ lreq->register_gen);
req->r_callback = linger_ping_cb;
+
+ ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
+ BUG_ON(ret);
+
req->r_priv = linger_get(lreq);
req->r_linger = true;
+ lreq->ping_req = req;
ceph_osdc_get_request(req);
account_request(req);
@@ -3165,12 +3196,6 @@ static void linger_submit(struct ceph_osd_linger_request *lreq)
down_write(&osdc->lock);
linger_register(lreq);
- if (lreq->is_watch) {
- lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id;
- lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id;
- } else {
- lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id;
- }
calc_target(osdc, &lreq->t, false);
osd = lookup_create_osd(osdc, lreq->t.osd, true);
@@ -3202,9 +3227,9 @@ static void cancel_linger_map_check(struct ceph_osd_linger_request *lreq)
*/
static void __linger_cancel(struct ceph_osd_linger_request *lreq)
{
- if (lreq->is_watch && lreq->ping_req->r_osd)
+ if (lreq->ping_req && lreq->ping_req->r_osd)
cancel_linger_request(lreq->ping_req);
- if (lreq->reg_req->r_osd)
+ if (lreq->reg_req && lreq->reg_req->r_osd)
cancel_linger_request(lreq->reg_req);
cancel_linger_map_check(lreq);
unlink_linger(lreq->osd, lreq);
@@ -4553,21 +4578,23 @@ bad:
/*
* Register request, send initial attempt.
*/
-int ceph_osdc_start_request(struct ceph_osd_client *osdc,
- struct ceph_osd_request *req,
- bool nofail)
+void ceph_osdc_start_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
{
down_read(&osdc->lock);
submit_request(req, false);
up_read(&osdc->lock);
-
- return 0;
}
EXPORT_SYMBOL(ceph_osdc_start_request);
/*
- * Unregister a registered request. The request is not completed:
- * ->r_result isn't set and __complete_request() isn't called.
+ * Unregister request. If @req was registered, it isn't completed:
+ * r_result isn't set and __complete_request() isn't invoked.
+ *
+ * If @req wasn't registered, this call may have raced with
+ * handle_reply(), in which case r_result would already be set and
+ * __complete_request() would be getting invoked, possibly even
+ * concurrently with this call.
*/
void ceph_osdc_cancel_request(struct ceph_osd_request *req)
{
@@ -4653,43 +4680,6 @@ again:
}
EXPORT_SYMBOL(ceph_osdc_sync);
-static struct ceph_osd_request *
-alloc_linger_request(struct ceph_osd_linger_request *lreq)
-{
- struct ceph_osd_request *req;
-
- req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO);
- if (!req)
- return NULL;
-
- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
- return req;
-}
-
-static struct ceph_osd_request *
-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode)
-{
- struct ceph_osd_request *req;
-
- req = alloc_linger_request(lreq);
- if (!req)
- return NULL;
-
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- osd_req_op_watch_init(req, 0, 0, watch_opcode);
-
- if (ceph_osdc_alloc_messages(req, GFP_NOIO)) {
- ceph_osdc_put_request(req);
- return NULL;
- }
-
- return req;
-}
-
/*
* Returns a handle, caller owns a ref.
*/
@@ -4719,18 +4709,6 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
lreq->t.flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&lreq->mtime);
- lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH);
- if (!lreq->reg_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
- lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING);
- if (!lreq->ping_req) {
- ret = -ENOMEM;
- goto err_put_lreq;
- }
-
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (ret) {
@@ -4768,14 +4746,14 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
req->r_flags = CEPH_OSD_FLAG_WRITE;
ktime_get_real_ts64(&req->r_mtime);
- osd_req_op_watch_init(req, 0, lreq->linger_id,
- CEPH_OSD_WATCH_OP_UNWATCH);
+ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH,
+ lreq->linger_id, 0);
ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
linger_cancel(lreq);
linger_put(lreq);
ret = wait_request_timeout(req, opts->mount_timeout);
@@ -4846,7 +4824,7 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
out_put_req:
@@ -4855,35 +4833,6 @@ out_put_req:
}
EXPORT_SYMBOL(ceph_osdc_notify_ack);
-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
- u64 cookie, u32 prot_ver, u32 timeout,
- void *payload, u32 payload_len)
-{
- struct ceph_osd_req_op *op;
- struct ceph_pagelist *pl;
- int ret;
-
- op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
- op->notify.cookie = cookie;
-
- pl = ceph_pagelist_alloc(GFP_NOIO);
- if (!pl)
- return -ENOMEM;
-
- ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
- ret |= ceph_pagelist_encode_32(pl, timeout);
- ret |= ceph_pagelist_encode_32(pl, payload_len);
- ret |= ceph_pagelist_append(pl, payload, payload_len);
- if (ret) {
- ceph_pagelist_release(pl);
- return -ENOMEM;
- }
-
- ceph_osd_data_pagelist_init(&op->notify.request_data, pl);
- op->indata_len = pl->length;
- return 0;
-}
-
/*
* @timeout: in seconds
*
@@ -4902,7 +4851,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
size_t *preply_len)
{
struct ceph_osd_linger_request *lreq;
- struct page **pages;
int ret;
WARN_ON(!timeout);
@@ -4915,41 +4863,35 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
if (!lreq)
return -ENOMEM;
- lreq->preply_pages = preply_pages;
- lreq->preply_len = preply_len;
-
- ceph_oid_copy(&lreq->t.base_oid, oid);
- ceph_oloc_copy(&lreq->t.base_oloc, oloc);
- lreq->t.flags = CEPH_OSD_FLAG_READ;
-
- lreq->reg_req = alloc_linger_request(lreq);
- if (!lreq->reg_req) {
+ lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO);
+ if (!lreq->request_pl) {
ret = -ENOMEM;
goto out_put_lreq;
}
- /*
- * Pass 0 for cookie because we don't know it yet, it will be
- * filled in by linger_submit().
- */
- ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout,
- payload, payload_len);
- if (ret)
+ ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout);
+ ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len);
+ ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len);
+ if (ret) {
+ ret = -ENOMEM;
goto out_put_lreq;
+ }
/* for notify_id */
- pages = ceph_alloc_page_vector(1, GFP_NOIO);
- if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
+ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO);
+ if (IS_ERR(lreq->notify_id_pages)) {
+ ret = PTR_ERR(lreq->notify_id_pages);
+ lreq->notify_id_pages = NULL;
goto out_put_lreq;
}
- ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify,
- response_data),
- pages, PAGE_SIZE, 0, false, true);
- ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO);
- if (ret)
- goto out_put_lreq;
+ lreq->preply_pages = preply_pages;
+ lreq->preply_len = preply_len;
+
+ ceph_oid_copy(&lreq->t.base_oid, oid);
+ ceph_oloc_copy(&lreq->t.base_oloc, oloc);
+ lreq->t.flags = CEPH_OSD_FLAG_READ;
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
@@ -5098,7 +5040,7 @@ int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
void *p = page_address(pages[0]);
@@ -5175,7 +5117,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
if (ret)
goto out_put_req;
- ceph_osdc_start_request(osdc, req, false);
+ ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
if (ret >= 0) {
ret = req->r_ops[0].rval;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 75b738083523..295098873861 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -11,6 +11,22 @@
#include <linux/crush/hash.h>
#include <linux/crush/mapper.h>
+static __printf(2, 3)
+void osdmap_info(const struct ceph_osdmap *map, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_INFO "%s (%pU e%u): %pV", KBUILD_MODNAME, &map->fsid,
+ map->epoch, &vaf);
+
+ va_end(args);
+}
+
char *ceph_osdmap_state_str(char *str, int len, u32 state)
{
if (!len)
@@ -571,10 +587,10 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
goto bad;
#endif
r = kmalloc(struct_size(r, steps, yes), GFP_NOFS);
- c->rules[i] = r;
if (r == NULL)
goto badmem;
dout(" rule %d is at %p\n", i, r);
+ c->rules[i] = r;
r->len = yes;
ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
@@ -980,7 +996,7 @@ static struct crush_work *alloc_workspace(const struct crush_map *c)
work_size = crush_work_size(c, CEPH_PG_MAX_SIZE);
dout("%s work_size %zu bytes\n", __func__, work_size);
- work = ceph_kvmalloc(work_size, GFP_NOIO);
+ work = kvmalloc(work_size, GFP_NOIO);
if (!work)
return NULL;
@@ -1190,9 +1206,9 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
if (max == map->max_osd)
return 0;
- state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
- weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
- addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
+ state = kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
+ weight = kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
+ addr = kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
if (!state || !weight || !addr) {
kvfree(state);
kvfree(weight);
@@ -1222,7 +1238,7 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
if (map->osd_primary_affinity) {
u32 *affinity;
- affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
+ affinity = kvmalloc(array_size(max, sizeof(*affinity)),
GFP_NOFS);
if (!affinity)
return -ENOMEM;
@@ -1503,7 +1519,7 @@ static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
if (!map->osd_primary_affinity) {
int i;
- map->osd_primary_affinity = ceph_kvmalloc(
+ map->osd_primary_affinity = kvmalloc(
array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
GFP_NOFS);
if (!map->osd_primary_affinity)
@@ -1566,7 +1582,7 @@ static int decode_new_primary_affinity(void **p, void *end,
if (ret)
return ret;
- pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
+ osdmap_info(map, "osd%d primary-affinity 0x%x\n", osd, aff);
}
return 0;
@@ -1864,9 +1880,9 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
osd = ceph_decode_32(p);
w = ceph_decode_32(p);
BUG_ON(osd >= map->max_osd);
- pr_info("osd%d weight 0x%x %s\n", osd, w,
- w == CEPH_OSD_IN ? "(in)" :
- (w == CEPH_OSD_OUT ? "(out)" : ""));
+ osdmap_info(map, "osd%d weight 0x%x %s\n", osd, w,
+ w == CEPH_OSD_IN ? "(in)" :
+ (w == CEPH_OSD_OUT ? "(out)" : ""));
map->osd_weight[osd] = w;
/*
@@ -1898,10 +1914,10 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
BUG_ON(osd >= map->max_osd);
if ((map->osd_state[osd] & CEPH_OSD_UP) &&
(xorstate & CEPH_OSD_UP))
- pr_info("osd%d down\n", osd);
+ osdmap_info(map, "osd%d down\n", osd);
if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
(xorstate & CEPH_OSD_EXISTS)) {
- pr_info("osd%d does not exist\n", osd);
+ osdmap_info(map, "osd%d does not exist\n", osd);
ret = set_primary_affinity(map, osd,
CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
if (ret)
@@ -1931,7 +1947,7 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr));
- pr_info("osd%d up\n", osd);
+ osdmap_info(map, "osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr;
}
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 65e34f78b05d..74622b278d57 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -96,7 +96,7 @@ int ceph_pagelist_append(struct ceph_pagelist *pl, const void *buf, size_t len)
EXPORT_SYMBOL(ceph_pagelist_append);
/* Allocate enough pages for a pagelist to append the given amount
- * of data without without allocating.
+ * of data without allocating.
* Returns: 0 on success, -ENOMEM on error.
*/
int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space)