From 0c0308066ca53fdf1423895f3a42838b67b3a5a8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 30 Jul 2011 12:45:35 -0400 Subject: NFS: Fix spurious readdir cookie loop messages If the directory contents change, then we have to accept that the file->f_pos value may shrink if we do a 'search-by-cookie'. In that case, we should turn off the loop detection and let the NFS client try to recover. The patch also fixes a second loop detection bug by ensuring that after turning on the ctx->duped flag, we read at least one new cookie into ctx->dir_cookie before attempting to match with ctx->dup_cookie. Reported-by: Petr Vandrovec Cc: stable@kernel.org [2.6.39+] Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8b579beb6358..b96fb99072ff 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -99,9 +99,10 @@ struct nfs_open_context { struct nfs_open_dir_context { struct rpc_cred *cred; + unsigned long attr_gencount; __u64 dir_cookie; __u64 dup_cookie; - int duped; + signed char duped; }; /* -- cgit v1.2.3-59-g8ed1b From a9bae5666d0510ad69bdb437371c9a3e6b770705 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 30 Jul 2011 20:52:33 -0400 Subject: pnfs: let layoutcommit handle a list of lseg There can be multiple lseg per file, so layoutcommit should be able to handle it. [Needed in v3.0] CC: Stable Tree Signed-off-by: Peng Tao Signed-off-by: Boaz Harrosh Signed-off-by: Jim Rees Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 +++++++- fs/nfs/pnfs.c | 32 ++++++++++++++++---------------- fs/nfs/pnfs.h | 2 ++ include/linux/nfs_xdr.h | 2 +- 4 files changed, 26 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 079614deca3f..efa6ae822ca8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5912,9 +5912,15 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) static void nfs4_layoutcommit_release(void *calldata) { struct nfs4_layoutcommit_data *data = calldata; + struct pnfs_layout_segment *lseg, *tmp; /* Matched by references in pnfs_set_layoutcommit */ - put_lseg(data->lseg); + list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { + list_del_init(&lseg->pls_lc_list); + if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, + &lseg->pls_flags)) + put_lseg(lseg); + } put_rpccred(data->cred); kfree(data); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ab2cb04f8a28..a7e5f17f7776 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -225,6 +225,7 @@ static void init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { INIT_LIST_HEAD(&lseg->pls_list); + INIT_LIST_HEAD(&lseg->pls_lc_list); atomic_set(&lseg->pls_refcount, 1); smp_mb(); set_bit(NFS_LSEG_VALID, &lseg->pls_flags); @@ -1356,16 +1357,17 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); /* - * Currently there is only one (whole file) write lseg. + * There can be multiple RW segments. */ -static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode) +static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) { - struct pnfs_layout_segment *lseg, *rv = NULL; + struct pnfs_layout_segment *lseg; - list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) - if (lseg->pls_range.iomode == IOMODE_RW) - rv = lseg; - return rv; + list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { + if (lseg->pls_range.iomode == IOMODE_RW && + test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + list_add(&lseg->pls_lc_list, listp); + } } void @@ -1377,12 +1379,14 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) spin_lock(&nfsi->vfs_inode.i_lock); if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { - /* references matched in nfs4_layoutcommit_release */ - get_lseg(wdata->lseg); mark_as_dirty = true; dprintk("%s: Set layoutcommit for inode %lu ", __func__, wdata->inode->i_ino); } + if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { + /* references matched in nfs4_layoutcommit_release */ + get_lseg(wdata->lseg); + } if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; spin_unlock(&nfsi->vfs_inode.i_lock); @@ -1409,7 +1413,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) { struct nfs4_layoutcommit_data *data; struct nfs_inode *nfsi = NFS_I(inode); - struct pnfs_layout_segment *lseg; loff_t end_pos; int status = 0; @@ -1426,17 +1429,15 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) goto out; } + INIT_LIST_HEAD(&data->lseg_list); spin_lock(&inode->i_lock); if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { spin_unlock(&inode->i_lock); kfree(data); goto out; } - /* - * Currently only one (whole file) write lseg which is referenced - * in pnfs_set_layoutcommit and will be found. - */ - lseg = pnfs_list_write_lseg(inode); + + pnfs_list_write_lseg(inode, &data->lseg_list); end_pos = nfsi->layout->plh_lwb; nfsi->layout->plh_lwb = 0; @@ -1446,7 +1447,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) spin_unlock(&inode->i_lock); data->args.inode = inode; - data->lseg = lseg; data->cred = get_rpccred(nfsi->layout->plh_lc_cred); nfs_fattr_init(&data->fattr); data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1f5cb47e2a2c..b94f874886ca 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -36,10 +36,12 @@ enum { NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ NFS_LSEG_ROC, /* roc bit received from server */ + NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ }; struct pnfs_layout_segment { struct list_head pls_list; + struct list_head pls_lc_list; struct pnfs_layout_range pls_range; atomic_t pls_refcount; unsigned long pls_flags; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5b115956abac..feb312716c07 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -262,7 +262,7 @@ struct nfs4_layoutcommit_res { struct nfs4_layoutcommit_data { struct rpc_task task; struct nfs_fattr fattr; - struct pnfs_layout_segment *lseg; + struct list_head lseg_list; struct rpc_cred *cred; struct nfs4_layoutcommit_args args; struct nfs4_layoutcommit_res res; -- cgit v1.2.3-59-g8ed1b From 7f11d8d38d64739e190581e015a2a2730ff54e2a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Sat, 30 Jul 2011 20:52:35 -0400 Subject: pnfs: GETDEVICELIST The block driver uses GETDEVICELIST Signed-off-by: Andy Adamson [pass struct nfs_server * to getdevicelist] [get machince creds for getdevicelist] [fix getdevicelist decode sizing] Signed-off-by: Benny Halevy Signed-off-by: Benny Halevy Signed-off-by: Jim Rees Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 48 ++++++++++++++++++ fs/nfs/nfs4xdr.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 12 +++++ include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 11 ++++ 5 files changed, 203 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index efa6ae822ca8..af32d3df0544 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5834,6 +5834,54 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) return status; } +/* + * Retrieve the list of Data Server devices from the MDS. + */ +static int _nfs4_getdevicelist(struct nfs_server *server, + const struct nfs_fh *fh, + struct pnfs_devicelist *devlist) +{ + struct nfs4_getdevicelist_args args = { + .fh = fh, + .layoutclass = server->pnfs_curr_ld->id, + }; + struct nfs4_getdevicelist_res res = { + .devlist = devlist, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + dprintk("--> %s\n", __func__); + status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 0); + dprintk("<-- %s status=%d\n", __func__, status); + return status; +} + +int nfs4_proc_getdevicelist(struct nfs_server *server, + const struct nfs_fh *fh, + struct pnfs_devicelist *devlist) +{ + struct nfs4_exception exception = { }; + int err; + + do { + err = nfs4_handle_exception(server, + _nfs4_getdevicelist(server, fh, devlist), + &exception); + } while (exception.retry); + + dprintk("%s: err=%d, num_devs=%u\n", __func__, + err, devlist->num_devs); + + return err; +} +EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); + static int _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b851b560a6f8..5f769f8d05b0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -314,6 +314,17 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) +#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \ + encode_verifier_maxsz) +#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \ + 2 /* nfs_cookie4 gdlr_cookie */ + \ + decode_verifier_maxsz \ + /* verifier4 gdlr_verifier */ + \ + 1 /* gdlr_deviceid_list count */ + \ + XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \ + NFS4_DEVICEID4_SIZE) \ + /* gdlr_deviceid_list */ + \ + 1 /* bool gdlr_eof */) #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ @@ -748,6 +759,14 @@ static int nfs4_stat_to_errno(int); #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_reclaim_complete_maxsz) +#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getdevicelist_maxsz) +#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getdevicelist_maxsz) #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz +\ encode_getdeviceinfo_maxsz) @@ -1854,6 +1873,26 @@ static void encode_sequence(struct xdr_stream *xdr, } #ifdef CONFIG_NFS_V4_1 +static void +encode_getdevicelist(struct xdr_stream *xdr, + const struct nfs4_getdevicelist_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + nfs4_verifier dummy = { + .data = "dummmmmy", + }; + + p = reserve_space(xdr, 20); + *p++ = cpu_to_be32(OP_GETDEVICELIST); + *p++ = cpu_to_be32(args->layoutclass); + *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); + xdr_encode_hyper(p, 0ULL); /* cookie */ + encode_nfs4_verifier(xdr, &dummy); + hdr->nops++; + hdr->replen += decode_getdevicelist_maxsz; +} + static void encode_getdeviceinfo(struct xdr_stream *xdr, const struct nfs4_getdeviceinfo_args *args, @@ -2774,6 +2813,24 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode GETDEVICELIST request + */ +static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_getdevicelist_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_getdevicelist(xdr, args, &hdr); + encode_nops(&hdr); +} + /* * Encode GETDEVICEINFO request */ @@ -5268,6 +5325,53 @@ out_overflow: } #if defined(CONFIG_NFS_V4_1) +/* + * TODO: Need to handle case when EOF != true; + */ +static int decode_getdevicelist(struct xdr_stream *xdr, + struct pnfs_devicelist *res) +{ + __be32 *p; + int status, i; + struct nfs_writeverf verftemp; + + status = decode_op_hdr(xdr, OP_GETDEVICELIST); + if (status) + return status; + + p = xdr_inline_decode(xdr, 8 + 8 + 4); + if (unlikely(!p)) + goto out_overflow; + + /* TODO: Skip cookie for now */ + p += 2; + + /* Read verifier */ + p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); + + res->num_devs = be32_to_cpup(p); + + dprintk("%s: num_dev %d\n", __func__, res->num_devs); + + if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { + printk(KERN_ERR "%s too many result dev_num %u\n", + __func__, res->num_devs); + return -EIO; + } + + p = xdr_inline_decode(xdr, + res->num_devs * NFS4_DEVICEID4_SIZE + 4); + if (unlikely(!p)) + goto out_overflow; + for (i = 0; i < res->num_devs; i++) + p = xdr_decode_opaque_fixed(p, res->dev_id[i].data, + NFS4_DEVICEID4_SIZE); + res->eof = be32_to_cpup(p); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} static int decode_getdeviceinfo(struct xdr_stream *xdr, struct pnfs_device *pdev) @@ -6541,6 +6645,32 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, return status; } +/* + * Decode GETDEVICELIST response + */ +static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_getdevicelist_res *res) +{ + struct compound_hdr hdr; + int status; + + dprintk("encoding getdevicelist!\n"); + + status = decode_compound_hdr(xdr, &hdr); + if (status != 0) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status != 0) + goto out; + status = decode_putfh(xdr); + if (status != 0) + goto out; + status = decode_getdevicelist(xdr, res->devlist); +out: + return status; +} + /* * Decode GETDEVINFO response */ @@ -6908,6 +7038,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), + PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index b94f874886ca..7074394944a9 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -135,14 +135,26 @@ struct pnfs_device { unsigned int layout_type; unsigned int mincount; struct page **pages; + void *area; unsigned int pgbase; unsigned int pglen; }; +#define NFS4_PNFS_GETDEVLIST_MAXNUM 16 + +struct pnfs_devicelist { + unsigned int eof; + unsigned int num_devs; + struct nfs4_deviceid dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM]; +}; + extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); /* nfs4proc.c */ +extern int nfs4_proc_getdevicelist(struct nfs_server *server, + const struct nfs_fh *fh, + struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index a3c4bc800dce..76f99e8714f3 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -566,6 +566,7 @@ enum { NFSPROC4_CLNT_SECINFO_NO_NAME, NFSPROC4_CLNT_TEST_STATEID, NFSPROC4_CLNT_FREE_STATEID, + NFSPROC4_CLNT_GETDEVICELIST, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index feb312716c07..21f333eae3c8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -235,6 +235,17 @@ struct nfs4_layoutget { gfp_t gfp_flags; }; +struct nfs4_getdevicelist_args { + const struct nfs_fh *fh; + u32 layoutclass; + struct nfs4_sequence_args seq_args; +}; + +struct nfs4_getdevicelist_res { + struct pnfs_devicelist *devlist; + struct nfs4_sequence_res seq_res; +}; + struct nfs4_getdeviceinfo_args { struct pnfs_device *pdev; struct nfs4_sequence_args seq_args; -- cgit v1.2.3-59-g8ed1b From dae100c2b1b9463996aab9162f2258145c43f7df Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Sat, 30 Jul 2011 20:52:37 -0400 Subject: pnfs: ask for layout_blksize and save it in nfs_server Block layout needs it to determine IO size. Signed-off-by: Fred Isaman Signed-off-by: Tao Guo Signed-off-by: Benny Halevy Signed-off-by: Benny Halevy Signed-off-by: Jim Rees Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 5 ++- fs/nfs/nfs4xdr.c | 99 +++++++++++++++++++++++++++++++++++++++-------- include/linux/nfs_fs_sb.h | 3 +- include/linux/nfs_xdr.h | 3 +- 6 files changed, 91 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a9b18483cb24..de00a373f085 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -936,6 +936,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, if (server->wsize > NFS_MAX_FILE_IO_SIZE) server->wsize = NFS_MAX_FILE_IO_SIZE; server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + server->pnfs_blksize = fsinfo->blksize; set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1909ee8be350..1ec1a85fa71c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -318,7 +318,7 @@ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; extern const u32 nfs4_fattr_bitmap[2]; extern const u32 nfs4_statfs_bitmap[2]; extern const u32 nfs4_pathconf_bitmap[2]; -extern const u32 nfs4_fsinfo_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[3]; extern const u32 nfs4_fs_locations_bitmap[2]; /* nfs4renewd.c */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af32d3df0544..e86de799dd12 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -140,12 +140,13 @@ const u32 nfs4_pathconf_bitmap[2] = { 0 }; -const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE +const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_LEASE_TIME, FATTR4_WORD1_TIME_DELTA - | FATTR4_WORD1_FS_LAYOUT_TYPES + | FATTR4_WORD1_FS_LAYOUT_TYPES, + FATTR4_WORD2_LAYOUT_BLKSIZE }; const u32 nfs4_fs_locations_bitmap[2] = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5f769f8d05b0..026166993d11 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -113,7 +113,11 @@ static int nfs4_stat_to_errno(int); #define encode_restorefh_maxsz (op_encode_hdr_maxsz) #define decode_restorefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (encode_getattr_maxsz) -#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15) +/* The 5 accounts for the PNFS attributes, and assumes that at most three + * layout types will be returned. + */ +#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 4 + 8 + 5) #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) #define decode_renew_maxsz (op_decode_hdr_maxsz) #define encode_setclientid_maxsz \ @@ -1123,6 +1127,35 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm hdr->replen += decode_getattr_maxsz; } +static void +encode_getattr_three(struct xdr_stream *xdr, + uint32_t bm0, uint32_t bm1, uint32_t bm2, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(OP_GETATTR); + if (bm2) { + p = reserve_space(xdr, 16); + *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bm0); + *p++ = cpu_to_be32(bm1); + *p = cpu_to_be32(bm2); + } else if (bm1) { + p = reserve_space(xdr, 12); + *p++ = cpu_to_be32(2); + *p++ = cpu_to_be32(bm0); + *p = cpu_to_be32(bm1); + } else { + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(1); + *p = cpu_to_be32(bm0); + } + hdr->nops++; + hdr->replen += decode_getattr_maxsz; +} + static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], @@ -1131,8 +1164,11 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) { - encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], - bitmask[1] & nfs4_fsinfo_bitmap[1], hdr); + encode_getattr_three(xdr, + bitmask[0] & nfs4_fsinfo_bitmap[0], + bitmask[1] & nfs4_fsinfo_bitmap[1], + bitmask[2] & nfs4_fsinfo_bitmap[2], + hdr); } static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) @@ -2643,7 +2679,7 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, struct compound_hdr hdr = { .nops = 0, }; - const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; encode_compound_hdr(xdr, req, &hdr); encode_setclientid_confirm(xdr, arg, &hdr); @@ -2787,7 +2823,7 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), }; - const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->la_seq_args, &hdr); @@ -3068,14 +3104,17 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) goto out_overflow; bmlen = be32_to_cpup(p); - bitmap[0] = bitmap[1] = 0; + bitmap[0] = bitmap[1] = bitmap[2] = 0; p = xdr_inline_decode(xdr, (bmlen << 2)); if (unlikely(!p)) goto out_overflow; if (bmlen > 0) { bitmap[0] = be32_to_cpup(p++); - if (bmlen > 1) - bitmap[1] = be32_to_cpup(p); + if (bmlen > 1) { + bitmap[1] = be32_to_cpup(p++); + if (bmlen > 2) + bitmap[2] = be32_to_cpup(p); + } } return 0; out_overflow: @@ -3107,8 +3146,9 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3 return ret; bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; } else - bitmask[0] = bitmask[1] = 0; - dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]); + bitmask[0] = bitmask[1] = bitmask[2] = 0; + dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__, + bitmask[0], bitmask[1], bitmask[2]); return 0; } @@ -4162,7 +4202,7 @@ out_overflow: static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) { __be32 *savep; - uint32_t attrlen, bitmap[2] = {0}; + uint32_t attrlen, bitmap[3] = {0}; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -4188,7 +4228,7 @@ xdr_error: static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) { __be32 *savep; - uint32_t attrlen, bitmap[2] = {0}; + uint32_t attrlen, bitmap[3] = {0}; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -4220,7 +4260,7 @@ xdr_error: static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) { __be32 *savep; - uint32_t attrlen, bitmap[2] = {0}; + uint32_t attrlen, bitmap[3] = {0}; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -4360,7 +4400,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat { __be32 *savep; uint32_t attrlen, - bitmap[2] = {0}; + bitmap[3] = {0}; int status; status = decode_op_hdr(xdr, OP_GETATTR); @@ -4446,10 +4486,32 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, return status; } +/* + * The prefered block size for layout directed io + */ +static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); + *res = 0; + if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE; + } + return 0; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { __be32 *savep; - uint32_t attrlen, bitmap[2]; + uint32_t attrlen, bitmap[3]; int status; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) @@ -4477,6 +4539,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); if (status != 0) goto xdr_error; + status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); + if (status) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: @@ -4896,7 +4961,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, { __be32 *savep; uint32_t attrlen, - bitmap[2] = {0}; + bitmap[3] = {0}; struct kvec *iov = req->rq_rcv_buf.head; int status; @@ -6852,7 +6917,7 @@ out: int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, int plus) { - uint32_t bitmap[2] = {0}; + uint32_t bitmap[3] = {0}; uint32_t len; __be32 *p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4faeac8f448a..b2ea8b82d2cb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -132,7 +132,7 @@ struct nfs_server { #endif #ifdef CONFIG_NFS_V4 - u32 attr_bitmask[2];/* V4 bitmask representing the set + u32 attr_bitmask[3];/* V4 bitmask representing the set of attributes supported on this filesystem */ u32 cache_consistency_bitmask[2]; @@ -145,6 +145,7 @@ struct nfs_server { filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; + u32 pnfs_blksize; /* layout_blksize attr */ /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 21f333eae3c8..94f27e56df9c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -122,6 +122,7 @@ struct nfs_fsinfo { struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ __u32 layouttype; /* supported pnfs layout driver */ + __u32 blksize; /* preferred pnfs io block size */ }; struct nfs_fsstat { @@ -954,7 +955,7 @@ struct nfs4_server_caps_arg { }; struct nfs4_server_caps_res { - u32 attr_bitmask[2]; + u32 attr_bitmask[3]; u32 acl_bitmask; u32 has_links; u32 has_symlinks; -- cgit v1.2.3-59-g8ed1b From db29c089094b2e686ebc9ed9f002be4a4f94b1f8 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Sat, 30 Jul 2011 20:52:38 -0400 Subject: pnfs: cleanup_layoutcommit This gives layout driver a chance to cleanup structures they put in at encode_layoutcommit. Signed-off-by: Andy Adamson [fixup layout header pointer for layoutcommit] Signed-off-by: Benny Halevy Signed-off-by: Benny Halevy [rm inode and pnfs_layout_hdr args from cleanup_layoutcommit()] Signed-off-by: Jim Rees Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 1 + fs/nfs/pnfs.c | 8 ++++++++ fs/nfs/pnfs.h | 3 +++ include/linux/nfs_xdr.h | 1 + 5 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e86de799dd12..8c77039e7a81 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5963,6 +5963,7 @@ static void nfs4_layoutcommit_release(void *calldata) struct nfs4_layoutcommit_data *data = calldata; struct pnfs_layout_segment *lseg, *tmp; + pnfs_cleanup_layoutcommit(data); /* Matched by references in pnfs_set_layoutcommit */ list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { list_del_init(&lseg->pls_lc_list); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 026166993d11..1dce12f41a4f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5599,6 +5599,7 @@ static int decode_layoutcommit(struct xdr_stream *xdr, int status; status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); + res->status = status; if (status) return status; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3a47f7ce1e90..e550e8836c37 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1411,6 +1411,14 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) } EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); +void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) +{ + struct nfs_server *nfss = NFS_SERVER(data->args.inode); + + if (nfss->pnfs_curr_ld->cleanup_layoutcommit) + nfss->pnfs_curr_ld->cleanup_layoutcommit(data); +} + /* * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bddd8b997e18..606fbde2e757 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -113,6 +113,8 @@ struct pnfs_layoutdriver_type { struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args); + void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); + void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); @@ -196,6 +198,7 @@ void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); +void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_ld_write_done(struct nfs_write_data *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 94f27e56df9c..569ea5b76fda 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -269,6 +269,7 @@ struct nfs4_layoutcommit_res { struct nfs_fattr *fattr; const struct nfs_server *server; struct nfs4_sequence_res seq_res; + int status; }; struct nfs4_layoutcommit_data { -- cgit v1.2.3-59-g8ed1b From fe0a9b740881d181e3c96c1f6f6043e252692ffe Mon Sep 17 00:00:00 2001 From: Jim Rees Date: Sat, 30 Jul 2011 20:52:42 -0400 Subject: pnfsblock: add device operations Signed-off-by: Jim Rees Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Benny Halevy [upcall bugfixes] Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/Makefile | 2 +- fs/nfs/blocklayout/blocklayout.c | 42 ++++++++ fs/nfs/blocklayout/blocklayout.h | 40 ++++++++ fs/nfs/blocklayout/blocklayoutdev.c | 191 ++++++++++++++++++++++++++++++++++++ fs/nfs/client.c | 2 +- include/linux/nfs.h | 2 + 6 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 fs/nfs/blocklayout/blocklayoutdev.c (limited to 'include/linux') diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile index 5cfadf6ebc90..5bf3409084d2 100644 --- a/fs/nfs/blocklayout/Makefile +++ b/fs/nfs/blocklayout/Makefile @@ -2,4 +2,4 @@ # Makefile for the pNFS block layout driver kernel module # obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o -blocklayoutdriver-objs := blocklayout.o extents.o +blocklayoutdriver-objs := blocklayout.o extents.o blocklayoutdev.o diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 8dde3723482e..c83878441047 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -31,6 +31,8 @@ */ #include #include +#include +#include #include "blocklayout.h" @@ -40,6 +42,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andy Adamson "); MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); +struct dentry *bl_device_pipe; +wait_queue_head_t bl_wq; + static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { @@ -176,13 +181,49 @@ static struct pnfs_layoutdriver_type blocklayout_type = { .pg_write_ops = &bl_pg_write_ops, }; +static const struct rpc_pipe_ops bl_upcall_ops = { + .upcall = bl_pipe_upcall, + .downcall = bl_pipe_downcall, + .destroy_msg = bl_pipe_destroy_msg, +}; + static int __init nfs4blocklayout_init(void) { + struct vfsmount *mnt; + struct path path; int ret; dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); ret = pnfs_register_layoutdriver(&blocklayout_type); + if (ret) + goto out; + + init_waitqueue_head(&bl_wq); + + mnt = rpc_get_mount(); + if (IS_ERR(mnt)) { + ret = PTR_ERR(mnt); + goto out_remove; + } + + ret = vfs_path_lookup(mnt->mnt_root, + mnt, + NFS_PIPE_DIRNAME, 0, &path); + if (ret) + goto out_remove; + + bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, + &bl_upcall_ops, 0); + if (IS_ERR(bl_device_pipe)) { + ret = PTR_ERR(bl_device_pipe); + goto out_remove; + } +out: + return ret; + +out_remove: + pnfs_unregister_layoutdriver(&blocklayout_type); return ret; } @@ -192,6 +233,7 @@ static void __exit nfs4blocklayout_exit(void) __func__); pnfs_unregister_layoutdriver(&blocklayout_type); + rpc_unlink(bl_device_pipe); } MODULE_ALIAS("nfs-layouttype4-3"); diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 98e2f60c2143..dd25f1b3fe1e 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -34,8 +34,16 @@ #include #include +#include + #include "../pnfs.h" +struct pnfs_block_dev { + struct list_head bm_node; + struct nfs4_deviceid bm_mdevid; /* associated devid */ + struct block_device *bm_mdev; /* meta device itself */ +}; + enum exstate4 { PNFS_BLOCK_READWRITE_DATA = 0, PNFS_BLOCK_READ_DATA = 1, @@ -88,5 +96,37 @@ static inline struct pnfs_block_layout *BLK_LO2EXT(struct pnfs_layout_hdr *lo) return container_of(lo, struct pnfs_block_layout, bl_layout); } +struct bl_dev_msg { + int status; + uint32_t major, minor; +}; + +struct bl_msg_hdr { + u8 type; + u16 totallen; /* length of entire message, including hdr itself */ +}; + +extern struct dentry *bl_device_pipe; +extern wait_queue_head_t bl_wq; + +#define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ +#define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ +#define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ +#define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ +#define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ + +/* blocklayoutdev.c */ +ssize_t bl_pipe_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); +ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); +void bl_pipe_destroy_msg(struct rpc_pipe_msg *); +struct block_device *nfs4_blkdev_get(dev_t dev); +int nfs4_blkdev_put(struct block_device *bdev); +struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, + struct pnfs_device *dev, + struct list_head *sdlist); +int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, + struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); + void bl_put_extent(struct pnfs_block_extent *be); #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c new file mode 100644 index 000000000000..7e1377fcfdce --- /dev/null +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -0,0 +1,191 @@ +/* + * linux/fs/nfs/blocklayout/blocklayoutdev.c + * + * Device operations for the pnfs nfs4 file layout driver. + * + * Copyright (c) 2006 The Regents of the University of Michigan. + * All rights reserved. + * + * Andy Adamson + * Fred Isaman + * + * permission is granted to use, copy, create derivative works and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the university of michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. if + * the above copyright notice or any other identification of the + * university of michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * this software is provided as is, without representation from the + * university of michigan as to its fitness for any purpose, and without + * warranty by the university of michigan of any kind, either express + * or implied, including without limitation the implied warranties of + * merchantability and fitness for a particular purpose. the regents + * of the university of michigan shall not be liable for any damages, + * including special, indirect, incidental, or consequential damages, + * with respect to any claim arising out or in connection with the use + * of the software, even if it has been or is hereafter advised of the + * possibility of such damages. + */ +#include +#include /* __bread */ + +#include +#include +#include + +#include "blocklayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +/* Open a block_device by device number. */ +struct block_device *nfs4_blkdev_get(dev_t dev) +{ + struct block_device *bd; + + dprintk("%s enter\n", __func__); + bd = blkdev_get_by_dev(dev, FMODE_READ, NULL); + if (IS_ERR(bd)) + goto fail; + return bd; +fail: + dprintk("%s failed to open device : %ld\n", + __func__, PTR_ERR(bd)); + return NULL; +} + +/* + * Release the block device + */ +int nfs4_blkdev_put(struct block_device *bdev) +{ + dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev), + MINOR(bdev->bd_dev)); + return blkdev_put(bdev, FMODE_READ); +} + +/* + * Shouldn't there be a rpc_generic_upcall() to do this for us? + */ +ssize_t bl_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, + char __user *dst, size_t buflen) +{ + char *data = (char *)msg->data + msg->copied; + size_t mlen = min(msg->len - msg->copied, buflen); + unsigned long left; + + left = copy_to_user(dst, data, mlen); + if (left == mlen) { + msg->errno = -EFAULT; + return -EFAULT; + } + + mlen -= left; + msg->copied += mlen; + msg->errno = 0; + return mlen; +} + +static struct bl_dev_msg bl_mount_reply; + +ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, + size_t mlen) +{ + if (mlen != sizeof (struct bl_dev_msg)) + return -EINVAL; + + if (copy_from_user(&bl_mount_reply, src, mlen) != 0) + return -EFAULT; + + wake_up(&bl_wq); + + return mlen; +} + +void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) +{ + if (msg->errno >= 0) + return; + wake_up(&bl_wq); +} + +/* + * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. + */ +struct pnfs_block_dev * +nfs4_blk_decode_device(struct nfs_server *server, + struct pnfs_device *dev, + struct list_head *sdlist) +{ + struct pnfs_block_dev *rv = NULL; + struct block_device *bd = NULL; + struct rpc_pipe_msg msg; + struct bl_msg_hdr bl_msg = { + .type = BL_DEVICE_MOUNT, + .totallen = dev->mincount, + }; + uint8_t *dataptr; + DECLARE_WAITQUEUE(wq, current); + struct bl_dev_msg *reply = &bl_mount_reply; + + dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); + dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, + dev->mincount); + + memset(&msg, 0, sizeof(msg)); + msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); + if (!msg.data) { + rv = ERR_PTR(-ENOMEM); + goto out; + } + + memcpy(msg.data, &bl_msg, sizeof(bl_msg)); + dataptr = (uint8_t *) msg.data; + memcpy(&dataptr[sizeof(bl_msg)], dev->area, dev->mincount); + msg.len = sizeof(bl_msg) + dev->mincount; + + dprintk("%s CALLING USERSPACE DAEMON\n", __func__); + add_wait_queue(&bl_wq, &wq); + if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { + remove_wait_queue(&bl_wq, &wq); + goto out; + } + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&bl_wq, &wq); + + if (reply->status != BL_DEVICE_REQUEST_PROC) { + dprintk("%s failed to open device: %d\n", + __func__, reply->status); + rv = ERR_PTR(-EINVAL); + goto out; + } + + bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); + if (IS_ERR(bd)) { + dprintk("%s failed to open device : %ld\n", + __func__, PTR_ERR(bd)); + goto out; + } + + rv = kzalloc(sizeof(*rv), GFP_NOFS); + if (!rv) { + rv = ERR_PTR(-ENOMEM); + goto out; + } + + rv->bm_mdev = bd; + memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); + dprintk("%s Created device %s with bd_block_size %u\n", + __func__, + bd->bd_disk->disk_name, + bd->bd_block_size); + +out: + kfree(msg.data); + return rv; +} diff --git a/fs/nfs/client.c b/fs/nfs/client.c index de00a373f085..5833fbbf59b0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -105,7 +105,7 @@ struct rpc_program nfs_program = { .nrvers = ARRAY_SIZE(nfs_version), .version = nfs_version, .stats = &nfs_rpcstat, - .pipe_dir_name = "/nfs", + .pipe_dir_name = NFS_PIPE_DIRNAME, }; struct rpc_stat nfs_rpcstat = { diff --git a/include/linux/nfs.h b/include/linux/nfs.h index f387919bbc59..8c6ee44914cb 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -29,6 +29,8 @@ #define NFS_MNT_VERSION 1 #define NFS_MNT3_VERSION 3 +#define NFS_PIPE_DIRNAME "/nfs" + /* * NFS stats. The good thing with these values is that NFSv3 errors are * a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which -- cgit v1.2.3-59-g8ed1b From 2f9fd182607e7b3bdca35f6ed7f2fae539f7c46b Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Sat, 30 Jul 2011 20:52:46 -0400 Subject: pnfsblock: call and parse getdevicelist Call GETDEVICELIST during mount, then call and parse GETDEVICEINFO for each device returned. [pnfsblock: get rid of deprecated xdr macros] Signed-off-by: Jim Rees [pnfsblock: fix pnfs_deviceid references] Signed-off-by: Fred Isaman [pnfsblock: fix print format warnings for sector_t and size_t] [pnfs-block: #include ] [pnfsblock: no PNFS_NFS_SERVER] Signed-off-by: Benny Halevy [pnfsblock: fix bug determining size of striped volume] [pnfsblock: fix oops when using multiple devices] Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Benny Halevy [pnfsblock: get rid of vmap and deviceid->area structure] Signed-off-by: Peng Tao Signed-off-by: Jim Rees Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 138 +++++++++++++++++++++++++++++++++++- fs/nfs/blocklayout/blocklayout.h | 13 +++- fs/nfs/blocklayout/blocklayoutdev.c | 13 +++- fs/nfs/pnfs.h | 1 - include/linux/nfs_fs_sb.h | 1 + 5 files changed, 158 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index e7bc7a57b3bb..6cd7f4f3acdb 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -157,17 +157,153 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) { } +static void free_blk_mountid(struct block_mount_id *mid) +{ + if (mid) { + struct pnfs_block_dev *dev; + spin_lock(&mid->bm_lock); + while (!list_empty(&mid->bm_devlist)) { + dev = list_first_entry(&mid->bm_devlist, + struct pnfs_block_dev, + bm_node); + list_del(&dev->bm_node); + bl_free_block_dev(dev); + } + spin_unlock(&mid->bm_lock); + kfree(mid); + } +} + +/* This is mostly copied from the filelayout's get_device_info function. + * It seems much of this should be at the generic pnfs level. + */ +static struct pnfs_block_dev * +nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, + struct nfs4_deviceid *d_id) +{ + struct pnfs_device *dev; + struct pnfs_block_dev *rv = NULL; + u32 max_resp_sz; + int max_pages; + struct page **pages = NULL; + int i, rc; + + /* + * Use the session max response size as the basis for setting + * GETDEVICEINFO's maxcount + */ + max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + max_pages = max_resp_sz >> PAGE_SHIFT; + dprintk("%s max_resp_sz %u max_pages %d\n", + __func__, max_resp_sz, max_pages); + + dev = kmalloc(sizeof(*dev), GFP_NOFS); + if (!dev) { + dprintk("%s kmalloc failed\n", __func__); + return NULL; + } + + pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); + if (pages == NULL) { + kfree(dev); + return NULL; + } + for (i = 0; i < max_pages; i++) { + pages[i] = alloc_page(GFP_NOFS); + if (!pages[i]) + goto out_free; + } + + memcpy(&dev->dev_id, d_id, sizeof(*d_id)); + dev->layout_type = LAYOUT_BLOCK_VOLUME; + dev->pages = pages; + dev->pgbase = 0; + dev->pglen = PAGE_SIZE * max_pages; + dev->mincount = 0; + + dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); + rc = nfs4_proc_getdeviceinfo(server, dev); + dprintk("%s getdevice info returns %d\n", __func__, rc); + if (rc) + goto out_free; + + rv = nfs4_blk_decode_device(server, dev); + out_free: + for (i = 0; i < max_pages; i++) + __free_page(pages[i]); + kfree(pages); + kfree(dev); + return rv; +} + static int bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) { + struct block_mount_id *b_mt_id = NULL; + struct pnfs_devicelist *dlist = NULL; + struct pnfs_block_dev *bdev; + LIST_HEAD(block_disklist); + int status = 0, i; + dprintk("%s enter\n", __func__); - return 0; + + if (server->pnfs_blksize == 0) { + dprintk("%s Server did not return blksize\n", __func__); + return -EINVAL; + } + b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS); + if (!b_mt_id) { + status = -ENOMEM; + goto out_error; + } + /* Initialize nfs4 block layout mount id */ + spin_lock_init(&b_mt_id->bm_lock); + INIT_LIST_HEAD(&b_mt_id->bm_devlist); + + dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); + if (!dlist) { + status = -ENOMEM; + goto out_error; + } + dlist->eof = 0; + while (!dlist->eof) { + status = nfs4_proc_getdevicelist(server, fh, dlist); + if (status) + goto out_error; + dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n", + __func__, dlist->num_devs, dlist->eof); + for (i = 0; i < dlist->num_devs; i++) { + bdev = nfs4_blk_get_deviceinfo(server, fh, + &dlist->dev_id[i]); + if (!bdev) { + status = -ENODEV; + goto out_error; + } + spin_lock(&b_mt_id->bm_lock); + list_add(&bdev->bm_node, &b_mt_id->bm_devlist); + spin_unlock(&b_mt_id->bm_lock); + } + } + dprintk("%s SUCCESS\n", __func__); + server->pnfs_ld_data = b_mt_id; + + out_return: + kfree(dlist); + return status; + + out_error: + free_blk_mountid(b_mt_id); + goto out_return; } static int bl_clear_layoutdriver(struct nfs_server *server) { + struct block_mount_id *b_mt_id = server->pnfs_ld_data; + dprintk("%s enter\n", __func__); + free_blk_mountid(b_mt_id); + dprintk("%s RETURNS\n", __func__); return 0; } diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 581d8f47a723..d645880f61a0 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -38,6 +38,11 @@ #include "../pnfs.h" +struct block_mount_id { + spinlock_t bm_lock; /* protects list */ + struct list_head bm_devlist; /* holds pnfs_block_dev */ +}; + struct pnfs_block_dev { struct list_head bm_node; struct nfs4_deviceid bm_mdevid; /* associated devid */ @@ -99,7 +104,10 @@ struct pnfs_block_layout { sector_t bl_blocksize; /* Server blocksize in sectors */ }; -static inline struct pnfs_block_layout *BLK_LO2EXT(struct pnfs_layout_hdr *lo) +#define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data)) + +static inline struct pnfs_block_layout * +BLK_LO2EXT(struct pnfs_layout_hdr *lo) { return container_of(lo, struct pnfs_block_layout, bl_layout); } @@ -137,8 +145,7 @@ void bl_pipe_destroy_msg(struct rpc_pipe_msg *); struct block_device *nfs4_blkdev_get(dev_t dev); int nfs4_blkdev_put(struct block_device *bdev); struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, - struct pnfs_device *dev, - struct list_head *sdlist); + struct pnfs_device *dev); int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index 64da33a40eaf..b23fe601d1c9 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -116,8 +116,7 @@ void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) */ struct pnfs_block_dev * nfs4_blk_decode_device(struct nfs_server *server, - struct pnfs_device *dev, - struct list_head *sdlist) + struct pnfs_device *dev) { struct pnfs_block_dev *rv = NULL; struct block_device *bd = NULL; @@ -129,6 +128,7 @@ nfs4_blk_decode_device(struct nfs_server *server, uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); struct bl_dev_msg *reply = &bl_mount_reply; + int offset, len, i; dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, @@ -143,7 +143,14 @@ nfs4_blk_decode_device(struct nfs_server *server, memcpy(msg.data, &bl_msg, sizeof(bl_msg)); dataptr = (uint8_t *) msg.data; - memcpy(&dataptr[sizeof(bl_msg)], dev->area, dev->mincount); + len = dev->mincount; + offset = sizeof(bl_msg); + for (i = 0; len > 0; i++) { + memcpy(&dataptr[offset], page_address(dev->pages[i]), + len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE); + len -= PAGE_CACHE_SIZE; + offset += PAGE_CACHE_SIZE; + } msg.len = sizeof(bl_msg) + dev->mincount; dprintk("%s CALLING USERSPACE DAEMON\n", __func__); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 606fbde2e757..e0b5d80a43f6 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -140,7 +140,6 @@ struct pnfs_device { unsigned int layout_type; unsigned int mincount; struct page **pages; - void *area; unsigned int pgbase; unsigned int pglen; }; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b2ea8b82d2cb..cc03fc1dfb72 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -146,6 +146,7 @@ struct nfs_server { struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; u32 pnfs_blksize; /* layout_blksize attr */ + void *pnfs_ld_data; /* per mount point data */ /* the following fields are protected by nfs_client->cl_lock */ struct rb_root state_owners; -- cgit v1.2.3-59-g8ed1b