aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/nfs/knfsd-stats.txt44
-rw-r--r--fs/nfsd/nfs3xdr.c12
-rw-r--r--fs/nfsd/nfs4acl.c18
-rw-r--r--fs/nfsd/nfs4callback.c15
-rw-r--r--fs/nfsd/nfs4proc.c43
-rw-r--r--fs/nfsd/nfs4state.c148
-rw-r--r--fs/nfsd/nfs4xdr.c75
-rw-r--r--fs/nfsd/nfsproc.c52
-rw-r--r--fs/nfsd/state.h7
-rw-r--r--fs/nfsd/vfs.c128
-rw-r--r--fs/nfsd/vfs.h11
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--include/linux/sunrpc/svc_rdma.h11
-rw-r--r--include/uapi/linux/nfs4.h7
-rw-r--r--net/sunrpc/Kconfig28
-rw-r--r--net/sunrpc/Makefile3
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c8
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/xprtrdma/Makefile14
-rw-r--r--net/sunrpc/xprtrdma/module.c46
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c140
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c36
-rw-r--r--net/sunrpc/xprtrdma/transport.c13
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h11
27 files changed, 407 insertions, 492 deletions
diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt
index 64ced5149d37..1a5d82180b84 100644
--- a/Documentation/filesystems/nfs/knfsd-stats.txt
+++ b/Documentation/filesystems/nfs/knfsd-stats.txt
@@ -68,16 +68,10 @@ sockets-enqueued
rate of change for this counter is zero; significantly non-zero
values may indicate a performance limitation.
- This can happen either because there are too few nfsd threads in the
- thread pool for the NFS workload (the workload is thread-limited),
- or because the NFS workload needs more CPU time than is available in
- the thread pool (the workload is CPU-limited). In the former case,
- configuring more nfsd threads will probably improve the performance
- of the NFS workload. In the latter case, the sunrpc server layer is
- already choosing not to wake idle nfsd threads because there are too
- many nfsd threads which want to run but cannot, so configuring more
- nfsd threads will make no difference whatsoever. The overloads-avoided
- statistic (see below) can be used to distinguish these cases.
+ This can happen because there are too few nfsd threads in the thread
+ pool for the NFS workload (the workload is thread-limited), in which
+ case configuring more nfsd threads will probably improve the
+ performance of the NFS workload.
threads-woken
Counts how many times an idle nfsd thread is woken to try to
@@ -88,36 +82,6 @@ threads-woken
thing. The ideal rate of change for this counter will be close
to but less than the rate of change of the packets-arrived counter.
-overloads-avoided
- Counts how many times the sunrpc server layer chose not to wake an
- nfsd thread, despite the presence of idle nfsd threads, because
- too many nfsd threads had been recently woken but could not get
- enough CPU time to actually run.
-
- This statistic counts a circumstance where the sunrpc layer
- heuristically avoids overloading the CPU scheduler with too many
- runnable nfsd threads. The ideal rate of change for this counter
- is zero. Significant non-zero values indicate that the workload
- is CPU limited. Usually this is associated with heavy CPU usage
- on all the CPUs in the nfsd thread pool.
-
- If a sustained large overloads-avoided rate is detected on a pool,
- the top(1) utility should be used to check for the following
- pattern of CPU usage on all the CPUs associated with the given
- nfsd thread pool.
-
- - %us ~= 0 (as you're *NOT* running applications on your NFS server)
-
- - %wa ~= 0
-
- - %id ~= 0
-
- - %sy + %hi + %si ~= 100
-
- If this pattern is seen, configuring more nfsd threads will *not*
- improve the performance of the workload. If this patten is not
- seen, then something more subtle is wrong.
-
threads-timedout
Counts how many times an nfsd thread triggered an idle timeout,
i.e. was not woken to handle any incoming network packets for
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index e4b2b4322553..f6e7cbabac5a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -805,7 +805,7 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
static __be32
compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
- const char *name, int namlen)
+ const char *name, int namlen, u64 ino)
{
struct svc_export *exp;
struct dentry *dparent, *dchild;
@@ -830,19 +830,21 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
goto out;
if (d_really_is_negative(dchild))
goto out;
+ if (dchild->d_inode->i_ino != ino)
+ goto out;
rv = fh_compose(fhp, exp, dchild, &cd->fh);
out:
dput(dchild);
return rv;
}
-static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
+static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino)
{
struct svc_fh *fh = &cd->scratch;
__be32 err;
fh_init(fh, NFS3_FHSIZE);
- err = compose_entry_fh(cd, fh, name, namlen);
+ err = compose_entry_fh(cd, fh, name, namlen, ino);
if (err) {
*p++ = 0;
*p++ = 0;
@@ -927,7 +929,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
p = encode_entry_baggage(cd, p, name, namlen, ino);
if (plus)
- p = encode_entryplus_baggage(cd, p, name, namlen);
+ p = encode_entryplus_baggage(cd, p, name, namlen, ino);
num_entry_words = p - cd->buffer;
} else if (*(page+1) != NULL) {
/* temporarily encode entry into next page, then move back to
@@ -941,7 +943,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
if (plus)
- p1 = encode_entryplus_baggage(cd, p1, name, namlen);
+ p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino);
/* determine entry word length and lengths to go in pages */
num_entry_words = p1 - tmp;
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 67242bf7c6cc..eb5accf1b37f 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -52,10 +52,6 @@
#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE)
#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
-/* We don't support these bits; insist they be neither allowed nor denied */
-#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \
- | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS)
-
/* flags used to simulate posix default ACLs */
#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
| NFS4_ACE_DIRECTORY_INHERIT_ACE)
@@ -64,9 +60,6 @@
| NFS4_ACE_INHERIT_ONLY_ACE \
| NFS4_ACE_IDENTIFIER_GROUP)
-#define MASK_EQUAL(mask1, mask2) \
- ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
-
static u32
mask_from_posix(unsigned short perm, unsigned int flags)
{
@@ -126,11 +119,6 @@ low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
*mode |= ACL_EXECUTE;
}
-struct ace_container {
- struct nfs4_ace *ace;
- struct list_head ace_l;
-};
-
static short ace2type(struct nfs4_ace *);
static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
unsigned int);
@@ -384,7 +372,6 @@ pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2)
static void
sort_pacl_range(struct posix_acl *pacl, int start, int end) {
int sorted = 0, i;
- struct posix_acl_entry tmp;
/* We just do a bubble sort; easy to do in place, and we're not
* expecting acl's to be long enough to justify anything more. */
@@ -394,9 +381,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) {
if (pace_gt(&pacl->a_entries[i],
&pacl->a_entries[i+1])) {
sorted = 0;
- tmp = pacl->a_entries[i];
- pacl->a_entries[i] = pacl->a_entries[i+1];
- pacl->a_entries[i+1] = tmp;
+ swap(pacl->a_entries[i],
+ pacl->a_entries[i + 1]);
}
}
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 5694cfb7a47b..a49201835a97 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -455,6 +455,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
if (unlikely(status || cb->cb_status))
return status;
+ cb->cb_update_seq_nr = true;
return decode_cb_sequence4resok(xdr, cb);
}
@@ -875,6 +876,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
u32 minorversion = clp->cl_minorversion;
cb->cb_minorversion = minorversion;
+ cb->cb_update_seq_nr = false;
+ cb->cb_status = 0;
if (minorversion) {
if (!nfsd41_cb_get_slot(clp, task))
return;
@@ -891,9 +894,16 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
clp->cl_minorversion);
if (clp->cl_minorversion) {
- /* No need for lock, access serialized in nfsd4_cb_prepare */
- if (!task->tk_status)
+ /*
+ * No need for lock, access serialized in nfsd4_cb_prepare
+ *
+ * RFC5661 20.9.3
+ * If CB_SEQUENCE returns an error, then the state of the slot
+ * (sequence ID, cached reply) MUST NOT change.
+ */
+ if (cb->cb_update_seq_nr)
++clp->cl_cb_session->se_cb_seq_nr;
+
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1090,6 +1100,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
+ cb->cb_update_seq_nr = false;
cb->cb_need_restart = false;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 864e2003e8de..90cfda75313c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -760,8 +760,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
__be32 status;
- /* no need to check permission - this will be done in nfsd_read() */
-
read->rd_filp = NULL;
if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval;
@@ -778,9 +776,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
- if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
- cstate, &read->rd_stateid,
- RD_STATE, &read->rd_filp))) {
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
+ RD_STATE, &read->rd_filp, &read->rd_tmp_file);
+ if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
}
@@ -924,8 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
- status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
- &setattr->sa_stateid, WR_STATE, NULL);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ &setattr->sa_stateid, WR_STATE, NULL, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
@@ -986,13 +984,11 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
unsigned long cnt;
int nvecs;
- /* no need to check permission - this will be done in nfsd_write() */
-
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;
- status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
- cstate, stateid, WR_STATE, &filp);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
+ &filp, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
@@ -1005,11 +1001,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nvecs = fill_in_write_vector(rqstp->rq_vec, write);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
- status = nfsd_write(rqstp, &cstate->current_fh, filp,
- write->wr_offset, rqstp->rq_vec, nvecs,
- &cnt, &write->wr_how_written);
- if (filp)
- fput(filp);
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
+ write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
+ &write->wr_how_written);
+ fput(filp);
write->wr_bytes_written = cnt;
@@ -1023,15 +1018,13 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status = nfserr_notsupp;
struct file *file;
- status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate,
&fallocate->falloc_stateid,
- WR_STATE, &file);
+ WR_STATE, &file, NULL);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
}
- if (!file)
- return nfserr_bad_stateid;
status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
fallocate->falloc_offset,
@@ -1064,15 +1057,13 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct file *file;
- status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate,
&seek->seek_stateid,
- RD_STATE, &file);
+ RD_STATE, &file, NULL);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
}
- if (!file)
- return nfserr_bad_stateid;
switch (seek->seek_whence) {
case NFS4_CONTENT_DATA:
@@ -1732,10 +1723,6 @@ encode_op:
be32_to_cpu(status));
nfsd4_cstate_clear_replay(cstate);
- /* XXX Ugh, we need to get rid of this kind of special case: */
- if (op->opnum == OP_READ && op->u.read.rd_filp)
- fput(op->u.read.rd_filp);
-
nfsd4_increment_op_stats(op->opnum);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 039f9c8a95e8..61dfb33f0559 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3861,7 +3861,7 @@ static __be32
nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
{
__be32 status;
- unsigned char old_deny_bmap;
+ unsigned char old_deny_bmap = stp->st_deny_bmap;
if (!test_access(open->op_share_access, stp))
return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
@@ -3870,7 +3870,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
spin_lock(&fp->fi_lock);
status = nfs4_file_check_deny(fp, open->op_share_deny);
if (status == nfs_ok) {
- old_deny_bmap = stp->st_deny_bmap;
set_deny(open->op_share_deny, stp);
fp->fi_share_deny |=
(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
@@ -4574,85 +4573,130 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return nfs_ok;
}
+static struct file *
+nfs4_find_file(struct nfs4_stid *s, int flags)
+{
+ if (!s)
+ return NULL;
+
+ switch (s->sc_type) {
+ case NFS4_DELEG_STID:
+ if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
+ return NULL;
+ return get_file(s->sc_file->fi_deleg_file);
+ case NFS4_OPEN_STID:
+ case NFS4_LOCK_STID:
+ if (flags & RD_STATE)
+ return find_readable_file(s->sc_file);
+ else
+ return find_writeable_file(s->sc_file);
+ break;
+ }
+
+ return NULL;
+}
+
+static __be32
+nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
+{
+ __be32 status;
+
+ status = nfs4_check_fh(fhp, ols);
+ if (status)
+ return status;
+ status = nfsd4_check_openowner_confirmed(ols);
+ if (status)
+ return status;
+ return nfs4_check_openmode(ols, flags);
+}
+
+static __be32
+nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
+ struct file **filpp, bool *tmp_file, int flags)
+{
+ int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
+ struct file *file;
+ __be32 status;
+
+ file = nfs4_find_file(s, flags);
+ if (file) {
+ status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ acc | NFSD_MAY_OWNER_OVERRIDE);
+ if (status) {
+ fput(file);
+ return status;
+ }
+
+ *filpp = file;
+ } else {
+ status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+ if (status)
+ return status;
+
+ if (tmp_file)
+ *tmp_file = true;
+ }
+
+ return 0;
+}
+
/*
-* Checks for stateid operations
-*/
+ * Checks for stateid operations
+ */
__be32
-nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
- stateid_t *stateid, int flags, struct file **filpp)
+nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate, stateid_t *stateid,
+ int flags, struct file **filpp, bool *tmp_file)
{
- struct nfs4_stid *s;
- struct nfs4_ol_stateid *stp = NULL;
- struct nfs4_delegation *dp = NULL;
- struct svc_fh *current_fh = &cstate->current_fh;
- struct inode *ino = d_inode(current_fh->fh_dentry);
+ struct svc_fh *fhp = &cstate->current_fh;
+ struct inode *ino = d_inode(fhp->fh_dentry);
+ struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- struct file *file = NULL;
+ struct nfs4_stid *s = NULL;
__be32 status;
if (filpp)
*filpp = NULL;
+ if (tmp_file)
+ *tmp_file = false;
if (grace_disallows_io(net, ino))
return nfserr_grace;
- if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
- return check_special_stateids(net, current_fh, stateid, flags);
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+ status = check_special_stateids(net, fhp, stateid, flags);
+ goto done;
+ }
status = nfsd4_lookup_stateid(cstate, stateid,
NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
&s, nn);
if (status)
return status;
- status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
+ status = check_stateid_generation(stateid, &s->sc_stateid,
+ nfsd4_has_session(cstate));
if (status)
goto out;
+
switch (s->sc_type) {
case NFS4_DELEG_STID:
- dp = delegstateid(s);
- status = nfs4_check_delegmode(dp, flags);
- if (status)
- goto out;
- if (filpp) {
- file = dp->dl_stid.sc_file->fi_deleg_file;
- if (!file) {
- WARN_ON_ONCE(1);
- status = nfserr_serverfault;
- goto out;
- }
- get_file(file);
- }
+ status = nfs4_check_delegmode(delegstateid(s), flags);
break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
- stp = openlockstateid(s);
- status = nfs4_check_fh(current_fh, stp);
- if (status)
- goto out;
- status = nfsd4_check_openowner_confirmed(stp);
- if (status)
- goto out;
- status = nfs4_check_openmode(stp, flags);
- if (status)
- goto out;
- if (filpp) {
- struct nfs4_file *fp = stp->st_stid.sc_file;
-
- if (flags & RD_STATE)
- file = find_readable_file(fp);
- else
- file = find_writeable_file(fp);
- }
+ status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
break;
default:
status = nfserr_bad_stateid;
- goto out;
+ break;
}
- status = nfs_ok;
- if (file)
- *filpp = file;
+
+done:
+ if (!status && filpp)
+ status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
out:
- nfs4_put_stid(s);
+ if (s)
+ nfs4_put_stid(s);
return status;
}
@@ -5505,7 +5549,7 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
__be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (!err) {
err = nfserrno(vfs_test_lock(file, lock));
- nfsd_close(file);
+ fput(file);
}
return err;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 158badf945df..54633858733a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -33,6 +33,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/statfs.h>
@@ -2227,7 +2228,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
u32 rdattr_err = 0;
__be32 status;
int err;
- int aclsupport = 0;
struct nfs4_acl *acl = NULL;
void *context = NULL;
int contextlen;
@@ -2274,19 +2274,15 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
goto out;
fhp = tempfh;
}
- if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
- | FATTR4_WORD0_SUPPORTED_ATTRS)) {
+ if (bmval0 & FATTR4_WORD0_ACL) {
err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
- aclsupport = (err == 0);
- if (bmval0 & FATTR4_WORD0_ACL) {
- if (err == -EOPNOTSUPP)
- bmval0 &= ~FATTR4_WORD0_ACL;
- else if (err == -EINVAL) {
- status = nfserr_attrnotsupp;
- goto out;
- } else if (err != 0)
- goto out_nfserr;
- }
+ if (err == -EOPNOTSUPP)
+ bmval0 &= ~FATTR4_WORD0_ACL;
+ else if (err == -EINVAL) {
+ status = nfserr_attrnotsupp;
+ goto out;
+ } else if (err != 0)
+ goto out_nfserr;
}
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
@@ -2338,7 +2334,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
u32 word1 = nfsd_suppattrs1(minorversion);
u32 word2 = nfsd_suppattrs2(minorversion);
- if (!aclsupport)
+ if (!IS_POSIXACL(dentry->d_inode))
word0 &= ~FATTR4_WORD0_ACL;
if (!contextsupport)
word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
@@ -2486,7 +2482,7 @@ out_acl:
p = xdr_reserve_space(xdr, 4);
if (!p)
goto out_resource;
- *p++ = cpu_to_be32(aclsupport ?
+ *p++ = cpu_to_be32(IS_POSIXACL(dentry->d_inode) ?
ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
}
if (bmval0 & FATTR4_WORD0_CANSETTIME) {
@@ -3422,52 +3418,51 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
unsigned long maxcount;
struct xdr_stream *xdr = &resp->xdr;
struct file *file = read->rd_filp;
- struct svc_fh *fhp = read->rd_fhp;
int starting_len = xdr->buf->len;
- struct raparms *ra;
+ struct raparms *ra = NULL;
__be32 *p;
- __be32 err;
if (nfserr)
- return nfserr;
+ goto out;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
- return nfserr_resource;
+ nfserr = nfserr_resource;
+ goto out;
}
- if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
+ if (resp->xdr.buf->page_len &&
+ test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
WARN_ON_ONCE(1);
- return nfserr_resource;
+ nfserr = nfserr_resource;
+ goto out;
}
xdr_commit_encode(xdr);
maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
+ maxcount = min_t(unsigned long, maxcount,
+ (xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
- if (read->rd_filp)
- err = nfsd_permission(resp->rqstp, fhp->fh_export,
- fhp->fh_dentry,
- NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
- else
- err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
- &file, &ra);
- if (err)
- goto err_truncate;
+ if (read->rd_tmp_file)
+ ra = nfsd_init_raparms(file);
- if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
- err = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ if (file->f_op->splice_read &&
+ test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
+ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
- err = nfsd4_encode_readv(resp, read, file, maxcount);
+ nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
- if (!read->rd_filp)
- nfsd_put_tmp_read_open(file, ra);
+ if (ra)
+ nfsd_put_raparams(file, ra);
-err_truncate:
- if (err)
+ if (nfserr)
xdr_truncate_encode(xdr, starting_len);
- return err;
+
+out:
+ if (file)
+ fput(file);
+ return nfserr;
}
static __be32
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index aecbcd34d336..4cd78ef4c95c 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -59,13 +59,61 @@ static __be32
nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
struct nfsd_attrstat *resp)
{
+ struct iattr *iap = &argp->attrs;
+ struct svc_fh *fhp;
__be32 nfserr;
+
dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n",
SVCFH_fmt(&argp->fh),
argp->attrs.ia_valid, (long) argp->attrs.ia_size);
- fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+ fhp = fh_copy(&resp->fh, &argp->fh);
+
+ /*
+ * NFSv2 does not differentiate between "set-[ac]time-to-now"
+ * which only requires access, and "set-[ac]time-to-X" which
+ * requires ownership.
+ * So if it looks like it might be "set both to the same time which
+ * is close to now", and if inode_change_ok fails, then we
+ * convert to "set to now" instead of "set to explicit time"
+ *
+ * We only call inode_change_ok as the last test as technically
+ * it is not an interface that we should be using.
+ */
+#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
+#define MAX_TOUCH_TIME_ERROR (30*60)
+ if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
+ iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
+ /*
+ * Looks probable.
+ *
+ * Now just make sure time is in the right ballpark.
+ * Solaris, at least, doesn't seem to care what the time
+ * request is. We require it be within 30 minutes of now.
+ */
+ time_t delta = iap->ia_atime.tv_sec - get_seconds();
+ struct inode *inode;
+
+ nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
+ if (nfserr)
+ goto done;
+ inode = d_inode(fhp->fh_dentry);
+
+ if (delta < 0)
+ delta = -delta;
+ if (delta < MAX_TOUCH_TIME_ERROR &&
+ inode_change_ok(inode, iap) != 0) {
+ /*
+ * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
+ * This will cause notify_change to set these times
+ * to "now"
+ */
+ iap->ia_valid &= ~BOTH_TIME_SET;
+ }
+ }
+
+ nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
+done:
return nfsd_return_attrs(nfserr, resp);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index dbc4f85a5008..4874ce515fc1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -68,6 +68,7 @@ struct nfsd4_callback {
struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
int cb_status;
+ bool cb_update_seq_nr;
bool cb_need_restart;
};
@@ -582,9 +583,9 @@ enum nfsd4_cb_op {
struct nfsd4_compound_state;
struct nfsd_net;
-extern __be32 nfs4_preprocess_stateid_op(struct net *net,
- struct nfsd4_compound_state *cstate,
- stateid_t *stateid, int flags, struct file **filp);
+extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate, stateid_t *stateid,
+ int flags, struct file **filp, bool *tmp_file);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 84d770be056e..b5e077a6e7d4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -302,42 +302,6 @@ commit_metadata(struct svc_fh *fhp)
static void
nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
- /*
- * NFSv2 does not differentiate between "set-[ac]time-to-now"
- * which only requires access, and "set-[ac]time-to-X" which
- * requires ownership.
- * So if it looks like it might be "set both to the same time which
- * is close to now", and if inode_change_ok fails, then we
- * convert to "set to now" instead of "set to explicit time"
- *
- * We only call inode_change_ok as the last test as technically
- * it is not an interface that we should be using.
- */
-#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
-#define MAX_TOUCH_TIME_ERROR (30*60)
- if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
- iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
- /*
- * Looks probable.
- *
- * Now just make sure time is in the right ballpark.
- * Solaris, at least, doesn't seem to care what the time
- * request is. We require it be within 30 minutes of now.
- */
- time_t delta = iap->ia_atime.tv_sec - get_seconds();
- if (delta < 0)
- delta = -delta;
- if (delta < MAX_TOUCH_TIME_ERROR &&
- inode_change_ok(inode, iap) != 0) {
- /*
- * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
- * This will cause notify_change to set these times
- * to "now"
- */
- iap->ia_valid &= ~BOTH_TIME_SET;
- }
- }
-
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
@@ -538,16 +502,11 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, loff_t len,
int flags)
{
- __be32 err;
int error;
if (!S_ISREG(file_inode(file)->i_mode))
return nfserr_inval;
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE);
- if (err)
- return err;
-
error = vfs_fallocate(file, flags, offset, len);
if (!error)
error = commit_metadata(fhp);
@@ -744,7 +703,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
host_err = ima_file_check(file, may_flags, 0);
if (host_err) {
- nfsd_close(file);
+ fput(file);
goto out_nfserr;
}
@@ -761,23 +720,12 @@ out:
return err;
}
-/*
- * Close a file.
- */
-void
-nfsd_close(struct file *filp)
-{
- fput(filp);
-}
-
-/*
- * Obtain the readahead parameters for the file
- * specified by (dev, ino).
- */
-
-static inline struct raparms *
-nfsd_get_raparms(dev_t dev, ino_t ino)
+struct raparms *
+nfsd_init_raparms(struct file *file)
{
+ struct inode *inode = file_inode(file);
+ dev_t dev = inode->i_sb->s_dev;
+ ino_t ino = inode->i_ino;
struct raparms *ra, **rap, **frap = NULL;
int depth = 0;
unsigned int hash;
@@ -814,9 +762,23 @@ found:
ra->p_count++;
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
spin_unlock(&rab->pb_lock);
+
+ if (ra->p_set)
+ file->f_ra = ra->p_ra;
return ra;
}
+void nfsd_put_raparams(struct file *file, struct raparms *ra)
+{
+ struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+
+ spin_lock(&rab->pb_lock);
+ ra->p_ra = file->f_ra;
+ ra->p_set = 1;
+ ra->p_count--;
+ spin_unlock(&rab->pb_lock);
+}
+
/*
* Grab and keep cached pages associated with a file in the svc_rqst
* so that they can be passed to the network sendmsg/sendpage routines
@@ -945,7 +907,7 @@ static int wait_for_concurrent_writes(struct file *file)
return err;
}
-static __be32
+__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
unsigned long *cnt, int *stablep)
@@ -1009,40 +971,6 @@ out_nfserr:
return err;
}
-__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file **file, struct raparms **ra)
-{
- struct inode *inode;
- __be32 err;
-
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
- if (err)
- return err;
-
- inode = file_inode(*file);
-
- /* Get readahead parameters */
- *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
-
- if (*ra && (*ra)->p_set)
- (*file)->f_ra = (*ra)->p_ra;
- return nfs_ok;
-}
-
-void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
-{
- /* Write back readahead params */
- if (ra) {
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
- spin_lock(&rab->pb_lock);
- ra->p_ra = file->f_ra;
- ra->p_set = 1;
- ra->p_count--;
- spin_unlock(&rab->pb_lock);
- }
- nfsd_close(file);
-}
-
/*
* Read data from a file. count must contain the requested read count
* on entry. On return, *count contains the number of bytes actually read.
@@ -1055,13 +983,15 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct raparms *ra;
__be32 err;
- err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra);
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
return err;
+ ra = nfsd_init_raparms(file);
err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
-
- nfsd_put_tmp_read_open(file, ra);
+ if (ra)
+ nfsd_put_raparams(file, ra);
+ fput(file);
return err;
}
@@ -1093,7 +1023,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (cnt)
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
cnt, stablep);
- nfsd_close(file);
+ fput(file);
}
out:
return err;
@@ -1138,7 +1068,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserr_notsupp;
}
- nfsd_close(file);
+ fput(file);
out:
return err;
}
@@ -1977,7 +1907,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
out_close:
- nfsd_close(file);
+ fput(file);
out:
return err;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 2050cb016998..5be875e3e638 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -71,11 +71,7 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
#endif /* CONFIG_NFSD_V3 */
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-void nfsd_close(struct file *);
struct raparms;
-__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
- struct file **, struct raparms **);
-void nfsd_put_tmp_read_open(struct file *, struct raparms *);
__be32 nfsd_splice_read(struct svc_rqst *,
struct file *, loff_t, unsigned long *);
__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
@@ -84,6 +80,10 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
loff_t, struct kvec *,int, unsigned long *, int *);
+__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file *file, loff_t offset,
+ struct kvec *vec, int vlen, unsigned long *cnt,
+ int *stablep);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
@@ -104,6 +104,9 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
+struct raparms *nfsd_init_raparms(struct file *file);
+void nfsd_put_raparams(struct file *file, struct raparms *ra);
+
static inline int fh_want_write(struct svc_fh *fh)
{
int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 2f8c092be2b3..9f991007a578 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -273,6 +273,7 @@ struct nfsd4_read {
u32 rd_length; /* request */
int rd_vlen;
struct file *rd_filp;
+ bool rd_tmp_file;
struct svc_rqst *rd_rqstp; /* response */
struct svc_fh * rd_fhp; /* response */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index df8edf8ec914..cb94ee4181d4 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -172,6 +172,13 @@ struct svcxprt_rdma {
#define RDMAXPRT_SQ_PENDING 2
#define RDMAXPRT_CONN_PENDING 3
+#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */
+#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
+#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD
+#else
+#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
+#endif
+
#define RPCRDMA_LISTEN_BACKLOG 10
/* The default ORD value is based on two outstanding full-size writes with a
* page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
@@ -182,10 +189,9 @@ struct svcxprt_rdma {
/* svc_rdma_marshal.c */
extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
-extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
struct rpcrdma_msg *,
- enum rpcrdma_errcode, u32 *);
+ enum rpcrdma_errcode, __be32 *);
extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
@@ -212,7 +218,6 @@ extern int svc_rdma_sendto(struct svc_rqst *);
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
enum rpcrdma_errcode);
-struct page *svc_rdma_get_page(void);
extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index adc0aff83fbb..2119c7c274d7 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -86,6 +86,10 @@
#define ACL4_SUPPORT_AUDIT_ACL 0x04
#define ACL4_SUPPORT_ALARM_ACL 0x08
+#define NFS4_ACL_AUTO_INHERIT 0x00000001
+#define NFS4_ACL_PROTECTED 0x00000002
+#define NFS4_ACL_DEFAULTED 0x00000004
+
#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001
#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002
#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004
@@ -93,6 +97,7 @@
#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010
#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020
#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040
+#define NFS4_ACE_INHERITED_ACE 0x00000080
#define NFS4_ACE_READ_DATA 0x00000001
#define NFS4_ACE_LIST_DIRECTORY 0x00000001
@@ -106,6 +111,8 @@
#define NFS4_ACE_DELETE_CHILD 0x00000040
#define NFS4_ACE_READ_ATTRIBUTES 0x00000080
#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100
+#define NFS4_ACE_WRITE_RETENTION 0x00000200
+#define NFS4_ACE_WRITE_RETENTION_HOLD 0x00000400
#define NFS4_ACE_DELETE 0x00010000
#define NFS4_ACE_READ_ACL 0x00020000
#define NFS4_ACE_WRITE_ACL 0x00040000
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 9068e72aa73c..04ce2c0b660e 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -48,28 +48,16 @@ config SUNRPC_DEBUG
If unsure, say Y.
-config SUNRPC_XPRT_RDMA_CLIENT
- tristate "RPC over RDMA Client Support"
+config SUNRPC_XPRT_RDMA
+ tristate "RPC-over-RDMA transport"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
- This option allows the NFS client to support an RDMA-enabled
- transport.
+ This option allows the NFS client and server to use RDMA
+ transports (InfiniBand, iWARP, or RoCE).
- To compile RPC client RDMA transport support as a module,
- choose M here: the module will be called xprtrdma.
+ To compile this support as a module, choose M. The module
+ will be called rpcrdma.ko.
- If unsure, say N.
-
-config SUNRPC_XPRT_RDMA_SERVER
- tristate "RPC over RDMA Server Support"
- depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
- default SUNRPC && INFINIBAND
- help
- This option allows the NFS server to support an RDMA-enabled
- transport.
-
- To compile RPC server RDMA transport support as a module,
- choose M here: the module will be called svcrdma.
-
- If unsure, say N.
+ If unsure, or you know there is no RDMA capability on your
+ hardware platform, say N.
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 15e6f6c23c5d..936ad0a15371 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,8 +5,7 @@
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
-
-obj-y += xprtrdma/
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o auth_generic.o \
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index b5408e8a37f2..fee3c15a4b52 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -881,9 +881,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
if (err)
goto out_err;
- sg_init_table(sg, 1);
- sg_set_buf(sg, &zeroconstant, 4);
-
+ sg_init_one(sg, &zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kseq);
if (err)
goto out_err;
@@ -951,9 +949,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
if (err)
goto out_err;
- sg_init_table(sg, 1);
- sg_set_buf(sg, zeroconstant, 4);
-
+ sg_init_one(sg, zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
if (err)
goto out_err;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 78974e4d9ad2..852ae606b02a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1290,7 +1290,6 @@ err_bad:
svc_putnl(resv, ntohl(rpc_stat));
goto sendit;
}
-EXPORT_SYMBOL_GPL(svc_process);
/*
* Process the RPC request.
@@ -1338,6 +1337,7 @@ out_drop:
svc_drop(rqstp);
return 0;
}
+EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 579f72bbcf4b..48913de240bd 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,9 +1,7 @@
-obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
-xprtrdma-y := transport.o rpc_rdma.o verbs.o \
- fmr_ops.o frwr_ops.o physical_ops.o
-
-obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
-
-svcrdma-y := svc_rdma.o svc_rdma_transport.o \
- svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
+rpcrdma-y := transport.o rpc_rdma.o verbs.o \
+ fmr_ops.o frwr_ops.o physical_ops.o \
+ svc_rdma.o svc_rdma_transport.o \
+ svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
+ module.o
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
new file mode 100644
index 000000000000..560712bd9fa2
--- /dev/null
+++ b/net/sunrpc/xprtrdma/module.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2015 Oracle. All rights reserved.
+ */
+
+/* rpcrdma.ko module initialization
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
+MODULE_DESCRIPTION("RPC/RDMA Transport");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("svcrdma");
+MODULE_ALIAS("xprtrdma");
+
+static void __exit rpc_rdma_cleanup(void)
+{
+ xprt_rdma_cleanup();
+ svc_rdma_cleanup();
+}
+
+static int __init rpc_rdma_init(void)
+{
+ int rc;
+
+ rc = svc_rdma_init();
+ if (rc)
+ goto out;
+
+ rc = xprt_rdma_init();
+ if (rc)
+ svc_rdma_cleanup();
+
+out:
+ return rc;
+}
+
+module_init(rpc_rdma_init);
+module_exit(rpc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index c1b6270262c2..2cd252f023a5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -38,8 +38,7 @@
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
-#include <linux/module.h>
-#include <linux/init.h>
+
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
@@ -295,8 +294,3 @@ int svc_rdma_init(void)
destroy_workqueue(svc_rdma_wq);
return -ENOMEM;
}
-MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
-MODULE_DESCRIPTION("SVC RDMA Transport");
-MODULE_LICENSE("Dual BSD/GPL");
-module_init(svc_rdma_init);
-module_exit(svc_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index b681855cf970..e2fca7617242 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -50,12 +50,12 @@
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
- * position : u32 offset into XDR stream
- * handle : u32 RKEY
+ * position : __be32 offset into XDR stream
+ * handle : __be32 RKEY
* . . .
* end-of-list: xdr_zero
*/
-static u32 *decode_read_list(u32 *va, u32 *vaend)
+static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
@@ -67,20 +67,20 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
}
ch++;
}
- return (u32 *)&ch->rc_position;
+ return &ch->rc_position;
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
- * handle : u32 RKEY ---+
- * length : u32 <len of segment> |
+ * handle : __be32 RKEY ---+
+ * length : __be32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
-static u32 *decode_write_list(u32 *va, u32 *vaend)
+static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
{
unsigned long start, end;
int nchunks;
@@ -90,14 +90,14 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
- return (u32 *)&ary->wc_nchunks;
+ return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
- nchunks = ntohl(ary->wc_nchunks);
+ nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
@@ -112,10 +112,10 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
- return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
+ return &ary->wc_array[nchunks].wc_target.rs_length;
}
-static u32 *decode_reply_array(u32 *va, u32 *vaend)
+static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
{
unsigned long start, end;
int nchunks;
@@ -124,14 +124,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
- return (u32 *)&ary->wc_nchunks;
+ return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
- nchunks = ntohl(ary->wc_nchunks);
+ nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
@@ -142,15 +142,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
ary, nchunks, vaend);
return NULL;
}
- return (u32 *)&ary->wc_array[nchunks];
+ return (__be32 *)&ary->wc_array[nchunks];
}
int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
- u32 *va;
- u32 *vaend;
+ __be32 *va, *vaend;
u32 hdr_len;
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
@@ -162,22 +161,17 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
return -EINVAL;
}
- /* Decode the header */
- rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
- rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
- rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
- rmsgp->rm_type = ntohl(rmsgp->rm_type);
-
- if (rmsgp->rm_vers != RPCRDMA_VERSION)
+ if (rmsgp->rm_vers != rpcrdma_version)
return -ENOSYS;
/* Pull in the extra for the padded case and bump our pointer */
- if (rmsgp->rm_type == RDMA_MSGP) {
+ if (rmsgp->rm_type == rdma_msgp) {
int hdrlen;
+
rmsgp->rm_body.rm_padded.rm_align =
- ntohl(rmsgp->rm_body.rm_padded.rm_align);
+ be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
- ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
+ be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
@@ -192,7 +186,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
- vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
+ vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
va = decode_read_list(va, vaend);
if (!va)
return -EINVAL;
@@ -211,76 +205,20 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
return hdr_len;
}
-int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
-{
- struct rpcrdma_msg *rmsgp = NULL;
- struct rpcrdma_read_chunk *ch;
- struct rpcrdma_write_array *ary;
- u32 *va;
- u32 hdrlen;
-
- dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
- rqstp);
- rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
-
- /* Pull in the extra for the padded case and bump our pointer */
- if (rmsgp->rm_type == RDMA_MSGP) {
- va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
- rqstp->rq_arg.head[0].iov_base = va;
- hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
- rqstp->rq_arg.head[0].iov_len -= hdrlen;
- return hdrlen;
- }
-
- /*
- * Skip all chunks to find RPC msg. These were previously processed
- */
- va = &rmsgp->rm_body.rm_chunks[0];
-
- /* Skip read-list */
- for (ch = (struct rpcrdma_read_chunk *)va;
- ch->rc_discrim != xdr_zero; ch++);
- va = (u32 *)&ch->rc_position;
-
- /* Skip write-list */
- ary = (struct rpcrdma_write_array *)va;
- if (ary->wc_discrim == xdr_zero)
- va = (u32 *)&ary->wc_nchunks;
- else
- /*
- * rs_length is the 2nd 4B field in wc_target and taking its
- * address skips the list terminator
- */
- va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
-
- /* Skip reply-array */
- ary = (struct rpcrdma_write_array *)va;
- if (ary->wc_discrim == xdr_zero)
- va = (u32 *)&ary->wc_nchunks;
- else
- va = (u32 *)&ary->wc_array[ary->wc_nchunks];
-
- rqstp->rq_arg.head[0].iov_base = va;
- hdrlen = (unsigned long)va - (unsigned long)rmsgp;
- rqstp->rq_arg.head[0].iov_len -= hdrlen;
-
- return hdrlen;
-}
-
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
- enum rpcrdma_errcode err, u32 *va)
+ enum rpcrdma_errcode err, __be32 *va)
{
- u32 *startp = va;
+ __be32 *startp = va;
- *va++ = htonl(rmsgp->rm_xid);
- *va++ = htonl(rmsgp->rm_vers);
- *va++ = htonl(xprt->sc_max_requests);
- *va++ = htonl(RDMA_ERROR);
- *va++ = htonl(err);
+ *va++ = rmsgp->rm_xid;
+ *va++ = rmsgp->rm_vers;
+ *va++ = cpu_to_be32(xprt->sc_max_requests);
+ *va++ = rdma_error;
+ *va++ = cpu_to_be32(err);
if (err == ERR_VERS) {
- *va++ = htonl(RPCRDMA_VERSION);
- *va++ = htonl(RPCRDMA_VERSION);
+ *va++ = rpcrdma_version;
+ *va++ = rpcrdma_version;
}
return (int)((unsigned long)va - (unsigned long)startp);
@@ -297,7 +235,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
+ &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
@@ -306,7 +244,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
+ &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
@@ -325,7 +263,7 @@ void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
ary->wc_discrim = xdr_one;
- ary->wc_nchunks = htonl(chunks);
+ ary->wc_nchunks = cpu_to_be32(chunks);
/* write-list terminator */
ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
@@ -338,7 +276,7 @@ void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
int chunks)
{
ary->wc_discrim = xdr_one;
- ary->wc_nchunks = htonl(chunks);
+ ary->wc_nchunks = cpu_to_be32(chunks);
}
void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
@@ -350,7 +288,7 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
seg->rs_handle = rs_handle;
seg->rs_offset = rs_offset;
- seg->rs_length = htonl(write_len);
+ seg->rs_length = cpu_to_be32(write_len);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
@@ -358,10 +296,10 @@ void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
- rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
- rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
- rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
- rdma_resp->rm_type = htonl(rdma_type);
+ rdma_resp->rm_xid = rdma_argp->rm_xid;
+ rdma_resp->rm_vers = rdma_argp->rm_vers;
+ rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
+ rdma_resp->rm_type = cpu_to_be32(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 86b44164172b..2e1348bde325 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -85,7 +85,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
- if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
+ if (rmsgp->rm_type == rdma_nomsg)
rqstp->rq_arg.pages = &rqstp->rq_pages[0];
else
rqstp->rq_arg.pages = &rqstp->rq_pages[1];
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7de33d1af9b6..d25cd430f9ff 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -240,6 +240,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
u32 xdr_off;
int chunk_off;
int chunk_no;
+ int nchunks;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
@@ -251,14 +252,15 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
&rdma_resp->rm_body.rm_chunks[1];
/* Write chunks start at the pagelist */
+ nchunks = be32_to_cpu(arg_ary->wc_nchunks);
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
- xfer_len && chunk_no < arg_ary->wc_nchunks;
+ xfer_len && chunk_no < nchunks;
chunk_no++) {
struct rpcrdma_segment *arg_ch;
u64 rs_offset;
arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
- write_len = min(xfer_len, ntohl(arg_ch->rs_length));
+ write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
/* Prepare the response chunk given the length actually
* written */
@@ -270,7 +272,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
- ntohl(arg_ch->rs_handle),
+ be32_to_cpu(arg_ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
@@ -318,13 +320,13 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
&rdma_resp->rm_body.rm_chunks[2];
/* xdr offset starts at RPC message */
- nchunks = ntohl(arg_ary->wc_nchunks);
+ nchunks = be32_to_cpu(arg_ary->wc_nchunks);
for (xdr_off = 0, chunk_no = 0;
xfer_len && chunk_no < nchunks;
chunk_no++) {
u64 rs_offset;
ch = &arg_ary->wc_array[chunk_no].wc_target;
- write_len = min(xfer_len, htonl(ch->rs_length));
+ write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
/* Prepare the reply chunk given the length actually
* written */
@@ -335,7 +337,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
- ntohl(ch->rs_handle),
+ be32_to_cpu(ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
@@ -515,7 +517,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
- res_page = svc_rdma_get_page();
+ res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f4cfa764d76f..6b36279e4288 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_name = "rdma",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
- .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
+ .xcl_max_payload = RPCRDMA_MAXPAYLOAD,
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
@@ -99,12 +99,8 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt;
- while (1) {
- ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
- if (ctxt)
- break;
- schedule_timeout_uninterruptible(msecs_to_jiffies(500));
- }
+ ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
+ GFP_KERNEL | __GFP_NOFAIL);
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q);
ctxt->count = 0;
@@ -156,12 +152,8 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
struct svc_rdma_req_map *svc_rdma_get_req_map(void)
{
struct svc_rdma_req_map *map;
- while (1) {
- map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
- if (map)
- break;
- schedule_timeout_uninterruptible(msecs_to_jiffies(500));
- }
+ map = kmem_cache_alloc(svc_rdma_map_cachep,
+ GFP_KERNEL | __GFP_NOFAIL);
map->count = 0;
return map;
}
@@ -493,18 +485,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return cma_xprt;
}
-struct page *svc_rdma_get_page(void)
-{
- struct page *page;
-
- while ((page = alloc_page(GFP_KERNEL)) == NULL) {
- /* If we can't get memory, wait a bit and try again */
- printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
- schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
- }
- return page;
-}
-
int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
{
struct ib_recv_wr recv_wr, *bad_recv_wr;
@@ -523,7 +503,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err_put_ctxt;
}
- page = svc_rdma_get_page();
+ page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
@@ -1318,11 +1298,11 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
struct ib_send_wr err_wr;
struct page *p;
struct svc_rdma_op_ctxt *ctxt;
- u32 *va;
+ __be32 *va;
int length;
int ret;
- p = svc_rdma_get_page();
+ p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
va = page_address(p);
/* XDR encode error */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 54f23b1be986..436da2caec95 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -48,7 +48,6 @@
*/
#include <linux/module.h>
-#include <linux/init.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
@@ -59,11 +58,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-MODULE_LICENSE("Dual BSD/GPL");
-
-MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
-MODULE_AUTHOR("Network Appliance, Inc.");
-
/*
* tunables
*/
@@ -711,7 +705,7 @@ static struct xprt_class xprt_rdma = {
.setup = xprt_setup_rdma,
};
-static void __exit xprt_rdma_cleanup(void)
+void xprt_rdma_cleanup(void)
{
int rc;
@@ -728,7 +722,7 @@ static void __exit xprt_rdma_cleanup(void)
__func__, rc);
}
-static int __init xprt_rdma_init(void)
+int xprt_rdma_init(void)
{
int rc;
@@ -753,6 +747,3 @@ static int __init xprt_rdma_init(void)
#endif
return 0;
}
-
-module_init(xprt_rdma_init);
-module_exit(xprt_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 78e0b8beaa36..58163b88738c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -480,6 +480,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
*/
int rpcrdma_marshal_req(struct rpc_rqst *);
+/* RPC/RDMA module init - xprtrdma/transport.c
+ */
+int xprt_rdma_init(void);
+void xprt_rdma_cleanup(void);
+
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
/* WR context cache. Created in svc_rdma.c */
@@ -487,10 +492,4 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
/* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
-#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
-#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
-#else
-#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
-#endif
-
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */