From ee1a2c564f67407947e89f1dac75ac0af0ba88c7 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Wed, 27 Feb 2008 15:04:26 -0500 Subject: SUNRPC: Fix a nfs4 over rdma transport oops Prevent an RPC oops when freeing a dynamically allocated RDMA buffer, used in certain special-case large metadata operations. Signed-off-by: Tom Talpey Signed-off-by: James Lentini Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/transport.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 02c522c17de5..a564c1a39ec5 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -614,7 +614,11 @@ xprt_rdma_free(void *buffer) return; req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); - r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); + if (req->rl_iov.length == 0) { /* see allocate above */ + r_xprt = container_of(((struct rpcrdma_req *) req->rl_buffer)->rl_buffer, + struct rpcrdma_xprt, rx_buf); + } else + r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); rep = req->rl_reply; dprintk("RPC: %s: called on 0x%p%s\n", -- cgit v1.2.3-59-g8ed1b From c48cbb405c4f338ce3263c44d621eff41d9a95fc Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Tue, 11 Mar 2008 14:31:39 -0400 Subject: SVCRDMA: Add xprt refs to fix close/unmount crash RDMA connection shutdown on an SMP machine can cause a kernel crash due to the transport close path racing with the I/O tasklet. Additional transport references were added as follows: - A reference when on the DTO Q to avoid having the transport deleted while queued for I/O. - A reference while there is a QP able to generate events. - A reference until the DISCONNECTED event is received on the CM ID Signed-off-by: Tom Tucker Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 96 +++++++++++++++++++------------- 1 file changed, 58 insertions(+), 38 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f09444c451bc..16fd3f6718ff 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -54,7 +54,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, int flags); static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); static void svc_rdma_release_rqst(struct svc_rqst *); -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt); static void dto_tasklet_func(unsigned long data); static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); @@ -247,6 +246,7 @@ static void dto_tasklet_func(unsigned long data) sq_cq_reap(xprt); } + svc_xprt_put(&xprt->sc_xprt); spin_lock_irqsave(&dto_lock, flags); } spin_unlock_irqrestore(&dto_lock, flags); @@ -275,8 +275,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) * add it */ spin_lock_irqsave(&dto_lock, flags); - if (list_empty(&xprt->sc_dto_q)) + if (list_empty(&xprt->sc_dto_q)) { + svc_xprt_get(&xprt->sc_xprt); list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); + } spin_unlock_irqrestore(&dto_lock, flags); /* Tasklet does all the work to avoid irqsave locks. */ @@ -386,8 +388,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) * add it */ spin_lock_irqsave(&dto_lock, flags); - if (list_empty(&xprt->sc_dto_q)) + if (list_empty(&xprt->sc_dto_q)) { + svc_xprt_get(&xprt->sc_xprt); list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); + } spin_unlock_irqrestore(&dto_lock, flags); /* Tasklet does all the work to avoid irqsave locks. */ @@ -611,6 +615,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, switch (event->event) { case RDMA_CM_EVENT_ESTABLISHED: /* Accept complete */ + svc_xprt_get(xprt); dprintk("svcrdma: Connection completed on DTO xprt=%p, " "cm_id=%p\n", xprt, cma_id); clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); @@ -661,15 +666,15 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); if (IS_ERR(listen_id)) { - rdma_destroy_xprt(cma_xprt); + svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_create_id failed = %ld\n", PTR_ERR(listen_id)); return (void *)listen_id; } ret = rdma_bind_addr(listen_id, sa); if (ret) { - rdma_destroy_xprt(cma_xprt); rdma_destroy_id(listen_id); + svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); return ERR_PTR(ret); } @@ -678,8 +683,9 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); if (ret) { rdma_destroy_id(listen_id); - rdma_destroy_xprt(cma_xprt); + svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_listen failed = %d\n", ret); + return ERR_PTR(ret); } /* @@ -820,6 +826,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; } + svc_xprt_get(&newxprt->sc_xprt); newxprt->sc_qp = newxprt->sc_cm_id->qp; /* Register all of physical memory */ @@ -891,8 +898,15 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) errout: dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); + /* Take a reference in case the DTO handler runs */ + svc_xprt_get(&newxprt->sc_xprt); + if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { + ib_destroy_qp(newxprt->sc_qp); + svc_xprt_put(&newxprt->sc_xprt); + } rdma_destroy_id(newxprt->sc_cm_id); - rdma_destroy_xprt(newxprt); + /* This call to put will destroy the transport */ + svc_xprt_put(&newxprt->sc_xprt); return NULL; } @@ -919,54 +933,60 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; } -/* Disable data ready events for this connection */ +/* + * When connected, an svc_xprt has at least three references: + * + * - A reference held by the QP. We still hold that here because this + * code deletes the QP and puts the reference. + * + * - A reference held by the cm_id between the ESTABLISHED and + * DISCONNECTED events. If the remote peer disconnected first, this + * reference could be gone. + * + * - A reference held by the svc_recv code that called this function + * as part of close processing. + * + * At a minimum two references should still be held. + */ static void svc_rdma_detach(struct svc_xprt *xprt) { struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); - unsigned long flags; - dprintk("svc: svc_rdma_detach(%p)\n", xprt); - /* - * Shutdown the connection. This will ensure we don't get any - * more events from the provider. - */ + + /* Disconnect and flush posted WQE */ rdma_disconnect(rdma->sc_cm_id); - rdma_destroy_id(rdma->sc_cm_id); - /* We may already be on the DTO list */ - spin_lock_irqsave(&dto_lock, flags); - if (!list_empty(&rdma->sc_dto_q)) - list_del_init(&rdma->sc_dto_q); - spin_unlock_irqrestore(&dto_lock, flags); + /* Destroy the QP if present (not a listener) */ + if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) { + ib_destroy_qp(rdma->sc_qp); + svc_xprt_put(xprt); + } + + /* Destroy the CM ID */ + rdma_destroy_id(rdma->sc_cm_id); } static void svc_rdma_free(struct svc_xprt *xprt) { struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); - rdma_destroy_xprt(rdma); - kfree(rdma); -} - -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt) -{ - if (xprt->sc_qp && !IS_ERR(xprt->sc_qp)) - ib_destroy_qp(xprt->sc_qp); - - if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq)) - ib_destroy_cq(xprt->sc_sq_cq); + /* We should only be called from kref_put */ + BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); + if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) + ib_destroy_cq(rdma->sc_sq_cq); - if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq)) - ib_destroy_cq(xprt->sc_rq_cq); + if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) + ib_destroy_cq(rdma->sc_rq_cq); - if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr)) - ib_dereg_mr(xprt->sc_phys_mr); + if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr)) + ib_dereg_mr(rdma->sc_phys_mr); - if (xprt->sc_pd && !IS_ERR(xprt->sc_pd)) - ib_dealloc_pd(xprt->sc_pd); + if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) + ib_dealloc_pd(rdma->sc_pd); - destroy_context_cache(xprt->sc_ctxt_head); + destroy_context_cache(rdma->sc_ctxt_head); + kfree(rdma); } static int svc_rdma_has_wspace(struct svc_xprt *xprt) -- cgit v1.2.3-59-g8ed1b From 3fedb3c5a80595d94f7cbe47a6dba9184d869eb8 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Tue, 11 Mar 2008 14:31:40 -0400 Subject: SVCRDMA: Fix erroneous BUG_ON in send_write The assertion that checks for sge context overflow is incorrectly hard-coded to 32. This causes a kernel bug check when using big-data mounts. Changed the BUG_ON to use the computed value RPCSVC_MAXPAGES. Signed-off-by: Tom Tucker Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 0598b229c11d..981f190c1b39 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -156,7 +156,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *ctxt; int ret = 0; - BUG_ON(sge_count >= 32); + BUG_ON(sge_count > RPCSVC_MAXPAGES); dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " "write_len=%d, xdr_sge=%p, sge_count=%d\n", rmr, (unsigned long long)to, xdr_off, -- cgit v1.2.3-59-g8ed1b From e6f1cebf71c4e7aae7dfa43414ce2631291def9f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 17 Mar 2008 22:44:53 -0700 Subject: [NET] endianness noise: INADDR_ANY Signed-off-by: Al Viro Signed-off-by: David S. Miller --- arch/sparc/kernel/sys_sunos.c | 2 +- arch/sparc64/kernel/sys_sunos32.c | 2 +- fs/nfs/super.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/ipconfig.c | 9 +++++---- net/sctp/protocol.c | 10 +++++----- net/sunrpc/svc_xprt.c | 2 +- 8 files changed, 16 insertions(+), 15 deletions(-) (limited to 'net/sunrpc') diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c index 28c187c5d9fd..f5b608bbe8af 100644 --- a/arch/sparc/kernel/sys_sunos.c +++ b/arch/sparc/kernel/sys_sunos.c @@ -659,7 +659,7 @@ sunos_nfs_get_server_fd (int fd, struct sockaddr_in *addr) socket = SOCKET_I(inode); local.sin_family = AF_INET; - local.sin_addr.s_addr = INADDR_ANY; + local.sin_addr.s_addr = htonl(INADDR_ANY); /* IPPORT_RESERVED = 1024, can't find the definition in the kernel */ try_port = 1024; diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c index cfc22d3fe54c..e91194fe39d7 100644 --- a/arch/sparc64/kernel/sys_sunos32.c +++ b/arch/sparc64/kernel/sys_sunos32.c @@ -618,7 +618,7 @@ sunos_nfs_get_server_fd (int fd, struct sockaddr_in *addr) socket = SOCKET_I(inode); local.sin_family = AF_INET; - local.sin_addr.s_addr = INADDR_ANY; + local.sin_addr.s_addr = htonl(INADDR_ANY); /* IPPORT_RESERVED = 1024, can't find the definition in the kernel */ try_port = 1024; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1fb381843650..bdcf6d3ef62f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -632,7 +632,7 @@ static int nfs_verify_server_address(struct sockaddr *addr) switch (addr->sa_family) { case AF_INET: { struct sockaddr_in *sa = (struct sockaddr_in *)addr; - return sa->sin_addr.s_addr != INADDR_ANY; + return sa->sin_addr.s_addr != htonl(INADDR_ANY); } case AF_INET6: { struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 09ca5293d08f..0d109504ed86 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -458,7 +458,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && !inet->freebind && - addr->sin_addr.s_addr != INADDR_ANY && + addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index de0572c88859..f72457b4b0a7 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -583,7 +583,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, } if (!mreq.imr_ifindex) { - if (mreq.imr_address.s_addr == INADDR_ANY) { + if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { inet->mc_index = 0; inet->mc_addr = 0; err = 0; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 5dd938579eeb..7c992fbbc2c3 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -103,6 +103,7 @@ - '3' from resolv.h */ #define NONE __constant_htonl(INADDR_NONE) +#define ANY __constant_htonl(INADDR_ANY) /* * Public IP configuration @@ -1479,19 +1480,19 @@ static int __init ip_auto_config_setup(char *addrs) DBG(("IP-Config: Parameter #%d: `%s'\n", num, ip)); switch (num) { case 0: - if ((ic_myaddr = in_aton(ip)) == INADDR_ANY) + if ((ic_myaddr = in_aton(ip)) == ANY) ic_myaddr = NONE; break; case 1: - if ((ic_servaddr = in_aton(ip)) == INADDR_ANY) + if ((ic_servaddr = in_aton(ip)) == ANY) ic_servaddr = NONE; break; case 2: - if ((ic_gateway = in_aton(ip)) == INADDR_ANY) + if ((ic_gateway = in_aton(ip)) == ANY) ic_gateway = NONE; break; case 3: - if ((ic_netmask = in_aton(ip)) == INADDR_ANY) + if ((ic_netmask = in_aton(ip)) == ANY) ic_netmask = NONE; break; case 4: diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index ad0a4069b95b..7a7646a9565c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -337,14 +337,14 @@ static int sctp_v4_cmp_addr(const union sctp_addr *addr1, static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port) { addr->v4.sin_family = AF_INET; - addr->v4.sin_addr.s_addr = INADDR_ANY; + addr->v4.sin_addr.s_addr = htonl(INADDR_ANY); addr->v4.sin_port = port; } /* Is this a wildcard address? */ static int sctp_v4_is_any(const union sctp_addr *addr) { - return INADDR_ANY == addr->v4.sin_addr.s_addr; + return htonl(INADDR_ANY) == addr->v4.sin_addr.s_addr; } /* This function checks if the address is a valid address to be used for @@ -375,7 +375,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr); - if (addr->v4.sin_addr.s_addr != INADDR_ANY && + if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) && ret != RTN_LOCAL && !sp->inet.freebind && !sysctl_ip_nonlocal_bind) @@ -785,8 +785,8 @@ static int sctp_inet_cmp_addr(const union sctp_addr *addr1, /* PF_INET only supports AF_INET addresses. */ if (addr1->sa.sa_family != addr2->sa.sa_family) return 0; - if (INADDR_ANY == addr1->v4.sin_addr.s_addr || - INADDR_ANY == addr2->v4.sin_addr.s_addr) + if (htonl(INADDR_ANY) == addr1->v4.sin_addr.s_addr || + htonl(INADDR_ANY) == addr2->v4.sin_addr.s_addr) return 1; if (addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr) return 1; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ea377e06afae..332eb47539e1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -185,7 +185,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, struct svc_xprt_class *xcl; struct sockaddr_in sin = { .sin_family = AF_INET, - .sin_addr.s_addr = INADDR_ANY, + .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; dprintk("svc: creating transport %s[%d]\n", xprt_name, port); -- cgit v1.2.3-59-g8ed1b From 27724426a9000086993a8107a11cff276c4bd4d4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 17 Mar 2008 22:48:03 -0700 Subject: [SUNRPC]: net/* NULL noise Signed-off-by: Al Viro Signed-off-by: David S. Miller --- net/sunrpc/auth_gss/gss_mech_switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 61801a069ff0..bce9d527af08 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -317,7 +317,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle) if (!*context_handle) return(GSS_S_NO_CONTEXT); - if ((*context_handle)->internal_ctx_id != 0) + if ((*context_handle)->internal_ctx_id) (*context_handle)->mech_type->gm_ops ->gss_delete_sec_context((*context_handle) ->internal_ctx_id); -- cgit v1.2.3-59-g8ed1b