From 745cbccac3fe8cead529a1b3358e1e86a1505bfa Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Apr 2009 08:20:19 +0000 Subject: RDS: Rewrite connection cleanup, fixing oops on rmmod This fixes a bug where a connection was unexpectedly not on *any* list while being destroyed. It also cleans up some code duplication and regularizes some function names. * Grab appropriate lock in conn_free() and explain in comment * Ensure via locking that a conn is never not on either a dev's list or the nodev list * Add rds_xx_remove_conn() to match rds_xx_add_conn() * Make rds_xx_add_conn() return void * Rename remove_{,nodev_}conns() to destroy_{,nodev_}conns() and unify their implementation in a helper function * Document lock ordering as nodev conn_lock before dev_conn_lock Reported-by: Yosef Etigin Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/iw_cm.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'net/rds/iw_cm.c') diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 57ecb3d4b8a5..0ffaa3e97ad6 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); if (err) printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); - err = rds_iw_add_conn(rds_iwdev, conn); - if (err) - printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err); + rds_iw_add_conn(rds_iwdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ @@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) * Move connection back to the nodev list. * Remove cm_id from the device cm_id list. */ - if (ic->rds_iwdev) { - - spin_lock_irq(&ic->rds_iwdev->spinlock); - BUG_ON(list_empty(&ic->iw_node)); - list_del(&ic->iw_node); - spin_unlock_irq(&ic->rds_iwdev->spinlock); - - spin_lock_irq(&iw_nodev_conns_lock); - list_add_tail(&ic->iw_node, &iw_nodev_conns); - spin_unlock_irq(&iw_nodev_conns_lock); - rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); - ic->rds_iwdev = NULL; - } + if (ic->rds_iwdev) + rds_iw_remove_conn(ic->rds_iwdev, conn); rdma_destroy_id(ic->i_cm_id); @@ -726,11 +713,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) return 0; } +/* + * Free a connection. Connection must be shut down and not set for reconnect. + */ void rds_iw_conn_free(void *arg) { struct rds_iw_connection *ic = arg; + spinlock_t *lock_ptr; + rdsdebug("ic %p\n", ic); + + /* + * Conn is either on a dev's list or on the nodev list. + * A race with shutdown() or connect() would cause problems + * (since rds_iwdev would change) but that should never happen. + */ + lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock; + + spin_lock_irq(lock_ptr); list_del(&ic->iw_node); + spin_unlock_irq(lock_ptr); + kfree(ic); } -- cgit v1.2.3-59-g8ed1b From 8cbd9606a6367c221a7bbcc47f3ab1a8c31b6437 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Apr 2009 08:20:20 +0000 Subject: RDS: Use spinlock to protect 64b value update on 32b archs We have a 64bit value that needs to be set atomically. This is easy and quick on all 64bit archs, and can also be done on x86/32 with set_64bit() (uses cmpxchg8b). However other 32b archs don't have this. I actually changed this to the current state in preparation for mainline because the old way (using a spinlock on 32b) resulted in unsightly #ifdefs in the code. But obviously, being correct takes precedence. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.h | 14 +++++--------- net/rds/ib_cm.c | 9 ++++++++- net/rds/ib_recv.c | 37 +++++++++++++++++++++++++++++++++++-- net/rds/iw.h | 14 +++++--------- net/rds/iw_cm.c | 9 ++++++++- net/rds/iw_recv.c | 37 +++++++++++++++++++++++++++++++++++-- net/rds/rds.h | 4 ++++ 7 files changed, 100 insertions(+), 24 deletions(-) (limited to 'net/rds/iw_cm.c') diff --git a/net/rds/ib.h b/net/rds/ib.h index c08ffffb3164..069206cae733 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -108,7 +108,12 @@ struct rds_ib_connection { /* sending acks */ unsigned long i_ack_flags; +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_t i_ack_next; /* next ACK to send */ +#else + spinlock_t i_ack_lock; /* protect i_ack_next */ u64 i_ack_next; /* next ACK to send */ +#endif struct rds_header *i_ack; struct ib_send_wr i_ack_wr; struct ib_sge i_ack_sge; @@ -363,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge) return &sge[1]; } -static inline void rds_ib_set_64bit(u64 *ptr, u64 val) -{ -#if BITS_PER_LONG == 64 - *ptr = val; -#else - set_64bit(ptr, val); -#endif -} - #endif diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 889ab0441359..f8e40e1a6038 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -636,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) /* Clear the ACK state */ clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); - rds_ib_set_64bit(&ic->i_ack_next, 0); +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_set(&ic->i_ack_next, 0); +#else + ic->i_ack_next = 0; +#endif ic->i_ack_recv = 0; /* Clear flow control state */ @@ -669,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) INIT_LIST_HEAD(&ic->ib_node); mutex_init(&ic->i_recv_mutex); +#ifndef KERNEL_HAS_ATOMIC64 + spin_lock_init(&ic->i_ack_lock); +#endif /* * rds_ib_conn_shutdown() waits for these to be emptied so they diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 5061b5502162..36d931573ff4 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic) * room for it beyond the ring size. Send completion notices its special * wr_id and avoids working with the ring in that case. */ +#ifndef KERNEL_HAS_ATOMIC64 static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required) { - rds_ib_set_64bit(&ic->i_ack_next, seq); + unsigned long flags; + + spin_lock_irqsave(&ic->i_ack_lock, flags); + ic->i_ack_next = seq; + if (ack_required) + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + spin_unlock_irqrestore(&ic->i_ack_lock, flags); +} + +static u64 rds_ib_get_ack(struct rds_ib_connection *ic) +{ + unsigned long flags; + u64 seq; + + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + + spin_lock_irqsave(&ic->i_ack_lock, flags); + seq = ic->i_ack_next; + spin_unlock_irqrestore(&ic->i_ack_lock, flags); + + return seq; +} +#else +static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, + int ack_required) +{ + atomic64_set(&ic->i_ack_next, seq); if (ack_required) { smp_mb__before_clear_bit(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); @@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic) clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); smp_mb__after_clear_bit(); - return ic->i_ack_next; + return atomic64_read(&ic->i_ack_next); } +#endif + static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) { @@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi * - i_ack_next, which is the last sequence number we received * * Potentially, send queue and receive queue handlers can run concurrently. + * It would be nice to not have to use a spinlock to synchronize things, + * but the one problem that rules this out is that 64bit updates are + * not atomic on all platforms. Things would be a lot simpler if + * we had atomic64 or maybe cmpxchg64 everywhere. * * Reconnecting complicates this picture just slightly. When we * reconnect, we may be seeing duplicate packets. The peer diff --git a/net/rds/iw.h b/net/rds/iw.h index 70eb948f42f4..b4fb27252895 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -131,7 +131,12 @@ struct rds_iw_connection { /* sending acks */ unsigned long i_ack_flags; +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_t i_ack_next; /* next ACK to send */ +#else + spinlock_t i_ack_lock; /* protect i_ack_next */ u64 i_ack_next; /* next ACK to send */ +#endif struct rds_header *i_ack; struct ib_send_wr i_ack_wr; struct ib_sge i_ack_sge; @@ -391,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge) return &sge[1]; } -static inline void rds_iw_set_64bit(u64 *ptr, u64 val) -{ -#if BITS_PER_LONG == 64 - *ptr = val; -#else - set_64bit(ptr, val); -#endif -} - #endif diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 0ffaa3e97ad6..a416b0d492b1 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -659,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) /* Clear the ACK state */ clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); - rds_iw_set_64bit(&ic->i_ack_next, 0); +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_set(&ic->i_ack_next, 0); +#else + ic->i_ack_next = 0; +#endif ic->i_ack_recv = 0; /* Clear flow control state */ @@ -693,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) INIT_LIST_HEAD(&ic->iw_node); mutex_init(&ic->i_recv_mutex); +#ifndef KERNEL_HAS_ATOMIC64 + spin_lock_init(&ic->i_ack_lock); +#endif /* * rds_iw_conn_shutdown() waits for these to be emptied so they diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index a1931f0027a2..fde470fa50d5 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic) * room for it beyond the ring size. Send completion notices its special * wr_id and avoids working with the ring in that case. */ +#ifndef KERNEL_HAS_ATOMIC64 static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, int ack_required) { - rds_iw_set_64bit(&ic->i_ack_next, seq); + unsigned long flags; + + spin_lock_irqsave(&ic->i_ack_lock, flags); + ic->i_ack_next = seq; + if (ack_required) + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + spin_unlock_irqrestore(&ic->i_ack_lock, flags); +} + +static u64 rds_iw_get_ack(struct rds_iw_connection *ic) +{ + unsigned long flags; + u64 seq; + + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + + spin_lock_irqsave(&ic->i_ack_lock, flags); + seq = ic->i_ack_next; + spin_unlock_irqrestore(&ic->i_ack_lock, flags); + + return seq; +} +#else +static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, + int ack_required) +{ + atomic64_set(&ic->i_ack_next, seq); if (ack_required) { smp_mb__before_clear_bit(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); @@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic) clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); smp_mb__after_clear_bit(); - return ic->i_ack_next; + return atomic64_read(&ic->i_ack_next); } +#endif + static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) { @@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi * - i_ack_next, which is the last sequence number we received * * Potentially, send queue and receive queue handlers can run concurrently. + * It would be nice to not have to use a spinlock to synchronize things, + * but the one problem that rules this out is that 64bit updates are + * not atomic on all platforms. Things would be a lot simpler if + * we had atomic64 or maybe cmpxchg64 everywhere. * * Reconnecting complicates this picture just slightly. When we * reconnect, we may be seeing duplicate packets. The peer diff --git a/net/rds/rds.h b/net/rds/rds.h index 060400704979..619f0a30a4e5 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -28,6 +28,10 @@ */ #define RDS_PORT 18634 +#ifdef ATOMIC64_INIT +#define KERNEL_HAS_ATOMIC64 +#endif + #ifdef DEBUG #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else -- cgit v1.2.3-59-g8ed1b