From 88a944eef8a4f9a2ca647eb16202a2b63f8ba7cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Oct 2008 12:48:29 -0700 Subject: Revert "ax25: Fix std timer socket destroy handling." This reverts commit 30902dc3cb0ea1cfc7ac2b17bcf478ff98420d74. It causes all kinds of problems, based upon a report by Bernard (f6bvp) and analysis by Jarek Poplawski. Signed-off-by: David S. Miller --- net/ax25/ax25_std_timer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index cdc7e751ef36..96e4b9273250 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -39,9 +39,11 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: - if (!sk || - sock_flag(sk, SOCK_DESTROY) || - sock_flag(sk, SOCK_DEAD)) { + /* Magic here: If we listen() and a new link dies before it + is accepted() it isn't 'dead' so doesn't get removed. */ + if (!sk || sock_flag(sk, SOCK_DESTROY) || + (sk->sk_state == TCP_LISTEN && + sock_flag(sk, SOCK_DEAD))) { if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); -- cgit v1.2.3-59-g8ed1b From 33d1d2c52c3befa6c4df33b4ba58137d1c48894b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Oct 2008 12:53:50 -0700 Subject: ax25: Quick fix for making sure unaccepted sockets get destroyed. Since we reverted 30902dc3cb0ea1cfc7ac2b17bcf478ff98420d74 ("ax25: Fix std timer socket destroy handling.") we have to put some kind of fix in to cure the issue whereby unaccepted connections do not get destroyed. The approach used here is from Tihomir Heidelberg - 9a4gl Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 01c83e2a4c19..28c71574a781 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -317,6 +317,9 @@ void ax25_destroy_socket(ax25_cb *ax25) /* Queue the unaccepted socket for death */ sock_orphan(skb->sk); + /* 9A4GL: hack to release unaccepted sockets */ + skb->sk->sk_state = TCP_LISTEN; + ax25_start_heartbeat(sax25); sax25->state = AX25_STATE_0; } -- cgit v1.2.3-59-g8ed1b From 859f4c74d8de4dc344b3a115367d5e22a79bddaf Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 6 Oct 2008 12:54:57 -0700 Subject: netrom: Fix sock_orphan() use in nr_release While debugging another bug it was found that NetRom socks are sometimes seen unorphaned in sk_free(). This patch moves sock_orphan() in nr_release() to the beginning (like in ax25, or rose). Reported-and-tested-by: Bernard Pidoux f6bvp Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 532e4faa29f7..9f1ea4a27b35 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -525,6 +525,7 @@ static int nr_release(struct socket *sock) if (sk == NULL) return 0; sock_hold(sk); + sock_orphan(sk); lock_sock(sk); nr = nr_sk(sk); @@ -548,7 +549,6 @@ static int nr_release(struct socket *sock) sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); - sock_orphan(sk); sock_set_flag(sk, SOCK_DESTROY); break; -- cgit v1.2.3-59-g8ed1b From b6c40d68ff6498b7f63ddf97cf0aa818d748dee7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 7 Oct 2008 15:26:48 -0700 Subject: net: only invoke dev->change_rx_flags when device is UP Jesper Dangaard Brouer reported a bug when setting a VLAN device down that is in promiscous mode: When the VLAN device is set down, the promiscous count on the real device is decremented by one by vlan_dev_stop(). When removing the promiscous flag from the VLAN device afterwards, the promiscous count on the real device is decremented a second time by the vlan_change_rx_flags() callback. The root cause for this is that the ->change_rx_flags() callback is invoked while the device is down. The synchronization is meant to mirror the behaviour of the ->set_rx_mode callbacks, meaning the ->open function is responsible for doing a full sync on open, the ->close() function is responsible for doing full cleanup on ->stop() and ->change_rx_flags() is meant to do incremental changes while the device is UP. Only invoke ->change_rx_flags() while the device is UP to provide the intended behaviour. Tested-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index e8eb2b478344..fd992c0f2717 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2918,6 +2918,12 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } +static void dev_change_rx_flags(struct net_device *dev, int flags) +{ + if (dev->flags & IFF_UP && dev->change_rx_flags) + dev->change_rx_flags(dev, flags); +} + static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -2955,8 +2961,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) current->uid, current->gid, audit_get_sessionid(current)); - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_PROMISC); + dev_change_rx_flags(dev, IFF_PROMISC); } return 0; } @@ -3022,8 +3027,7 @@ int dev_set_allmulti(struct net_device *dev, int inc) } } if (dev->flags ^ old_flags) { - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } return 0; @@ -3347,8 +3351,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) - dev->change_rx_flags(dev, IFF_MULTICAST); + if ((old_flags ^ flags) & IFF_MULTICAST) + dev_change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); -- cgit v1.2.3-59-g8ed1b From 53240c208776d557dba9d7afedbcdbf512774c16 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 7 Oct 2008 15:31:19 -0700 Subject: tcp: Fix possible double-ack w/ user dma From: Ali Saidi When TCP receive copy offload is enabled it's possible that tcp_rcv_established() will cause two acks to be sent for a single packet. In the case that a tcp_dma_early_copy() is successful, copied_early is set to true which causes tcp_cleanup_rbuf() to be called early which can send an ack. Further along in tcp_rcv_established(), __tcp_ack_snd_check() is called and will schedule a delayed ACK. If no packets are processed before the delayed ack timer expires the packet will be acked twice. Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ccce2a96bd..7abc6b80d47d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4879,7 +4879,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, goto no_ack; } - __tcp_ack_snd_check(sk, 0); + if (!copied_early || tp->rcv_nxt != tp->rcv_wup) + __tcp_ack_snd_check(sk, 0); no_ack: #ifdef CONFIG_NET_DMA if (copied_early) -- cgit v1.2.3-59-g8ed1b From 58ec3b4db9eb5a28e3aec5f407a54e28f7039c19 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 7 Oct 2008 15:50:03 -0700 Subject: net: Fix netdev_run_todo dead-lock Benjamin Thery tracked down a bug that explains many instances of the error unregister_netdevice: waiting for %s to become free. Usage count = %d It turns out that netdev_run_todo can dead-lock with itself if a second instance of it is run in a thread that will then free a reference to the device waited on by the first instance. The problem is really quite silly. We were trying to create parallelism where none was required. As netdev_run_todo always follows a RTNL section, and that todo tasks can only be added with the RTNL held, by definition you should only need to wait for the very ones that you've added and be done with it. There is no need for a second mutex or spinlock. This is exactly what the following patch does. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 27 ++++++--------------------- net/core/rtnetlink.c | 2 +- 2 files changed, 7 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fd992c0f2717..0ae08d3f57e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3812,14 +3812,11 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static DEFINE_SPINLOCK(net_todo_list_lock); static LIST_HEAD(net_todo_list); static void net_set_todo(struct net_device *dev) { - spin_lock(&net_todo_list_lock); list_add_tail(&dev->todo_list, &net_todo_list); - spin_unlock(&net_todo_list_lock); } static void rollback_registered(struct net_device *dev) @@ -4146,33 +4143,24 @@ static void netdev_wait_allrefs(struct net_device *dev) * free_netdev(y1); * free_netdev(y2); * - * We are invoked by rtnl_unlock() after it drops the semaphore. + * We are invoked by rtnl_unlock(). * This allows us to deal with problems: * 1) We can delete sysfs objects which invoke hotplug * without deadlocking with linkwatch via keventd. * 2) Since we run with the RTNL semaphore not held, we can sleep * safely in order to wait for the netdev refcnt to drop to zero. + * + * We must not return until all unregister events added during + * the interval the lock was held have been completed. */ -static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { struct list_head list; - /* Need to guard against multiple cpu's getting out of order. */ - mutex_lock(&net_todo_run_mutex); - - /* Not safe to do outside the semaphore. We must not return - * until all unregister events invoked by the local processor - * have been completed (either by this todo run, or one on - * another cpu). - */ - if (list_empty(&net_todo_list)) - goto out; - /* Snapshot list, allow later requests */ - spin_lock(&net_todo_list_lock); list_replace_init(&net_todo_list, &list); - spin_unlock(&net_todo_list_lock); + + __rtnl_unlock(); while (!list_empty(&list)) { struct net_device *dev @@ -4204,9 +4192,6 @@ void netdev_run_todo(void) /* Free network device */ kobject_put(&dev->dev.kobj); } - -out: - mutex_unlock(&net_todo_run_mutex); } static struct net_device_stats *internal_stats(struct net_device *dev) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 71edb8b36341..d6381c2a4693 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -73,7 +73,7 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { - mutex_unlock(&rtnl_mutex); + /* This fellow will unlock it for us. */ netdev_run_todo(); } -- cgit v1.2.3-59-g8ed1b From 9d2c27e17b7574023b5adb5c6a50d7aaeb915543 Mon Sep 17 00:00:00 2001 From: Daniele Lacamera Date: Tue, 7 Oct 2008 15:58:17 -0700 Subject: tcp: Fix tcp_hybla zero congestion window growth with small rho and large cwnd. Because of rounding, in certain conditions, i.e. when in congestion avoidance state rho is smaller than 1/128 of the current cwnd, TCP Hybla congestion control starves and the cwnd is kept constant forever. This patch forces an increment by one segment after #send_cwnd calls without increments(newreno behavior). Signed-off-by: Daniele Lacamera Signed-off-by: David S. Miller --- net/ipv4/tcp_hybla.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index bfcbd148a89d..c209e054a634 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -150,7 +150,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) ca->snd_cwnd_cents -= 128; tp->snd_cwnd_cnt = 0; } - + /* check when cwnd has not been incremented for a while */ + if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) { + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } /* clamp down slowstart cwnd to ssthresh value. */ if (is_slowstart) tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); -- cgit v1.2.3-59-g8ed1b