From f291196979ca80cdef199ca2b55e2758e8c23a0d Mon Sep 17 00:00:00 2001
From: Herbert Xu ~{PmVHI~} <herbert@gondor.apana.org.au>
Date: Mon, 5 Jun 2006 15:03:37 -0700
Subject: [TCP]: Avoid skb_pull if possible when trimming head

Trimming the head of an skb by calling skb_pull can cause the packet
to become unaligned if the length pulled is odd.  Since the length is
entirely arbitrary for a FIN packet carrying data, this is actually
quite common.

Unaligned data is not the end of the world, but we should avoid it if
it's easily done.  In this case it is trivial.  Since we're discarding
all of the head data it doesn't matter whether we move skb->data forward
or back.

However, it is still possible to have unaligned skb->data in general.
So network drivers should be prepared to handle it instead of crashing.

This patch also adds an unlikely marking on len < headlen since partial
ACKs on head data are extremely rare in the wild.  As the return value
of __pskb_trim_head is no longer ever NULL that has been removed.

Signed-off-by: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 743016baa048..f33c9dddaa12 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -642,7 +642,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
  * eventually). The difference is that pulled data not copied, but
  * immediately discarded.
  */
-static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len)
+static void __pskb_trim_head(struct sk_buff *skb, int len)
 {
 	int i, k, eat;
 
@@ -667,7 +667,6 @@ static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len)
 	skb->tail = skb->data;
 	skb->data_len -= len;
 	skb->len = skb->data_len;
-	return skb->tail;
 }
 
 int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
@@ -676,12 +675,11 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 		return -ENOMEM;
 
-	if (len <= skb_headlen(skb)) {
+	/* If len == headlen, we avoid __skb_pull to preserve alignment. */
+	if (unlikely(len < skb_headlen(skb)))
 		__skb_pull(skb, len);
-	} else {
-		if (__pskb_trim_head(skb, len-skb_headlen(skb)) == NULL)
-			return -ENOMEM;
-	}
+	else
+		__pskb_trim_head(skb, len - skb_headlen(skb));
 
 	TCP_SKB_CB(skb)->seq += len;
 	skb->ip_summed = CHECKSUM_HW;
-- 
cgit v1.2.3-59-g8ed1b


From 6569a351da7e58d6f0fbc92fcf0bef5d4a4bc0a4 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Mon, 5 Jun 2006 15:34:11 -0700
Subject: [NET]: Eliminate unused /proc/sys/net/ethernet

The /proc/sys/net/ethernet directory has been sitting empty for more than
10 years!  Time to eliminate it!

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/Makefile           |  1 -
 net/ethernet/sysctl_net_ether.c | 14 --------------
 net/sysctl_net.c                |  8 --------
 3 files changed, 23 deletions(-)
 delete mode 100644 net/ethernet/sysctl_net_ether.c

(limited to 'net')

diff --git a/net/ethernet/Makefile b/net/ethernet/Makefile
index 69b74a9a0fc3..7cef1d8ace27 100644
--- a/net/ethernet/Makefile
+++ b/net/ethernet/Makefile
@@ -3,6 +3,5 @@
 #
 
 obj-y					+= eth.o
-obj-$(CONFIG_SYSCTL)			+= sysctl_net_ether.o
 obj-$(subst m,y,$(CONFIG_IPX))		+= pe2.o
 obj-$(subst m,y,$(CONFIG_ATALK))	+= pe2.o
diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c
deleted file mode 100644
index 66b39fc342d2..000000000000
--- a/net/ethernet/sysctl_net_ether.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/* -*- linux-c -*-
- * sysctl_net_ether.c: sysctl interface to net Ethernet subsystem.
- *
- * Begun April 1, 1996, Mike Shaver.
- * Added /proc/sys/net/ether directory entry (empty =) ). [MS]
- */
-
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <linux/if_ether.h>
-
-ctl_table ether_table[] = {
-	{0}
-};
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 55538f6b60ff..58a1b6b42ddd 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -37,14 +37,6 @@ struct ctl_table net_table[] = {
 		.mode		= 0555,
 		.child		= core_table,
 	},
-#ifdef CONFIG_NET
-	{
-		.ctl_name	= NET_ETHER,
-		.procname	= "ethernet",
-		.mode		= 0555,
-		.child		= ether_table,
-	},
-#endif
 #ifdef CONFIG_INET
 	{
 		.ctl_name	= NET_IPV4,
-- 
cgit v1.2.3-59-g8ed1b


From 8c893ff6abbac0c7c05b1cb9bfb6e2dfc4538c75 Mon Sep 17 00:00:00 2001
From: Florin Malita <fmalita@gmail.com>
Date: Mon, 5 Jun 2006 15:34:52 -0700
Subject: [IRDA]: Missing allocation result check in irlap_change_speed().

The skb allocation may fail, which can result in a NULL pointer dereference
in irlap_queue_xmit().

Coverity CID: 434.

Signed-off-by: Florin Malita <fmalita@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index 7029618f5719..a16528657b4c 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -884,7 +884,8 @@ static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now)
 	if (now) {
 		/* Send down empty frame to trigger speed change */
 		skb = dev_alloc_skb(0);
-		irlap_queue_xmit(self, skb);
+		if (skb)
+			irlap_queue_xmit(self, skb);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 36485707bbd9729e0c52315b173aeed9bc2303dd Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@suse.cz>
Date: Mon, 5 Jun 2006 16:39:34 -0700
Subject: [BRIDGE]: fix locking and memory leak in br_add_bridge

There are several bugs in error handling in br_add_bridge:
- when dev_alloc_name fails, allocated net_device is not freed
- unregister_netdev is called when rtnl lock is held
- free_netdev is called before netdev_run_todo has a chance to be run after
  unregistering net_device

Signed-off-by: Jiri Benc <jbenc@suse.cz>
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ad1c7af65ec8..f5d47bf4f967 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -300,25 +300,20 @@ int br_add_bridge(const char *name)
 	rtnl_lock();
 	if (strchr(dev->name, '%')) {
 		ret = dev_alloc_name(dev, dev->name);
-		if (ret < 0)
-			goto err1;
+		if (ret < 0) {
+			free_netdev(dev);
+			goto out;
+		}
 	}
 
 	ret = register_netdevice(dev);
 	if (ret)
-		goto err2;
+		goto out;
 
 	ret = br_sysfs_addbr(dev);
 	if (ret)
-		goto err3;
-	rtnl_unlock();
-	return 0;
-
- err3:
-	unregister_netdev(dev);
- err2:
-	free_netdev(dev);
- err1:
+		unregister_netdevice(dev);
+ out:
 	rtnl_unlock();
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From afec35e3fee900b3016519d0b5512064e4625b2c Mon Sep 17 00:00:00 2001
From: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Date: Sun, 11 Jun 2006 20:58:33 -0700
Subject: [DCCP] Ackvec: fix soft lockup in ackvec handling code

A soft lockup existed in the handling of ack vector records.
Specifically, when a tail of the list of ack vector records was
removed, it was possible to end up iterating infinitely on an element
of the tail.

Signed-off-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index b5981e5f6b00..8c211c58893b 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -452,6 +452,7 @@ found:
 					      (unsigned long long)
 					      avr->dccpavr_ack_ackno);
 				dccp_ackvec_throw_record(av, avr);
+				break;
 			}
 			/*
 			 * If it wasn't received, continue scanning... we might
-- 
cgit v1.2.3-59-g8ed1b


From 79320d7e14900c549c3520791a297328f53ff71e Mon Sep 17 00:00:00 2001
From: Aki M Nyrhinen <anyrhine@cs.helsinki.fi>
Date: Sun, 11 Jun 2006 21:18:56 -0700
Subject: [TCP]: continued: reno sacked_out count fix

From: Aki M Nyrhinen <anyrhine@cs.helsinki.fi>

IMHO the current fix to the problem (in_flight underflow in reno)
is incorrect.  it treats the symptons but ignores the problem. the
problem is timing out packets other than the head packet when we
don't have sack. i try to explain (sorry if explaining the obvious).

with sack, scanning the retransmit queue for timed out packets is
fine because we know which packets in our retransmit queue have been
acked by the receiver.

without sack, we know only how many packets in our retransmit queue the
receiver has acknowledged, but no idea which packets.

think of a "typical" slow-start overshoot case, where for example
every third packet in a window get lost because a router buffer gets
full.

with sack, we check for timeouts on those every third packet (as the
rest have been sacked). the packet counting works out and if there
is no reordering, we'll retransmit exactly the packets that were
lost.

without sack, however, we check for timeout on every packet and end up
retransmitting consecutive packets in the retransmit queue. in our
slow-start example, 2/3 of those retransmissions are unnecessary. these
unnecessary retransmissions eat the congestion window and evetually
prevent fast recovery from continuing, if enough packets were lost.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4a538bc1683d..b5521a9d3dc1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1649,7 +1649,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 	 * Hence, we can detect timed out packets during fast
 	 * retransmit without falling to slow start.
 	 */
-	if (tcp_head_timedout(sk, tp)) {
+	if (!IsReno(tp) && tcp_head_timedout(sk, tp)) {
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -1662,8 +1662,6 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 			if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
 				TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 				tp->lost_out += tcp_skb_pcount(skb);
-				if (IsReno(tp))
-					tcp_remove_reno_sacks(sk, tp, tcp_skb_pcount(skb) + 1);
 
 				/* clear xmit_retrans hint */
 				if (tp->retransmit_skb_hint &&
-- 
cgit v1.2.3-59-g8ed1b


From 42d1d52e695d87475846e9a09964cae1209eeecb Mon Sep 17 00:00:00 2001
From: Weidong <weid@nanjing-fnst.com>
Date: Mon, 12 Jun 2006 13:09:59 -0700
Subject: [IPV4]: Increment ipInHdrErrors when TTL expires.

Signed-off-by: Weidong <weid@nanjing-fnst.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_forward.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 0923add122b4..9f0bb529ab70 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -116,6 +116,7 @@ sr_failed:
 
 too_many_hops:
         /* Tell the sender its packet died... */
+        IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
         icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
 drop:
 	kfree_skb(skb);
-- 
cgit v1.2.3-59-g8ed1b