From b3de7559afbb7a8a35b4be975a6adf6c5e3cdca0 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 24 Sep 2010 13:22:06 +0000
Subject: tcp: fix TSO FACK loss marking in tcp_mark_head_lost
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When TCP uses FACK algorithm to mark lost packets in
tcp_mark_head_lost(), if the number of packets in the (TSO) skb is
greater than the number of packets that should be marked lost, TCP
incorrectly exits the loop and marks no packets lost in the skb. This
underestimates tp->lost_out and affects the recovery/retransmission.
This patch fargments the skb and marks the correct amount of packets
lost.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 149e79ac2891..b55f60f6fcbe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2545,7 +2545,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
 			cnt += tcp_skb_pcount(skb);
 
 		if (cnt > packets) {
-			if (tcp_is_sack(tp) || (oldcnt >= packets))
+			if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) ||
+			    (oldcnt >= packets))
 				break;
 
 			mss = skb_shinfo(skb)->gso_size;
-- 
cgit v1.2.3-59-g8ed1b


From 7e1b33e5ea392dfc984fc63b76ca75acbf249dcd Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Mon, 27 Sep 2010 15:02:18 -0700
Subject: ipv6: add IPv6 to neighbour table overflow warning

IPv4 and IPv6 have separate neighbour tables, so
the warning messages should be distinguishable.

[ Add a suitable message prefix on the ipv4 side as well -DaveM ]

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 net/ipv6/route.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6298f75d5e93..ac6559cb54f9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1231,7 +1231,7 @@ restart:
 			}
 
 			if (net_ratelimit())
-				printk(KERN_WARNING "Neighbour table overflow.\n");
+				printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
 			rt_drop(rt);
 			return -ENOBUFS;
 		}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d126365ac046..8323136bdc54 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -670,7 +670,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 
 			if (net_ratelimit())
 				printk(KERN_WARNING
-				       "Neighbour table overflow.\n");
+				       "ipv6: Neighbour table overflow.\n");
 			dst_free(&rt->dst);
 			return NULL;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 0b20406cda621c2495d10baab1e87127ceb43337 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven.eckelmann@gmx.de>
Date: Mon, 27 Sep 2010 15:54:44 -0700
Subject: net/9p: Mount only matching virtio channels

p9_virtio_create will only compare the the channel's tag characters
against the device name till the end of the channel's tag but not till
the end of the device name. This means that if a user defines channels
with the tags foo and foobar then he would mount foo when he requested
foonot and may mount foo when he requested foobar.

Thus it is necessary to check both string lengths against each other in
case of a successful partial string match.

Signed-off-by: Sven Eckelmann <sven.eckelmann@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_virtio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index dcfbe99ff81c..b88515936e4b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -329,7 +329,8 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 
 	mutex_lock(&virtio_9p_lock);
 	list_for_each_entry(chan, &virtio_chan_list, chan_list) {
-		if (!strncmp(devname, chan->tag, chan->tag_len)) {
+		if (!strncmp(devname, chan->tag, chan->tag_len) &&
+		    strlen(devname) == chan->tag_len) {
 			if (!chan->inuse) {
 				chan->inuse = true;
 				found = 1;
-- 
cgit v1.2.3-59-g8ed1b


From 01db403cf99f739f86903314a489fb420e0e254f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 27 Sep 2010 20:24:54 -0700
Subject: tcp: Fix >4GB writes on 64-bit.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes kernel bugzilla #16603

tcp_sendmsg() truncates iov_len to an 'int' which a 4GB write to write
zero bytes, for example.

There is also the problem higher up of how verify_iovec() works.  It
wants to prevent the total length from looking like an error return
value.

However it does this using 'int', but syscalls return 'long' (and
thus signed 64-bit on 64-bit machines).  So it could trigger
false-positives on 64-bit as written.  So fix it to use 'long'.

Reported-by: Olaf Bonorden <bono@onlinehome.de>
Reported-by: Daniel Büse <dbuese@gmx.de>
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 2 +-
 net/core/iovec.c       | 5 +++--
 net/ipv4/tcp.c         | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index a2fada9becb6..a8f56e1ec760 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
 					  int offset, 
 					  unsigned int len, __wsum *csump);
 
-extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
+extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode);
 extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
 extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
 			     int offset, int len);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1cd98df412df..e6b133b77ccb 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -35,9 +35,10 @@
  *	in any case.
  */
 
-int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
+long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
 {
-	int size, err, ct;
+	int size, ct;
+	long err;
 
 	if (m->msg_namelen) {
 		if (mode == VERIFY_READ) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 95d75d443927..f115ea68a4ef 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -943,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	sg = sk->sk_route_caps & NETIF_F_SG;
 
 	while (--iovlen >= 0) {
-		int seglen = iov->iov_len;
+		size_t seglen = iov->iov_len;
 		unsigned char __user *from = iov->iov_base;
 
 		iov++;
-- 
cgit v1.2.3-59-g8ed1b


From 4d22f7d372f5769c6c0149e427ed6353e2dcfe61 Mon Sep 17 00:00:00 2001
From: Damian Lukowski <damian@tvk.rwth-aachen.de>
Date: Tue, 28 Sep 2010 13:08:32 -0700
Subject: net-2.6: SYN retransmits: Add new parameter to
 retransmits_timed_out()

Fixes kernel Bugzilla Bug 18952

This patch adds a syn_set parameter to the retransmits_timed_out()
routine and updates its callers. If not set, TCP_RTO_MIN is taken
as the calculation basis as before. If set, TCP_TIMEOUT_INIT is
used instead, so that sysctl_syn_retries represents the actual
amount of SYN retransmissions in case no SYNACKs are received when
establishing a new connection.

Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c35b469e851c..74c54b30600f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -135,13 +135,16 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 
 /* This function calculates a "timeout" which is equivalent to the timeout of a
  * TCP connection after "boundary" unsuccessful, exponentially backed-off
- * retransmissions with an initial RTO of TCP_RTO_MIN.
+ * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if
+ * syn_set flag is set.
  */
 static bool retransmits_timed_out(struct sock *sk,
-				  unsigned int boundary)
+				  unsigned int boundary,
+				  bool syn_set)
 {
 	unsigned int timeout, linear_backoff_thresh;
 	unsigned int start_ts;
+	unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
 
 	if (!inet_csk(sk)->icsk_retransmits)
 		return false;
@@ -151,12 +154,12 @@ static bool retransmits_timed_out(struct sock *sk,
 	else
 		start_ts = tcp_sk(sk)->retrans_stamp;
 
-	linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
+	linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
 
 	if (boundary <= linear_backoff_thresh)
-		timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
+		timeout = ((2 << boundary) - 1) * rto_base;
 	else
-		timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
+		timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
 			  (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
 
 	return (tcp_time_stamp - start_ts) >= timeout;
@@ -167,14 +170,15 @@ static int tcp_write_timeout(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int retry_until;
-	bool do_reset;
+	bool do_reset, syn_set = 0;
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits)
 			dst_negative_advice(sk);
 		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+		syn_set = 1;
 	} else {
-		if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
+		if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) {
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
@@ -187,14 +191,14 @@ static int tcp_write_timeout(struct sock *sk)
 
 			retry_until = tcp_orphan_retries(sk, alive);
 			do_reset = alive ||
-				   !retransmits_timed_out(sk, retry_until);
+				   !retransmits_timed_out(sk, retry_until, 0);
 
 			if (tcp_out_of_resources(sk, do_reset))
 				return 1;
 		}
 	}
 
-	if (retransmits_timed_out(sk, retry_until)) {
+	if (retransmits_timed_out(sk, retry_until, syn_set)) {
 		/* Has it gone just too far? */
 		tcp_write_err(sk);
 		return 1;
@@ -436,7 +440,7 @@ out_reset_timer:
 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
 	}
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
-	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
+	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0))
 		__sk_dst_reset(sk);
 
 out:;
-- 
cgit v1.2.3-59-g8ed1b


From 68c1f3a96c32a4fe15ebadae45c8145a5e5a66d2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 28 Sep 2010 22:37:56 -0700
Subject: ip_gre: Fix dependencies wrt. ipv6.

The GRE tunnel driver needs to invoke icmpv6 helpers in the
ipv6 stack when ipv6 support is enabled.

Therefore if IPV6 is enabled, we have to enforce that GRE's
enabling (modular or static) matches that of ipv6.

Reported-by: Patrick McHardy <kaber@trash.net>
Reported-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 571f8950ed06..72380a30d1c8 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -217,6 +217,7 @@ config NET_IPIP
 
 config NET_IPGRE
 	tristate "IP: GRE tunnels over IP"
+	depends on IPV6 || IPV6=n
 	help
 	  Tunneling means encapsulating data of one protocol type within
 	  another protocol and sending it over a channel that understands the
-- 
cgit v1.2.3-59-g8ed1b


From a91e7d471e2e384035b9746ea707ccdcd353f5dd Mon Sep 17 00:00:00 2001
From: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Date: Mon, 27 Sep 2010 23:10:42 +0000
Subject: Phonet: Correct header retrieval after pskb_may_pull
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Retrieve the header after doing pskb_may_pull since, pskb_may_pull
could change the buffer structure.

This is based on the comment given by Eric Dumazet on Phonet
Pipe controller patch for a similar problem.

Signed-off-by: Kumar Sanghvi <kumar.sanghvi@stericsson.com>
Acked-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b2a3ae6cad78..15003021f4f0 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -225,12 +225,13 @@ static void pipe_grant_credits(struct sock *sk)
 static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 {
 	struct pep_sock *pn = pep_sk(sk);
-	struct pnpipehdr *hdr = pnp_hdr(skb);
+	struct pnpipehdr *hdr;
 	int wake = 0;
 
 	if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
 		return -EINVAL;
 
+	hdr = pnp_hdr(skb);
 	if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
 		LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n",
 				(unsigned)hdr->data[0]);
-- 
cgit v1.2.3-59-g8ed1b


From 173e79fb70a98b5b223f8dc09c22990d777bdd78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 30 Sep 2010 02:16:44 +0000
Subject: vlan: dont drop packets from unknown vlans in promiscuous mode

Roger Luethi noticed packets for unknown VLANs getting silently dropped
even in promiscuous mode.

Check for promiscuous mode in __vlan_hwaccel_rx() and vlan_gro_common()
before drops.

As suggested by Patrick, mark such packets to have skb->pkt_type set to
PACKET_OTHERHOST to make sure they are dropped by IP stack.

Reported-by: Roger Luethi <rl@hellgate.ch>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 01ddb0472f86..0eb96f7e44be 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -24,8 +24,11 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 
 	if (vlan_dev)
 		skb->dev = vlan_dev;
-	else if (vlan_id)
-		goto drop;
+	else if (vlan_id) {
+		if (!(skb->dev->flags & IFF_PROMISC))
+			goto drop;
+		skb->pkt_type = PACKET_OTHERHOST;
+	}
 
 	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
 
@@ -102,8 +105,11 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 
 	if (vlan_dev)
 		skb->dev = vlan_dev;
-	else if (vlan_id)
-		goto drop;
+	else if (vlan_id) {
+		if (!(skb->dev->flags & IFF_PROMISC))
+			goto drop;
+		skb->pkt_type = PACKET_OTHERHOST;
+	}
 
 	for (p = napi->gro_list; p; p = p->next) {
 		NAPI_GRO_CB(p)->same_flow =
-- 
cgit v1.2.3-59-g8ed1b


From ae878ae280bea286ff2b1e1cb6e609dd8cb4501d Mon Sep 17 00:00:00 2001
From: Maciej Żenczykowski <maze@google.com>
Date: Sun, 3 Oct 2010 14:49:00 -0700
Subject: net: Fix IPv6 PMTU disc. w/ asymmetric routes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8323136bdc54..a275c6e1e25c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1556,14 +1556,13 @@ out:
  *	i.e. Path MTU discovery
  */
 
-void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
-			struct net_device *dev, u32 pmtu)
+static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
+			     struct net *net, u32 pmtu, int ifindex)
 {
 	struct rt6_info *rt, *nrt;
-	struct net *net = dev_net(dev);
 	int allfrag = 0;
 
-	rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
+	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
 	if (rt == NULL)
 		return;
 
@@ -1631,6 +1630,27 @@ out:
 	dst_release(&rt->dst);
 }
 
+void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
+			struct net_device *dev, u32 pmtu)
+{
+	struct net *net = dev_net(dev);
+
+	/*
+	 * RFC 1981 states that a node "MUST reduce the size of the packets it
+	 * is sending along the path" that caused the Packet Too Big message.
+	 * Since it's not possible in the general case to determine which
+	 * interface was used to send the original packet, we update the MTU
+	 * on the interface that will be used to send future packets. We also
+	 * update the MTU on the interface that received the Packet Too Big in
+	 * case the original packet was forced out that interface with
+	 * SO_BINDTODEVICE or similar. This is the next best thing to the
+	 * correct behaviour, which would be to update the MTU on all
+	 * interfaces.
+	 */
+	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
+	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
+}
+
 /*
  *	Misc support functions
  */
-- 
cgit v1.2.3-59-g8ed1b


From 482964e56e1320cb7952faa1932d8ecf59c4bf75 Mon Sep 17 00:00:00 2001
From: Nagendra Tomar <tomer_iisc@yahoo.com>
Date: Sat, 2 Oct 2010 23:45:06 +0000
Subject: net: Fix the condition passed to sk_wait_event()

This patch fixes the condition (3rd arg) passed to sk_wait_event() in
sk_stream_wait_memory(). The incorrect check in sk_stream_wait_memory()
causes the following soft lockup in tcp_sendmsg() when the global tcp
memory pool has exhausted.

>>> snip <<<

localhost kernel: BUG: soft lockup - CPU#3 stuck for 11s! [sshd:6429]
localhost kernel: CPU 3:
localhost kernel: RIP: 0010:[sk_stream_wait_memory+0xcd/0x200]  [sk_stream_wait_memory+0xcd/0x200] sk_stream_wait_memory+0xcd/0x200
localhost kernel:
localhost kernel: Call Trace:
localhost kernel:  [sk_stream_wait_memory+0x1b1/0x200] sk_stream_wait_memory+0x1b1/0x200
localhost kernel:  [<ffffffff802557c0>] autoremove_wake_function+0x0/0x40
localhost kernel:  [ipv6:tcp_sendmsg+0x6e6/0xe90] tcp_sendmsg+0x6e6/0xce0
localhost kernel:  [sock_aio_write+0x126/0x140] sock_aio_write+0x126/0x140
localhost kernel:  [xfs:do_sync_write+0xf1/0x130] do_sync_write+0xf1/0x130
localhost kernel:  [<ffffffff802557c0>] autoremove_wake_function+0x0/0x40
localhost kernel:  [hrtimer_start+0xe3/0x170] hrtimer_start+0xe3/0x170
localhost kernel:  [vfs_write+0x185/0x190] vfs_write+0x185/0x190
localhost kernel:  [sys_write+0x50/0x90] sys_write+0x50/0x90
localhost kernel:  [system_call+0x7e/0x83] system_call+0x7e/0x83

>>> snip <<<

What is happening is, that the sk_wait_event() condition passed from
sk_stream_wait_memory() evaluates to true for the case of tcp global memory
exhaustion. This is because both sk_stream_memory_free() and vm_wait are true
which causes sk_wait_event() to *not* call schedule_timeout().
Hence sk_stream_wait_memory() returns immediately to the caller w/o sleeping.
This causes the caller to again try allocation, which again fails and again
calls sk_stream_wait_memory(), and so on.

[ Bug introduced by commit c1cbe4b7ad0bc4b1d98ea708a3fecb7362aa4088
  ("[NET]: Avoid atomic xchg() for non-error case") -DaveM ]

Signed-off-by: Nagendra Singh Tomar <tomer_iisc@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/stream.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/stream.c b/net/core/stream.c
index d959e0f41528..f5df85dcd20b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -141,10 +141,10 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		sk->sk_write_pending++;
-		sk_wait_event(sk, &current_timeo, !sk->sk_err &&
-						  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
-						  sk_stream_memory_free(sk) &&
-						  vm_wait);
+		sk_wait_event(sk, &current_timeo, sk->sk_err ||
+						  (sk->sk_shutdown & SEND_SHUTDOWN) ||
+						  (sk_stream_memory_free(sk) &&
+						  !vm_wait));
 		sk->sk_write_pending--;
 
 		if (vm_wait) {
-- 
cgit v1.2.3-59-g8ed1b


From c5d3557103f8bef81d7a150ab9cc970099cd58a2 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 3 Oct 2010 15:37:42 +0000
Subject: Revert "ipv4: Make INET_LRO a bool instead of tristate."

This reverts commit e81963b180ac502fda0326edf059b1e29cdef1a2.

LRO is now deprecated in favour of GRO, and only a few drivers use it,
so it is desirable to build it as a module in distribution kernels.

The original change to prevent building it as a module was made in an
attempt to avoid the case where some dependents are set to y and some
to m, and INET_LRO can be set to m rather than y.  However, the
Kconfig system will reliably set INET_LRO=y in this case.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 72380a30d1c8..7cd7760144f7 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -413,7 +413,7 @@ config INET_XFRM_MODE_BEET
 	  If unsure, say Y.
 
 config INET_LRO
-	bool "Large Receive Offload (ipv4/tcp)"
+	tristate "Large Receive Offload (ipv4/tcp)"
 	default y
 	---help---
 	  Support for Large Receive Offload (ipv4/tcp).
-- 
cgit v1.2.3-59-g8ed1b


From 5b7c84066733c5dfb0e4016d939757b38de189e4 Mon Sep 17 00:00:00 2001
From: David Stevens <dlstevens@us.ibm.com>
Date: Thu, 30 Sep 2010 14:29:40 +0000
Subject: ipv4: correct IGMP behavior on v3 query during v2-compatibility mode

A recent patch to allow IGMPv2 responses to IGMPv3 queries
bypasses length checks for valid query lengths, incorrectly
resets the v2_seen timer, and does not support IGMPv1.

The following patch responds with a v2 report as required
by IGMPv2 while correcting the other problems introduced
by the patch.

Signed-Off-By: David L Stevens <dlstevens@us.ibm.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1fdcacd36ce7..2a4bb76f2132 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -834,7 +834,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	int			mark = 0;
 
 
-	if (len == 8 || IGMP_V2_SEEN(in_dev)) {
+	if (len == 8) {
 		if (ih->code == 0) {
 			/* Alas, old v1 router presents here. */
 
@@ -856,6 +856,18 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		igmpv3_clear_delrec(in_dev);
 	} else if (len < 12) {
 		return;	/* ignore bogus packet; freed by caller */
+	} else if (IGMP_V1_SEEN(in_dev)) {
+		/* This is a v3 query with v1 queriers present */
+		max_delay = IGMP_Query_Response_Interval;
+		group = 0;
+	} else if (IGMP_V2_SEEN(in_dev)) {
+		/* this is a v3 query with v2 queriers present;
+		 * Interpretation of the max_delay code is problematic here.
+		 * A real v2 host would use ih_code directly, while v3 has a
+		 * different encoding. We use the v3 encoding as more likely
+		 * to be intended in a v3 query.
+		 */
+		max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
 	} else { /* v3 */
 		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
 			return;
-- 
cgit v1.2.3-59-g8ed1b


From d7e0d19aa0fdd22819d35db551bd54c1bcf9c2aa Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Date: Fri, 1 Oct 2010 11:16:58 +0000
Subject: sctp: prevent reading out-of-bounds memory

Two user-controlled allocations in SCTP are subsequently dereferenced as
sockaddr structs, without checking if the dereferenced struct members fall
beyond the end of the allocated chunk.  There doesn't appear to be any
information leakage here based on how these members are used and
additional checking, but it's still worth fixing.

[akpm@linux-foundation.org: remove unfashionable newlines, fix gmail tab->space conversion]
Signed-off-by: Dan Rosenberg <dan.j.rosenberg@gmail.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ca44917872d2..fbb70770ad05 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -916,6 +916,11 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
 	/* Walk through the addrs buffer and count the number of addresses. */
 	addr_buf = kaddrs;
 	while (walk_size < addrs_size) {
+		if (walk_size + sizeof(sa_family_t) > addrs_size) {
+			kfree(kaddrs);
+			return -EINVAL;
+		}
+
 		sa_addr = (struct sockaddr *)addr_buf;
 		af = sctp_get_af_specific(sa_addr->sa_family);
 
@@ -1002,9 +1007,13 @@ static int __sctp_connect(struct sock* sk,
 	/* Walk through the addrs buffer and count the number of addresses. */
 	addr_buf = kaddrs;
 	while (walk_size < addrs_size) {
+		if (walk_size + sizeof(sa_family_t) > addrs_size) {
+			err = -EINVAL;
+			goto out_free;
+		}
+
 		sa_addr = (union sctp_addr *)addr_buf;
 		af = sctp_get_af_specific(sa_addr->sa.sa_family);
-		port = ntohs(sa_addr->v4.sin_port);
 
 		/* If the address family is not supported or if this address
 		 * causes the address buffer to overflow return EINVAL.
@@ -1014,6 +1023,8 @@ static int __sctp_connect(struct sock* sk,
 			goto out_free;
 		}
 
+		port = ntohs(sa_addr->v4.sin_port);
+
 		/* Save current address so we can work with it */
 		memcpy(&to, sa_addr, af->sockaddr_len);
 
-- 
cgit v1.2.3-59-g8ed1b


From 51e97a12bef19b7e43199fc153cf9bd5f2140362 Mon Sep 17 00:00:00 2001
From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Fri, 1 Oct 2010 11:51:47 +0000
Subject: sctp: Fix out-of-bounds reading in sctp_asoc_get_hmac()

The sctp_asoc_get_hmac() function iterates through a peer's hmac_ids
array and attempts to ensure that only a supported hmac entry is
returned.  The current code fails to do this properly - if the last id
in the array is out of range (greater than SCTP_AUTH_HMAC_ID_MAX), the
id integer remains set after exiting the loop, and the address of an
out-of-bounds entry will be returned and subsequently used in the parent
function, causing potentially ugly memory corruption.  This patch resets
the id integer to 0 on encountering an invalid id so that NULL will be
returned after finishing the loop if no valid ids are found.

Signed-off-by: Dan Rosenberg <drosenberg@vsecurity.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/auth.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 86366390038a..ddbbf7c81fa1 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -543,16 +543,20 @@ struct sctp_hmac *sctp_auth_asoc_get_hmac(const struct sctp_association *asoc)
 		id = ntohs(hmacs->hmac_ids[i]);
 
 		/* Check the id is in the supported range */
-		if (id > SCTP_AUTH_HMAC_ID_MAX)
+		if (id > SCTP_AUTH_HMAC_ID_MAX) {
+			id = 0;
 			continue;
+		}
 
 		/* See is we support the id.  Supported IDs have name and
 		 * length fields set, so that we can allocated and use
 		 * them.  We can safely just check for name, for without the
 		 * name, we can't allocate the TFM.
 		 */
-		if (!sctp_hmac_list[id].hmac_name)
+		if (!sctp_hmac_list[id].hmac_name) {
+			id = 0;
 			continue;
+		}
 
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b