From a1889c0d2039a53ae04abb9f20c62500bd312bf3 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 28 Jul 2011 02:46:01 +0000 Subject: net: adjust array index Convert array index from the loop bound to the loop index. A simplified version of the semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression e1,e2,ar; @@ for(e1 = 0; e1 < e2; e1++) { <... ar[ - e2 + e1 ] ...> } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f1d27f6c9351..283c0a26e03f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1718,7 +1718,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]--; for (j=0; jsfcount[MCAST_EXCLUDE] != 0)) { #ifdef CONFIG_IP_MULTICAST struct ip_sf_list *psf; -- cgit v1.2.3-59-g8ed1b From e1738bd9cecc5c867b0e2996470c1ff20f66ba79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 19:22:42 +0000 Subject: sch_sfq: fix sfq_enqueue() commit 8efa88540635 (sch_sfq: avoid giving spurious NET_XMIT_CN signals) forgot to call qdisc_tree_decrease_qlen() to signal upper levels that a packet (from another flow) was dropped, leading to various problems. With help from Michal Soltys and Michal Pokrywka, who did a bisection. Bugzilla ref: https://bugzilla.kernel.org/show_bug.cgi?id=39372 Debian ref: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=631945 Reported-by: Lucas Bocchi Reported-and-bisected-by: Michal Pokrywka Signed-off-by: Eric Dumazet CC: Michal Soltys Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 4536ee64383e..4f5510e2bd6f 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -410,7 +410,12 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* Return Congestion Notification only if we dropped a packet * from this flow. */ - return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS; + if (qlen != slot->qlen) + return NET_XMIT_CN; + + /* As we dropped a packet, better let upper stack know this */ + qdisc_tree_decrease_qlen(sch, 1); + return NET_XMIT_SUCCESS; } static struct sk_buff * -- cgit v1.2.3-59-g8ed1b From 84404623da45aac04595a8f5760a58df0e955d87 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Jul 2011 11:52:18 +0300 Subject: cfg80211: off by one in nl80211_trigger_scan() The test is off by one so we'd read past the end of the wiphy->bands[] array on the next line. Signed-off-by: Dan Carpenter Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 28d2aa109bee..e83e7fee3bc0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3464,7 +3464,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) tmp) { enum ieee80211_band band = nla_type(attr); - if (band < 0 || band > IEEE80211_NUM_BANDS) { + if (band < 0 || band >= IEEE80211_NUM_BANDS) { err = -EINVAL; goto out_free; } -- cgit v1.2.3-59-g8ed1b From a08af810cdc29d2ca930e8a869d3d01744c392d8 Mon Sep 17 00:00:00 2001 From: Chas Williams Date: Mon, 1 Aug 2011 17:56:14 -0700 Subject: atm: br2864: sent packets truncated in VC routed mode Reported-by: Pascal Hambourg Signed-off-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/br2684.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 2252c2085dac..52cfd0c3ea71 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -242,8 +242,6 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, if (brdev->payload == p_bridged) { skb_push(skb, 2); memset(skb->data, 0, 2); - } else { /* p_routed */ - skb_pull(skb, ETH_HLEN); } } skb_debug(skb); -- cgit v1.2.3-59-g8ed1b From 82c21bfab41a77bc01affe21bea9727d776774a7 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 1 Aug 2011 11:10:33 +0000 Subject: doc: Update the email address for Paul Moore in various source files My @hp.com will no longer be valid starting August 5, 2011 so an update is necessary. My new email address is employer independent so we don't have to worry about doing this again any time soon. Signed-off-by: Paul Moore Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 2 +- include/net/netlabel.h | 2 +- net/netlabel/Makefile | 2 -- net/netlabel/netlabel_addrlist.c | 2 +- net/netlabel/netlabel_addrlist.h | 2 +- net/netlabel/netlabel_cipso_v4.c | 2 +- net/netlabel/netlabel_cipso_v4.h | 2 +- net/netlabel/netlabel_domainhash.c | 2 +- net/netlabel/netlabel_domainhash.h | 2 +- net/netlabel/netlabel_kapi.c | 2 +- net/netlabel/netlabel_mgmt.c | 2 +- net/netlabel/netlabel_mgmt.h | 2 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlabel/netlabel_unlabeled.h | 2 +- net/netlabel/netlabel_user.c | 2 +- net/netlabel/netlabel_user.h | 2 +- security/selinux/hooks.c | 2 +- security/selinux/include/netif.h | 2 +- security/selinux/include/netlabel.h | 2 +- security/selinux/include/netnode.h | 2 +- security/selinux/include/netport.h | 2 +- security/selinux/netif.c | 2 +- security/selinux/netlabel.c | 2 +- security/selinux/netnode.c | 2 +- security/selinux/netport.c | 2 +- security/selinux/selinuxfs.c | 2 +- security/selinux/ss/ebitmap.c | 2 +- security/selinux/ss/mls.c | 2 +- security/selinux/ss/mls.h | 2 +- security/selinux/ss/policydb.c | 2 +- security/selinux/ss/services.c | 2 +- security/smack/smack_lsm.c | 2 +- 32 files changed, 31 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 3b938743514b..9808877c2ab9 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -8,7 +8,7 @@ * have chosen to adopt the protocol and over the years it has become a * de-facto standard for labeled networking. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/include/net/netlabel.h b/include/net/netlabel.h index f21a16ee3705..f67440970d7e 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -4,7 +4,7 @@ * The NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index ea750e9df65f..d2732fc952e2 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile @@ -1,8 +1,6 @@ # # Makefile for the NetLabel subsystem. # -# Feb 9, 2006, Paul Moore -# # base objects obj-y := netlabel_user.o netlabel_kapi.o diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index c0519139679e..96b749dacc34 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index 2b9644e19de0..fdbc1d2c7352 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index dd53a36d89af..6bf878335d94 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index af7f3355103e..d24d774bfd62 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 2aa975e5452d..7d8083cde34f 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 0261dda3f2d2..bfcc0f7024c5 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -6,7 +6,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index b528dd928d3c..58107d060846 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -5,7 +5,7 @@ * system manages static and dynamic label mappings for network protocols such * as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index dff8a0809245..bfa555869775 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 8db37f4c10f7..5a9f31ce5799 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index f1ecf848e3ac..e6e823656f9d 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 0bc8dc3f9e3c..700af49022a0 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -5,7 +5,7 @@ * NetLabel system. The NetLabel system manages static and dynamic label * mappings for network protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index a3fd75ac3fa5..9fae63f10298 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index f4fc4c9ad567..81969785e279 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -5,7 +5,7 @@ * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a38316b2e3f6..266a2292451d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -14,7 +14,7 @@ * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. * * Copyright (C) 2006, 2007, 2009 Hewlett-Packard Development Company, L.P. - * Paul Moore + * Paul Moore * Copyright (C) 2007 Hitachi Software Engineering Co., Ltd. * Yuichi Nakamura * diff --git a/security/selinux/include/netif.h b/security/selinux/include/netif.h index ce23edd128b3..43d507242b42 100644 --- a/security/selinux/include/netif.h +++ b/security/selinux/include/netif.h @@ -8,7 +8,7 @@ * * Copyright (C) 2003 Red Hat, Inc., James Morris * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. - * Paul Moore, + * Paul Moore * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h index cf2f628e6e28..8c59b8f150e8 100644 --- a/security/selinux/include/netlabel.h +++ b/security/selinux/include/netlabel.h @@ -1,7 +1,7 @@ /* * SELinux interface to the NetLabel subsystem * - * Author : Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/include/netnode.h b/security/selinux/include/netnode.h index 1b94450d11d2..df7a5ed6c694 100644 --- a/security/selinux/include/netnode.h +++ b/security/selinux/include/netnode.h @@ -6,7 +6,7 @@ * needed to reduce the lookup overhead since most of these queries happen on * a per-packet basis. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/include/netport.h b/security/selinux/include/netport.h index 8991752eaf93..4d965b83d735 100644 --- a/security/selinux/include/netport.h +++ b/security/selinux/include/netport.h @@ -5,7 +5,7 @@ * mapping is maintained as part of the normal policy but a fast cache is * needed to reduce the lookup overhead. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/netif.c b/security/selinux/netif.c index 58cc481c93d5..326f22cbe405 100644 --- a/security/selinux/netif.c +++ b/security/selinux/netif.c @@ -8,7 +8,7 @@ * * Copyright (C) 2003 Red Hat, Inc., James Morris * Copyright (C) 2007 Hewlett-Packard Development Company, L.P. - * Paul Moore + * Paul Moore * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index c3bf3ed07b06..da4b8b233280 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -4,7 +4,7 @@ * This file provides the necessary glue to tie NetLabel into the SELinux * subsystem. * - * Author: Paul Moore + * Author: Paul Moore * */ diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index 8b691a863186..3bf46abaa688 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -6,7 +6,7 @@ * needed to reduce the lookup overhead since most of these queries happen on * a per-packet basis. * - * Author: Paul Moore + * Author: Paul Moore * * This code is heavily based on the "netif" concept originally developed by * James Morris diff --git a/security/selinux/netport.c b/security/selinux/netport.c index ae76e298de7d..0b62bd112461 100644 --- a/security/selinux/netport.c +++ b/security/selinux/netport.c @@ -5,7 +5,7 @@ * mapping is maintained as part of the normal policy but a fast cache is * needed to reduce the lookup overhead. * - * Author: Paul Moore + * Author: Paul Moore * * This code is heavily based on the "netif" concept originally developed by * James Morris diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index de7900ef53da..55d92cbb177a 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -2,7 +2,7 @@ * * Added conditional policy language extensions * - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support for the policy capability bitmap * diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index d42951fcbe87..30f119b1d1ec 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -4,7 +4,7 @@ * Author : Stephen Smalley, */ /* - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support to import/export the NetLabel category bitmap * diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index e96174216bc9..fbf9c5816c71 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -11,7 +11,7 @@ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ /* - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support to import/export the MLS label from NetLabel * diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 037bf9d82d41..e4369e3e6366 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -11,7 +11,7 @@ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ /* - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support to import/export the MLS label from NetLabel * diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index d246aca3f4fb..2381d0ded228 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -13,7 +13,7 @@ * * Added conditional policy language extensions * - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support for the policy capability bitmap * diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 973e00e34fa9..f6917bc0aa05 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -13,7 +13,7 @@ * * Added conditional policy language extensions * - * Updated: Hewlett-Packard + * Updated: Hewlett-Packard * * Added support for NetLabel * Added support for the policy capability bitmap diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index f375eb2e1957..b9c5e149903b 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -9,7 +9,7 @@ * * Copyright (C) 2007 Casey Schaufler * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. - * Paul Moore + * Paul Moore * Copyright (C) 2010 Nokia Corporation * * This program is free software; you can redistribute it and/or modify -- cgit v1.2.3-59-g8ed1b From 22019b17821ab3543090827491e465c5816cbcd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 18:37:31 +0000 Subject: net: add kerneldoc to skb_copy_bits() Since skb_copy_bits() is called from assembly, add a fat comment to make clear we should think twice before changing its prototype. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2beda824636e..27002dffe7ed 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1369,8 +1369,21 @@ pull_pages: } EXPORT_SYMBOL(__pskb_pull_tail); -/* Copy some data bits from skb to kernel buffer. */ - +/** + * skb_copy_bits - copy bits from skb to kernel buffer + * @skb: source skb + * @offset: offset in source + * @to: destination buffer + * @len: number of bytes to copy + * + * Copy the specified number of bytes from the source skb to the + * destination buffer. + * + * CAUTION ! : + * If its prototype is ever changed, + * check arch/{*}/net/{*}.S files, + * since it is called from BPF assembly code. + */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { int start = skb_headlen(skb); -- cgit v1.2.3-59-g8ed1b From f2c31e32b378a6653f8de606149d963baf11d7d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 19:00:53 +0000 Subject: net: fix NULL dereferences in check_peer_redir() Gergely Kalman reported crashes in check_peer_redir(). It appears commit f39925dbde778 (ipv4: Cache learned redirect information in inetpeer.) added a race, leading to possible NULL ptr dereference. Since we can now change dst neighbour, we should make sure a reader can safely use a neighbour. Add RCU protection to dst neighbour, and make sure check_peer_redir() can be called safely by different cpus in parallel. As neighbours are already freed after one RCU grace period, this patch should not add typical RCU penalty (cache cold effects) Many thanks to Gergely for providing a pretty report pointing to the bug. Reported-by: Gergely Kalman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 17 +++++++++++++---- net/ipv4/ip_output.c | 10 ++++++++-- net/ipv4/route.c | 14 ++++++++------ net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ip6_output.c | 13 +++++++++++-- net/ipv6/route.c | 35 +++++++++++++++++++++++++---------- 7 files changed, 67 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/include/net/dst.h b/include/net/dst.h index 29e255796ce1..13d507d69ddb 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -37,7 +37,7 @@ struct dst_entry { unsigned long _metrics; unsigned long expires; struct dst_entry *path; - struct neighbour *_neighbour; + struct neighbour __rcu *_neighbour; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else @@ -88,12 +88,17 @@ struct dst_entry { static inline struct neighbour *dst_get_neighbour(struct dst_entry *dst) { - return dst->_neighbour; + return rcu_dereference(dst->_neighbour); +} + +static inline struct neighbour *dst_get_neighbour_raw(struct dst_entry *dst) +{ + return rcu_dereference_raw(dst->_neighbour); } static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) { - dst->_neighbour = neigh; + rcu_assign_pointer(dst->_neighbour, neigh); } extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); @@ -382,8 +387,12 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { if (dst) { - struct neighbour *n = dst_get_neighbour(dst); + struct neighbour *n; + + rcu_read_lock(); + n = dst_get_neighbour(dst); neigh_confirm(n); + rcu_read_unlock(); } } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ccaaa851ab42..77d3eded665a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -204,9 +204,15 @@ static inline int ip_finish_output2(struct sk_buff *skb) skb = skb2; } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1730689f560e..6afc4eb50591 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1628,16 +1628,18 @@ static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) { struct rtable *rt = (struct rtable *) dst; __be32 orig_gw = rt->rt_gateway; - struct neighbour *n; + struct neighbour *n, *old_n; dst_confirm(&rt->dst); - neigh_release(dst_get_neighbour(&rt->dst)); - dst_set_neighbour(&rt->dst, NULL); - rt->rt_gateway = peer->redirect_learned.a4; - rt_bind_neighbour(rt); - n = dst_get_neighbour(&rt->dst); + + n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); + if (IS_ERR(n)) + return PTR_ERR(n); + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); if (!n || !(n->nud_state & NUD_VALID)) { if (n) neigh_event_send(n, NULL); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a55500cc0b29..f012ebd87b43 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -656,7 +656,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, * layer address of our nexhop router */ - if (dst_get_neighbour(&rt->dst) == NULL) + if (dst_get_neighbour_raw(&rt->dst) == NULL) ifa->flags &= ~IFA_F_OPTIMISTIC; ifa->idev = idev; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 54a4678955bf..320d91d20ad7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1455,7 +1455,7 @@ static int fib6_age(struct rt6_info *rt, void *arg) RT6_TRACE("aging clone %p\n", rt); return -1; } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(dst_get_neighbour(&rt->dst)->flags & NTF_ROUTER))) { + (!(dst_get_neighbour_raw(&rt->dst)->flags & NTF_ROUTER))) { RT6_TRACE("purging route %p via non-router but gateway\n", rt); return -1; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 32e5339db0c8..4c882cf4e8a1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -135,10 +135,15 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } + rcu_read_lock(); neigh = dst_get_neighbour(dst); - if (neigh) - return neigh_output(neigh, skb); + if (neigh) { + int res = neigh_output(neigh, skb); + rcu_read_unlock(); + return res; + } + rcu_read_unlock(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); @@ -975,12 +980,14 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ + rcu_read_lock(); n = dst_get_neighbour(*dst); if (n && !(n->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; + rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1000,6 +1007,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } + } else { + rcu_read_unlock(); } #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e8987da06667..9e69eb0ec6dd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -364,7 +364,7 @@ out: #ifdef CONFIG_IPV6_ROUTER_PREF static void rt6_probe(struct rt6_info *rt) { - struct neighbour *neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; + struct neighbour *neigh; /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -373,8 +373,10 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ + rcu_read_lock(); + neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - return; + goto out; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -387,8 +389,11 @@ static void rt6_probe(struct rt6_info *rt) target = (struct in6_addr *)&neigh->primary_key; addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); - } else + } else { read_unlock_bh(&neigh->lock); + } +out: + rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -412,8 +417,11 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_neigh(struct rt6_info *rt) { - struct neighbour *neigh = dst_get_neighbour(&rt->dst); + struct neighbour *neigh; int m; + + rcu_read_lock(); + neigh = dst_get_neighbour(&rt->dst); if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) m = 1; @@ -430,6 +438,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; + rcu_read_unlock(); return m; } @@ -769,7 +778,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, rt->rt6i_dst.plen = 128; rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; - dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour(&ort->dst))); + dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); } return rt; } @@ -803,7 +812,7 @@ restart: dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) nrt = rt6_alloc_clone(rt, &fl6->daddr); @@ -1587,7 +1596,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, dst_confirm(&rt->dst); /* Duplicate redirect: silently ignore. */ - if (neigh == dst_get_neighbour(&rt->dst)) + if (neigh == dst_get_neighbour_raw(&rt->dst)) goto out; nrt = ip6_rt_copy(rt, dest); @@ -1682,7 +1691,7 @@ again: 1. It is connected route. Action: COW 2. It is gatewayed route or NONEXTHOP route. Action: clone it. */ - if (!dst_get_neighbour(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) + if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, daddr, saddr); else nrt = rt6_alloc_clone(rt, daddr); @@ -2326,6 +2335,7 @@ static int rt6_fill_node(struct net *net, struct nlmsghdr *nlh; long expires; u32 table; + struct neighbour *n; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -2414,8 +2424,11 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (dst_get_neighbour(&rt->dst)) - NLA_PUT(skb, RTA_GATEWAY, 16, &dst_get_neighbour(&rt->dst)->primary_key); + rcu_read_lock(); + n = dst_get_neighbour(&rt->dst); + if (n) + NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); + rcu_read_unlock(); if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); @@ -2608,12 +2621,14 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif + rcu_read_lock(); n = dst_get_neighbour(&rt->dst); if (n) { seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } + rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, -- cgit v1.2.3-59-g8ed1b From 728ffb86f10873aaf4abd26dde691ee40ae731fe Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 4 Aug 2011 14:07:38 +0000 Subject: net: sendmmsg should only return an error if no messages were sent sendmmsg uses a similar error return strategy as recvmmsg but it turns out to be a confusing way to communicate errors. The current code stores the error code away and returns it on the next sendmmsg call. This means a call with completely valid arguments could get an error from a previous call. Change things so we only return an error if no datagrams could be sent. If less than the requested number of messages were sent, the application must retry starting at the first failed one and if the problem is persistent the error will be returned. This matches the behaviour of other syscalls like read/write - it is not an error if less than the requested number of elements are sent. Signed-off-by: Anton Blanchard Cc: stable [3.0+] Signed-off-by: David S. Miller --- net/socket.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index b1cbbcd92558..e4ed2359eb43 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2005,12 +2005,9 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; - err = sock_error(sock->sk); - if (err) - goto out_put; - entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; + err = 0; while (datagrams < vlen) { /* @@ -2037,29 +2034,11 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, ++datagrams; } -out_put: fput_light(sock->file, fput_needed); - if (err == 0) - return datagrams; - - if (datagrams != 0) { - /* - * We may send less entries than requested (vlen) if the - * sock is non blocking... - */ - if (err != -EAGAIN) { - /* - * ... or if sendmsg returns an error after we - * send some datagrams, where we record the - * error to return on the next call or if the - * app asks about it using getsockopt(SO_ERROR). - */ - sock->sk->sk_err = -err; - } - + /* We only return an error if no datagrams were able to be sent */ + if (datagrams != 0) return datagrams; - } return err; } -- cgit v1.2.3-59-g8ed1b From 98382f419f32d2c12d021943b87dea555677144b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 4 Aug 2011 14:07:39 +0000 Subject: net: Cap number of elements for sendmmsg To limit the amount of time we can spend in sendmmsg, cap the number of elements to UIO_MAXIOV (currently 1024). For error handling an application using sendmmsg needs to retry at the first unsent message, so capping is simpler and requires less application logic than returning EINVAL. Signed-off-by: Anton Blanchard Cc: stable [3.0+] Signed-off-by: David S. Miller --- net/socket.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index e4ed2359eb43..b5c6de4f268a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1999,6 +1999,9 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, struct compat_mmsghdr __user *compat_entry; struct msghdr msg_sys; + if (vlen > UIO_MAXIOV) + vlen = UIO_MAXIOV; + datagrams = 0; sock = sockfd_lookup_light(fd, &err, &fput_needed); -- cgit v1.2.3-59-g8ed1b From c71d8ebe7a4496fb7231151cb70a6baa0cb56f9a Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 4 Aug 2011 14:07:40 +0000 Subject: net: Fix security_socket_sendmsg() bypass problem. The sendmmsg() introduced by commit 228e548e "net: Add sendmmsg socket system call" is capable of sending to multiple different destination addresses. SMACK is using destination's address for checking sendmsg() permission. However, security_socket_sendmsg() is called for only once even if multiple different destination addresses are passed to sendmmsg(). Therefore, we need to call security_socket_sendmsg() for each destination address rather than only the first destination address. Since calling security_socket_sendmsg() every time when only single destination address was passed to sendmmsg() is a waste of time, omit calling security_socket_sendmsg() unless destination address of previous datagram and that of current datagram differs. Signed-off-by: Tetsuo Handa Acked-by: Anton Blanchard Cc: stable [3.0+] Signed-off-by: David S. Miller --- net/socket.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index b5c6de4f268a..24a77400b65e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1871,8 +1871,14 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) +struct used_address { + struct sockaddr_storage name; + unsigned int name_len; +}; + static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, - struct msghdr *msg_sys, unsigned flags, int nosec) + struct msghdr *msg_sys, unsigned flags, + struct used_address *used_address) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -1953,8 +1959,28 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (sock->file->f_flags & O_NONBLOCK) msg_sys->msg_flags |= MSG_DONTWAIT; - err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys, - total_len); + /* + * If this is sendmmsg() and current destination address is same as + * previously succeeded address, omit asking LSM's decision. + * used_address->name_len is initialized to UINT_MAX so that the first + * destination address never matches. + */ + if (used_address && used_address->name_len == msg_sys->msg_namelen && + !memcmp(&used_address->name, msg->msg_name, + used_address->name_len)) { + err = sock_sendmsg_nosec(sock, msg_sys, total_len); + goto out_freectl; + } + err = sock_sendmsg(sock, msg_sys, total_len); + /* + * If this is sendmmsg() and sending to current destination address was + * successful, remember it. + */ + if (used_address && err >= 0) { + used_address->name_len = msg_sys->msg_namelen; + memcpy(&used_address->name, msg->msg_name, + used_address->name_len); + } out_freectl: if (ctl_buf != ctl) @@ -1979,7 +2005,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) if (!sock) goto out; - err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0); + err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL); fput_light(sock->file, fput_needed); out: @@ -1998,6 +2024,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, struct mmsghdr __user *entry; struct compat_mmsghdr __user *compat_entry; struct msghdr msg_sys; + struct used_address used_address; if (vlen > UIO_MAXIOV) vlen = UIO_MAXIOV; @@ -2008,24 +2035,22 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; + used_address.name_len = UINT_MAX; entry = mmsg; compat_entry = (struct compat_mmsghdr __user *)mmsg; err = 0; while (datagrams < vlen) { - /* - * No need to ask LSM for more than the first datagram. - */ if (MSG_CMSG_COMPAT & flags) { err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, &used_address); if (err < 0) break; err = __put_user(err, &compat_entry->msg_len); ++compat_entry; } else { err = __sys_sendmsg(sock, (struct msghdr __user *)entry, - &msg_sys, flags, datagrams); + &msg_sys, flags, &used_address); if (err < 0) break; err = put_user(err, &entry->msg_len); -- cgit v1.2.3-59-g8ed1b From c15fea2d8ca834dae491339c47e4fb3c81428190 Mon Sep 17 00:00:00 2001 From: Max Matveev Date: Fri, 5 Aug 2011 03:56:30 -0700 Subject: ipv6: check for IPv4 mapped addresses when connecting IPv6 sockets When support for binding to 'mapped INADDR_ANY (::ffff.0.0.0.0)' was added in 0f8d3c7ac3693d7b6c731bf2159273a59bf70e12 the rest of the code wasn't told so now it's possible to bind IPv6 datagram socket to ::ffff.0.0.0.0, connect it to another IPv4 address and it will all work except for getsockhame() which does not return the local address as expected. To give getsockname() something to work with check for 'mapped INADDR_ANY' when connecting and update the in-core source addresses appropriately. Signed-off-by: Max Matveev Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 16560336eb72..9ef1831746ef 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -33,6 +33,11 @@ #include #include +static inline int ipv6_mapped_addr_any(const struct in6_addr *a) +{ + return (ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0)); +} + int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; @@ -102,10 +107,12 @@ ipv4_connected: ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); - if (ipv6_addr_any(&np->saddr)) + if (ipv6_addr_any(&np->saddr) || + ipv6_mapped_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) { + if (ipv6_addr_any(&np->rcv_saddr) || + ipv6_mapped_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); if (sk->sk_prot->rehash) -- cgit v1.2.3-59-g8ed1b From 6e5714eaf77d79ae1c8b47e3e040ff5411b717ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 3 Aug 2011 20:50:44 -0700 Subject: net: Compute protocol sequence numbers and fragment IDs using MD5. Computers have become a lot faster since we compromised on the partial MD4 hash which we use currently for performance reasons. MD5 is a much safer choice, and is inline with both RFC1948 and other ISS generators (OpenBSD, Solaris, etc.) Furthermore, only having 24-bits of the sequence number be truly unpredictable is a very serious limitation. So the periodic regeneration and 8-bit counter have been removed. We compute and use a full 32-bit sequence number. For ipv6, DCCP was found to use a 32-bit truncated initial sequence number (it needs 43-bits) and that is fixed here as well. Reported-by: Dan Kaminsky Tested-by: Willy Tarreau Signed-off-by: David S. Miller --- drivers/char/random.c | 349 +------------------------------ include/linux/random.h | 12 -- include/net/secure_seq.h | 20 ++ net/core/Makefile | 2 +- net/core/secure_seq.c | 184 ++++++++++++++++ net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 9 +- net/ipv4/inet_hashtables.c | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/netfilter/nf_nat_proto_common.c | 1 + net/ipv4/route.c | 1 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/inet6_hashtables.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 14 files changed, 223 insertions(+), 361 deletions(-) create mode 100644 include/net/secure_seq.h create mode 100644 net/core/secure_seq.c (limited to 'net') diff --git a/drivers/char/random.c b/drivers/char/random.c index 729281961f22..c35a785005b0 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1300,345 +1300,14 @@ ctl_table random_table[] = { }; #endif /* CONFIG_SYSCTL */ -/******************************************************************** - * - * Random functions for networking - * - ********************************************************************/ - -/* - * TCP initial sequence number picking. This uses the random number - * generator to pick an initial secret value. This value is hashed - * along with the TCP endpoint information to provide a unique - * starting point for each pair of TCP endpoints. This defeats - * attacks which rely on guessing the initial TCP sequence number. - * This algorithm was suggested by Steve Bellovin. - * - * Using a very strong hash was taking an appreciable amount of the total - * TCP connection establishment time, so this is a weaker hash, - * compensated for by changing the secret periodically. - */ - -/* F, G and H are basic MD4 functions: selection, majority, parity */ -#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) - -/* - * The generic round function. The application is so specific that - * we don't bother protecting all the arguments with parens, as is generally - * good macro practice, in favor of extra legibility. - * Rotation is separate from addition to prevent recomputation - */ -#define ROUND(f, a, b, c, d, x, s) \ - (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s))) -#define K1 0 -#define K2 013240474631UL -#define K3 015666365641UL - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - -static __u32 twothirdsMD4Transform(__u32 const buf[4], __u32 const in[12]) -{ - __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; - - /* Round 1 */ - ROUND(F, a, b, c, d, in[ 0] + K1, 3); - ROUND(F, d, a, b, c, in[ 1] + K1, 7); - ROUND(F, c, d, a, b, in[ 2] + K1, 11); - ROUND(F, b, c, d, a, in[ 3] + K1, 19); - ROUND(F, a, b, c, d, in[ 4] + K1, 3); - ROUND(F, d, a, b, c, in[ 5] + K1, 7); - ROUND(F, c, d, a, b, in[ 6] + K1, 11); - ROUND(F, b, c, d, a, in[ 7] + K1, 19); - ROUND(F, a, b, c, d, in[ 8] + K1, 3); - ROUND(F, d, a, b, c, in[ 9] + K1, 7); - ROUND(F, c, d, a, b, in[10] + K1, 11); - ROUND(F, b, c, d, a, in[11] + K1, 19); - - /* Round 2 */ - ROUND(G, a, b, c, d, in[ 1] + K2, 3); - ROUND(G, d, a, b, c, in[ 3] + K2, 5); - ROUND(G, c, d, a, b, in[ 5] + K2, 9); - ROUND(G, b, c, d, a, in[ 7] + K2, 13); - ROUND(G, a, b, c, d, in[ 9] + K2, 3); - ROUND(G, d, a, b, c, in[11] + K2, 5); - ROUND(G, c, d, a, b, in[ 0] + K2, 9); - ROUND(G, b, c, d, a, in[ 2] + K2, 13); - ROUND(G, a, b, c, d, in[ 4] + K2, 3); - ROUND(G, d, a, b, c, in[ 6] + K2, 5); - ROUND(G, c, d, a, b, in[ 8] + K2, 9); - ROUND(G, b, c, d, a, in[10] + K2, 13); - - /* Round 3 */ - ROUND(H, a, b, c, d, in[ 3] + K3, 3); - ROUND(H, d, a, b, c, in[ 7] + K3, 9); - ROUND(H, c, d, a, b, in[11] + K3, 11); - ROUND(H, b, c, d, a, in[ 2] + K3, 15); - ROUND(H, a, b, c, d, in[ 6] + K3, 3); - ROUND(H, d, a, b, c, in[10] + K3, 9); - ROUND(H, c, d, a, b, in[ 1] + K3, 11); - ROUND(H, b, c, d, a, in[ 5] + K3, 15); - ROUND(H, a, b, c, d, in[ 9] + K3, 3); - ROUND(H, d, a, b, c, in[ 0] + K3, 9); - ROUND(H, c, d, a, b, in[ 4] + K3, 11); - ROUND(H, b, c, d, a, in[ 8] + K3, 15); - - return buf[1] + b; /* "most hashed" word */ - /* Alternative: return sum of all words? */ -} -#endif - -#undef ROUND -#undef F -#undef G -#undef H -#undef K1 -#undef K2 -#undef K3 - -/* This should not be decreased so low that ISNs wrap too fast. */ -#define REKEY_INTERVAL (300 * HZ) -/* - * Bit layout of the tcp sequence numbers (before adding current time): - * bit 24-31: increased after every key exchange - * bit 0-23: hash(source,dest) - * - * The implementation is similar to the algorithm described - * in the Appendix of RFC 1185, except that - * - it uses a 1 MHz clock instead of a 250 kHz clock - * - it performs a rekey every 5 minutes, which is equivalent - * to a (source,dest) tulple dependent forward jump of the - * clock by 0..2^(HASH_BITS+1) - * - * Thus the average ISN wraparound time is 68 minutes instead of - * 4.55 hours. - * - * SMP cleanup and lock avoidance with poor man's RCU. - * Manfred Spraul - * - */ -#define COUNT_BITS 8 -#define COUNT_MASK ((1 << COUNT_BITS) - 1) -#define HASH_BITS 24 -#define HASH_MASK ((1 << HASH_BITS) - 1) +static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static struct keydata { - __u32 count; /* already shifted to the final position */ - __u32 secret[12]; -} ____cacheline_aligned ip_keydata[2]; - -static unsigned int ip_cnt; - -static void rekey_seq_generator(struct work_struct *work); - -static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator); - -/* - * Lock avoidance: - * The ISN generation runs lockless - it's just a hash over random data. - * State changes happen every 5 minutes when the random key is replaced. - * Synchronization is performed by having two copies of the hash function - * state and rekey_seq_generator always updates the inactive copy. - * The copy is then activated by updating ip_cnt. - * The implementation breaks down if someone blocks the thread - * that processes SYN requests for more than 5 minutes. Should never - * happen, and even if that happens only a not perfectly compliant - * ISN is generated, nothing fatal. - */ -static void rekey_seq_generator(struct work_struct *work) +static int __init random_int_secret_init(void) { - struct keydata *keyptr = &ip_keydata[1 ^ (ip_cnt & 1)]; - - get_random_bytes(keyptr->secret, sizeof(keyptr->secret)); - keyptr->count = (ip_cnt & COUNT_MASK) << HASH_BITS; - smp_wmb(); - ip_cnt++; - schedule_delayed_work(&rekey_work, - round_jiffies_relative(REKEY_INTERVAL)); -} - -static inline struct keydata *get_keyptr(void) -{ - struct keydata *keyptr = &ip_keydata[ip_cnt & 1]; - - smp_rmb(); - - return keyptr; -} - -static __init int seqgen_init(void) -{ - rekey_seq_generator(NULL); + get_random_bytes(random_int_secret, sizeof(random_int_secret)); return 0; } -late_initcall(seqgen_init); - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport) -{ - __u32 seq; - __u32 hash[12]; - struct keydata *keyptr = get_keyptr(); - - /* The procedure is the same as for IPv4, but addresses are longer. - * Thus we must use twothirdsMD4Transform. - */ - - memcpy(hash, saddr, 16); - hash[4] = ((__force u16)sport << 16) + (__force u16)dport; - memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7); - - seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK; - seq += keyptr->count; - - seq += ktime_to_ns(ktime_get_real()); - - return seq; -} -EXPORT_SYMBOL(secure_tcpv6_sequence_number); -#endif - -/* The code below is shamelessly stolen from secure_tcp_sequence_number(). - * All blames to Andrey V. Savochkin . - */ -__u32 secure_ip_id(__be32 daddr) -{ - struct keydata *keyptr; - __u32 hash[4]; - - keyptr = get_keyptr(); - - /* - * Pick a unique starting offset for each IP destination. - * The dest ip address is placed in the starting vector, - * which is then hashed with random data. - */ - hash[0] = (__force __u32)daddr; - hash[1] = keyptr->secret[9]; - hash[2] = keyptr->secret[10]; - hash[3] = keyptr->secret[11]; - - return half_md4_transform(hash, keyptr->secret); -} - -__u32 secure_ipv6_id(const __be32 daddr[4]) -{ - const struct keydata *keyptr; - __u32 hash[4]; - - keyptr = get_keyptr(); - - hash[0] = (__force __u32)daddr[0]; - hash[1] = (__force __u32)daddr[1]; - hash[2] = (__force __u32)daddr[2]; - hash[3] = (__force __u32)daddr[3]; - - return half_md4_transform(hash, keyptr->secret); -} - -#ifdef CONFIG_INET - -__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) -{ - __u32 seq; - __u32 hash[4]; - struct keydata *keyptr = get_keyptr(); - - /* - * Pick a unique starting offset for each TCP connection endpoints - * (saddr, daddr, sport, dport). - * Note that the words are placed into the starting vector, which is - * then mixed with a partial MD4 over random data. - */ - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = ((__force u16)sport << 16) + (__force u16)dport; - hash[3] = keyptr->secret[11]; - - seq = half_md4_transform(hash, keyptr->secret) & HASH_MASK; - seq += keyptr->count; - /* - * As close as possible to RFC 793, which - * suggests using a 250 kHz clock. - * Further reading shows this assumes 2 Mb/s networks. - * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. - * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but - * we also need to limit the resolution so that the u32 seq - * overlaps less than one time per MSL (2 minutes). - * Choosing a clock of 64 ns period is OK. (period of 274 s) - */ - seq += ktime_to_ns(ktime_get_real()) >> 6; - - return seq; -} - -/* Generate secure starting point for ephemeral IPV4 transport port search */ -u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) -{ - struct keydata *keyptr = get_keyptr(); - u32 hash[4]; - - /* - * Pick a unique starting offset for each ephemeral port search - * (saddr, daddr, dport) and 48bits of random data. - */ - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = (__force u32)dport ^ keyptr->secret[10]; - hash[3] = keyptr->secret[11]; - - return half_md4_transform(hash, keyptr->secret); -} -EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, - __be16 dport) -{ - struct keydata *keyptr = get_keyptr(); - u32 hash[12]; - - memcpy(hash, saddr, 16); - hash[4] = (__force u32)dport; - memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7); - - return twothirdsMD4Transform((const __u32 *)daddr, hash); -} -#endif - -#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) -/* Similar to secure_tcp_sequence_number but generate a 48 bit value - * bit's 32-47 increase every key exchange - * 0-31 hash(source, dest) - */ -u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport) -{ - u64 seq; - __u32 hash[4]; - struct keydata *keyptr = get_keyptr(); - - hash[0] = (__force u32)saddr; - hash[1] = (__force u32)daddr; - hash[2] = ((__force u16)sport << 16) + (__force u16)dport; - hash[3] = keyptr->secret[11]; - - seq = half_md4_transform(hash, keyptr->secret); - seq |= ((u64)keyptr->count) << (32 - HASH_BITS); - - seq += ktime_to_ns(ktime_get_real()); - seq &= (1ull << 48) - 1; - - return seq; -} -EXPORT_SYMBOL(secure_dccp_sequence_number); -#endif - -#endif /* CONFIG_INET */ - +late_initcall(random_int_secret_init); /* * Get a random word for internal kernel use only. Similar to urandom but @@ -1646,17 +1315,15 @@ EXPORT_SYMBOL(secure_dccp_sequence_number); * value is not cryptographically secure but for several uses the cost of * depleting entropy is too high */ -DEFINE_PER_CPU(__u32 [4], get_random_int_hash); +DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash); unsigned int get_random_int(void) { - struct keydata *keyptr; __u32 *hash = get_cpu_var(get_random_int_hash); - int ret; + unsigned int ret; - keyptr = get_keyptr(); hash[0] += current->pid + jiffies + get_cycles(); - - ret = half_md4_transform(hash, keyptr->secret); + md5_transform(hash, random_int_secret); + ret = hash[0]; put_cpu_var(get_random_int_hash); return ret; diff --git a/include/linux/random.h b/include/linux/random.h index ce29a040c8dc..d13059f3ea32 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -57,18 +57,6 @@ extern void add_interrupt_randomness(int irq); extern void get_random_bytes(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); -extern __u32 secure_ip_id(__be32 daddr); -extern __u32 secure_ipv6_id(const __be32 daddr[4]); -extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); -extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, - __be16 dport); -extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport); -extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); - #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; #endif diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h new file mode 100644 index 000000000000..d97f6892c019 --- /dev/null +++ b/include/net/secure_seq.h @@ -0,0 +1,20 @@ +#ifndef _NET_SECURE_SEQ +#define _NET_SECURE_SEQ + +#include + +extern __u32 secure_ip_id(__be32 daddr); +extern __u32 secure_ipv6_id(const __be32 daddr[4]); +extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); +extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport); +extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +extern u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport); + +#endif /* _NET_SECURE_SEQ */ diff --git a/net/core/Makefile b/net/core/Makefile index 8a04dd22cf77..0d357b1c4e57 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c new file mode 100644 index 000000000000..45329d7c9dd9 --- /dev/null +++ b/net/core/secure_seq.c @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; + +static int __init net_secret_init(void) +{ + get_random_bytes(net_secret, sizeof(net_secret)); + return 0; +} +late_initcall(net_secret_init); + +static u32 seq_scale(u32 seq) +{ + /* + * As close as possible to RFC 793, which + * suggests using a 250 kHz clock. + * Further reading shows this assumes 2 Mb/s networks. + * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. + * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but + * we also need to limit the resolution so that the u32 seq + * overlaps less than one time per MSL (2 minutes). + * Choosing a clock of 64 ns period is OK. (period of 274 s) + */ + return seq + (ktime_to_ns(ktime_get_real()) >> 6); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return seq_scale(hash[0]); +} +EXPORT_SYMBOL(secure_tcpv6_sequence_number); + +u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + (__force u32) daddr[i]; + secret[4] = net_secret[4] + (__force u32)dport; + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return hash[0]; +} +#endif + +#ifdef CONFIG_INET +__u32 secure_ip_id(__be32 daddr) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force __u32) daddr; + hash[1] = net_secret[13]; + hash[2] = net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_ipv6_id(const __be32 daddr[4]) +{ + __u32 hash[4]; + + memcpy(hash, daddr, 16); + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return seq_scale(hash[0]); +} + +u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = (__force u32)dport ^ net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} +EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); +#endif + +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccp_sequence_number); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccpv6_sequence_number); +#endif +#endif diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8c36adfd1919..332639b56f4d 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "ackvec.h" #include "ccid.h" diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8dc4348774a5..b74f76117dcf 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "dccp.h" #include "ipv6.h" @@ -69,13 +70,7 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); } -static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport ) -{ - return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); -} - -static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) +static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) { return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3c0369a3a663..984ec656b03b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -21,6 +21,7 @@ #include #include +#include #include /* diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e38213817d0a..86f13c67ea85 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -19,6 +19,7 @@ #include #include #include +#include /* * Theory of operations. diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 3e61faf23a9a..f52d41ea0690 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6afc4eb50591..e3dec1c9f09d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,6 +109,7 @@ #include #endif #include +#include #define RT_FL_TOS(oldflp4) \ ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 955b8e65b69e..1c12b8ec849d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b53197233709..73f1a00a96af 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -20,6 +20,7 @@ #include #include #include +#include #include int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 78aa53492b3e..d1fb63f4aeb7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -61,6 +61,7 @@ #include #include #include +#include #include -- cgit v1.2.3-59-g8ed1b From fad54440438a7c231a6ae347738423cbabc936d9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 5 Aug 2011 00:36:28 +0000 Subject: netfilter: avoid double free in nf_reinject NF_STOLEN means skb was already freed Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/netfilter/nf_queue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5b466cd1272f..84d0fd47636a 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -312,6 +312,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) } break; case NF_STOLEN: + break; default: kfree_skb(skb); } -- cgit v1.2.3-59-g8ed1b From d547f727df86059104af2234804fdd538e112015 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 7 Aug 2011 22:20:20 -0700 Subject: ipv4: fix the reusing of routing cache entries compare_keys and ip_route_input_common rely on rt_oif for distinguishing of input and output routes with same keys values. But sometimes the input route has also same hash chain (keyed by iif != 0) with the output routes (keyed by orig_oif=0). Problem visible if running with small number of rhash_entries. Fix them to use rt_route_iif instead. By this way input route can not be returned to users that request output route. The patch fixes the ip_rt_bug errors that were reported in ip_local_out context, mostly for 255.255.255.255 destinations. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e3dec1c9f09d..cb7efe0567f0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -731,6 +731,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | (rt1->rt_mark ^ rt2->rt_mark) | (rt1->rt_key_tos ^ rt2->rt_key_tos) | + (rt1->rt_route_iif ^ rt2->rt_route_iif) | (rt1->rt_oif ^ rt2->rt_oif) | (rt1->rt_iif ^ rt2->rt_iif)) == 0; } @@ -2321,8 +2322,8 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | (rth->rt_iif ^ iif) | - rth->rt_oif | (rth->rt_key_tos ^ tos)) == 0 && + rt_is_input_route(rth) && rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { -- cgit v1.2.3-59-g8ed1b From dd23198e58cd35259dd09e8892bbdb90f1d57748 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Sun, 7 Aug 2011 22:31:07 -0700 Subject: ipv4: Fix ip_getsockopt for IP_PKTOPTIONS IP_PKTOPTIONS is broken for 32-bit applications running in COMPAT mode on 64-bit kernels. This happens because msghdr's msg_flags field is always set to zero. When running in COMPAT mode this should be set to MSG_CMSG_COMPAT instead. Signed-off-by: Tiberiu Szocs-Mihai Signed-off-by: Daniel Baluta Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ab0c9efd1efa..8905e92f896a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1067,7 +1067,7 @@ EXPORT_SYMBOL(compat_ip_setsockopt); */ static int do_ip_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + char __user *optval, int __user *optlen, unsigned flags) { struct inet_sock *inet = inet_sk(sk); int val; @@ -1240,7 +1240,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, msg.msg_control = optval; msg.msg_controllen = len; - msg.msg_flags = 0; + msg.msg_flags = flags; if (inet->cmsg_flags & IP_CMSG_PKTINFO) { struct in_pktinfo info; @@ -1294,7 +1294,7 @@ int ip_getsockopt(struct sock *sk, int level, { int err; - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1327,7 +1327,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, return compat_mc_getsockopt(sk, level, optname, optval, optlen, ip_getsockopt); - err = do_ip_getsockopt(sk, level, optname, optval, optlen); + err = do_ip_getsockopt(sk, level, optname, optval, optlen, + MSG_CMSG_COMPAT); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ -- cgit v1.2.3-59-g8ed1b From 797fd3913abf2f7036003ab8d3d019cbea41affd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 7 Aug 2011 09:11:00 +0000 Subject: netfilter: TCP and raw fix for ip_route_me_harder TCP in some cases uses different global (raw) socket to send RST and ACK. The transparent flag is not set there. Currently, it is a problem for rerouting after the previous change. Fix it by simplifying the checks in ip_route_me_harder and use FLOWI_FLAG_ANYSRC even for sockets. It looks safe because the initial routing allowed this source address to be used and now we just have to make sure the packet is rerouted. As a side effect this also allows rerouting for normal raw sockets that use spoofed source addresses which was not possible even before we eliminated the ip_route_input call. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/netfilter.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 2e97e3ec1eb7..929b27bdeb79 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -18,17 +18,15 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - __u8 flags = 0; + __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; unsigned int hh_len; - if (!skb->sk && addr_type != RTN_LOCAL) { - if (addr_type == RTN_UNSPEC) - addr_type = inet_addr_type(net, saddr); - if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) - flags |= FLOWI_FLAG_ANYSRC; - else - saddr = 0; - } + if (addr_type == RTN_UNSPEC) + addr_type = inet_addr_type(net, saddr); + if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) + flags |= FLOWI_FLAG_ANYSRC; + else + saddr = 0; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. @@ -38,7 +36,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl4.flowi4_mark = skb->mark; - fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : flags; + fl4.flowi4_flags = flags; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; -- cgit v1.2.3-59-g8ed1b From 47670b767b1593433b516df7798df03f858278be Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 7 Aug 2011 09:16:09 +0000 Subject: ipv4: route non-local sources for raw socket The raw sockets can provide source address for routing but their privileges are not considered. We can provide non-local source address, make sure the FLOWI_FLAG_ANYSRC flag is set if socket has privileges for this, i.e. based on hdrincl (IP_HDRINCL) and transparent flags. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 +- net/ipv4/raw.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index caaff5f5f39f..b897d6e6d0a5 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -238,7 +238,7 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) { __u8 flags = 0; - if (inet_sk(sk)->transparent) + if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) flags |= FLOWI_FLAG_ANYSRC; if (sk->sk_protocol == IPPROTO_TCP) flags |= FLOWI_FLAG_PRECOW_METRICS; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1457acb39cec..61714bd52925 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - FLOWI_FLAG_CAN_SLEEP, daddr, saddr, 0, 0); + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + daddr, saddr, 0, 0); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); -- cgit v1.2.3-59-g8ed1b From d52fbfc9e5c7bb0b0dbc256edf17dee170ce839d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 7 Aug 2011 10:17:22 +0000 Subject: ipv4: use dst with ref during bcast/mcast loopback Make sure skb dst has reference when moving to another context. Currently, I don't see protocols that can hit it when sending broadcasts/multicasts to loopback using noref dsts, so it is just a precaution. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 77d3eded665a..8c6563361ab5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -122,6 +122,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); + skb_dst_force(newskb); netif_rx_ni(newskb); return 0; } -- cgit v1.2.3-59-g8ed1b