From 062f1d6de01df545ca0df366c6133f0fa164bff6 Mon Sep 17 00:00:00 2001
From: Chun-Yeow Yeoh <yeohchunyeow@gmail.com>
Date: Tue, 22 Apr 2014 18:19:25 +0800
Subject: mac80211: avoid handling of SMPS for mesh

The patch "mac80211: implement SMPS for AP" has caused kernel
oops at mesh STA if the peer mesh STA operates in sleep mode
and then becomes active mode. It can be easily reproduced by
setting the following commands at peer mesh STA:

iw mesh0 station set aa:bb:cc:dd:ee:ff mesh_power_mode deep
iw mesh0 station set aa:bb:cc:dd:ee:ff mesh_power_mode active

Kernel oops will happen at mesh STA aa:bb:cc:dd:ee:ff.

Fix this by avoiding SMPS for mesh mode.

Signed-off-by: Chun-Yeow Yeoh <yeohchunyeow@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 137a192e64bc..847d92f6bef6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1148,7 +1148,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	atomic_dec(&ps->num_sta_ps);
 
 	/* This station just woke up and isn't aware of our SMPS state */
-	if (!ieee80211_smps_is_restrictive(sta->known_smps_mode,
+	if (!ieee80211_vif_is_mesh(&sdata->vif) &&
+	    !ieee80211_smps_is_restrictive(sta->known_smps_mode,
 					   sdata->smps_mode) &&
 	    sta->known_smps_mode != sdata->bss->req_smps &&
 	    sta_info_tx_streams(sta) != 1) {
-- 
cgit v1.2.3-59-g8ed1b


From 311ab8e1c28659e16a6fe14a68929c3ced4a5757 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Wed, 16 Apr 2014 08:32:41 -0400
Subject: mac80211: fixup radiotap tx flags for RTS/CTS

When using RTS/CTS, the CTS-to-Self bit in radiotap TX flags is
getting set instead of the RTS bit.  Set the correct one.

Reported-by: Larry Maxwell <larrymaxwell@agilemesh.com>
Signed-off-by: Bob Copeland <bob@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 00ba90b02ab2..60cb7a665976 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -314,10 +314,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
 	    !is_multicast_ether_addr(hdr->addr1))
 		txflags |= IEEE80211_RADIOTAP_F_TX_FAIL;
 
-	if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) ||
-	    (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT))
+	if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
 		txflags |= IEEE80211_RADIOTAP_F_TX_CTS;
-	else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
+	if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS)
 		txflags |= IEEE80211_RADIOTAP_F_TX_RTS;
 
 	put_unaligned_le16(txflags, pos);
-- 
cgit v1.2.3-59-g8ed1b


From 5187cd055b6e81fc6526109456f8b20623148d5f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 Apr 2014 14:25:48 -0700
Subject: netlink: Rename netlink_capable netlink_allowed

netlink_capable is a static internal function in af_netlink.c and we
have better uses for the name netlink_capable.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 894cda0206bb..7f931fe4d187 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1360,7 +1360,7 @@ retry:
 	return err;
 }
 
-static inline int netlink_capable(const struct socket *sock, unsigned int flag)
+static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
 {
 	return (nl_table[sock->sk->sk_protocol].flags & flag) ||
 		ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
@@ -1428,7 +1428,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 
 	/* Only superuser is allowed to listen multicasts */
 	if (nladdr->nl_groups) {
-		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
+		if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
 			return -EPERM;
 		err = netlink_realloc_groups(sk);
 		if (err)
@@ -1490,7 +1490,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 		return -EINVAL;
 
 	if ((nladdr->nl_groups || nladdr->nl_pid) &&
-	    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
+	    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
 		return -EPERM;
 
 	if (!nlk->portid)
@@ -2096,7 +2096,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 		break;
 	case NETLINK_ADD_MEMBERSHIP:
 	case NETLINK_DROP_MEMBERSHIP: {
-		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
+		if (!netlink_allowed(sock, NL_CFG_F_NONROOT_RECV))
 			return -EPERM;
 		err = netlink_realloc_groups(sk);
 		if (err)
@@ -2247,7 +2247,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		dst_group = ffs(addr->nl_groups);
 		err =  -EPERM;
 		if ((dst_group || dst_portid) &&
-		    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
+		    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
 			goto out;
 	} else {
 		dst_portid = nlk->dst_portid;
-- 
cgit v1.2.3-59-g8ed1b


From a53b72c83a4216f2eb883ed45a0cbce014b8e62d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 Apr 2014 14:26:25 -0700
Subject: net: Move the permission check in sock_diag_put_filterinfo to
 packet_diag_dump

The permission check in sock_diag_put_filterinfo is wrong, and it is so removed
from it's sources it is not clear why it is wrong.  Move the computation
into packet_diag_dump and pass a bool of the result into sock_diag_filterinfo.

This does not yet correct the capability check but instead simply moves it to make
it clear what is going on.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h | 2 +-
 net/core/sock_diag.c      | 4 ++--
 net/packet/diag.c         | 7 ++++++-
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 302ab805b0bb..46cca4c06848 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -23,7 +23,7 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
 int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
-int sock_diag_put_filterinfo(struct sock *sk,
+int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 			     struct sk_buff *skb, int attrtype);
 
 #endif
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 9deb6abd6cf6..a4216a4c9572 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,7 +49,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
 }
 EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
 
-int sock_diag_put_filterinfo(struct sock *sk,
+int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 			     struct sk_buff *skb, int attrtype)
 {
 	struct sock_fprog_kern *fprog;
@@ -58,7 +58,7 @@ int sock_diag_put_filterinfo(struct sock *sk,
 	unsigned int flen;
 	int err = 0;
 
-	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+	if (!may_report_filterinfo) {
 		nla_reserve(skb, attrtype, 0);
 		return 0;
 	}
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 435ff99ba8c7..b34d0de24091 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -128,6 +128,7 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			struct packet_diag_req *req,
+			bool may_report_filterinfo,
 			struct user_namespace *user_ns,
 			u32 portid, u32 seq, u32 flags, int sk_ino)
 {
@@ -172,7 +173,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 		goto out_nlmsg_trim;
 
 	if ((req->pdiag_show & PACKET_SHOW_FILTER) &&
-	    sock_diag_put_filterinfo(sk, skb, PACKET_DIAG_FILTER))
+	    sock_diag_put_filterinfo(may_report_filterinfo, sk, skb,
+				     PACKET_DIAG_FILTER))
 		goto out_nlmsg_trim;
 
 	return nlmsg_end(skb, nlh);
@@ -188,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	struct packet_diag_req *req;
 	struct net *net;
 	struct sock *sk;
+	bool may_report_filterinfo;
 
 	net = sock_net(skb->sk);
 	req = nlmsg_data(cb->nlh);
+	may_report_filterinfo = ns_capable(net->user_ns, CAP_NET_ADMIN);
 
 	mutex_lock(&net->packet.sklist_lock);
 	sk_for_each(sk, &net->packet.sklist) {
@@ -200,6 +204,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			goto next;
 
 		if (sk_diag_fill(sk, skb, req,
+				 may_report_filterinfo,
 				 sk_user_ns(NETLINK_CB(cb->skb).sk),
 				 NETLINK_CB(cb->skb).portid,
 				 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-- 
cgit v1.2.3-59-g8ed1b


From a3b299da869d6e78cf42ae0b1b41797bcb8c5e4b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 Apr 2014 14:26:56 -0700
Subject: net: Add variants of capable for use on on sockets

sk_net_capable - The common case, operations that are safe in a network namespace.
sk_capable - Operations that are not known to be safe in a network namespace
sk_ns_capable - The general case for special cases.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  5 +++++
 net/core/sock.c    | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 8338a14e4805..21569cf456ed 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2255,6 +2255,11 @@ int sock_get_timestampns(struct sock *, struct timespec __user *);
 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
 		       int type);
 
+bool sk_ns_capable(const struct sock *sk,
+		   struct user_namespace *user_ns, int cap);
+bool sk_capable(const struct sock *sk, int cap);
+bool sk_net_capable(const struct sock *sk, int cap);
+
 /*
  *	Enable debug/info messages
  */
diff --git a/net/core/sock.c b/net/core/sock.c
index b4fff008136f..664ee4295b6f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -145,6 +145,55 @@
 static DEFINE_MUTEX(proto_list_mutex);
 static LIST_HEAD(proto_list);
 
+/**
+ * sk_ns_capable - General socket capability test
+ * @sk: Socket to use a capability on or through
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in the user
+ * namespace @user_ns.
+ */
+bool sk_ns_capable(const struct sock *sk,
+		   struct user_namespace *user_ns, int cap)
+{
+	return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
+		ns_capable(user_ns, cap);
+}
+EXPORT_SYMBOL(sk_ns_capable);
+
+/**
+ * sk_capable - Socket global capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The global capbility to use
+ *
+ * Test to see if the opener of the socket had when the socket was
+ * created and the current process has the capability @cap in all user
+ * namespaces.
+ */
+bool sk_capable(const struct sock *sk, int cap)
+{
+	return sk_ns_capable(sk, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(sk_capable);
+
+/**
+ * sk_net_capable - Network namespace socket capability test
+ * @sk: Socket to use a capability on or through
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket had when the socke was created
+ * and the current process has the capability @cap over the network namespace
+ * the socket is a member of.
+ */
+bool sk_net_capable(const struct sock *sk, int cap)
+{
+	return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(sk_net_capable);
+
+
 #ifdef CONFIG_MEMCG_KMEM
 int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
-- 
cgit v1.2.3-59-g8ed1b


From aa4cf9452f469f16cea8c96283b641b4576d4a7b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 Apr 2014 14:28:03 -0700
Subject: net: Add variants of capable for use on netlink messages

netlink_net_capable - The common case use, for operations that are safe on a network namespace
netlink_capable - For operations that are only known to be safe for the global root
netlink_ns_capable - The general case of capable used to handle special cases

__netlink_ns_capable - Same as netlink_ns_capable except taking a netlink_skb_parms instead of
		       the skbuff of a netlink message.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  7 ++++++
 net/netlink/af_netlink.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

(limited to 'net')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index aad8eeaf416d..f64b01787ddc 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -169,4 +169,11 @@ struct netlink_tap {
 extern int netlink_add_tap(struct netlink_tap *nt);
 extern int netlink_remove_tap(struct netlink_tap *nt);
 
+bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
+			  struct user_namespace *ns, int cap);
+bool netlink_ns_capable(const struct sk_buff *skb,
+			struct user_namespace *ns, int cap);
+bool netlink_capable(const struct sk_buff *skb, int cap);
+bool netlink_net_capable(const struct sk_buff *skb, int cap);
+
 #endif	/* __LINUX_NETLINK_H */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7f931fe4d187..81dca96d2be6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1360,6 +1360,71 @@ retry:
 	return err;
 }
 
+/**
+ * __netlink_ns_capable - General netlink message capability test
+ * @nsp: NETLINK_CB of the socket buffer holding a netlink command from userspace.
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in the user namespace @user_ns.
+ */
+bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
+			struct user_namespace *user_ns, int cap)
+{
+	return sk_ns_capable(nsp->sk, user_ns, cap);
+}
+EXPORT_SYMBOL(__netlink_ns_capable);
+
+/**
+ * netlink_ns_capable - General netlink message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @user_ns: The user namespace of the capability to use
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in the user namespace @user_ns.
+ */
+bool netlink_ns_capable(const struct sk_buff *skb,
+			struct user_namespace *user_ns, int cap)
+{
+	return __netlink_ns_capable(&NETLINK_CB(skb), user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_ns_capable);
+
+/**
+ * netlink_capable - Netlink global message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap in all user namespaces.
+ */
+bool netlink_capable(const struct sk_buff *skb, int cap)
+{
+	return netlink_ns_capable(skb, &init_user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_capable);
+
+/**
+ * netlink_net_capable - Netlink network namespace message capability test
+ * @skb: socket buffer holding a netlink command from userspace
+ * @cap: The capability to use
+ *
+ * Test to see if the opener of the socket we received the message
+ * from had when the netlink socket was created and the sender of the
+ * message has has the capability @cap over the network namespace of
+ * the socket we received the message from.
+ */
+bool netlink_net_capable(const struct sk_buff *skb, int cap)
+{
+	return netlink_ns_capable(skb, sock_net(skb->sk)->user_ns, cap);
+}
+EXPORT_SYMBOL(netlink_net_capable);
+
 static inline int netlink_allowed(const struct socket *sock, unsigned int flag)
 {
 	return (nl_table[sock->sk->sk_protocol].flags & flag) ||
-- 
cgit v1.2.3-59-g8ed1b


From 90f62cf30a78721641e08737bda787552428061e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 23 Apr 2014 14:29:27 -0700
Subject: net: Use netlink_ns_capable to verify the permisions of netlink
 messages

It is possible by passing a netlink socket to a more privileged
executable and then to fool that executable into writing to the socket
data that happens to be valid netlink message to do something that
privileged executable did not intend to do.

To keep this from happening replace bare capable and ns_capable calls
with netlink_capable, netlink_net_calls and netlink_ns_capable calls.
Which act the same as the previous calls except they verify that the
opener of the socket had the desired permissions as well.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/crypto_user.c            |  2 +-
 drivers/connector/cn_proc.c     |  2 +-
 drivers/scsi/scsi_netlink.c     |  2 +-
 kernel/audit.c                  |  4 ++--
 net/can/gw.c                    |  4 ++--
 net/core/rtnetlink.c            | 20 +++++++++++---------
 net/dcb/dcbnl.c                 |  2 +-
 net/decnet/dn_dev.c             |  4 ++--
 net/decnet/dn_fib.c             |  4 ++--
 net/decnet/netfilter/dn_rtmsg.c |  2 +-
 net/netfilter/nfnetlink.c       |  2 +-
 net/netlink/genetlink.c         |  2 +-
 net/packet/diag.c               |  2 +-
 net/phonet/pn_netlink.c         |  8 ++++----
 net/sched/act_api.c             |  2 +-
 net/sched/cls_api.c             |  2 +-
 net/sched/sch_api.c             |  6 +++---
 net/tipc/netlink.c              |  2 +-
 net/xfrm/xfrm_user.c            |  2 +-
 19 files changed, 38 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 1512e41cd93d..43665d0d0905 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -466,7 +466,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	type -= CRYPTO_MSG_BASE;
 	link = &crypto_dispatch[type];
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) &&
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 148d707a1d43..ccdd4c7e748b 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -369,7 +369,7 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg,
 		return;
 
 	/* Can only change if privileged. */
-	if (!capable(CAP_NET_ADMIN)) {
+	if (!__netlink_ns_capable(nsp, &init_user_ns, CAP_NET_ADMIN)) {
 		err = EPERM;
 		goto out;
 	}
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index fe30ea94ffe6..109802f776ed 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -77,7 +77,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
 			goto next_msg;
 		}
 
-		if (!capable(CAP_SYS_ADMIN)) {
+		if (!netlink_capable(skb, CAP_SYS_ADMIN)) {
 			err = -EPERM;
 			goto next_msg;
 		}
diff --git a/kernel/audit.c b/kernel/audit.c
index 7c2893602d06..47845c57eb19 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -643,13 +643,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
 		if ((task_active_pid_ns(current) != &init_pid_ns))
 			return -EPERM;
 
-		if (!capable(CAP_AUDIT_CONTROL))
+		if (!netlink_capable(skb, CAP_AUDIT_CONTROL))
 			err = -EPERM;
 		break;
 	case AUDIT_USER:
 	case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG:
 	case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2:
-		if (!capable(CAP_AUDIT_WRITE))
+		if (!netlink_capable(skb, CAP_AUDIT_WRITE))
 			err = -EPERM;
 		break;
 	default:  /* bad msg */
diff --git a/net/can/gw.c b/net/can/gw.c
index ac31891967da..050a2110d43f 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -804,7 +804,7 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 	u8 limhops = 0;
 	int err = 0;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (nlmsg_len(nlh) < sizeof(*r))
@@ -893,7 +893,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
 	u8 limhops = 0;
 	int err = 0;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (nlmsg_len(nlh) < sizeof(*r))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d4ff41739b0f..64ad17d077ed 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1395,7 +1395,8 @@ static int do_set_master(struct net_device *dev, int ifindex)
 	return 0;
 }
 
-static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
+static int do_setlink(const struct sk_buff *skb,
+		      struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
@@ -1407,7 +1408,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 			err = PTR_ERR(net);
 			goto errout;
 		}
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+		if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
 			err = -EPERM;
 			goto errout;
 		}
@@ -1661,7 +1662,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (err < 0)
 		goto errout;
 
-	err = do_setlink(dev, ifm, tb, ifname, 0);
+	err = do_setlink(skb, dev, ifm, tb, ifname, 0);
 errout:
 	return err;
 }
@@ -1778,7 +1779,8 @@ err:
 }
 EXPORT_SYMBOL(rtnl_create_link);
 
-static int rtnl_group_changelink(struct net *net, int group,
+static int rtnl_group_changelink(const struct sk_buff *skb,
+		struct net *net, int group,
 		struct ifinfomsg *ifm,
 		struct nlattr **tb)
 {
@@ -1787,7 +1789,7 @@ static int rtnl_group_changelink(struct net *net, int group,
 
 	for_each_netdev(net, dev) {
 		if (dev->group == group) {
-			err = do_setlink(dev, ifm, tb, NULL, 0);
+			err = do_setlink(skb, dev, ifm, tb, NULL, 0);
 			if (err < 0)
 				return err;
 		}
@@ -1929,12 +1931,12 @@ replay:
 				modified = 1;
 			}
 
-			return do_setlink(dev, ifm, tb, ifname, modified);
+			return do_setlink(skb, dev, ifm, tb, ifname, modified);
 		}
 
 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
 			if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
-				return rtnl_group_changelink(net,
+				return rtnl_group_changelink(skb, net,
 						nla_get_u32(tb[IFLA_GROUP]),
 						ifm, tb);
 			return -ENODEV;
@@ -2321,7 +2323,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 	int err = -EINVAL;
 	__u8 *addr;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
@@ -2773,7 +2775,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	sz_idx = type>>2;
 	kind = type&3;
 
-	if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+	if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 553644402670..f8b98d89c285 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1669,7 +1669,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct nlmsghdr *reply_nlh = NULL;
 	const struct reply_func *fn;
 
-	if ((nlh->nlmsg_type == RTM_SETDCB) && !capable(CAP_NET_ADMIN))
+	if ((nlh->nlmsg_type == RTM_SETDCB) && !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX,
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index a603823a3e27..3b726f31c64c 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -574,7 +574,7 @@ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct dn_ifaddr __rcu **ifap;
 	int err = -EINVAL;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (!net_eq(net, &init_net))
@@ -618,7 +618,7 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct dn_ifaddr *ifa;
 	int err;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (!net_eq(net, &init_net))
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 57dc159245ec..d332aefb0846 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -505,7 +505,7 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct nlattr *attrs[RTA_MAX+1];
 	int err;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (!net_eq(net, &init_net))
@@ -530,7 +530,7 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct nlattr *attrs[RTA_MAX+1];
 	int err;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (!net_eq(net, &init_net))
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index e83015cecfa7..e4d9560a910b 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -107,7 +107,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
 		return;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
 	/* Eventually we might send routing messages too */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e8138da4c14f..84392f3237c1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -375,7 +375,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 	    skb->len < nlh->nlmsg_len)
 		return;
 
-	if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+	if (!netlink_net_capable(skb, CAP_NET_ADMIN)) {
 		netlink_ack(skb, nlh, -EPERM);
 		return;
 	}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index b1dcdb932a86..a3ba3ca0ff92 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -561,7 +561,7 @@ static int genl_family_rcv_msg(struct genl_family *family,
 		return -EOPNOTSUPP;
 
 	if ((ops->flags & GENL_ADMIN_PERM) &&
-	    !capable(CAP_NET_ADMIN))
+	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
diff --git a/net/packet/diag.c b/net/packet/diag.c
index b34d0de24091..92f2c7107eec 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -194,7 +194,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	net = sock_net(skb->sk);
 	req = nlmsg_data(cb->nlh);
-	may_report_filterinfo = ns_capable(net->user_ns, CAP_NET_ADMIN);
+	may_report_filterinfo = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
 
 	mutex_lock(&net->packet.sklist_lock);
 	sk_for_each(sk, &net->packet.sklist) {
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index dc15f4300808..b64151ade6b3 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -70,10 +70,10 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
 	int err;
 	u8 pnaddr;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!netlink_capable(skb, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	ASSERT_RTNL();
@@ -233,10 +233,10 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
 	int err;
 	u8 dst;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!netlink_capable(skb, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	ASSERT_RTNL();
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8a5ba5add4bc..648778aef1a2 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -948,7 +948,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
-	if ((n->nlmsg_type != RTM_GETACTION) && !capable(CAP_NET_ADMIN))
+	if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 29a30a14c315..bdbdb1a7920a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -134,7 +134,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
 	int err;
 	int tp_created = 0;
 
-	if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
+	if ((n->nlmsg_type != RTM_GETTFILTER) && !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 replay:
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a0b84e0e22de..400769014bbd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1084,7 +1084,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
 	struct Qdisc *p = NULL;
 	int err;
 
-	if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
+	if ((n->nlmsg_type != RTM_GETQDISC) && !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1151,7 +1151,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
 	struct Qdisc *q, *p;
 	int err;
 
-	if (!capable(CAP_NET_ADMIN))
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 replay:
@@ -1490,7 +1490,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
 	u32 qid;
 	int err;
 
-	if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
+	if ((n->nlmsg_type != RTM_GETTCLASS) && !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 3aaf73de9e2d..ad844d365340 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -47,7 +47,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 	int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
 	u16 cmd;
 
-	if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
+	if ((req_userhdr->cmd & 0xC000) && (!netlink_capable(skb, CAP_NET_ADMIN)))
 		cmd = TIPC_CMD_NOT_NET_ADMIN;
 	else
 		cmd = req_userhdr->cmd;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8f131c10a6f3..51398ae6cda8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2377,7 +2377,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	link = &xfrm_dispatch[type];
 
 	/* All operations require privileges, even GET */
-	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+	if (!netlink_net_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
-- 
cgit v1.2.3-59-g8ed1b


From a64d90fd962c2956da7505f98a302408450365e2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 24 Apr 2014 13:51:29 -0400
Subject: netfilter: Fix warning in nfnetlink_receive().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/netfilter/nfnetlink.c: In function ‘nfnetlink_rcv’:
net/netfilter/nfnetlink.c:371:14: warning: unused variable ‘net’ [-Wunused-variable]

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 84392f3237c1..e009087620e3 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -368,7 +368,6 @@ done:
 static void nfnetlink_rcv(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(skb);
-	struct net *net = sock_net(skb->sk);
 	int msglen;
 
 	if (nlh->nlmsg_len < NLMSG_HDRLEN ||
-- 
cgit v1.2.3-59-g8ed1b


From 973462bbde79bb827824c73b59027a0aed5c9ca6 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Thu, 24 Apr 2014 10:22:35 +1000
Subject: rtnetlink: Warn when interface's information won't fit in our packet

Without IFLA_EXT_MASK specified, the information reported for a single
interface in response to RTM_GETLINK is expected to fit within a netlink
packet of NLMSG_GOODSIZE.

If it doesn't, however, things will go badly wrong,  When listing all
interfaces, netlink_dump() will incorrectly treat -EMSGSIZE on the first
message in a packet as the end of the listing and omit information for
that interface and all subsequent ones.  This can cause getifaddrs(3) to
enter an infinite loop.

This patch won't fix the problem, but it will WARN_ON() making it easier to
track down what's going wrong.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 64ad17d077ed..8db72ac88feb 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1198,6 +1198,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct hlist_head *head;
 	struct nlattr *tb[IFLA_MAX+1];
 	u32 ext_filter_mask = 0;
+	int err;
 
 	s_h = cb->args[0];
 	s_idx = cb->args[1];
@@ -1218,11 +1219,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 		hlist_for_each_entry_rcu(dev, head, index_hlist) {
 			if (idx < s_idx)
 				goto cont;
-			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq, 0,
-					     NLM_F_MULTI,
-					     ext_filter_mask) <= 0)
+			err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+					       NETLINK_CB(cb->skb).portid,
+					       cb->nlh->nlmsg_seq, 0,
+					       NLM_F_MULTI,
+					       ext_filter_mask);
+			/* If we ran out of room on the first message,
+			 * we're in trouble
+			 */
+			WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+
+			if (err <= 0)
 				goto out;
 
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-- 
cgit v1.2.3-59-g8ed1b


From c53864fd60227de025cb79e05493b13f69843971 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Thu, 24 Apr 2014 10:22:36 +1000
Subject: rtnetlink: Only supply IFLA_VF_PORTS information when RTEXT_FILTER_VF
 is set

Since 115c9b81928360d769a76c632bae62d15206a94a (rtnetlink: Fix problem with
buffer allocation), RTM_NEWLINK messages only contain the IFLA_VFINFO_LIST
attribute if they were solicited by a GETLINK message containing an
IFLA_EXT_MASK attribute with the RTEXT_FILTER_VF flag.

That was done because some user programs broke when they received more data
than expected - because IFLA_VFINFO_LIST contains information for each VF
it can become large if there are many VFs.

However, the IFLA_VF_PORTS attribute, supplied for devices which implement
ndo_get_vf_port (currently the 'enic' driver only), has the same problem.
It supplies per-VF information and can therefore become large, but it is
not currently conditional on the IFLA_EXT_MASK value.

Worse, it interacts badly with the existing EXT_MASK handling.  When
IFLA_EXT_MASK is not supplied, the buffer for netlink replies is fixed at
NLMSG_GOODSIZE.  If the information for IFLA_VF_PORTS exceeds this, then
rtnl_fill_ifinfo() returns -EMSGSIZE on the first message in a packet.
netlink_dump() will misinterpret this as having finished the listing and
omit data for this interface and all subsequent ones.  That can cause
getifaddrs(3) to enter an infinite loop.

This patch addresses the problem by only supplying IFLA_VF_PORTS when
IFLA_EXT_MASK is supplied with the RTEXT_FILTER_VF flag set.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8db72ac88feb..9837bebf93ce 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -774,7 +774,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 		return 0;
 }
 
-static size_t rtnl_port_size(const struct net_device *dev)
+static size_t rtnl_port_size(const struct net_device *dev,
+			     u32 ext_filter_mask)
 {
 	size_t port_size = nla_total_size(4)		/* PORT_VF */
 		+ nla_total_size(PORT_PROFILE_MAX)	/* PORT_PROFILE */
@@ -790,7 +791,8 @@ static size_t rtnl_port_size(const struct net_device *dev)
 	size_t port_self_size = nla_total_size(sizeof(struct nlattr))
 		+ port_size;
 
-	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
+	    !(ext_filter_mask & RTEXT_FILTER_VF))
 		return 0;
 	if (dev_num_vf(dev->dev.parent))
 		return port_self_size + vf_ports_size +
@@ -826,7 +828,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(ext_filter_mask
 			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
-	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+	       + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
 	       + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
 	       + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
@@ -888,11 +890,13 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
+			  u32 ext_filter_mask)
 {
 	int err;
 
-	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent ||
+	    !(ext_filter_mask & RTEXT_FILTER_VF))
 		return 0;
 
 	err = rtnl_port_self_fill(skb, dev);
@@ -1079,7 +1083,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		nla_nest_end(skb, vfinfo);
 	}
 
-	if (rtnl_port_fill(skb, dev))
+	if (rtnl_port_fill(skb, dev, ext_filter_mask))
 		goto nla_put_failure;
 
 	if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) {
-- 
cgit v1.2.3-59-g8ed1b


From 1c2658545816088477e91860c3a645053719cb54 Mon Sep 17 00:00:00 2001
From: Kumar Sundararajan <kumar@fb.com>
Date: Thu, 24 Apr 2014 09:48:53 -0400
Subject: ipv6: fib: fix fib dump restart

When the ipv6 fib changes during a table dump, the walk is
restarted and the number of nodes dumped are skipped. But the existing
code doesn't advance to the next node after a node is skipped. This can
cause the dump to loop or produce lots of duplicates when the fib
is modified during the dump.

This change advances the walk to the next node if the current node is
skipped after a restart.

Signed-off-by: Kumar Sundararajan <kumar@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 34e0ded5c14b..87891f5f57b5 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1459,7 +1459,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
 
 				if (w->skip) {
 					w->skip--;
-					continue;
+					goto skip;
 				}
 
 				err = w->func(w);
@@ -1469,6 +1469,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)
 				w->count++;
 				continue;
 			}
+skip:
 			w->state = FWS_U;
 		case FWS_U:
 			if (fn == w->root)
-- 
cgit v1.2.3-59-g8ed1b


From 9eb1fbfa0a737fd4d3a6d12d71c5ea9af622b887 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 11 Apr 2014 12:02:31 -0700
Subject: Bluetooth: Fix triggering BR/EDR L2CAP Connect too early

Commit 1c2e004183178 introduced an event handler for the encryption key
refresh complete event with the intent of fixing some LE/SMP cases.
However, this event is shared with BR/EDR and there we actually want to
act only on the auth_complete event (which comes after the key refresh).

If we do not do this we may trigger an L2CAP Connect Request too early
and cause the remote side to return a security block error.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Cc: stable@vger.kernel.org
---
 net/bluetooth/hci_event.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 49774912cb01..15010a230b6d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3330,6 +3330,12 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
 	if (!conn)
 		goto unlock;
 
+	/* For BR/EDR the necessary steps are taken through the
+	 * auth_complete event.
+	 */
+	if (conn->type != LE_LINK)
+		goto unlock;
+
 	if (!ev->status)
 		conn->sec_level = conn->pending_sec_level;
 
-- 
cgit v1.2.3-59-g8ed1b


From 09da1f3463eb81d59685df723b1c5950b7570340 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 11 Apr 2014 12:02:32 -0700
Subject: Bluetooth: Fix redundant encryption request for reauthentication

When we're performing reauthentication (in order to elevate the
security level from an unauthenticated key to an authenticated one) we
do not need to issue any encryption command once authentication
completes. Since the trigger for the encryption HCI command is the
ENCRYPT_PEND flag this flag should not be set in this scenario.
Instead, the REAUTH_PEND flag takes care of all necessary steps for
reauthentication.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Cc: stable@vger.kernel.org
---
 net/bluetooth/hci_conn.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index d958e2dca52f..521fd4f3985e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -819,14 +819,17 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
 		struct hci_cp_auth_requested cp;
 
-		/* encrypt must be pending if auth is also pending */
-		set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
-
 		cp.handle = cpu_to_le16(conn->handle);
 		hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
 			     sizeof(cp), &cp);
+
+		/* If we're already encrypted set the REAUTH_PEND flag,
+		 * otherwise set the ENCRYPT_PEND.
+		 */
 		if (conn->key_type != 0xff)
 			set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
+		else
+			set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
 	}
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 30313a3d5794472c3548d7288e306a5492030370 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 25 Apr 2014 17:01:18 +0900
Subject: bridge: Handle IFLA_ADDRESS correctly when creating bridge device

When bridge device is created with IFLA_ADDRESS, we are not calling
br_stp_change_bridge_id(), which leads to incorrect local fdb
management and bridge id calculation, and prevents us from receiving
frames on the bridge device.

Reported-by: Tom Gundersen <teg@jklm.no>
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e74b6d530cb6..e8844d975b32 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -445,6 +445,20 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
+static int br_dev_newlink(struct net *src_net, struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	if (tb[IFLA_ADDRESS]) {
+		spin_lock_bh(&br->lock);
+		br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
+		spin_unlock_bh(&br->lock);
+	}
+
+	return register_netdevice(dev);
+}
+
 static size_t br_get_link_af_size(const struct net_device *dev)
 {
 	struct net_port_vlans *pv;
@@ -473,6 +487,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
 	.priv_size	= sizeof(struct net_bridge),
 	.setup		= br_dev_setup,
 	.validate	= br_validate,
+	.newlink	= br_dev_newlink,
 	.dellink	= br_dev_delete,
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 85350871317a5adb35519d9dc6fc9e80809d42ad Mon Sep 17 00:00:00 2001
From: Xufeng Zhang <xufeng.zhang@windriver.com>
Date: Fri, 25 Apr 2014 16:55:41 +0800
Subject: sctp: reset flowi4_oif parameter on route lookup

commit 813b3b5db83 (ipv4: Use caller's on-stack flowi as-is
in output route lookups.) introduces another regression which
is very similar to the problem of commit e6b45241c (ipv4: reset
flowi parameters on route connect) wants to fix:
Before we call ip_route_output_key() in sctp_v4_get_dst() to
get a dst that matches a bind address as the source address,
we have already called this function previously and the flowi
parameters have been initialized including flowi4_oif, so when
we call this function again, the process in __ip_route_output_key()
will be different because of the setting of flowi4_oif, and we'll
get a networking device which corresponds to the inputted flowi4_oif
as the output device, this is wrong because we'll never hit this
place if the previously returned source address of dst match one
of the bound addresses.

To reproduce this problem, a vlan setting is enough:
  # ifconfig eth0 up
  # route del default
  # vconfig add eth0 2
  # vconfig add eth0 3
  # ifconfig eth0.2 10.0.1.14 netmask 255.255.255.0
  # route add default gw 10.0.1.254 dev eth0.2
  # ifconfig eth0.3 10.0.0.14 netmask 255.255.255.0
  # ip rule add from 10.0.0.14 table 4
  # ip route add table 4 default via 10.0.0.254 src 10.0.0.14 dev eth0.3
  # sctp_darn -H 10.0.0.14 -P 36422 -h 10.1.4.134 -p 36422 -s -I
You'll detect that all the flow are routed to eth0.2(10.0.1.254).

Signed-off-by: Xufeng Zhang <xufeng.zhang@windriver.com>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c09757fbf803..44cbb54c8574 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -491,8 +491,13 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 			continue;
 		if ((laddr->state == SCTP_ADDR_SRC) &&
 		    (AF_INET == laddr->a.sa.sa_family)) {
-			fl4->saddr = laddr->a.v4.sin_addr.s_addr;
 			fl4->fl4_sport = laddr->a.v4.sin_port;
+			flowi4_update_output(fl4,
+					     asoc->base.sk->sk_bound_dev_if,
+					     RT_CONN_FLAGS(asoc->base.sk),
+					     daddr->v4.sin_addr.s_addr,
+					     laddr->a.v4.sin_addr.s_addr);
+
 			rt = ip_route_output_key(sock_net(sk), fl4);
 			if (!IS_ERR(rt)) {
 				dst = &rt->dst;
-- 
cgit v1.2.3-59-g8ed1b


From 8c2eab9097dba50bcd73ed4632baccc3f34857f9 Mon Sep 17 00:00:00 2001
From: Karl Heiss <kheiss@gmail.com>
Date: Fri, 25 Apr 2014 14:26:30 -0400
Subject: net: sctp: Don't transition to PF state when transport has exhausted
 'Path.Max.Retrans'.

Don't transition to the PF state on every strike after 'Path.Max.Retrans'.
Per draft-ietf-tsvwg-sctp-failover-03 Section 5.1.6:

   Additional (PMR - PFMR) consecutive timeouts on a PF destination
   confirm the path failure, upon which the destination transitions to the
   Inactive state.  As described in [RFC4960], the sender (i) SHOULD notify
   ULP about this state transition, and (ii) transmit heartbeats to the
   Inactive destination at a lower frequency as described in Section 8.3 of
   [RFC4960].

This also prevents sending SCTP_ADDR_UNREACHABLE to the user as the state
bounces between SCTP_INACTIVE and SCTP_PF for each subsequent strike.

Signed-off-by: Karl Heiss <kheiss@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_sideeffect.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 5d6883ff00c3..fef2acdf4a2e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -496,11 +496,10 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
 
 	/* If the transport error count is greater than the pf_retrans
 	 * threshold, and less than pathmaxrtx, and if the current state
-	 * is not SCTP_UNCONFIRMED, then mark this transport as Partially
-	 * Failed, see SCTP Quick Failover Draft, section 5.1
+	 * is SCTP_ACTIVE, then mark this transport as Partially Failed,
+	 * see SCTP Quick Failover Draft, section 5.1
 	 */
-	if ((transport->state != SCTP_PF) &&
-	   (transport->state != SCTP_UNCONFIRMED) &&
+	if ((transport->state == SCTP_ACTIVE) &&
 	   (asoc->pf_retrans < transport->pathmaxrxt) &&
 	   (transport->error_count > asoc->pf_retrans)) {
 
-- 
cgit v1.2.3-59-g8ed1b


From e374c618b1465f0292047a9f4c244bd71ab5f1f0 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Mon, 28 Apr 2014 10:51:56 +0300
Subject: net: ipv6: more places need LOOPBACK_IFINDEX for flowi6_iif

To properly match iif in ip rules we have to provide
LOOPBACK_IFINDEX in flowi6_iif, not 0. Some ip6mr_fib_lookup
and fib6_rule_lookup callers need such fix.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c                   | 2 +-
 net/ipv6/netfilter/ip6t_rpfilter.c | 1 +
 net/ipv6/route.c                   | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 8659067da28e..8250474ab7dc 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1633,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 {
 	struct mr6_table *mrt;
 	struct flowi6 fl6 = {
-		.flowi6_iif	= skb->skb_iif,
+		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
 		.flowi6_oif	= skb->dev->ifindex,
 		.flowi6_mark	= skb->mark,
 	};
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index e0983f3648a6..790e0c6b19e1 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	bool ret = false;
 	struct flowi6 fl6 = {
+		.flowi6_iif = LOOPBACK_IFINDEX,
 		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
 		.flowi6_proto = iph->nexthdr,
 		.daddr = iph->saddr,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4011617cca68..004fffb6c221 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1273,6 +1273,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
 	struct flowi6 fl6;
 
 	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_iif = LOOPBACK_IFINDEX;
 	fl6.flowi6_oif = oif;
 	fl6.flowi6_mark = mark;
 	fl6.daddr = iph->daddr;
@@ -1294,6 +1295,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
 	struct flowi6 fl6;
 
 	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_iif = LOOPBACK_IFINDEX;
 	fl6.flowi6_oif = oif;
 	fl6.flowi6_mark = mark;
 	fl6.daddr = msg->dest;
-- 
cgit v1.2.3-59-g8ed1b


From 92b2e75158f6b8316b5a567c73dcf5b3d8f6bbce Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Thu, 10 Apr 2014 18:09:41 +0400
Subject: libceph: fix non-default values check in apply_primary_affinity()

osd_primary_affinity array is indexed into incorrectly when checking
for non-default primary-affinity values.  This nullifies the impact of
the rest of the apply_primary_affinity() and results in misdirected
requests.

                if (osds[i] != CRUSH_ITEM_NONE &&
                    osdmap->osd_primary_affinity[i] !=
                                                ^^^
                                        CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {

For a pool with size 2, this always ends up checking osd0 and osd1
primary_affinity values, instead of the values that correspond to the
osds in question.  E.g., given a [2,3] up set and a [max,max,0,max]
primary affinity vector, requests are still sent to osd2, because both
osd0 and osd1 happen to have max primary_affinity values and therefore
we return from apply_primary_affinity() early on the premise that all
osds in the given set have max (default) values.  Fix it.

Fixes: http://tracker.ceph.com/issues/7954

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 net/ceph/osdmap.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index e632b5a52f5b..8b8a5a24b223 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1548,8 +1548,10 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
 		return;
 
 	for (i = 0; i < len; i++) {
-		if (osds[i] != CRUSH_ITEM_NONE &&
-		    osdmap->osd_primary_affinity[i] !=
+		int osd = osds[i];
+
+		if (osd != CRUSH_ITEM_NONE &&
+		    osdmap->osd_primary_affinity[osd] !=
 					CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
 			break;
 		}
@@ -1563,10 +1565,9 @@ static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps,
 	 * osd's pgs get rejected as primary.
 	 */
 	for (i = 0; i < len; i++) {
-		int osd;
+		int osd = osds[i];
 		u32 aff;
 
-		osd = osds[i];
 		if (osd == CRUSH_ITEM_NONE)
 			continue;
 
-- 
cgit v1.2.3-59-g8ed1b


From fe337ac2839521b360f828b3ebd992d597b1ad16 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 28 Apr 2014 21:07:31 +0200
Subject: netfilter: ctnetlink: don't add null bindings if no nat requested

commit 0eba801b64cc8284d9024c7ece30415a2b981a72 tried to fix a race
where nat initialisation can happen after ctnetlink-created conntrack
has been created.

However, it causes the nat module(s) to be loaded needlessly on
systems that are not using NAT.

Fortunately, we do not have to create null bindings in that case.

conntracks injected via ctnetlink always have the CONFIRMED bit set,
which prevents addition of the nat extension in nf_nat_ipv4/6_fn().

We only need to make sure that either no nat extension is added
or that we've created both src and dst manips.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ccc46fa5edbc..58579634427d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1336,6 +1336,9 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
 #ifdef CONFIG_NF_NAT_NEEDED
 	int ret;
 
+	if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
+		return 0;
+
 	ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST,
 					cda[CTA_NAT_DST]);
 	if (ret < 0)
-- 
cgit v1.2.3-59-g8ed1b


From fc9f35010641ea85dd19d144b86cdd93156f1a1e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 28 Apr 2014 22:00:29 -0700
Subject: tcp: increment retransmit counters in tlp and fast open

Both TLP and Fast Open call __tcp_retransmit_skb() instead of
tcp_retransmit_skb() to avoid changing tp->retrans_out.

This has the side effect of missing SNMP counters increments as well
as tcp_info tcpi_total_retrans updates.

Fix this by moving the stats increments of into __tcp_retransmit_skb()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 025e25093984..12d6016bdd9a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2441,8 +2441,14 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 	}
 
-	if (likely(!err))
+	if (likely(!err)) {
 		TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+		/* Update global TCP statistics. */
+		TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+		tp->total_retrans++;
+	}
 	return err;
 }
 
@@ -2452,12 +2458,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	int err = __tcp_retransmit_skb(sk, skb);
 
 	if (err == 0) {
-		/* Update global TCP statistics. */
-		TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
-		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
-			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
-		tp->total_retrans++;
-
 #if FASTRETRANS_DEBUG > 0
 		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
 			net_dbg_ratelimited("retrans_out leaked\n");
-- 
cgit v1.2.3-59-g8ed1b


From 0cda345d1b2201dd15591b163e3c92bad5191745 Mon Sep 17 00:00:00 2001
From: Liu Yu <allanyuliu@tencent.com>
Date: Wed, 30 Apr 2014 17:34:09 +0800
Subject: tcp_cubic: fix the range of delayed_ack

commit b9f47a3aaeab (tcp_cubic: limit delayed_ack ratio to prevent
divide error) try to prevent divide error, but there is still a little
chance that delayed_ack can reach zero. In case the param cnt get
negative value, then ratio+cnt would overflow and may happen to be zero.
As a result, min(ratio, ACK_RATIO_LIMIT) will calculate to be zero.

In some old kernels, such as 2.6.32, there is a bug that would
pass negative param, which then ultimately leads to this divide error.

commit 5b35e1e6e9c (tcp: fix tcp_trim_head() to adjust segment count
with skb MSS) fixed the negative param issue. However,
it's safe that we fix the range of delayed_ack as well,
to make sure we do not hit a divide by zero.

CC: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Liu Yu <allanyuliu@tencent.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 8bf224516ba2..b4f1b29b08bd 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -409,7 +409,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
 		ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
 		ratio += cnt;
 
-		ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
+		ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
 	}
 
 	/* Some calls are for duplicates without timetamps */
-- 
cgit v1.2.3-59-g8ed1b


From 895162b1101b3ea5db08ca6822ae9672717efec0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 2 May 2014 15:32:16 +0200
Subject: netfilter: ipv4: defrag: set local_df flag on defragmented skb

else we may fail to forward skb even if original fragments do fit
outgoing link mtu:

1. remote sends 2k packets in two 1000 byte frags, DF set
2. we want to forward but only see '2k > mtu and DF set'
3. we then send icmp error saying that outgoing link is 1500

But original sender never sent a packet that would not fit
the outgoing link.

Setting local_df makes outgoing path test size vs.
IPCB(skb)->frag_max_size, so we will still send the correct
error in case the largest original size did not fit
outgoing link mtu.

Reported-by: Maxime Bizon <mbizon@freebox.fr>
Suggested-by: Maxime Bizon <mbizon@freebox.fr>
Fixes: 5f2d04f1f9 (ipv4: fix path MTU discovery with connection tracking)
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_defrag_ipv4.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 12e13bd82b5b..f40f321b41fc 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,7 +22,6 @@
 #endif
 #include <net/netfilter/nf_conntrack_zones.h>
 
-/* Returns new sk_buff, or NULL */
 static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
 	int err;
@@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 	err = ip_defrag(skb, user);
 	local_bh_enable();
 
-	if (!err)
+	if (!err) {
 		ip_send_check(ip_hdr(skb));
+		skb->local_df = 1;
+	}
 
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From ecd15dd7e45f3683fa8142b9f2c015dfaa0c243d Mon Sep 17 00:00:00 2001
From: Denys Fedoryshchenko <nuclearcat@nuclearcat.com>
Date: Sun, 4 May 2014 13:35:37 +0200
Subject: netfilter: nfnetlink: Fix use after free when it fails to process
 batch

This bug manifests when calling the nft command line tool without
nf_tables kernel support.

kernel message:
[   44.071555] Netfilter messages via NETLINK v0.30.
[   44.072253] BUG: unable to handle kernel NULL pointer dereference at 0000000000000119
[   44.072264] IP: [<ffffffff8171db1f>] netlink_getsockbyportid+0xf/0x70
[   44.072272] PGD 7f2b74067 PUD 7f2b73067 PMD 0
[   44.072277] Oops: 0000 [#1] SMP
[...]
[   44.072369] Call Trace:
[   44.072373]  [<ffffffff8171fd81>] netlink_unicast+0x91/0x200
[   44.072377]  [<ffffffff817206c9>] netlink_ack+0x99/0x110
[   44.072381]  [<ffffffffa004b951>] nfnetlink_rcv+0x3c1/0x408 [nfnetlink]
[   44.072385]  [<ffffffff8171fde3>] netlink_unicast+0xf3/0x200
[   44.072389]  [<ffffffff817201ef>] netlink_sendmsg+0x2ff/0x740
[   44.072394]  [<ffffffff81044752>] ? __mmdrop+0x62/0x90
[   44.072398]  [<ffffffff816dafdb>] sock_sendmsg+0x8b/0xc0
[   44.072403]  [<ffffffff812f1af5>] ? copy_user_enhanced_fast_string+0x5/0x10
[   44.072406]  [<ffffffff816dbb6c>] ? move_addr_to_kernel+0x2c/0x50
[   44.072410]  [<ffffffff816db423>] ___sys_sendmsg+0x3c3/0x3d0
[   44.072415]  [<ffffffff811301ba>] ? handle_mm_fault+0xa9a/0xc60
[   44.072420]  [<ffffffff811362d6>] ? mmap_region+0x166/0x5a0
[   44.072424]  [<ffffffff817da84c>] ? __do_page_fault+0x1dc/0x510
[   44.072428]  [<ffffffff812b8b2c>] ? apparmor_capable+0x1c/0x60
[   44.072435]  [<ffffffff817d6e9a>] ? _raw_spin_unlock_bh+0x1a/0x20
[   44.072439]  [<ffffffff816dfc86>] ? release_sock+0x106/0x150
[   44.072443]  [<ffffffff816dc212>] __sys_sendmsg+0x42/0x80
[   44.072446]  [<ffffffff816dc262>] SyS_sendmsg+0x12/0x20
[   44.072450]  [<ffffffff817df616>] system_call_fastpath+0x1a/0x1f

Signed-off-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e009087620e3..23ef77c60fff 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -256,15 +256,15 @@ replay:
 #endif
 		{
 			nfnl_unlock(subsys_id);
-			kfree_skb(nskb);
-			return netlink_ack(skb, nlh, -EOPNOTSUPP);
+			netlink_ack(skb, nlh, -EOPNOTSUPP);
+			return kfree_skb(nskb);
 		}
 	}
 
 	if (!ss->commit || !ss->abort) {
 		nfnl_unlock(subsys_id);
-		kfree_skb(nskb);
-		return netlink_ack(skb, nlh, -EOPNOTSUPP);
+		netlink_ack(skb, nlh, -EOPNOTSUPP);
+		return kfree_skb(skb);
 	}
 
 	while (skb->len >= nlmsg_total_size(0)) {
-- 
cgit v1.2.3-59-g8ed1b


From f6a082fed1e6407c2f4437d0d963b1bcbe5f9f58 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 1 May 2014 09:23:06 -0700
Subject: net: sched: lock imbalance in hhf qdisc

hhf_change() takes the sch_tree_lock and releases it but misses the
error cases. Fix the missed case here.

To reproduce try a command like this,

# tc qdisc change dev p3p2 root hhf quantum 40960 non_hh_weight 300000

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hhf.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index edee03d922e2..6e957c3b9854 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -553,11 +553,6 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		return err;
 
-	sch_tree_lock(sch);
-
-	if (tb[TCA_HHF_BACKLOG_LIMIT])
-		sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
-
 	if (tb[TCA_HHF_QUANTUM])
 		new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]);
 
@@ -567,6 +562,12 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
 	non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
 	if (non_hh_quantum > INT_MAX)
 		return -EINVAL;
+
+	sch_tree_lock(sch);
+
+	if (tb[TCA_HHF_BACKLOG_LIMIT])
+		sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
+
 	q->quantum = new_quantum;
 	q->hhf_non_hh_weight = new_hhf_non_hh_weight;
 
-- 
cgit v1.2.3-59-g8ed1b


From f4ebddf9abb1fb0add1ce8e9c0bc414f1c17d81d Mon Sep 17 00:00:00 2001
From: Henning Rogge <hrogge@gmail.com>
Date: Thu, 1 May 2014 10:03:46 +0200
Subject: mac80211: Fix mac80211 station info rx bitrate for IBSS mode

Filter out incoming multicast packages before applying their bitrate
to the rx bitrate station info field to prevent them from setting the
rx bitrate to the basic multicast rate.

Signed-off-by: Henning Rogge <hrogge@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 216c45b949e5..2b608b2b70ec 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1231,7 +1231,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 		if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) &&
 		    test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
 			sta->last_rx = jiffies;
-			if (ieee80211_is_data(hdr->frame_control)) {
+			if (ieee80211_is_data(hdr->frame_control) &&
+			    !is_multicast_ether_addr(hdr->addr1)) {
 				sta->last_rx_rate_idx = status->rate_idx;
 				sta->last_rx_rate_flag = status->flag;
 				sta->last_rx_rate_vht_flag = status->vht_flag;
-- 
cgit v1.2.3-59-g8ed1b


From c1fbb258846dfc425507a093922d2d001e54c3ea Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 30 Apr 2014 15:58:13 +0300
Subject: cfg80211: free sme on connection failures

cfg80211 is notified about connection failures by
__cfg80211_connect_result() call. However, this
function currently does not free cfg80211 sme.

This results in hanging connection attempts in some cases

e.g. when mac80211 authentication attempt is denied,
we have this function call:
ieee80211_rx_mgmt_auth() -> cfg80211_rx_mlme_mgmt() ->
cfg80211_process_auth() -> cfg80211_sme_rx_auth() ->
__cfg80211_connect_result()

but cfg80211_sme_free() is never get called.

Fixes: ceca7b712 ("cfg80211: separate internal SME implementation")
Cc: stable@vger.kernel.org (3.10+)
Signed-off-by: Eliad Peller <eliadx.peller@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/sme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index acdcb4a81817..3546a77033de 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -234,7 +234,6 @@ void cfg80211_conn_work(struct work_struct *work)
 					NULL, 0, NULL, 0,
 					WLAN_STATUS_UNSPECIFIED_FAILURE,
 					false, NULL);
-			cfg80211_sme_free(wdev);
 		}
 		wdev_unlock(wdev);
 	}
@@ -648,6 +647,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			cfg80211_unhold_bss(bss_from_pub(bss));
 			cfg80211_put_bss(wdev->wiphy, bss);
 		}
+		cfg80211_sme_free(wdev);
 		return;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 792e6aa7a15ea0fb16f8687e93caede1ea9118c7 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 30 Apr 2014 16:14:23 +0300
Subject: cfg80211: add cfg80211_sched_scan_stopped_rtnl

Add locked-version for cfg80211_sched_scan_stopped.
This is used for some users that might want to
call it when rtnl is already locked.

Fixes: d43c6b6 ("mac80211: reschedule sched scan after HW restart")
Cc: stable@vger.kernel.org (3.14+)
Signed-off-by: Eliad Peller <eliadx.peller@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 12 ++++++++++++
 net/wireless/scan.c    | 12 ++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f3539a15c411..f856e5a746fa 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3668,6 +3668,18 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy);
  */
 void cfg80211_sched_scan_stopped(struct wiphy *wiphy);
 
+/**
+ * cfg80211_sched_scan_stopped_rtnl - notify that the scheduled scan has stopped
+ *
+ * @wiphy: the wiphy on which the scheduled scan stopped
+ *
+ * The driver can call this function to inform cfg80211 that the
+ * scheduled scan had to be stopped, for whatever reason.  The driver
+ * is then called back via the sched_scan_stop operation when done.
+ * This function should be called with rtnl locked.
+ */
+void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy);
+
 /**
  * cfg80211_inform_bss_width_frame - inform cfg80211 of a received BSS frame
  *
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 7d09a712cb1f..88f108edfb58 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -284,14 +284,22 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy)
 }
 EXPORT_SYMBOL(cfg80211_sched_scan_results);
 
-void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
+void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 
+	ASSERT_RTNL();
+
 	trace_cfg80211_sched_scan_stopped(wiphy);
 
-	rtnl_lock();
 	__cfg80211_stop_sched_scan(rdev, true);
+}
+EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl);
+
+void cfg80211_sched_scan_stopped(struct wiphy *wiphy)
+{
+	rtnl_lock();
+	cfg80211_sched_scan_stopped_rtnl(wiphy);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
-- 
cgit v1.2.3-59-g8ed1b


From e669ba2d06c6195662601956454ac959892f0762 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Wed, 30 Apr 2014 16:14:24 +0300
Subject: mac80211: fix nested rtnl locking on ieee80211_reconfig

ieee80211_reconfig already holds rtnl, so calling
cfg80211_sched_scan_stopped results in deadlock.

Use the rtnl-version of this function instead.

Fixes: d43c6b6 ("mac80211: reschedule sched scan after HW restart")
Cc: stable@vger.kernel.org (3.14+)
Signed-off-by: Eliad Peller <eliadx.peller@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 275c94f995f7..3c365837e910 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1780,7 +1780,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	mutex_unlock(&local->mtx);
 
 	if (sched_scan_stopped)
-		cfg80211_sched_scan_stopped(local->hw.wiphy);
+		cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
 
 	/*
 	 * If this is for hw restart things are still running.
-- 
cgit v1.2.3-59-g8ed1b


From 7c3d5ab1f35f5475b1a1fbe74143683cfc092d33 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@parallels.com>
Date: Sat, 3 May 2014 03:14:04 +0400
Subject: ipv4: fix "conntrack zones" support for defrag user check in
 ip_expire

Defrag user check in ip_expire was not updated after adding support for
"conntrack zones".

This bug manifests as a RFC violation, since the router will send
the icmp time exceeeded message when using conntrack zones.

Signed-off-by: Vasily Averin <vvs@openvz.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/ip_fragment.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index c10a3ce5cbff..ed32313e307c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -232,8 +232,9 @@ static void ip_expire(unsigned long arg)
 		 * "Fragment Reassembly Timeout" message, per RFC792.
 		 */
 		if (qp->user == IP_DEFRAG_AF_PACKET ||
-		    (qp->user == IP_DEFRAG_CONNTRACK_IN &&
-		     skb_rtable(head)->rt_type != RTN_LOCAL))
+		    ((qp->user >= IP_DEFRAG_CONNTRACK_IN) &&
+		     (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
+		     (skb_rtable(head)->rt_type != RTN_LOCAL)))
 			goto out_rcu_unlock;
 
 
-- 
cgit v1.2.3-59-g8ed1b


From aff09ce303f83bd370772349238482ae422a2341 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@parallels.com>
Date: Mon, 5 May 2014 00:17:48 +0400
Subject: bridge: superfluous skb->nfct check in br_nf_dev_queue_xmit

Currently bridge can silently drop ipv4 fragments.
If node have loaded nf_defrag_ipv4 module but have no nf_conntrack_ipv4,
br_nf_pre_routing defragments incoming ipv4 fragments
but nfct check in br_nf_dev_queue_xmit does not allow re-fragment combined
packet back, and therefore it is dropped in br_dev_queue_push_xmit without
incrementing of any failcounters

It seems the only way to hit the ip_fragment code in the bridge xmit
path is to have a fragment list whose reassembled fragments go over
the mtu. This only happens if nf_defrag is enabled. Thanks to
Florian Westphal for providing feedback to clarify this.

Defragmentation ipv4 is required not only in conntracks but at least in
TPROXY target and socket match, therefore #ifdef is changed from
NF_CONNTRACK_IPV4 to NF_DEFRAG_IPV4

Signed-off-by: Vasily Averin <vvs@openvz.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/br_netfilter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 80e1b0f60a30..2acf7fa1fec6 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -859,12 +859,12 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 	return NF_STOLEN;
 }
 
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4)
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	int ret;
 
-	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
+	if (skb->protocol == htons(ETH_P_IP) &&
 	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
 	    !skb_is_gso(skb)) {
 		if (br_parse_ip_options(skb))
-- 
cgit v1.2.3-59-g8ed1b


From 2c4a336e0a3e203fab6aa8d8f7bb70a0ad968a6b Mon Sep 17 00:00:00 2001
From: Andy King <acking@vmware.com>
Date: Thu, 1 May 2014 15:20:43 -0700
Subject: vsock: Make transport the proto owner

Right now the core vsock module is the owner of the proto family. This
means there's nothing preventing the transport module from unloading if
there are open sockets, which results in a panic. Fix that by allowing
the transport to be the owner, which will refcount it properly.

Includes version bump to 1.0.1.0-k

Passes checkpatch this time, I swear...

Acked-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Andy King <acking@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_vsock.h   |  6 +++++-
 net/vmw_vsock/af_vsock.c | 47 ++++++++++++++++++++++-------------------------
 2 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 7d64d3609ec9..428277869400 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -155,7 +155,11 @@ struct vsock_transport {
 
 /**** CORE ****/
 
-int vsock_core_init(const struct vsock_transport *t);
+int __vsock_core_init(const struct vsock_transport *t, struct module *owner);
+static inline int vsock_core_init(const struct vsock_transport *t)
+{
+	return __vsock_core_init(t, THIS_MODULE);
+}
 void vsock_core_exit(void);
 
 /**** UTILS ****/
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 5adfd94c5b85..85d232bed87d 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1925,9 +1925,23 @@ static struct miscdevice vsock_device = {
 	.fops		= &vsock_device_ops,
 };
 
-static int __vsock_core_init(void)
+int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
 {
-	int err;
+	int err = mutex_lock_interruptible(&vsock_register_mutex);
+
+	if (err)
+		return err;
+
+	if (transport) {
+		err = -EBUSY;
+		goto err_busy;
+	}
+
+	/* Transport must be the owner of the protocol so that it can't
+	 * unload while there are open sockets.
+	 */
+	vsock_proto.owner = owner;
+	transport = t;
 
 	vsock_init_tables();
 
@@ -1951,36 +1965,19 @@ static int __vsock_core_init(void)
 		goto err_unregister_proto;
 	}
 
+	mutex_unlock(&vsock_register_mutex);
 	return 0;
 
 err_unregister_proto:
 	proto_unregister(&vsock_proto);
 err_misc_deregister:
 	misc_deregister(&vsock_device);
-	return err;
-}
-
-int vsock_core_init(const struct vsock_transport *t)
-{
-	int retval = mutex_lock_interruptible(&vsock_register_mutex);
-	if (retval)
-		return retval;
-
-	if (transport) {
-		retval = -EBUSY;
-		goto out;
-	}
-
-	transport = t;
-	retval = __vsock_core_init();
-	if (retval)
-		transport = NULL;
-
-out:
+	transport = NULL;
+err_busy:
 	mutex_unlock(&vsock_register_mutex);
-	return retval;
+	return err;
 }
-EXPORT_SYMBOL_GPL(vsock_core_init);
+EXPORT_SYMBOL_GPL(__vsock_core_init);
 
 void vsock_core_exit(void)
 {
@@ -2000,5 +1997,5 @@ EXPORT_SYMBOL_GPL(vsock_core_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Socket Family");
-MODULE_VERSION("1.0.0.0-k");
+MODULE_VERSION("1.0.1.0-k");
 MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-59-g8ed1b


From e96f2e7c430014eff52c93cabef1ad4f42ed0db1 Mon Sep 17 00:00:00 2001
From: Ying Cai <ycai@google.com>
Date: Sun, 4 May 2014 15:20:04 -0700
Subject: ip_tunnel: Set network header properly for IP_ECN_decapsulate()

In ip_tunnel_rcv(), set skb->network_header to inner IP header
before IP_ECN_decapsulate().

Without the fix, IP_ECN_decapsulate() takes outer IP header as
inner IP header, possibly causing error messages or packet drops.

Note that this skb_reset_network_header() call was in this spot when
the original feature for checking consistency of ECN bits through
tunnels was added in eccc1bb8d4b4 ("tunnel: drop packet if ECN present
with not-ECT"). It was only removed from this spot in 3d7b46cd20e3
("ip_tunnel: push generic protocol handling to ip_tunnel module.").

Fixes: 3d7b46cd20e3 ("ip_tunnel: push generic protocol handling to ip_tunnel module.")
Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Ying Cai <ycai@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index fa5b7519765f..b3f859731c60 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -442,6 +442,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 		tunnel->i_seqno = ntohl(tpi->seq) + 1;
 	}
 
+	skb_reset_network_header(skb);
+
 	err = IP_ECN_decapsulate(iph, skb);
 	if (unlikely(err)) {
 		if (log_ecn_error)
-- 
cgit v1.2.3-59-g8ed1b


From ca6c5d4ad216d5942ae544bbf02503041bd802aa Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 4 May 2014 23:24:31 +0200
Subject: net: ipv4: ip_forward: fix inverted local_df test

local_df means 'ignore DF bit if set', so if its set we're
allowed to perform ip fragmentation.

This wasn't noticed earlier because the output path also drops such skbs
(and emits needed icmp error) and because netfilter ip defrag did not
set local_df until couple of days ago.

Only difference is that DF-packets-larger-than MTU now discarded
earlier (f.e. we avoid pointless netfilter postrouting trip).

While at it, drop the repeated test ip_exceeds_mtu, checking it once
is enough...

Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_forward.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index be8abe73bb9f..c29ae8371e44 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,12 +42,12 @@
 static bool ip_may_fragment(const struct sk_buff *skb)
 {
 	return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
-	       !skb->local_df;
+		skb->local_df;
 }
 
 static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 {
-	if (skb->len <= mtu || skb->local_df)
+	if (skb->len <= mtu)
 		return false;
 
 	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
-- 
cgit v1.2.3-59-g8ed1b


From 418a31561d594a2b636c1e2fa94ecd9e1245abb1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 5 May 2014 00:03:34 +0200
Subject: net: ipv6: send pkttoobig immediately if orig frag size > mtu

If conntrack defragments incoming ipv6 frags it stores largest original
frag size in ip6cb and sets ->local_df.

We must thus first test the largest original frag size vs. mtu, and not
vice versa.

Without this patch PKTTOOBIG is still generated in ip6_fragment() later
in the stack, but

1) IPSTATS_MIB_INTOOBIGERRORS won't increment
2) packet did (needlessly) traverse netfilter postrouting hook.

Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 40e7581374f7..31a38bde69ef 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -344,12 +344,16 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
 
 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 {
-	if (skb->len <= mtu || skb->local_df)
+	if (skb->len <= mtu)
 		return false;
 
+	/* ipv6 conntrack defrag sets max_frag_size + local_df */
 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 		return true;
 
+	if (skb->local_df)
+		return false;
+
 	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
 		return false;
 
-- 
cgit v1.2.3-59-g8ed1b


From c7ba65d7b64984ff371cb5630b36af23506c50d5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 5 May 2014 15:00:43 +0200
Subject: net: ip: push gso skb forwarding handling down the stack

Doing the segmentation in the forward path has one major drawback:

When using virtio, we may process gso udp packets coming
from host network stack.  In that case, netfilter POSTROUTING
will see one packet with udp header followed by multiple ip
fragments.

Delay the segmentation and do it after POSTROUTING invocation
to avoid this.

Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_forward.c | 50 --------------------------------------------------
 net/ipv4/ip_output.c  | 51 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 48 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index c29ae8371e44..6f111e48e11c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 	return true;
 }
 
-static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
-{
-	unsigned int mtu;
-
-	if (skb->local_df || !skb_is_gso(skb))
-		return false;
-
-	mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
-
-	/* if seglen > mtu, do software segmentation for IP fragmentation on
-	 * output.  DF bit cannot be set since ip_forward would have sent
-	 * icmp error.
-	 */
-	return skb_gso_network_seglen(skb) > mtu;
-}
-
-/* called if GSO skb needs to be fragmented on forward */
-static int ip_forward_finish_gso(struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-	netdev_features_t features;
-	struct sk_buff *segs;
-	int ret = 0;
-
-	features = netif_skb_dev_features(skb, dst->dev);
-	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
-	if (IS_ERR(segs)) {
-		kfree_skb(skb);
-		return -ENOMEM;
-	}
-
-	consume_skb(skb);
-
-	do {
-		struct sk_buff *nskb = segs->next;
-		int err;
-
-		segs->next = NULL;
-		err = dst_output(segs);
-
-		if (err && ret == 0)
-			ret = err;
-		segs = nskb;
-	} while (segs);
-
-	return ret;
-}
 
 static int ip_forward_finish(struct sk_buff *skb)
 {
@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb)
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
-	if (ip_gso_exceeds_dst_mtu(skb))
-		return ip_forward_finish_gso(skb);
-
 	return dst_output(skb);
 }
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1cbeba5edff9..a52f50187b54 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 	return -EINVAL;
 }
 
+static int ip_finish_output_gso(struct sk_buff *skb)
+{
+	netdev_features_t features;
+	struct sk_buff *segs;
+	int ret = 0;
+
+	/* common case: locally created skb or seglen is <= mtu */
+	if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
+	      skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
+		return ip_finish_output2(skb);
+
+	/* Slowpath -  GSO segment length is exceeding the dst MTU.
+	 *
+	 * This can happen in two cases:
+	 * 1) TCP GRO packet, DF bit not set
+	 * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
+	 * from host network stack.
+	 */
+	features = netif_skb_features(skb);
+	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+	if (IS_ERR(segs)) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+
+	consume_skb(skb);
+
+	do {
+		struct sk_buff *nskb = segs->next;
+		int err;
+
+		segs->next = NULL;
+		err = ip_fragment(segs, ip_finish_output2);
+
+		if (err && ret == 0)
+			ret = err;
+		segs = nskb;
+	} while (segs);
+
+	return ret;
+}
+
 static int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -220,10 +262,13 @@ static int ip_finish_output(struct sk_buff *skb)
 		return dst_output(skb);
 	}
 #endif
-	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
+	if (skb_is_gso(skb))
+		return ip_finish_output_gso(skb);
+
+	if (skb->len > ip_skb_dst_mtu(skb))
 		return ip_fragment(skb, ip_finish_output2);
-	else
-		return ip_finish_output2(skb);
+
+	return ip_finish_output2(skb);
 }
 
 int ip_mc_output(struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3-59-g8ed1b


From c1e756bfcbcac838a86a23f3e4501b556a961e3c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 5 May 2014 15:00:44 +0200
Subject: Revert "net: core: introduce netif_skb_dev_features"

This reverts commit d206940319c41df4299db75ed56142177bb2e5f6,
there are no more callers.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +------
 net/core/dev.c            | 22 ++++++++++------------
 2 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ed3a3aa6604..20e99efb1ca6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
-netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
-					 const struct net_device *dev);
-static inline netdev_features_t netif_skb_features(struct sk_buff *skb)
-{
-	return netif_skb_dev_features(skb, skb->dev);
-}
+netdev_features_t netif_skb_features(struct sk_buff *skb);
 
 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index d2c8a06b3a98..c619b8641337 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
  * 2. No high memory really exists on this machine.
  */
 
-static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
@@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
 }
 
 static netdev_features_t harmonize_features(struct sk_buff *skb,
-					    const struct net_device *dev,
-					    netdev_features_t features)
+	netdev_features_t features)
 {
 	int tmp;
 
 	if (skb->ip_summed != CHECKSUM_NONE &&
 	    !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
 		features &= ~NETIF_F_ALL_CSUM;
-	} else if (illegal_highdma(dev, skb)) {
+	} else if (illegal_highdma(skb->dev, skb)) {
 		features &= ~NETIF_F_SG;
 	}
 
 	return features;
 }
 
-netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
-					 const struct net_device *dev)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
-	netdev_features_t features = dev->features;
+	netdev_features_t features = skb->dev->features;
 
-	if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
+	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	} else if (!vlan_tx_tag_present(skb)) {
-		return harmonize_features(skb, dev, features);
+		return harmonize_features(skb, features);
 	}
 
-	features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
 					       NETIF_F_HW_VLAN_STAG_TX);
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
 				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_STAG_TX;
 
-	return harmonize_features(skb, dev, features);
+	return harmonize_features(skb, features);
 }
-EXPORT_SYMBOL(netif_skb_dev_features);
+EXPORT_SYMBOL(netif_skb_features);
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
-- 
cgit v1.2.3-59-g8ed1b


From aeefa1ecfc799b0ea2c4979617f14cecd5cccbfd Mon Sep 17 00:00:00 2001
From: Sergey Popovich <popovich_sergei@mail.ru>
Date: Tue, 6 May 2014 18:23:08 +0300
Subject: ipv4: fib_semantics: increment fib_info_cnt after fib_info allocation

Increment fib_info_cnt in fib_create_info() right after successfuly
alllocating fib_info structure, overwise fib_metrics allocation failure
leads to fib_info_cnt incorrectly decremented in free_fib_info(), called
on error path from fib_create_info().

Signed-off-by: Sergey Popovich <popovich_sergei@mail.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8a043f03c88e..b10cd43a4722 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -821,13 +821,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
 	if (fi == NULL)
 		goto failure;
+	fib_info_cnt++;
 	if (cfg->fc_mx) {
 		fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
 		if (!fi->fib_metrics)
 			goto failure;
 	} else
 		fi->fib_metrics = (u32 *) dst_default_metrics;
-	fib_info_cnt++;
 
 	fi->fib_net = hold_net(net);
 	fi->fib_protocol = cfg->fc_protocol;
-- 
cgit v1.2.3-59-g8ed1b


From a8951d5814e1373807a94f79f7ccec7041325470 Mon Sep 17 00:00:00 2001
From: Sergey Popovich <popovich_sergei@mail.ru>
Date: Thu, 8 May 2014 16:22:35 +0300
Subject: netfilter: Fix potential use after free in ip6_route_me_harder()

Dst is released one line before we access it again with dst->error.

Fixes: 58e35d147128 netfilter: ipv6: propagate routing errors from
ip6_route_me_harder()

Signed-off-by: Sergey Popovich <popovich_sergei@mail.ru>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 95f3f1da0d7f..d38e6a8d8b9f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb)
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
 	};
+	int err;
 
 	dst = ip6_route_output(net, skb->sk, &fl6);
-	if (dst->error) {
+	err = dst->error;
+	if (err) {
 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
 		dst_release(dst);
-		return dst->error;
+		return err;
 	}
 
 	/* Drop old route. */
-- 
cgit v1.2.3-59-g8ed1b


From c9d8f1a64225dfcc2f721d73a5984a2444920744 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 6 May 2014 11:02:49 -0700
Subject: ipv4: move local_port_range out of CONFIG_SYSCTL

When CONFIG_SYSCTL is not set, ip_local_port_range should still work,
just that no one can change it. Therefore we should move it out of sysctl_inet.c.
Also, rename it to ->ip_local_ports instead.

Cc: David S. Miller <davem@davemloft.net>
Cc: Francois Romieu <romieu@fr.zoreil.com>
Reported-by: Stefan de Konink <stefan@konink.de>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h        |  2 +-
 net/ipv4/af_inet.c              | 28 ++++++++++++++++++++++++++++
 net/ipv4/inet_connection_sock.c |  8 ++++----
 net/ipv4/ping.c                 |  4 ++--
 net/ipv4/sysctl_net_ipv4.c      | 29 +++++++++++------------------
 5 files changed, 46 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 80f500a29498..3d95cd475316 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -66,7 +66,7 @@ struct netns_ipv4 {
 	int sysctl_icmp_ratemask;
 	int sysctl_icmp_errors_use_inbound_ifaddr;
 
-	struct local_ports sysctl_local_ports;
+	struct local_ports ip_local_ports;
 
 	int sysctl_tcp_ecn;
 	int sysctl_ip_no_pmtu_disc;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8c54870db792..cccc8e487c7e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1650,6 +1650,31 @@ static int __init init_ipv4_mibs(void)
 	return register_pernet_subsys(&ipv4_mib_ops);
 }
 
+static __net_init int inet_init_net(struct net *net)
+{
+	/*
+	 * Set defaults for local port range
+	 */
+	seqlock_init(&net->ipv4.ip_local_ports.lock);
+	net->ipv4.ip_local_ports.range[0] =  32768;
+	net->ipv4.ip_local_ports.range[1] =  61000;
+	return 0;
+}
+
+static __net_exit void inet_exit_net(struct net *net)
+{
+}
+
+static __net_initdata struct pernet_operations af_inet_ops = {
+	.init = inet_init_net,
+	.exit = inet_exit_net,
+};
+
+static int __init init_inet_pernet_ops(void)
+{
+	return register_pernet_subsys(&af_inet_ops);
+}
+
 static int ipv4_proc_init(void);
 
 /*
@@ -1794,6 +1819,9 @@ static int __init inet_init(void)
 	if (ip_mr_init())
 		pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
 #endif
+
+	if (init_inet_pernet_ops())
+		pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
 	/*
 	 *	Initialise per-cpu ipv4 mibs
 	 */
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0d1e2cb877ec..a56b8e6e866a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,11 +37,11 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
 	unsigned int seq;
 
 	do {
-		seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
+		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
 
-		*low = net->ipv4.sysctl_local_ports.range[0];
-		*high = net->ipv4.sysctl_local_ports.range[1];
-	} while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
+		*low = net->ipv4.ip_local_ports.range[0];
+		*high = net->ipv4.ip_local_ports.range[1];
+	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
 }
 EXPORT_SYMBOL(inet_get_local_port_range);
 
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8210964a9f19..347bdde9a585 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -240,11 +240,11 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
 	unsigned int seq;
 
 	do {
-		seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
+		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
 
 		*low = data[0];
 		*high = data[1];
-	} while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
+	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
 }
 
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44eba052b43d..a116c41b05dd 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 /* Update system visible IP port range */
 static void set_local_port_range(struct net *net, int range[2])
 {
-	write_seqlock(&net->ipv4.sysctl_local_ports.lock);
-	net->ipv4.sysctl_local_ports.range[0] = range[0];
-	net->ipv4.sysctl_local_ports.range[1] = range[1];
-	write_sequnlock(&net->ipv4.sysctl_local_ports.lock);
+	write_seqlock(&net->ipv4.ip_local_ports.lock);
+	net->ipv4.ip_local_ports.range[0] = range[0];
+	net->ipv4.ip_local_ports.range[1] = range[1];
+	write_sequnlock(&net->ipv4.ip_local_ports.lock);
 }
 
 /* Validate changes from /proc interface. */
@@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
 				 size_t *lenp, loff_t *ppos)
 {
 	struct net *net =
-		container_of(table->data, struct net, ipv4.sysctl_local_ports.range);
+		container_of(table->data, struct net, ipv4.ip_local_ports.range);
 	int ret;
 	int range[2];
 	struct ctl_table tmp = {
@@ -90,11 +90,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
 		container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
 	unsigned int seq;
 	do {
-		seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock);
+		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
 
 		*low = data[0];
 		*high = data[1];
-	} while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq));
+	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
 }
 
 /* Update system visible IP port range */
@@ -103,10 +103,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
 	kgid_t *data = table->data;
 	struct net *net =
 		container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
-	write_seqlock(&net->ipv4.sysctl_local_ports.lock);
+	write_seqlock(&net->ipv4.ip_local_ports.lock);
 	data[0] = low;
 	data[1] = high;
-	write_sequnlock(&net->ipv4.sysctl_local_ports.lock);
+	write_sequnlock(&net->ipv4.ip_local_ports.lock);
 }
 
 /* Validate changes from /proc interface. */
@@ -819,8 +819,8 @@ static struct ctl_table ipv4_net_table[] = {
 	},
 	{
 		.procname	= "ip_local_port_range",
-		.maxlen		= sizeof(init_net.ipv4.sysctl_local_ports.range),
-		.data		= &init_net.ipv4.sysctl_local_ports.range,
+		.maxlen		= sizeof(init_net.ipv4.ip_local_ports.range),
+		.data		= &init_net.ipv4.ip_local_ports.range,
 		.mode		= 0644,
 		.proc_handler	= ipv4_local_port_range,
 	},
@@ -865,13 +865,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 	net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
 	net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
 
-	/*
-	 * Set defaults for local port range
-	 */
-	seqlock_init(&net->ipv4.sysctl_local_ports.lock);
-	net->ipv4.sysctl_local_ports.range[0] =  32768;
-	net->ipv4.sysctl_local_ports.range[1] =  61000;
-
 	net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
 	if (net->ipv4.ipv4_hdr == NULL)
 		goto err_reg;
-- 
cgit v1.2.3-59-g8ed1b


From ba6b918ab234186d3aa1663e296586a1b526b77a Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 6 May 2014 11:02:50 -0700
Subject: ping: move ping_group_range out of CONFIG_SYSCTL

Similarly, when CONFIG_SYSCTL is not set, ping_group_range should still
work, just that no one can change it. Therefore we should move it out of
sysctl_net_ipv4.c. And, it should not share the same seqlock with
ip_local_port_range.

BTW, rename it to ->ping_group_range instead.

Cc: David S. Miller <davem@davemloft.net>
Cc: Francois Romieu <romieu@fr.zoreil.com>
Reported-by: Stefan de Konink <stefan@konink.de>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   |  7 ++++++-
 net/ipv4/af_inet.c         |  8 ++++++++
 net/ipv4/ping.c            |  6 +++---
 net/ipv4/sysctl_net_ipv4.c | 13 +++----------
 4 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 3d95cd475316..b2704fd0ec80 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -20,6 +20,11 @@ struct local_ports {
 	int		range[2];
 };
 
+struct ping_group_range {
+	seqlock_t	lock;
+	kgid_t		range[2];
+};
+
 struct netns_ipv4 {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*forw_hdr;
@@ -72,7 +77,7 @@ struct netns_ipv4 {
 	int sysctl_ip_no_pmtu_disc;
 	int sysctl_ip_fwd_use_pmtu;
 
-	kgid_t sysctl_ping_group_range[2];
+	struct ping_group_range ping_group_range;
 
 	atomic_t dev_addr_genid;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cccc8e487c7e..6d6dd345bc4d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1658,6 +1658,14 @@ static __net_init int inet_init_net(struct net *net)
 	seqlock_init(&net->ipv4.ip_local_ports.lock);
 	net->ipv4.ip_local_ports.range[0] =  32768;
 	net->ipv4.ip_local_ports.range[1] =  61000;
+
+	seqlock_init(&net->ipv4.ping_group_range.lock);
+	/*
+	 * Sane defaults - nobody may create ping sockets.
+	 * Boot scripts should set this to distro-specific group.
+	 */
+	net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1);
+	net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0);
 	return 0;
 }
 
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 347bdde9a585..044a0ddf6a79 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -236,15 +236,15 @@ exit:
 static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
 					  kgid_t *high)
 {
-	kgid_t *data = net->ipv4.sysctl_ping_group_range;
+	kgid_t *data = net->ipv4.ping_group_range.range;
 	unsigned int seq;
 
 	do {
-		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
+		seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
 
 		*low = data[0];
 		*high = data[1];
-	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
+	} while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
 }
 
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a116c41b05dd..5cde8f263d40 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -87,7 +87,7 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
 {
 	kgid_t *data = table->data;
 	struct net *net =
-		container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
+		container_of(table->data, struct net, ipv4.ping_group_range.range);
 	unsigned int seq;
 	do {
 		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
@@ -102,7 +102,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
 {
 	kgid_t *data = table->data;
 	struct net *net =
-		container_of(table->data, struct net, ipv4.sysctl_ping_group_range);
+		container_of(table->data, struct net, ipv4.ping_group_range.range);
 	write_seqlock(&net->ipv4.ip_local_ports.lock);
 	data[0] = low;
 	data[1] = high;
@@ -805,7 +805,7 @@ static struct ctl_table ipv4_net_table[] = {
 	},
 	{
 		.procname	= "ping_group_range",
-		.data		= &init_net.ipv4.sysctl_ping_group_range,
+		.data		= &init_net.ipv4.ping_group_range.range,
 		.maxlen		= sizeof(gid_t)*2,
 		.mode		= 0644,
 		.proc_handler	= ipv4_ping_group_range,
@@ -858,13 +858,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
 			table[i].data += (void *)net - (void *)&init_net;
 	}
 
-	/*
-	 * Sane defaults - nobody may create ping sockets.
-	 * Boot scripts should set this to distro-specific group.
-	 */
-	net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
-	net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
-
 	net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
 	if (net->ipv4.ipv4_hdr == NULL)
 		goto err_reg;
-- 
cgit v1.2.3-59-g8ed1b