From 6cafaf4764a32597c2195aa5411b87728e1fde8a Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Mon, 20 Jun 2016 21:11:45 +0800
Subject: netfilter: nf_tables: fix memory leak if expr init fails

If expr init fails then we need to free it.

So when the user add a nft rule as follows:

  # nft add rule filter input tcp dport 22 flow table ssh \
    { ip saddr limit rate 0/second }

memory leak will happen.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2c881871db38..cf7c74599cbe 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1724,9 +1724,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
 
 	err = nf_tables_newexpr(ctx, &info, expr);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	return expr;
+err3:
+	kfree(expr);
 err2:
 	module_put(info.ops->type->owner);
 err1:
-- 
cgit v1.2.3-59-g8ed1b


From 62131e5d735226074cba53095545d76b491e5003 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Wed, 8 Jun 2016 20:20:10 +0800
Subject: netfilter: nft_meta: set skb->nf_trace appropriately

When user add a nft rule to set nftrace to zero, for example:

  # nft add rule ip filter input nftrace set 0

We should set nf_trace to zero also.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_meta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 16c50b0dd426..f4bad9dc15c4 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -227,7 +227,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 			skb->pkt_type = value;
 		break;
 	case NFT_META_NFTRACE:
-		skb->nf_trace = 1;
+		skb->nf_trace = !!value;
 		break;
 	default:
 		WARN_ON(1);
-- 
cgit v1.2.3-59-g8ed1b


From 9cc1c73ad66610bffc80b691136ffc1e9a3b1a58 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 24 Apr 2016 01:18:21 +0200
Subject: netfilter: conntrack: avoid integer overflow when resizing

Can overflow so we might allocate very small table when bucket count is
high on a 32bit platform.

Note: resize is only possible from init_netns.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f204274a9b6b..62c42e970c89 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1601,8 +1601,15 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 	unsigned int nr_slots, i;
 	size_t sz;
 
+	if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head)))
+		return NULL;
+
 	BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
 	nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
+
+	if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head)))
+		return NULL;
+
 	sz = nr_slots * sizeof(struct hlist_nulls_head);
 	hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
 					get_order(sz));
-- 
cgit v1.2.3-59-g8ed1b


From 10c78f5854d361ded4736c1831948e0a5f67b932 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 2 Jul 2016 09:52:13 +0200
Subject: batman-adv: Avoid nullptr dereference in bla after vlan_insert_tag

vlan_insert_tag can return NULL on errors. The bridge loop avoidance code
therefore has to check the return value of vlan_insert_tag for NULL before
it can safely operate on this pointer.

Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 748a9ead7ce5..712978024c5d 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -418,9 +418,12 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
 		break;
 	}
 
-	if (vid & BATADV_VLAN_HAS_TAG)
+	if (vid & BATADV_VLAN_HAS_TAG) {
 		skb = vlan_insert_tag(skb, htons(ETH_P_8021Q),
 				      vid & VLAN_VID_MASK);
+		if (!skb)
+			goto out;
+	}
 
 	skb_reset_mac_header(skb);
 	skb->protocol = eth_type_trans(skb, soft_iface);
-- 
cgit v1.2.3-59-g8ed1b


From 60154a1e0495ffb8343a95cefe1e874634572fa8 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 2 Jul 2016 09:52:14 +0200
Subject: batman-adv: Avoid nullptr dereference in dat after vlan_insert_tag

vlan_insert_tag can return NULL on errors. The distributed arp table code
therefore has to check the return value of vlan_insert_tag for NULL before
it can safely operate on this pointer.

Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/distributed-arp-table.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 278800a99c69..aee3b3991471 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1009,9 +1009,12 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 		if (!skb_new)
 			goto out;
 
-		if (vid & BATADV_VLAN_HAS_TAG)
+		if (vid & BATADV_VLAN_HAS_TAG) {
 			skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
 						  vid & VLAN_VID_MASK);
+			if (!skb_new)
+				goto out;
+		}
 
 		skb_reset_mac_header(skb_new);
 		skb_new->protocol = eth_type_trans(skb_new,
@@ -1089,9 +1092,12 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
 	 */
 	skb_reset_mac_header(skb_new);
 
-	if (vid & BATADV_VLAN_HAS_TAG)
+	if (vid & BATADV_VLAN_HAS_TAG) {
 		skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
 					  vid & VLAN_VID_MASK);
+		if (!skb_new)
+			goto out;
+	}
 
 	/* To preserve backwards compatibility, the node has choose the outgoing
 	 * format based on the incoming request packet type. The assumption is
-- 
cgit v1.2.3-59-g8ed1b


From 33fbb1f3db87ce53da925b3e034b4dd446d483f8 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Thu, 30 Jun 2016 20:10:46 +0200
Subject: batman-adv: Fix orig_node_vlan leak on orig_node_release

batadv_orig_node_new uses batadv_orig_node_vlan_new to allocate a new
batadv_orig_node_vlan and add it to batadv_orig_node::vlan_list. References
to this list have also to be cleaned when the batadv_orig_node is removed.

Fixes: 7ea7b4a14275 ("batman-adv: make the TT CRC logic VLAN specific")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/originator.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 7f51bc2c06eb..fe2fcda4a984 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -765,6 +765,7 @@ static void batadv_orig_node_release(struct kref *ref)
 	struct batadv_neigh_node *neigh_node;
 	struct batadv_orig_node *orig_node;
 	struct batadv_orig_ifinfo *orig_ifinfo;
+	struct batadv_orig_node_vlan *vlan;
 
 	orig_node = container_of(ref, struct batadv_orig_node, refcount);
 
@@ -784,6 +785,13 @@ static void batadv_orig_node_release(struct kref *ref)
 	}
 	spin_unlock_bh(&orig_node->neigh_list_lock);
 
+	spin_lock_bh(&orig_node->vlan_list_lock);
+	hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) {
+		hlist_del_rcu(&vlan->list);
+		batadv_orig_node_vlan_put(vlan);
+	}
+	spin_unlock_bh(&orig_node->vlan_list_lock);
+
 	/* Free nc_nodes */
 	batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
 
-- 
cgit v1.2.3-59-g8ed1b


From 3db0decf1185357d6ab2256d0dede1ca9efda03d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 1 Jul 2016 15:49:43 +0200
Subject: batman-adv: Fix non-atomic bla_claim::backbone_gw access

The pointer batadv_bla_claim::backbone_gw can be changed at any time.
Therefore, access to it must be protected to ensure that two function
accessing the same backbone_gw are actually accessing the same. This is
especially important when the crc_lock is used or when the backbone_gw of a
claim is exchanged.

Not doing so leads to invalid memory access and/or reference leaks.

Fixes: 23721387c409 ("batman-adv: add basic bridge loop avoidance code")
Fixes: 5a1dd8a4773d ("batman-adv: lock crc access in bridge loop avoidance")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bridge_loop_avoidance.c | 111 ++++++++++++++++++++++++++-------
 net/batman-adv/types.h                 |   2 +
 2 files changed, 90 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 712978024c5d..825a5cdf4382 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -177,10 +177,21 @@ static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
 static void batadv_claim_release(struct kref *ref)
 {
 	struct batadv_bla_claim *claim;
+	struct batadv_bla_backbone_gw *old_backbone_gw;
 
 	claim = container_of(ref, struct batadv_bla_claim, refcount);
 
-	batadv_backbone_gw_put(claim->backbone_gw);
+	spin_lock_bh(&claim->backbone_lock);
+	old_backbone_gw = claim->backbone_gw;
+	claim->backbone_gw = NULL;
+	spin_unlock_bh(&claim->backbone_lock);
+
+	spin_lock_bh(&old_backbone_gw->crc_lock);
+	old_backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+	spin_unlock_bh(&old_backbone_gw->crc_lock);
+
+	batadv_backbone_gw_put(old_backbone_gw);
+
 	kfree_rcu(claim, rcu);
 }
 
@@ -677,8 +688,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 				 const u8 *mac, const unsigned short vid,
 				 struct batadv_bla_backbone_gw *backbone_gw)
 {
+	struct batadv_bla_backbone_gw *old_backbone_gw;
 	struct batadv_bla_claim *claim;
 	struct batadv_bla_claim search_claim;
+	bool remove_crc = false;
 	int hash_added;
 
 	ether_addr_copy(search_claim.addr, mac);
@@ -692,8 +705,10 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 			return;
 
 		ether_addr_copy(claim->addr, mac);
+		spin_lock_init(&claim->backbone_lock);
 		claim->vid = vid;
 		claim->lasttime = jiffies;
+		kref_get(&backbone_gw->refcount);
 		claim->backbone_gw = backbone_gw;
 
 		kref_init(&claim->refcount);
@@ -721,15 +736,26 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
 			   "bla_add_claim(): changing ownership for %pM, vid %d\n",
 			   mac, BATADV_PRINT_VID(vid));
 
-		spin_lock_bh(&claim->backbone_gw->crc_lock);
-		claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
-		spin_unlock_bh(&claim->backbone_gw->crc_lock);
-		batadv_backbone_gw_put(claim->backbone_gw);
+		remove_crc = true;
 	}
-	/* set (new) backbone gw */
+
+	/* replace backbone_gw atomically and adjust reference counters */
+	spin_lock_bh(&claim->backbone_lock);
+	old_backbone_gw = claim->backbone_gw;
 	kref_get(&backbone_gw->refcount);
 	claim->backbone_gw = backbone_gw;
+	spin_unlock_bh(&claim->backbone_lock);
+
+	if (remove_crc) {
+		/* remove claim address from old backbone_gw */
+		spin_lock_bh(&old_backbone_gw->crc_lock);
+		old_backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
+		spin_unlock_bh(&old_backbone_gw->crc_lock);
+	}
 
+	batadv_backbone_gw_put(old_backbone_gw);
+
+	/* add claim address to new backbone_gw */
 	spin_lock_bh(&backbone_gw->crc_lock);
 	backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
 	spin_unlock_bh(&backbone_gw->crc_lock);
@@ -739,6 +765,26 @@ claim_free_ref:
 	batadv_claim_put(claim);
 }
 
+/**
+ * batadv_bla_claim_get_backbone_gw - Get valid reference for backbone_gw of
+ *  claim
+ * @claim: claim whose backbone_gw should be returned
+ *
+ * Return: valid reference to claim::backbone_gw
+ */
+static struct batadv_bla_backbone_gw *
+batadv_bla_claim_get_backbone_gw(struct batadv_bla_claim *claim)
+{
+	struct batadv_bla_backbone_gw *backbone_gw;
+
+	spin_lock_bh(&claim->backbone_lock);
+	backbone_gw = claim->backbone_gw;
+	kref_get(&backbone_gw->refcount);
+	spin_unlock_bh(&claim->backbone_lock);
+
+	return backbone_gw;
+}
+
 /**
  * batadv_bla_del_claim - delete a claim from the claim hash
  * @bat_priv: the bat priv with all the soft interface information
@@ -763,10 +809,6 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
 			   batadv_choose_claim, claim);
 	batadv_claim_put(claim); /* reference from the hash is gone */
 
-	spin_lock_bh(&claim->backbone_gw->crc_lock);
-	claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
-	spin_unlock_bh(&claim->backbone_gw->crc_lock);
-
 	/* don't need the reference from hash_find() anymore */
 	batadv_claim_put(claim);
 }
@@ -1219,6 +1261,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 				    struct batadv_hard_iface *primary_if,
 				    int now)
 {
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_bla_claim *claim;
 	struct hlist_head *head;
 	struct batadv_hashtable *hash;
@@ -1233,14 +1276,17 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(claim, head, hash_entry) {
+			backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
 			if (now)
 				goto purge_now;
-			if (!batadv_compare_eth(claim->backbone_gw->orig,
+
+			if (!batadv_compare_eth(backbone_gw->orig,
 						primary_if->net_dev->dev_addr))
-				continue;
+				goto skip;
+
 			if (!batadv_has_timed_out(claim->lasttime,
 						  BATADV_BLA_CLAIM_TIMEOUT))
-				continue;
+				goto skip;
 
 			batadv_dbg(BATADV_DBG_BLA, bat_priv,
 				   "bla_purge_claims(): %pM, vid %d, time out\n",
@@ -1248,8 +1294,10 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
 
 purge_now:
 			batadv_handle_unclaim(bat_priv, primary_if,
-					      claim->backbone_gw->orig,
+					      backbone_gw->orig,
 					      claim->addr, claim->vid);
+skip:
+			batadv_backbone_gw_put(backbone_gw);
 		}
 		rcu_read_unlock();
 	}
@@ -1760,9 +1808,11 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
 bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		   unsigned short vid, bool is_bcast)
 {
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct ethhdr *ethhdr;
 	struct batadv_bla_claim search_claim, *claim = NULL;
 	struct batadv_hard_iface *primary_if;
+	bool own_claim;
 	bool ret;
 
 	ethhdr = eth_hdr(skb);
@@ -1797,8 +1847,12 @@ bool batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	}
 
 	/* if it is our own claim ... */
-	if (batadv_compare_eth(claim->backbone_gw->orig,
-			       primary_if->net_dev->dev_addr)) {
+	backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+	own_claim = batadv_compare_eth(backbone_gw->orig,
+				       primary_if->net_dev->dev_addr);
+	batadv_backbone_gw_put(backbone_gw);
+
+	if (own_claim) {
 		/* ... allow it in any case */
 		claim->lasttime = jiffies;
 		goto allow;
@@ -1862,7 +1916,9 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 {
 	struct ethhdr *ethhdr;
 	struct batadv_bla_claim search_claim, *claim = NULL;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_hard_iface *primary_if;
+	bool client_roamed;
 	bool ret = false;
 
 	primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -1892,8 +1948,12 @@ bool batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		goto allow;
 
 	/* check if we are responsible. */
-	if (batadv_compare_eth(claim->backbone_gw->orig,
-			       primary_if->net_dev->dev_addr)) {
+	backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+	client_roamed = batadv_compare_eth(backbone_gw->orig,
+					   primary_if->net_dev->dev_addr);
+	batadv_backbone_gw_put(backbone_gw);
+
+	if (client_roamed) {
 		/* if yes, the client has roamed and we have
 		 * to unclaim it.
 		 */
@@ -1941,6 +2001,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
+	struct batadv_bla_backbone_gw *backbone_gw;
 	struct batadv_bla_claim *claim;
 	struct batadv_hard_iface *primary_if;
 	struct hlist_head *head;
@@ -1965,17 +2026,21 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
 
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(claim, head, hash_entry) {
-			is_own = batadv_compare_eth(claim->backbone_gw->orig,
+			backbone_gw = batadv_bla_claim_get_backbone_gw(claim);
+
+			is_own = batadv_compare_eth(backbone_gw->orig,
 						    primary_addr);
 
-			spin_lock_bh(&claim->backbone_gw->crc_lock);
-			backbone_crc = claim->backbone_gw->crc;
-			spin_unlock_bh(&claim->backbone_gw->crc_lock);
+			spin_lock_bh(&backbone_gw->crc_lock);
+			backbone_crc = backbone_gw->crc;
+			spin_unlock_bh(&backbone_gw->crc_lock);
 			seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n",
 				   claim->addr, BATADV_PRINT_VID(claim->vid),
-				   claim->backbone_gw->orig,
+				   backbone_gw->orig,
 				   (is_own ? 'x' : ' '),
 				   backbone_crc);
+
+			batadv_backbone_gw_put(backbone_gw);
 		}
 		rcu_read_unlock();
 	}
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ba846b078af8..005122234b90 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1042,6 +1042,7 @@ struct batadv_bla_backbone_gw {
  * @addr: mac address of claimed non-mesh client
  * @vid: vlan id this client was detected on
  * @backbone_gw: pointer to backbone gw claiming this client
+ * @backbone_lock: lock protecting backbone_gw pointer
  * @lasttime: last time we heard of claim (locals only)
  * @hash_entry: hlist node for batadv_priv_bla::claim_hash
  * @refcount: number of contexts the object is used
@@ -1051,6 +1052,7 @@ struct batadv_bla_claim {
 	u8 addr[ETH_ALEN];
 	unsigned short vid;
 	struct batadv_bla_backbone_gw *backbone_gw;
+	spinlock_t backbone_lock; /* protects backbone_gw */
 	unsigned long lasttime;
 	struct hlist_node hash_entry;
 	struct rcu_head rcu;
-- 
cgit v1.2.3-59-g8ed1b


From 15c2ed753cd9e3e746472deab8151337a5b6da56 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Thu, 30 Jun 2016 20:11:34 +0200
Subject: batman-adv: Fix reference leak in batadv_find_router

The replacement of last_bonding_candidate in batadv_orig_node has to be an
atomic operation. Otherwise it is possible that the reference counter of a
batadv_orig_ifinfo is reduced which was no longer the
last_bonding_candidate when the new candidate is added. This can either
lead to an invalid memory access or to reference leaks which make it
impossible to an interface which was added to batman-adv.

Fixes: f3b3d9018975 ("batman-adv: add bonding again")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/routing.c | 52 ++++++++++++++++++++++++++++++++++++------------
 net/batman-adv/types.h   |  4 +++-
 2 files changed, 42 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 6c2901a86230..bfac086b4d01 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -455,6 +455,29 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
 	return 0;
 }
 
+/**
+ * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
+ * @orig_node: originator node whose bonding candidates should be replaced
+ * @new_candidate: new bonding candidate or NULL
+ */
+static void
+batadv_last_bonding_replace(struct batadv_orig_node *orig_node,
+			    struct batadv_orig_ifinfo *new_candidate)
+{
+	struct batadv_orig_ifinfo *old_candidate;
+
+	spin_lock_bh(&orig_node->neigh_list_lock);
+	old_candidate = orig_node->last_bonding_candidate;
+
+	if (new_candidate)
+		kref_get(&new_candidate->refcount);
+	orig_node->last_bonding_candidate = new_candidate;
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+
+	if (old_candidate)
+		batadv_orig_ifinfo_put(old_candidate);
+}
+
 /**
  * batadv_find_router - find a suitable router for this originator
  * @bat_priv: the bat priv with all the soft interface information
@@ -562,10 +585,6 @@ next:
 	}
 	rcu_read_unlock();
 
-	/* last_bonding_candidate is reset below, remove the old reference. */
-	if (orig_node->last_bonding_candidate)
-		batadv_orig_ifinfo_put(orig_node->last_bonding_candidate);
-
 	/* After finding candidates, handle the three cases:
 	 * 1) there is a next candidate, use that
 	 * 2) there is no next candidate, use the first of the list
@@ -574,21 +593,28 @@ next:
 	if (next_candidate) {
 		batadv_neigh_node_put(router);
 
-		/* remove references to first candidate, we don't need it. */
-		if (first_candidate) {
-			batadv_neigh_node_put(first_candidate_router);
-			batadv_orig_ifinfo_put(first_candidate);
-		}
+		kref_get(&next_candidate_router->refcount);
 		router = next_candidate_router;
-		orig_node->last_bonding_candidate = next_candidate;
+		batadv_last_bonding_replace(orig_node, next_candidate);
 	} else if (first_candidate) {
 		batadv_neigh_node_put(router);
 
-		/* refcounting has already been done in the loop above. */
+		kref_get(&first_candidate_router->refcount);
 		router = first_candidate_router;
-		orig_node->last_bonding_candidate = first_candidate;
+		batadv_last_bonding_replace(orig_node, first_candidate);
 	} else {
-		orig_node->last_bonding_candidate = NULL;
+		batadv_last_bonding_replace(orig_node, NULL);
+	}
+
+	/* cleanup of candidates */
+	if (first_candidate) {
+		batadv_neigh_node_put(first_candidate_router);
+		batadv_orig_ifinfo_put(first_candidate);
+	}
+
+	if (next_candidate) {
+		batadv_neigh_node_put(next_candidate_router);
+		batadv_orig_ifinfo_put(next_candidate);
 	}
 
 	return router;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 005122234b90..74d865a4df46 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -330,7 +330,9 @@ struct batadv_orig_node {
 	DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
 	u32 last_bcast_seqno;
 	struct hlist_head neigh_list;
-	/* neigh_list_lock protects: neigh_list and router */
+	/* neigh_list_lock protects: neigh_list, ifinfo_list,
+	 * last_bonding_candidate and router
+	 */
 	spinlock_t neigh_list_lock;
 	struct hlist_node hash_entry;
 	struct batadv_priv *bat_priv;
-- 
cgit v1.2.3-59-g8ed1b


From cbef1e102003edb236c6b2319ab269ccef963731 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Thu, 30 Jun 2016 21:41:13 +0200
Subject: batman-adv: Free last_bonding_candidate on release of orig_node

The orig_ifinfo reference counter for last_bonding_candidate in
batadv_orig_node has to be reduced when an originator node is released.
Otherwise the orig_ifinfo is leaked and the reference counter the netdevice
is not reduced correctly.

Fixes: f3b3d9018975 ("batman-adv: add bonding again")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/originator.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index fe2fcda4a984..ab8c4f9738fe 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -766,6 +766,7 @@ static void batadv_orig_node_release(struct kref *ref)
 	struct batadv_orig_node *orig_node;
 	struct batadv_orig_ifinfo *orig_ifinfo;
 	struct batadv_orig_node_vlan *vlan;
+	struct batadv_orig_ifinfo *last_candidate;
 
 	orig_node = container_of(ref, struct batadv_orig_node, refcount);
 
@@ -783,8 +784,14 @@ static void batadv_orig_node_release(struct kref *ref)
 		hlist_del_rcu(&orig_ifinfo->list);
 		batadv_orig_ifinfo_put(orig_ifinfo);
 	}
+
+	last_candidate = orig_node->last_bonding_candidate;
+	orig_node->last_bonding_candidate = NULL;
 	spin_unlock_bh(&orig_node->neigh_list_lock);
 
+	if (last_candidate)
+		batadv_orig_ifinfo_put(last_candidate);
+
 	spin_lock_bh(&orig_node->vlan_list_lock);
 	hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) {
 		hlist_del_rcu(&vlan->list);
-- 
cgit v1.2.3-59-g8ed1b


From 6e8ef842223b90a33efd570128bb566a9ae6f5ad Mon Sep 17 00:00:00 2001
From: Purushottam Kushwaha <pkushwah@qti.qualcomm.com>
Date: Tue, 5 Jul 2016 13:44:51 +0530
Subject: nl80211: Move ACL parsing later to avoid a possible memory leak

No support for pbss results in a memory leak for the acl_data
(if parse_acl_data succeeds). Fix this by moving the ACL parsing later.

Cc: stable@vger.kernel.org
Fixes: 34d505193bd10 ("cfg80211: basic support for PBSS network type")
Signed-off-by: Purushottam Kushwaha <pkushwah@qti.qualcomm.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 net/wireless/nl80211.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d7599014055d..7d72283901a3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3487,16 +3487,16 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 		params.smps_mode = NL80211_SMPS_OFF;
 	}
 
+	params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
+	if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ])
+		return -EOPNOTSUPP;
+
 	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
 		params.acl = parse_acl_data(&rdev->wiphy, info);
 		if (IS_ERR(params.acl))
 			return PTR_ERR(params.acl);
 	}
 
-	params.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]);
-	if (params.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ])
-		return -EOPNOTSUPP;
-
 	wdev_lock(wdev);
 	err = rdev_start_ap(rdev, dev, &params);
 	if (!err) {
-- 
cgit v1.2.3-59-g8ed1b


From 16a910a6722b7a8680409e634c7c0dac073c01e4 Mon Sep 17 00:00:00 2001
From: Gregory Greenman <gregory.greenman@intel.com>
Date: Tue, 5 Jul 2016 15:23:10 +0300
Subject: cfg80211: handle failed skb allocation

Handle the case when dev_alloc_skb returns NULL.

Cc: stable@vger.kernel.org
Fixes: 2b67f944f88c2 ("cfg80211: reuse existing page fragments in A-MSDU rx")
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 net/wireless/util.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 2443ee30ba5b..b7d1592bd5b8 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -721,6 +721,8 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
 	 * alignment since sizeof(struct ethhdr) is 14.
 	 */
 	frame = dev_alloc_skb(hlen + sizeof(struct ethhdr) + 2 + cur_len);
+	if (!frame)
+		return NULL;
 
 	skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2);
 	skb_copy_bits(skb, offset, skb_put(frame, cur_len), cur_len);
-- 
cgit v1.2.3-59-g8ed1b


From d1fe176ca51fa3cb35f70c1d876d9a090e9befce Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 12 Jun 2016 10:43:19 +0200
Subject: batman-adv: Fix speedy join in gateway client mode

Speedy join only works when the received packet is either broadcast or an
4addr unicast packet. Thus packets converted from broadcast to unicast via
the gateway handling code have to be converted to 4addr packets to allow
the receiving gateway server to add the sender address as temporary entry
to the translation table.

Not doing it will make the batman-adv gateway server drop the DHCP response
in many situations because it doesn't yet have the TT entry for the
destination of the DHCP response.

Fixes: 371351731e9c ("batman-adv: change interface_rx to get orig node")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/send.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index f2f125684ed9..010397650fa5 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -424,8 +424,8 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	struct batadv_orig_node *orig_node;
 
 	orig_node = batadv_gw_get_selected_orig(bat_priv);
-	return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0,
-				       orig_node, vid);
+	return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
+				       BATADV_P_DATA, orig_node, vid);
 }
 
 void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface)
-- 
cgit v1.2.3-59-g8ed1b


From 3777ed688fba82d0bd43f9fc1ebbc6abe788576d Mon Sep 17 00:00:00 2001
From: Quentin Armitage <quentin@armitage.org.uk>
Date: Thu, 16 Jun 2016 08:00:14 +0100
Subject: ipvs: fix bind to link-local mcast IPv6 address in backup

When using HEAD from
https://git.kernel.org/cgit/utils/kernel/ipvsadm/ipvsadm.git/,
the command:
ipvsadm --start-daemon backup --mcast-interface eth0.60 \
    --mcast-group ff02::1:81
fails with the error message:
Argument list too long

whereas both:
ipvsadm --start-daemon master --mcast-interface eth0.60 \
    --mcast-group ff02::1:81
and:
ipvsadm --start-daemon backup --mcast-interface eth0.60 \
    --mcast-group 224.0.0.81
are successful.

The error message "Argument list too long" isn't helpful. The error occurs
because an IPv6 address is given in backup mode.

The error is in make_receive_sock() in net/netfilter/ipvs/ip_vs_sync.c,
since it fails to set the interface on the address or the socket before
calling inet6_bind() (via sock->ops->bind), where the test
'if (!sk->sk_bound_dev_if)' failed.

Setting sock->sk->sk_bound_dev_if on the socket before calling
inet6_bind() resolves the issue.

Fixes: d33288172e72 ("ipvs: add more mcast parameters for the sync daemon")
Signed-off-by: Quentin Armitage <quentin@armitage.org.uk>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_sync.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 803001a45aa1..1b07578bedf3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1545,7 +1545,8 @@ error:
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
+static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id,
+					int ifindex)
 {
 	/* multicast addr */
 	union ipvs_sockaddr mcast_addr;
@@ -1566,6 +1567,7 @@ static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
 		set_sock_size(sock->sk, 0, result);
 
 	get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
+	sock->sk->sk_bound_dev_if = ifindex;
 	result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
 	if (result < 0) {
 		pr_err("Error binding to the multicast addr\n");
@@ -1868,7 +1870,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
 		if (state == IP_VS_STATE_MASTER)
 			sock = make_send_sock(ipvs, id);
 		else
-			sock = make_receive_sock(ipvs, id);
+			sock = make_receive_sock(ipvs, id, dev->ifindex);
 		if (IS_ERR(sock)) {
 			result = PTR_ERR(sock);
 			goto outtinfo;
-- 
cgit v1.2.3-59-g8ed1b


From c8607e020014cf11a61601a0005270bad81cabdf Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Jul 2016 14:53:06 +0200
Subject: netfilter: nft_ct: fix expiration getter

We need to compute timeout.expires - jiffies, not the other way around.
Add a helper, another patch can then later change more places in
conntrack code where we currently open-code this.

Will allow us to only change one place later when we remove per-ct timer.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 8 ++++++++
 net/netfilter/nft_ct.c               | 6 +-----
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index dd78bea227c8..b6083c34ef0d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -284,6 +284,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
 	return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK;
 }
 
+/* jiffies until ct expires, 0 if already expired */
+static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
+{
+	long timeout = (long)ct->timeout.expires - (long)jiffies;
+
+	return timeout > 0 ? timeout : 0;
+}
+
 struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 137e308d5b24..81fbb450783e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -54,7 +54,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 	const struct nf_conn_help *help;
 	const struct nf_conntrack_tuple *tuple;
 	const struct nf_conntrack_helper *helper;
-	long diff;
 	unsigned int state;
 
 	ct = nf_ct_get(pkt->skb, &ctinfo);
@@ -94,10 +93,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 		return;
 #endif
 	case NFT_CT_EXPIRATION:
-		diff = (long)jiffies - (long)ct->timeout.expires;
-		if (diff < 0)
-			diff = 0;
-		*dest = jiffies_to_msecs(diff);
+		*dest = jiffies_to_msecs(nf_ct_expires(ct));
 		return;
 	case NFT_CT_HELPER:
 		if (ct->master == NULL)
-- 
cgit v1.2.3-59-g8ed1b


From 95556a883834122c616bbeb942654d745ceb9712 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 8 Jul 2016 11:03:57 +0200
Subject: dccp: avoid deadlock in dccp_v4_ctl_send_reset

In the prep work I did before enabling BH while handling socket backlog,
I missed two points in DCCP :

1) dccp_v4_ctl_send_reset() uses bh_lock_sock(), assuming BH were
blocked. It is not anymore always true.

2) dccp_v4_route_skb() was using __IP_INC_STATS() instead of
  IP_INC_STATS()

A similar fix was done for TCP, in commit 47dcc20a39d0
("ipv4: tcp: ip_send_unicast_reply() is not BH safe")

Fixes: 7309f8821fd6 ("dccp: do not assume DCCP code is non preemptible")
Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 5c7e413a3ae4..25dd25b47d41 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -462,7 +462,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_flow(net, &fl4, sk);
 	if (IS_ERR(rt)) {
-		__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
+		IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
 		return NULL;
 	}
 
@@ -527,17 +527,19 @@ static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 								 rxiph->daddr);
 	skb_dst_set(skb, dst_clone(dst));
 
+	local_bh_disable();
 	bh_lock_sock(ctl_sk);
 	err = ip_build_and_send_pkt(skb, ctl_sk,
 				    rxiph->daddr, rxiph->saddr, NULL);
 	bh_unlock_sock(ctl_sk);
 
 	if (net_xmit_eval(err) == 0) {
-		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
-		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
+		__DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+		__DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
 	}
+	local_bh_enable();
 out:
-	 dst_release(dst);
+	dst_release(dst);
 }
 
 static void dccp_v4_reqsk_destructor(struct request_sock *req)
-- 
cgit v1.2.3-59-g8ed1b


From a612769774a30e4fc143c4cb6395c12573415660 Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Fri, 8 Jul 2016 17:52:33 +0200
Subject: udp: prevent bugcheck if filter truncates packet too much

If socket filter truncates an udp packet below the length of UDP header
in udpv6_queue_rcv_skb() or udp_queue_rcv_skb(), it will trigger a
BUG_ON in skb_pull_rcsum(). This BUG_ON (and therefore a system crash if
kernel is configured that way) can be easily enforced by an unprivileged
user which was reported as CVE-2016-6162. For a reproducer, see
http://seclists.org/oss-sec/2016/q3/8

Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing")
Reported-by: Marco Grassi <marco.gra@gmail.com>
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 2 ++
 net/ipv6/udp.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ca5e8ea29538..4aed8fc23d32 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1583,6 +1583,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	if (sk_filter(sk, skb))
 		goto drop;
+	if (unlikely(skb->len < sizeof(struct udphdr)))
+		goto drop;
 
 	udp_csum_pull_header(skb);
 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 005dc82c2138..acc09705618b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -620,6 +620,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	if (sk_filter(sk, skb))
 		goto drop;
+	if (unlikely(skb->len < sizeof(struct udphdr)))
+		goto drop;
 
 	udp_csum_pull_header(skb);
 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
-- 
cgit v1.2.3-59-g8ed1b


From 75ff39ccc1bd5d3c455b6822ab09e533c551f758 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 10 Jul 2016 10:04:02 +0200
Subject: tcp: make challenge acks less predictable

Yue Cao claims that current host rate limiting of challenge ACKS
(RFC 5961) could leak enough information to allow a patient attacker
to hijack TCP sessions. He will soon provide details in an academic
paper.

This patch increases the default limit from 100 to 1000, and adds
some randomization so that the attacker can no longer hijack
sessions without spending a considerable amount of probes.

Based on initial analysis and patch from Linus.

Note that we also have per socket rate limiting, so it is tempting
to remove the host limit in the future.

v2: randomize the count of challenge acks per second, not the period.

Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2")
Reported-by: Yue Cao <ycao009@ucr.edu>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6c8f4cd0800..91868bb17818 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -87,7 +87,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 
 /* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 100;
+int sysctl_tcp_challenge_ack_limit = 1000;
 
 int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
@@ -3458,7 +3458,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 	static u32 challenge_timestamp;
 	static unsigned int challenge_count;
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 now;
+	u32 count, now;
 
 	/* First check our per-socket dupack rate limit. */
 	if (tcp_oow_rate_limited(sock_net(sk), skb,
@@ -3466,13 +3466,18 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 				 &tp->last_oow_ack_time))
 		return;
 
-	/* Then check the check host-wide RFC 5961 rate limit. */
+	/* Then check host-wide RFC 5961 rate limit. */
 	now = jiffies / HZ;
 	if (now != challenge_timestamp) {
+		u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+
 		challenge_timestamp = now;
-		challenge_count = 0;
+		WRITE_ONCE(challenge_count, half +
+			   prandom_u32_max(sysctl_tcp_challenge_ack_limit));
 	}
-	if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
+	count = READ_ONCE(challenge_count);
+	if (count > 0) {
+		WRITE_ONCE(challenge_count, count - 1);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
 		tcp_send_ack(sk);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 80610229ef7b26615dbb6cb6e873709a60bacc9f Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 10 Jul 2016 21:11:55 +0300
Subject: ipv4: reject RTNH_F_DEAD and RTNH_F_LINKDOWN from user space

Vegard Nossum is reporting for a crash in fib_dump_info
when nh_dev = NULL and fib_nhs == 1:

Pid: 50, comm: netlink.exe Not tainted 4.7.0-rc5+
RIP: 0033:[<00000000602b3d18>]
RSP: 0000000062623890  EFLAGS: 00010202
RAX: 0000000000000000 RBX: 000000006261b800 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000024 RDI: 000000006245ba00
RBP: 00000000626238f0 R08: 000000000000029c R09: 0000000000000000
R10: 0000000062468038 R11: 000000006245ba00 R12: 000000006245ba00
R13: 00000000625f96c0 R14: 00000000601e16f0 R15: 0000000000000000
Kernel panic - not syncing: Kernel mode fault at addr 0x2e0, ip 0x602b3d18
CPU: 0 PID: 50 Comm: netlink.exe Not tainted 4.7.0-rc5+ #581
Stack:
 626238f0 960226a02 00000400 000000fe
 62623910 600afca7 62623970 62623a48
 62468038 00000018 00000000 00000000
Call Trace:
 [<602b3e93>] rtmsg_fib+0xd3/0x190
 [<602b6680>] fib_table_insert+0x260/0x500
 [<602b0e5d>] inet_rtm_newroute+0x4d/0x60
 [<60250def>] rtnetlink_rcv_msg+0x8f/0x270
 [<60267079>] netlink_rcv_skb+0xc9/0xe0
 [<60250d4b>] rtnetlink_rcv+0x3b/0x50
 [<60265400>] netlink_unicast+0x1a0/0x2c0
 [<60265e47>] netlink_sendmsg+0x3f7/0x470
 [<6021dc9a>] sock_sendmsg+0x3a/0x90
 [<6021e0d0>] ___sys_sendmsg+0x300/0x360
 [<6021fa64>] __sys_sendmsg+0x54/0xa0
 [<6021fac0>] SyS_sendmsg+0x10/0x20
 [<6001ea68>] handle_syscall+0x88/0x90
 [<600295fd>] userspace+0x3fd/0x500
 [<6001ac55>] fork_handler+0x85/0x90

$ addr2line -e vmlinux -i 0x602b3d18
include/linux/inetdevice.h:222
net/ipv4/fib_semantics.c:1264

Problem happens when RTNH_F_LINKDOWN is provided from user space
when creating routes that do not use the flag, catched with
netlink fuzzer.

Currently, the kernel allows user space to set both flags
to nh_flags and fib_flags but this is not intentional, the
assumption was that they are not set. Fix this by rejecting
both flags with EINVAL.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Fixes: 0eeb075fad73 ("net: ipv4 sysctl option to ignore routes when nexthop link is down")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Cc: Dinesh Dutt <ddutt@cumulusnetworks.com>
Cc: Scott Feldman <sfeldma@gmail.com>
Reviewed-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_semantics.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d09173bf9500..539fa264e67d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 		if (!rtnh_ok(rtnh, remaining))
 			return -EINVAL;
 
+		if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+			return -EINVAL;
+
 		nexthop_nh->nh_flags =
 			(cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
 		nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
@@ -1003,6 +1006,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 	if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
 		goto err_inval;
 
+	if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
+		goto err_inval;
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (cfg->fc_mp) {
 		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
-- 
cgit v1.2.3-59-g8ed1b


From 779f1edec664a7b32b71f7b4702e085a08d60592 Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Mon, 11 Jul 2016 16:51:26 -0400
Subject: sock: ignore SCM_RIGHTS and SCM_CREDENTIALS in __sock_cmsg_send

Sergei Trofimovich reported that pulse audio sends SCM_CREDENTIALS
as a control message to TCP. Since __sock_cmsg_send does not
support SCM_RIGHTS and SCM_CREDENTIALS, it returns an error and
hence breaks pulse audio over TCP.

SCM_RIGHTS and SCM_CREDENTIALS are sent on the SOL_SOCKET layer
but they semantically belong to SOL_UNIX. Since all
cmsg-processing functions including sock_cmsg_send ignore control
messages of other layers, it is best to ignore SCM_RIGHTS
and SCM_CREDENTIALS for consistency (and also for fixing pulse
audio over TCP).

Fixes: c14ac9451c34 ("sock: enable timestamping using control messages")
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Reported-by: Sergei Trofimovich <slyfox@gentoo.org>
Tested-by: Sergei Trofimovich <slyfox@gentoo.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 08bf97eceeb3..b7f12639c26a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1938,6 +1938,10 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
 		sockc->tsflags |= tsflags;
 		break;
+	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
+	case SCM_RIGHTS:
+	case SCM_CREDENTIALS:
+		break;
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 2d18ac4ba7454a4260473e68be7e485ae71e7948 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 11 Jul 2016 16:08:35 -0400
Subject: tipc: extend broadcast link initialization criteria

At first contact between two nodes, an endpoint might sometimes have
time to send out a LINK_PROTOCOL/STATE packet before it has received
the broadcast initialization packet from the peer, i.e., before it has
received a valid broadcast packet number to add to the 'bc_ack' field
of the protocol message.

This means that the peer endpoint will receive a protocol packet with an
invalid broadcast acknowledge value of 0. Under unlucky circumstances
this may lead to the original, already received acknowledge value being
overwritten, so that the whole broadcast link goes stale after a while.

We fix this by delaying the setting of the link field 'bc_peer_is_up'
until we know that the peer really has received our own broadcast
initialization message. The latter is always sent out as the first
unicast message on a link, and always with seqeunce number 1. Because
of this, we only need to look for a non-zero unicast acknowledge value
in the arriving STATE messages, and once that is confirmed we know we
are safe and can set the mentioned field. Before this moment, we must
ignore all broadcast acknowledges from the peer.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 67b6ab9f4c8d..6483dc4333fb 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1559,7 +1559,12 @@ void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 	if (!msg_peer_node_is_up(hdr))
 		return;
 
-	l->bc_peer_is_up = true;
+	/* Open when peer ackowledges our bcast init msg (pkt #1) */
+	if (msg_ack(hdr))
+		l->bc_peer_is_up = true;
+
+	if (!l->bc_peer_is_up)
+		return;
 
 	/* Ignore if peers_snd_nxt goes beyond receive window */
 	if (more(peers_snd_nxt, l->rcv_nxt + l->window))
-- 
cgit v1.2.3-59-g8ed1b


From a71eb720355c28eaeb2de0c4d960247c69bb2c6f Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 11 Jul 2016 16:08:36 -0400
Subject: tipc: ensure correct broadcast send buffer release when peer is lost

After a new receiver peer has been added to the broadcast transmission
link, we allow immediate transmission of new broadcast packets, trusting
that the new peer will not accept the packets until it has received the
previously sent unicast broadcast initialiation message. In the same
way, the sender must not accept any acknowledges until it has itself
received the broadcast initialization from the peer, as well as
confirmation of the reception of its own initialization message.

Furthermore, when a receiver peer goes down, the sender has to produce
the missing acknowledges from the lost peer locally, in order ensure
correct release of the buffers that were expected to be acknowledged by
the said peer.

In a highly stressed system we have observed that contact with a peer
may come up and be lost before the above mentioned broadcast initial-
ization and confirmation have been received. This leads to the locally
produced acknowledges being rejected, and the non-acknowledged buffers
to linger in the broadcast link transmission queue until it fills up
and the link goes into permanent congestion.

In this commit, we remedy this by temporarily setting the corresponding
broadcast receive link state to ESTABLISHED and the 'bc_peer_is_up'
state to true before we issue the local acknowledges. This ensures that
those acknowledges will always be accepted. The mentioned state values
are restored immediately afterwards when the link is reset.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 6483dc4333fb..7d89f8713d49 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -349,6 +349,8 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
 	u16 ack = snd_l->snd_nxt - 1;
 
 	snd_l->ackers--;
+	rcv_l->bc_peer_is_up = true;
+	rcv_l->state = LINK_ESTABLISHED;
 	tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
 	tipc_link_reset(rcv_l);
 	rcv_l->state = LINK_RESET;
-- 
cgit v1.2.3-59-g8ed1b


From 1fc07f3e1541cc49cc159beb3fdefc5013570eda Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 11 Jul 2016 16:08:37 -0400
Subject: tipc: reset all unicast links when broadcast send link fails

In test situations with many nodes and a heavily stressed system we have
observed that the transmission broadcast link may fail due to an
excessive number of retransmissions of the same packet. In such
situations we need to reset all unicast links to all peers, in order to
reset and re-synchronize the broadcast link.

In this commit, we add a new function tipc_bearer_reset_all() to be used
in such situations. The function scans across all bearers and resets all
their pertaining links.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 15 +++++++++++++++
 net/tipc/bearer.h |  1 +
 net/tipc/node.c   | 15 +++++++++++----
 3 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index bf8f05c3eb82..a597708ae381 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -330,6 +330,21 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b)
 	return 0;
 }
 
+/* tipc_bearer_reset_all - reset all links on all bearers
+ */
+void tipc_bearer_reset_all(struct net *net)
+{
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_bearer *b;
+	int i;
+
+	for (i = 0; i < MAX_BEARERS; i++) {
+		b = rcu_dereference_rtnl(tn->bearer_list[i]);
+		if (b)
+			tipc_reset_bearer(net, b);
+	}
+}
+
 /**
  * bearer_disable
  *
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index f686e41b5abb..60e49c3be19c 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -198,6 +198,7 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest);
 void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
 struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
 struct tipc_media *tipc_media_find(const char *name);
+void tipc_bearer_reset_all(struct net *net);
 int tipc_bearer_setup(void);
 void tipc_bearer_cleanup(void);
 void tipc_bearer_stop(struct net *net);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index e01e2c71b5a1..23d4761842a0 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1297,10 +1297,6 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
 
 	rc = tipc_bcast_rcv(net, be->link, skb);
 
-	/* Broadcast link reset may happen at reassembly failure */
-	if (rc & TIPC_LINK_DOWN_EVT)
-		tipc_node_reset_links(n);
-
 	/* Broadcast ACKs are sent on a unicast link */
 	if (rc & TIPC_LINK_SND_BC_ACK) {
 		tipc_node_read_lock(n);
@@ -1320,6 +1316,17 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
 		spin_unlock_bh(&be->inputq2.lock);
 		tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2);
 	}
+
+	if (rc & TIPC_LINK_DOWN_EVT) {
+		/* Reception reassembly failure => reset all links to peer */
+		if (!tipc_link_is_up(be->link))
+			tipc_node_reset_links(n);
+
+		/* Retransmission failure => reset all links to all peers */
+		if (!tipc_link_is_up(tipc_bc_sndlink(net)))
+			tipc_bearer_reset_all(net);
+	}
+
 	tipc_node_put(n);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 590b52e10d410e1439ae86be9fe19d75fdab628b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 11 Jul 2016 17:28:54 +0200
Subject: netfilter: conntrack: skip clash resolution if nat is in place

The clash resolution is not easy to apply if the NAT table is
registered. Even if no NAT rules are installed, the nul-binding ensures
that a unique tuple is used, thus, the packet that loses race gets a
different source port number, as described by:

http://marc.info/?l=netfilter-devel&m=146818011604484&w=2

Clash resolution with NAT is also problematic if addresses/port range
ports are used since the conntrack that wins race may describe a
different mangling that we may have earlier applied to the packet via
nf_nat_setup_info().

Fixes: 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Tested-by: Marc Dionne <marc.c.dionne@gmail.com>
---
 net/netfilter/nf_conntrack_core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 62c42e970c89..9f530adad10d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -646,6 +646,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	if (l4proto->allow_clash &&
+	    !nfct_nat(ct) &&
 	    !nf_ct_is_dying(ct) &&
 	    atomic_inc_not_zero(&ct->ct_general.use)) {
 		nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
-- 
cgit v1.2.3-59-g8ed1b


From f4979fcea7fd36d8e2f556abef86f80e0d5af1ba Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 12 Jul 2016 18:18:56 -0400
Subject: rose: limit sk_filter trim to payload

Sockets can have a filter program attached that drops or trims
incoming packets based on the filter program return value.

Rose requires data packets to have at least ROSE_MIN_LEN bytes. It
verifies this on arrival in rose_route_frame and unconditionally pulls
the bytes in rose_recvmsg. The filter can trim packets to below this
value in-between, causing pull to fail, leaving the partial header at
the time of skb_copy_datagram_msg.

Place a lower bound on the size to which sk_filter may trim packets
by introducing sk_filter_trim_cap and call this for rose packets.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  6 +++++-
 net/core/filter.c      | 10 +++++-----
 net/rose/rose_in.c     |  3 ++-
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6fc31ef1da2d..8f74f3d61894 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -467,7 +467,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 }
 #endif /* CONFIG_DEBUG_SET_MODULE_RONX */
 
-int sk_filter(struct sock *sk, struct sk_buff *skb);
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
+static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
+{
+	return sk_filter_trim_cap(sk, skb, 1);
+}
 
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
 void bpf_prog_free(struct bpf_prog *fp);
diff --git a/net/core/filter.c b/net/core/filter.c
index c4b330c85c02..e759d90e8cef 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -53,9 +53,10 @@
 #include <net/sock_reuseport.h>
 
 /**
- *	sk_filter - run a packet through a socket filter
+ *	sk_filter_trim_cap - run a packet through a socket filter
  *	@sk: sock associated with &sk_buff
  *	@skb: buffer to filter
+ *	@cap: limit on how short the eBPF program may trim the packet
  *
  * Run the eBPF program and then cut skb->data to correct size returned by
  * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
@@ -64,7 +65,7 @@
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
-int sk_filter(struct sock *sk, struct sk_buff *skb)
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
 {
 	int err;
 	struct sk_filter *filter;
@@ -85,14 +86,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
 		unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
-
-		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
+		err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
 	}
 	rcu_read_unlock();
 
 	return err;
 }
-EXPORT_SYMBOL(sk_filter);
+EXPORT_SYMBOL(sk_filter_trim_cap);
 
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 79c4abcfa6b4..0a6394754e81 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -164,7 +164,8 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety
 		rose_frames_acked(sk, nr);
 		if (ns == rose->vr) {
 			rose_start_idletimer(sk);
-			if (sock_queue_rcv_skb(sk, skb) == 0) {
+			if (sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) == 0 &&
+			    __sock_queue_rcv_skb(sk, skb) == 0) {
 				rose->vr = (rose->vr + 1) % ROSE_MODULUS;
 				queued = 1;
 			} else {
-- 
cgit v1.2.3-59-g8ed1b


From 4f0c40d94461cfd23893a17335b2ab78ecb333c8 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 12 Jul 2016 18:18:57 -0400
Subject: dccp: limit sk_filter trim to payload

Dccp verifies packet integrity, including length, at initial rcv in
dccp_invalid_packet, later pulls headers in dccp_enqueue_skb.

A call to sk_filter in-between can cause __skb_pull to wrap skb->len.
skb_copy_datagram_msg interprets this as a negative value, so
(correctly) fails with EFAULT. The negative length is reported in
ioctl SIOCINQ or possibly in a DCCP_WARN in dccp_close.

Introduce an sk_receive_skb variant that caps how small a filter
program can trim packets, and call this in dccp with the header
length. Excessively trimmed packets are now processed normally and
queued for reception as 0B payloads.

Fixes: 7c657876b63c ("[DCCP]: Initial implementation")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 8 +++++++-
 net/core/sock.c    | 7 ++++---
 net/dccp/ipv4.c    | 2 +-
 net/dccp/ipv6.c    | 2 +-
 4 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 649d2a8c17fc..ff5be7e8ddea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1576,7 +1576,13 @@ static inline void sock_put(struct sock *sk)
  */
 void sock_gen_put(struct sock *sk);
 
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested);
+int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
+		     unsigned int trim_cap);
+static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+				 const int nested)
+{
+	return __sk_receive_skb(sk, skb, nested, 1);
+}
 
 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index b7f12639c26a..25dab8b60223 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -452,11 +452,12 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_queue_rcv_skb);
 
-int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
+int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
+		     const int nested, unsigned int trim_cap)
 {
 	int rc = NET_RX_SUCCESS;
 
-	if (sk_filter(sk, skb))
+	if (sk_filter_trim_cap(sk, skb, trim_cap))
 		goto discard_and_relse;
 
 	skb->dev = NULL;
@@ -492,7 +493,7 @@ discard_and_relse:
 	kfree_skb(skb);
 	goto out;
 }
-EXPORT_SYMBOL(sk_receive_skb);
+EXPORT_SYMBOL(__sk_receive_skb);
 
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 25dd25b47d41..345a3aeb8c7e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -868,7 +868,7 @@ lookup:
 		goto discard_and_relse;
 	nf_reset(skb);
 
-	return sk_receive_skb(sk, skb, 1);
+	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
 
 no_dccp_socket:
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d176f4e66369..3ff137d9471d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -732,7 +732,7 @@ lookup:
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-	return sk_receive_skb(sk, skb, 1) ? -1 : 0;
+	return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
 
 no_dccp_socket:
 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
-- 
cgit v1.2.3-59-g8ed1b


From 083ae308280d13d187512b9babe3454342a7987e Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Thu, 14 Jul 2016 11:38:40 -0400
Subject: tcp: enable per-socket rate limiting of all 'challenge acks'

The per-socket rate limit for 'challenge acks' was introduced in the
context of limiting ack loops:

commit f2b2c582e824 ("tcp: mitigate ACK loops for connections as tcp_sock")

And I think it can be extended to rate limit all 'challenge acks' on a
per-socket basis.

Since we have the global tcp_challenge_ack_limit, this patch allows for
tcp_challenge_ack_limit to be set to a large value and effectively rely on
the per-socket limit, or set tcp_challenge_ack_limit to a lower value and
still prevents a single connections from consuming the entire challenge ack
quota.

It further moves in the direction of eliminating the global limit at some
point, as Eric Dumazet has suggested. This a follow-up to:
Subject: tcp: make challenge acks less predictable

Cc: Eric Dumazet <edumazet@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Yue Cao <ycao009@ucr.edu>
Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 91868bb17818..42bf89aaf6a5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3421,6 +3421,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 	return flag;
 }
 
+static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+				   u32 *last_oow_ack_time)
+{
+	if (*last_oow_ack_time) {
+		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+			NET_INC_STATS(net, mib_idx);
+			return true;	/* rate-limited: don't send yet! */
+		}
+	}
+
+	*last_oow_ack_time = tcp_time_stamp;
+
+	return false;	/* not rate-limited: go ahead, send dupack now! */
+}
+
 /* Return true if we're currently rate-limiting out-of-window ACKs and
  * thus shouldn't send a dupack right now. We rate-limit dupacks in
  * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
@@ -3434,21 +3451,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
 	/* Data packets without SYNs are not likely part of an ACK loop. */
 	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
 	    !tcp_hdr(skb)->syn)
-		goto not_rate_limited;
-
-	if (*last_oow_ack_time) {
-		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
-
-		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
-			NET_INC_STATS(net, mib_idx);
-			return true;	/* rate-limited: don't send yet! */
-		}
-	}
-
-	*last_oow_ack_time = tcp_time_stamp;
+		return false;
 
-not_rate_limited:
-	return false;	/* not rate-limited: go ahead, send dupack now! */
+	return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
 }
 
 /* RFC 5961 7 [ACK Throttling] */
@@ -3461,9 +3466,9 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 	u32 count, now;
 
 	/* First check our per-socket dupack rate limit. */
-	if (tcp_oow_rate_limited(sock_net(sk), skb,
-				 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
-				 &tp->last_oow_ack_time))
+	if (__tcp_oow_rate_limited(sock_net(sk),
+				   LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+				   &tp->last_oow_ack_time))
 		return;
 
 	/* Then check host-wide RFC 5961 rate limit. */
-- 
cgit v1.2.3-59-g8ed1b


From 18d3df3eab23796d7f852f9c6bb60962b8372ced Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 14 Jul 2016 18:00:10 +0200
Subject: vlan: use a valid default mtu value for vlan over macsec

macsec can't cope with mtu frames which need vlan tag insertion, and
vlan device set the default mtu equal to the underlying dev's one.
By default vlan over macsec devices use invalid mtu, dropping
all the large packets.
This patch adds a netif helper to check if an upper vlan device
needs mtu reduction. The helper is used during vlan devices
initialization to set a valid default and during mtu updating to
forbid invalid, too bit, mtu values.
The helper currently only check if the lower dev is a macsec device,
if we get more users, we need to update only the helper (possibly
reserving an additional IFF bit).

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++++++
 net/8021q/vlan_dev.c      | 10 ++++++----
 net/8021q/vlan_netlink.c  |  7 +++++--
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f45929ce8157..da4b33bea982 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4145,6 +4145,13 @@ static inline void netif_keep_dst(struct net_device *dev)
 	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
 }
 
+/* return true if dev can't cope with mtu frames that need vlan tag insertion */
+static inline bool netif_reduces_vlan_mtu(struct net_device *dev)
+{
+	/* TODO: reserve and use an additional IFF bit, if we get more users */
+	return dev->priv_flags & IFF_MACSEC;
+}
+
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 86ae75b77390..516b0e73263c 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -146,10 +146,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 
 static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 {
-	/* TODO: gotta make sure the underlying layer can handle it,
-	 * maybe an IFF_VLAN_CAPABLE flag for devices?
-	 */
-	if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
+	unsigned int max_mtu = real_dev->mtu;
+
+	if (netif_reduces_vlan_mtu(real_dev))
+		max_mtu -= VLAN_HLEN;
+	if (max_mtu < new_mtu)
 		return -ERANGE;
 
 	dev->mtu = new_mtu;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c92b52f37d38..1270207f3d7c 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -118,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev;
+	unsigned int max_mtu;
 	__be16 proto;
 	int err;
 
@@ -144,9 +145,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
+	max_mtu = netif_reduces_vlan_mtu(real_dev) ? real_dev->mtu - VLAN_HLEN :
+						     real_dev->mtu;
 	if (!tb[IFLA_MTU])
-		dev->mtu = real_dev->mtu;
-	else if (dev->mtu > real_dev->mtu)
+		dev->mtu = max_mtu;
+	else if (dev->mtu > max_mtu)
 		return -EINVAL;
 
 	err = vlan_changelink(dev, tb, data);
-- 
cgit v1.2.3-59-g8ed1b


From 0564bf0afae443deeb16f36e2c39fefff89d05f2 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Sat, 16 Jul 2016 17:08:56 +0300
Subject: net/sched/sch_htb: clamp xstats tokens to fit into 32-bit int

In kernel HTB keeps tokens in signed 64-bit in nanoseconds. In netlink
protocol these values are converted into pshed ticks (64ns for now) and
truncated to 32-bit. In struct tc_htb_xstats fields "tokens" and "ctokens"
are declared as unsigned 32-bit but they could be negative thus tool 'tc'
prints them as signed. Big values loose higher bits and/or become negative.

This patch clamps tokens in xstat into range from INT_MIN to INT_MAX.
In this way it's easier to understand what's going on here.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 62f9d8100c6e..052f84d6cc23 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1140,8 +1140,10 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 
 	if (!cl->level && cl->un.leaf.q)
 		qlen = cl->un.leaf.q->q.qlen;
-	cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
-	cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
+	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
+				    INT_MIN, INT_MAX);
+	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
+				     INT_MIN, INT_MAX);
 
 	if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
-- 
cgit v1.2.3-59-g8ed1b


From c74bfbdba0e8d056e4ba579a666b5cdb8ec3cd35 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 16 Jul 2016 17:33:15 -0400
Subject: sctp: load transport header after sk_filter

Do not cache pointers into the skb linear segment across sk_filter.
The function call can trigger pskb_expand_head.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/input.c b/net/sctp/input.c
index a701527a9480..47cf4604d19c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -112,7 +112,6 @@ int sctp_rcv(struct sk_buff *skb)
 	struct sctp_ep_common *rcvr;
 	struct sctp_transport *transport = NULL;
 	struct sctp_chunk *chunk;
-	struct sctphdr *sh;
 	union sctp_addr src;
 	union sctp_addr dest;
 	int family;
@@ -127,8 +126,6 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb_linearize(skb))
 		goto discard_it;
 
-	sh = sctp_hdr(skb);
-
 	/* Pull up the IP and SCTP headers. */
 	__skb_pull(skb, skb_transport_offset(skb));
 	if (skb->len < sizeof(struct sctphdr))
@@ -230,7 +227,7 @@ int sctp_rcv(struct sk_buff *skb)
 	chunk->rcvr = rcvr;
 
 	/* Remember the SCTP header. */
-	chunk->sctp_hdr = sh;
+	chunk->sctp_hdr = sctp_hdr(skb);
 
 	/* Set the source and destination addresses of the incoming chunk.  */
 	sctp_init_addrs(chunk, &src, &dest);
-- 
cgit v1.2.3-59-g8ed1b


From edbe77462302ec0b11a90244de13f9012118c538 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 19 Jul 2016 14:40:51 +0900
Subject: packet: fix second argument of sock_tx_timestamp()

This patch fixes an issue that a syscall (e.g. sendto syscall) cannot
work correctly. Since the sendto syscall doesn't have msg_control buffer,
the sock_tx_timestamp() in packet_snd() cannot work correctly because
the socks.tsflags is set to 0.
So, this patch sets the socks.tsflags to sk->sk_tsflags as default.

Fixes: c14ac9451c34 ("sock: enable timestamping using control messages")
Reported-by: Kazuya Mizuguchi <kazuya.mizuguchi.ks@renesas.com>
Reported-by: Keita Kobayashi <keita.kobayashi.ym@renesas.com>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9f0983fa4d52..53e87ceb26e7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1927,7 +1927,7 @@ retry:
 		goto out_unlock;
 	}
 
-	sockc.tsflags = 0;
+	sockc.tsflags = sk->sk_tsflags;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
 		if (unlikely(err)) {
@@ -2678,7 +2678,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
 	}
 
-	sockc.tsflags = 0;
+	sockc.tsflags = po->sk.sk_tsflags;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(&po->sk, msg, &sockc);
 		if (unlikely(err))
@@ -2881,7 +2881,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;
 
-	sockc.tsflags = 0;
+	sockc.tsflags = sk->sk_tsflags;
 	sockc.mark = sk->sk_mark;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
-- 
cgit v1.2.3-59-g8ed1b


From f8e7718cc0445587fe8530fc2d240d9aac2c9072 Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Wed, 20 Jul 2016 18:01:18 -0400
Subject: packet: propagate sock_cmsg_send() error

sock_cmsg_send() can return different error codes and not only
-EINVAL, and we should properly propagate them.

Fixes: c14ac9451c34 ("sock: enable timestamping using control messages")
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 53e87ceb26e7..b43c4015b2f7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1930,10 +1930,8 @@ retry:
 	sockc.tsflags = sk->sk_tsflags;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
-		if (unlikely(err)) {
-			err = -EINVAL;
+		if (unlikely(err))
 			goto out_unlock;
-		}
 	}
 
 	skb->protocol = proto;
-- 
cgit v1.2.3-59-g8ed1b