aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c265
-rw-r--r--net/core/ethtool.c45
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/filter.c2
-rw-r--r--net/core/flow.c2
-rw-r--r--net/core/flow_dissector.c21
-rw-r--r--net/core/iovec.c137
-rw-r--r--net/core/neighbour.c20
-rw-r--r--net/core/net-sysfs.c28
-rw-r--r--net/core/net_namespace.c213
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/pktgen.c18
-rw-r--r--net/core/rtnetlink.c169
-rw-r--r--net/core/skbuff.c59
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/sysctl_net_core.c15
17 files changed, 619 insertions, 385 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 235e6c50708d..fec0856dd6c0 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,7 @@
# Makefile for the Linux networking core.
#
-obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
+obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 171420e75b03..8f9710c62e20 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL))
- return &ptype_all;
+ return pt->dev ? &pt->dev->ptype_all : &ptype_all;
else
- return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+ return pt->dev ? &pt->dev->ptype_specific :
+ &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
/**
@@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
+static inline void deliver_ptype_list_skb(struct sk_buff *skb,
+ struct packet_type **pt,
+ struct net_device *dev, __be16 type,
+ struct list_head *ptype_list)
+{
+ struct packet_type *ptype, *pt_prev = *pt;
+
+ list_for_each_entry_rcu(ptype, ptype_list, list) {
+ if (ptype->type != type)
+ continue;
+ if (pt_prev)
+ deliver_skb(skb, pt_prev, dev);
+ pt_prev = ptype;
+ }
+ *pt = pt_prev;
+}
+
static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
{
if (!ptype->af_packet_priv || !skb->sk)
@@ -1757,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
struct packet_type *ptype;
struct sk_buff *skb2 = NULL;
struct packet_type *pt_prev = NULL;
+ struct list_head *ptype_list = &ptype_all;
rcu_read_lock();
- list_for_each_entry_rcu(ptype, &ptype_all, list) {
+again:
+ list_for_each_entry_rcu(ptype, ptype_list, list) {
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
- if ((ptype->dev == dev || !ptype->dev) &&
- (!skb_loop_sk(ptype, skb))) {
- if (pt_prev) {
- deliver_skb(skb2, pt_prev, skb->dev);
- pt_prev = ptype;
- continue;
- }
+ if (skb_loop_sk(ptype, skb))
+ continue;
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- break;
+ if (pt_prev) {
+ deliver_skb(skb2, pt_prev, skb->dev);
+ pt_prev = ptype;
+ continue;
+ }
- net_timestamp_set(skb2);
+ /* need to clone skb, done only once */
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ goto out_unlock;
- /* skb->nh should be correctly
- set by sender, so that the second statement is
- just protection against buggy protocols.
- */
- skb_reset_mac_header(skb2);
-
- if (skb_network_header(skb2) < skb2->data ||
- skb_network_header(skb2) > skb_tail_pointer(skb2)) {
- net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
- ntohs(skb2->protocol),
- dev->name);
- skb_reset_network_header(skb2);
- }
+ net_timestamp_set(skb2);
- skb2->transport_header = skb2->network_header;
- skb2->pkt_type = PACKET_OUTGOING;
- pt_prev = ptype;
+ /* skb->nh should be correctly
+ * set by sender, so that the second statement is
+ * just protection against buggy protocols.
+ */
+ skb_reset_mac_header(skb2);
+
+ if (skb_network_header(skb2) < skb2->data ||
+ skb_network_header(skb2) > skb_tail_pointer(skb2)) {
+ net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
+ ntohs(skb2->protocol),
+ dev->name);
+ skb_reset_network_header(skb2);
}
+
+ skb2->transport_header = skb2->network_header;
+ skb2->pkt_type = PACKET_OUTGOING;
+ pt_prev = ptype;
+ }
+
+ if (ptype_list == &ptype_all) {
+ ptype_list = &dev->ptype_all;
+ goto again;
}
+out_unlock:
if (pt_prev)
pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
rcu_read_unlock();
@@ -2352,7 +2379,6 @@ EXPORT_SYMBOL(skb_checksum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
- unsigned int vlan_depth = skb->mac_len;
__be16 type = skb->protocol;
/* Tunnel gso handlers can set protocol to ethernet. */
@@ -2366,35 +2392,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
type = eth->h_proto;
}
- /* if skb->protocol is 802.1Q/AD then the header should already be
- * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
- * ETH_HLEN otherwise
- */
- if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
- if (vlan_depth) {
- if (WARN_ON(vlan_depth < VLAN_HLEN))
- return 0;
- vlan_depth -= VLAN_HLEN;
- } else {
- vlan_depth = ETH_HLEN;
- }
- do {
- struct vlan_hdr *vh;
-
- if (unlikely(!pskb_may_pull(skb,
- vlan_depth + VLAN_HLEN)))
- return 0;
-
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
- } while (type == htons(ETH_P_8021Q) ||
- type == htons(ETH_P_8021AD));
- }
-
- *depth = vlan_depth;
-
- return type;
+ return __vlan_get_protocol(skb, type, depth);
}
/**
@@ -2578,7 +2576,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
if (skb->encapsulation)
features &= dev->hw_enc_features;
- if (!vlan_tx_tag_present(skb)) {
+ if (!skb_vlan_tag_present(skb)) {
if (unlikely(protocol == htons(ETH_P_8021Q) ||
protocol == htons(ETH_P_8021AD))) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2617,7 +2615,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
unsigned int len;
int rc;
- if (!list_empty(&ptype_all))
+ if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
dev_queue_xmit_nit(skb, dev);
len = skb->len;
@@ -2659,7 +2657,7 @@ out:
static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
netdev_features_t features)
{
- if (vlan_tx_tag_present(skb) &&
+ if (skb_vlan_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto))
skb = __vlan_hwaccel_push_inside(skb);
return skb;
@@ -3032,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
/* One global table that all flow-based protocols share. */
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
+u32 rps_cpu_mask __read_mostly;
+EXPORT_SYMBOL(rps_cpu_mask);
struct static_key rps_needed __read_mostly;
@@ -3088,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp)
{
- struct netdev_rx_queue *rxqueue;
- struct rps_map *map;
+ const struct rps_sock_flow_table *sock_flow_table;
+ struct netdev_rx_queue *rxqueue = dev->_rx;
struct rps_dev_flow_table *flow_table;
- struct rps_sock_flow_table *sock_flow_table;
+ struct rps_map *map;
int cpu = -1;
- u16 tcpu;
+ u32 tcpu;
u32 hash;
if (skb_rx_queue_recorded(skb)) {
u16 index = skb_get_rx_queue(skb);
+
if (unlikely(index >= dev->real_num_rx_queues)) {
WARN_ONCE(dev->real_num_rx_queues > 1,
"%s received packet on queue %u, but number "
@@ -3105,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
dev->name, index, dev->real_num_rx_queues);
goto done;
}
- rxqueue = dev->_rx + index;
- } else
- rxqueue = dev->_rx;
+ rxqueue += index;
+ }
+ /* Avoid computing hash if RFS/RPS is not active for this rxqueue */
+
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
map = rcu_dereference(rxqueue->rps_map);
- if (map) {
- if (map->len == 1 &&
- !rcu_access_pointer(rxqueue->rps_flow_table)) {
- tcpu = map->cpus[0];
- if (cpu_online(tcpu))
- cpu = tcpu;
- goto done;
- }
- } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
+ if (!flow_table && !map)
goto done;
- }
skb_reset_network_header(skb);
hash = skb_get_hash(skb);
if (!hash)
goto done;
- flow_table = rcu_dereference(rxqueue->rps_flow_table);
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (flow_table && sock_flow_table) {
- u16 next_cpu;
struct rps_dev_flow *rflow;
+ u32 next_cpu;
+ u32 ident;
+
+ /* First check into global flow table if there is a match */
+ ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+ if ((ident ^ hash) & ~rps_cpu_mask)
+ goto try_rps;
+ next_cpu = ident & rps_cpu_mask;
+
+ /* OK, now we know there is a match,
+ * we can look at the local (per receive queue) flow table
+ */
rflow = &flow_table->flows[hash & flow_table->mask];
tcpu = rflow->cpu;
- next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
-
/*
* If the desired CPU (where last recvmsg was done) is
* different from current CPU (one in the rx-queue flow
@@ -3164,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
}
}
+try_rps:
+
if (map) {
tcpu = map->cpus[reciprocal_scale(hash, map->len)];
if (cpu_online(tcpu)) {
@@ -3615,7 +3619,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
struct packet_type *ptype, *pt_prev;
rx_handler_func_t *rx_handler;
struct net_device *orig_dev;
- struct net_device *null_or_dev;
bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
@@ -3658,11 +3661,15 @@ another_round:
goto skip_taps;
list_for_each_entry_rcu(ptype, &ptype_all, list) {
- if (!ptype->dev || ptype->dev == skb->dev) {
- if (pt_prev)
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = ptype;
- }
+ if (pt_prev)
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = ptype;
+ }
+
+ list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+ if (pt_prev)
+ ret = deliver_skb(skb, pt_prev, orig_dev);
+ pt_prev = ptype;
}
skip_taps:
@@ -3676,7 +3683,7 @@ ncls:
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
- if (vlan_tx_tag_present(skb)) {
+ if (skb_vlan_tag_present(skb)) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
@@ -3708,8 +3715,8 @@ ncls:
}
}
- if (unlikely(vlan_tx_tag_present(skb))) {
- if (vlan_tx_tag_get_id(skb))
+ if (unlikely(skb_vlan_tag_present(skb))) {
+ if (skb_vlan_tag_get_id(skb))
skb->pkt_type = PACKET_OTHERHOST;
/* Note: we might in the future use prio bits
* and set skb->priority like in vlan_do_receive()
@@ -3718,19 +3725,21 @@ ncls:
skb->vlan_tci = 0;
}
+ type = skb->protocol;
+
/* deliver only exact match when indicated */
- null_or_dev = deliver_exact ? skb->dev : NULL;
+ if (likely(!deliver_exact)) {
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &ptype_base[ntohs(type) &
+ PTYPE_HASH_MASK]);
+ }
- type = skb->protocol;
- list_for_each_entry_rcu(ptype,
- &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
- if (ptype->type == type &&
- (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
- ptype->dev == orig_dev)) {
- if (pt_prev)
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = ptype;
- }
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &orig_dev->ptype_specific);
+
+ if (unlikely(skb->dev != orig_dev)) {
+ deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+ &skb->dev->ptype_specific);
}
if (pt_prev) {
@@ -4015,6 +4024,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
NAPI_GRO_CB(skb)->udp_mark = 0;
+ NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
/* Setup for GRO checksum validation */
switch (skb->ip_summed) {
@@ -5323,7 +5333,27 @@ void netdev_upper_dev_unlink(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
-void netdev_adjacent_add_links(struct net_device *dev)
+/**
+ * netdev_bonding_info_change - Dispatch event about slave change
+ * @dev: device
+ * @bonding_info: info to dispatch
+ *
+ * Send NETDEV_BONDING_INFO to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_bonding_info_change(struct net_device *dev,
+ struct netdev_bonding_info *bonding_info)
+{
+ struct netdev_notifier_bonding_info info;
+
+ memcpy(&info.bonding_info, bonding_info,
+ sizeof(struct netdev_bonding_info));
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ &info.info);
+}
+EXPORT_SYMBOL(netdev_bonding_info_change);
+
+static void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -5348,7 +5378,7 @@ void netdev_adjacent_add_links(struct net_device *dev)
}
}
-void netdev_adjacent_del_links(struct net_device *dev)
+static void netdev_adjacent_del_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -6172,13 +6202,16 @@ static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
+ size_t sz = count * sizeof(*rx);
BUG_ON(count < 1);
- rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
- if (!rx)
- return -ENOMEM;
-
+ rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+ if (!rx) {
+ rx = vzalloc(sz);
+ if (!rx)
+ return -ENOMEM;
+ }
dev->_rx = rx;
for (i = 0; i < count; i++)
@@ -6576,6 +6609,8 @@ void netdev_run_todo(void)
/* paranoia */
BUG_ON(netdev_refcnt_read(dev));
+ BUG_ON(!list_empty(&dev->ptype_all));
+ BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
WARN_ON(dev->dn_ptr);
@@ -6656,7 +6691,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
if (!queue)
return NULL;
netdev_init_one_queue(dev, queue, NULL);
- queue->qdisc = &noop_qdisc;
+ RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
queue->qdisc_sleeping = &noop_qdisc;
rcu_assign_pointer(dev->ingress_queue, queue);
#endif
@@ -6758,6 +6793,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->adj_list.lower);
INIT_LIST_HEAD(&dev->all_adj_list.upper);
INIT_LIST_HEAD(&dev->all_adj_list.lower);
+ INIT_LIST_HEAD(&dev->ptype_all);
+ INIT_LIST_HEAD(&dev->ptype_specific);
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -6808,7 +6845,7 @@ void free_netdev(struct net_device *dev)
netif_free_tx_queues(dev);
#ifdef CONFIG_SYSFS
- kfree(dev->_rx);
+ kvfree(dev->_rx);
#endif
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
@@ -7093,11 +7130,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
- netif_rx_internal(skb);
+ netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}
while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
- netif_rx_internal(skb);
+ netif_rx_ni(skb);
input_queue_head_incr(oldsd);
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 550892cd6b3f..91f74f3eb204 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1597,20 +1597,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
return err;
}
+static int __ethtool_get_module_info(struct net_device *dev,
+ struct ethtool_modinfo *modinfo)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct phy_device *phydev = dev->phydev;
+
+ if (phydev && phydev->drv && phydev->drv->module_info)
+ return phydev->drv->module_info(phydev, modinfo);
+
+ if (ops->get_module_info)
+ return ops->get_module_info(dev, modinfo);
+
+ return -EOPNOTSUPP;
+}
+
static int ethtool_get_module_info(struct net_device *dev,
void __user *useraddr)
{
int ret;
struct ethtool_modinfo modinfo;
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (!ops->get_module_info)
- return -EOPNOTSUPP;
if (copy_from_user(&modinfo, useraddr, sizeof(modinfo)))
return -EFAULT;
- ret = ops->get_module_info(dev, &modinfo);
+ ret = __ethtool_get_module_info(dev, &modinfo);
if (ret)
return ret;
@@ -1620,21 +1631,33 @@ static int ethtool_get_module_info(struct net_device *dev,
return 0;
}
+static int __ethtool_get_module_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *ee, u8 *data)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct phy_device *phydev = dev->phydev;
+
+ if (phydev && phydev->drv && phydev->drv->module_eeprom)
+ return phydev->drv->module_eeprom(phydev, ee, data);
+
+ if (ops->get_module_eeprom)
+ return ops->get_module_eeprom(dev, ee, data);
+
+ return -EOPNOTSUPP;
+}
+
static int ethtool_get_module_eeprom(struct net_device *dev,
void __user *useraddr)
{
int ret;
struct ethtool_modinfo modinfo;
- const struct ethtool_ops *ops = dev->ethtool_ops;
-
- if (!ops->get_module_info || !ops->get_module_eeprom)
- return -EOPNOTSUPP;
- ret = ops->get_module_info(dev, &modinfo);
+ ret = __ethtool_get_module_info(dev, &modinfo);
if (ret)
return ret;
- return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom,
+ return ethtool_get_any_eeprom(dev, useraddr,
+ __ethtool_get_module_eeprom,
modinfo.eeprom_len);
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 185c341fafbd..44706e81b2e0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -609,7 +609,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
if (ops->fill(rule, skb, frh) < 0)
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
diff --git a/net/core/filter.c b/net/core/filter.c
index ec9baea10c16..f6bdc2b1ba01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -531,7 +531,7 @@ do_pass:
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
break;
- /* Unkown instruction. */
+ /* Unknown instruction. */
default:
goto err;
}
diff --git a/net/core/flow.c b/net/core/flow.c
index a0348fde1fdf..1033725be40b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -379,7 +379,7 @@ done:
static void flow_cache_flush_task(struct work_struct *work)
{
struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
- flow_cache_gc_work);
+ flow_cache_flush_work);
struct net *net = container_of(xfrm, struct net, xfrm);
flow_cache_flush(net);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 45084938c403..2c35c02a931e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -178,6 +178,20 @@ ipv6:
return false;
}
}
+ case htons(ETH_P_TIPC): {
+ struct {
+ __be32 pre[3];
+ __be32 srcnode;
+ } *hdr, _hdr;
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
+ if (!hdr)
+ return false;
+ flow->src = hdr->srcnode;
+ flow->dst = 0;
+ flow->n_proto = proto;
+ flow->thoff = (u16)nhoff;
+ return true;
+ }
case htons(ETH_P_FCOE):
flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
/* fall through */
@@ -408,7 +422,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) {
map = rcu_dereference(
- dev_maps->cpu_map[raw_smp_processor_id()]);
+ dev_maps->cpu_map[skb->sender_cpu - 1]);
if (map) {
if (map->len == 1)
queue_index = map->queues[0];
@@ -454,6 +468,11 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
{
int queue_index = 0;
+#ifdef CONFIG_XPS
+ if (skb->sender_cpu == 0)
+ skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
if (dev->real_num_tx_queues != 1) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue)
diff --git a/net/core/iovec.c b/net/core/iovec.c
deleted file mode 100644
index dcbe98b3726a..000000000000
--- a/net/core/iovec.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * iovec manipulation routines.
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Fixes:
- * Andrew Lunn : Errors in iovec copying.
- * Pedro Roque : Added memcpy_fromiovecend and
- * csum_..._fromiovecend.
- * Andi Kleen : fixed error handling for 2.1
- * Alexey Kuznetsov: 2.1 optimisations
- * Andi Kleen : Fix csum*fromiovecend for IPv6.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/net.h>
-#include <linux/in6.h>
-#include <asm/uaccess.h>
-#include <asm/byteorder.h>
-#include <net/checksum.h>
-#include <net/sock.h>
-
-/*
- * And now for the all-in-one: copy and checksum from a user iovec
- * directly to a datagram
- * Calls to csum_partial but the last must be in 32 bit chunks
- *
- * ip_build_xmit must ensure that when fragmenting only the last
- * call to this function will be unaligned also.
- */
-int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
- int offset, unsigned int len, __wsum *csump)
-{
- __wsum csum = *csump;
- int partial_cnt = 0, err = 0;
-
- /* Skip over the finished iovecs */
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- iov++;
- }
-
- while (len > 0) {
- u8 __user *base = iov->iov_base + offset;
- int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
- offset = 0;
-
- /* There is a remnant from previous iov. */
- if (partial_cnt) {
- int par_len = 4 - partial_cnt;
-
- /* iov component is too short ... */
- if (par_len > copy) {
- if (copy_from_user(kdata, base, copy))
- goto out_fault;
- kdata += copy;
- base += copy;
- partial_cnt += copy;
- len -= copy;
- iov++;
- if (len)
- continue;
- *csump = csum_partial(kdata - partial_cnt,
- partial_cnt, csum);
- goto out;
- }
- if (copy_from_user(kdata, base, par_len))
- goto out_fault;
- csum = csum_partial(kdata - partial_cnt, 4, csum);
- kdata += par_len;
- base += par_len;
- copy -= par_len;
- len -= par_len;
- partial_cnt = 0;
- }
-
- if (len > copy) {
- partial_cnt = copy % 4;
- if (partial_cnt) {
- copy -= partial_cnt;
- if (copy_from_user(kdata + copy, base + copy,
- partial_cnt))
- goto out_fault;
- }
- }
-
- if (copy) {
- csum = csum_and_copy_from_user(base, kdata, copy,
- csum, &err);
- if (err)
- goto out;
- }
- len -= copy + partial_cnt;
- kdata += copy + partial_cnt;
- iov++;
- }
- *csump = csum;
-out:
- return err;
-
-out_fault:
- err = -EFAULT;
- goto out;
-}
-EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
-
-unsigned long iov_pages(const struct iovec *iov, int offset,
- unsigned long nr_segs)
-{
- unsigned long seg, base;
- int pages = 0, len, size;
-
- while (nr_segs && (offset >= iov->iov_len)) {
- offset -= iov->iov_len;
- ++iov;
- --nr_segs;
- }
-
- for (seg = 0; seg < nr_segs; seg++) {
- base = (unsigned long)iov[seg].iov_base + offset;
- len = iov[seg].iov_len - offset;
- size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
- pages += size;
- offset = 0;
- }
-
- return pages;
-}
-EXPORT_SYMBOL(iov_pages);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8d614c93f86a..70fe9e10ac86 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1884,7 +1884,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
goto nla_put_failure;
read_unlock_bh(&tbl->lock);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
read_unlock_bh(&tbl->lock);
@@ -1917,7 +1918,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
goto errout;
read_unlock_bh(&tbl->lock);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
errout:
read_unlock_bh(&tbl->lock);
nlmsg_cancel(skb, nlh);
@@ -2126,7 +2128,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
- NLM_F_MULTI) <= 0)
+ NLM_F_MULTI) < 0)
break;
nidx = 0;
@@ -2142,7 +2144,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGHTBL,
- NLM_F_MULTI) <= 0)
+ NLM_F_MULTI) < 0)
goto out;
next:
nidx++;
@@ -2202,7 +2204,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -2232,7 +2235,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -2270,7 +2274,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
- NLM_F_MULTI) <= 0) {
+ NLM_F_MULTI) < 0) {
rc = -1;
goto out;
}
@@ -2307,7 +2311,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
- NLM_F_MULTI, tbl) <= 0) {
+ NLM_F_MULTI, tbl) < 0) {
read_unlock_bh(&tbl->lock);
rc = -1;
goto out;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 999341244434..f2aa73bfb0e4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -614,8 +614,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
{
struct rps_map *map;
cpumask_var_t mask;
- size_t len = 0;
- int i;
+ int i, len;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
@@ -626,17 +625,11 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
for (i = 0; i < map->len; i++)
cpumask_set_cpu(map->cpus[i], mask);
- len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
- if (PAGE_SIZE - len < 3) {
- rcu_read_unlock();
- free_cpumask_var(mask);
- return -EINVAL;
- }
+ len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
rcu_read_unlock();
-
free_cpumask_var(mask);
- len += sprintf(buf + len, "\n");
- return len;
+
+ return len < PAGE_SIZE ? len : -EINVAL;
}
static ssize_t store_rps_map(struct netdev_rx_queue *queue,
@@ -1090,8 +1083,7 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
struct xps_dev_maps *dev_maps;
cpumask_var_t mask;
unsigned long index;
- size_t len = 0;
- int i;
+ int i, len;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
@@ -1117,15 +1109,9 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
}
rcu_read_unlock();
- len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
- if (PAGE_SIZE - len < 3) {
- free_cpumask_var(mask);
- return -EINVAL;
- }
-
+ len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
free_cpumask_var(mask);
- len += sprintf(buf + len, "\n");
- return len;
+ return len < PAGE_SIZE ? len : -EINVAL;
}
static ssize_t store_xps_map(struct netdev_queue *queue,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ce780c722e48..cb5290b8c428 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -15,6 +15,10 @@
#include <linux/file.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
+#include <linux/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+#include <net/netlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -144,6 +148,78 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
+static int alloc_netid(struct net *net, struct net *peer, int reqid)
+{
+ int min = 0, max = 0;
+
+ ASSERT_RTNL();
+
+ if (reqid >= 0) {
+ min = reqid;
+ max = reqid + 1;
+ }
+
+ return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+}
+
+/* This function is used by idr_for_each(). If net is equal to peer, the
+ * function returns the id so that idr_for_each() stops. Because we cannot
+ * returns the id 0 (idr_for_each() will not stop), we return the magic value
+ * NET_ID_ZERO (-1) for it.
+ */
+#define NET_ID_ZERO -1
+static int net_eq_idr(int id, void *net, void *peer)
+{
+ if (net_eq(net, peer))
+ return id ? : NET_ID_ZERO;
+ return 0;
+}
+
+static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+{
+ int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
+
+ ASSERT_RTNL();
+
+ /* Magic value for id 0. */
+ if (id == NET_ID_ZERO)
+ return 0;
+ if (id > 0)
+ return id;
+
+ if (alloc)
+ return alloc_netid(net, peer, -1);
+
+ return -ENOENT;
+}
+
+/* This function returns the id of a peer netns. If no id is assigned, one will
+ * be allocated and returned.
+ */
+int peernet2id(struct net *net, struct net *peer)
+{
+ int id = __peernet2id(net, peer, true);
+
+ return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+}
+EXPORT_SYMBOL(peernet2id);
+
+struct net *get_net_ns_by_id(struct net *net, int id)
+{
+ struct net *peer;
+
+ if (id < 0)
+ return NULL;
+
+ rcu_read_lock();
+ peer = idr_find(&net->netns_ids, id);
+ if (peer)
+ get_net(peer);
+ rcu_read_unlock();
+
+ return peer;
+}
+
/*
* setup_net runs the initializers for the network namespace object.
*/
@@ -158,6 +234,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
atomic_set(&net->passive, 1);
net->dev_base_seq = 1;
net->user_ns = user_ns;
+ idr_init(&net->netns_ids);
#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
@@ -288,6 +365,14 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry(net, &net_kill_list, cleanup_list) {
list_del_rcu(&net->list);
list_add_tail(&net->exit_list, &net_exit_list);
+ for_each_net(tmp) {
+ int id = __peernet2id(tmp, net, false);
+
+ if (id >= 0)
+ idr_remove(&tmp->netns_ids, id);
+ }
+ idr_destroy(&net->netns_ids);
+
}
rtnl_unlock();
@@ -361,6 +446,7 @@ struct net *get_net_ns_by_fd(int fd)
return ERR_PTR(-EINVAL);
}
#endif
+EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
struct net *get_net_ns_by_pid(pid_t pid)
{
@@ -402,6 +488,130 @@ static struct pernet_operations __net_initdata net_ns_ops = {
.exit = net_ns_net_exit,
};
+static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+ [NETNSA_NONE] = { .type = NLA_UNSPEC },
+ [NETNSA_NSID] = { .type = NLA_S32 },
+ [NETNSA_PID] = { .type = NLA_U32 },
+ [NETNSA_FD] = { .type = NLA_U32 },
+};
+
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NETNSA_MAX + 1];
+ struct net *peer;
+ int nsid, err;
+
+ err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+ rtnl_net_policy);
+ if (err < 0)
+ return err;
+ if (!tb[NETNSA_NSID])
+ return -EINVAL;
+ nsid = nla_get_s32(tb[NETNSA_NSID]);
+
+ if (tb[NETNSA_PID])
+ peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+ else if (tb[NETNSA_FD])
+ peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+ else
+ return -EINVAL;
+ if (IS_ERR(peer))
+ return PTR_ERR(peer);
+
+ if (__peernet2id(net, peer, false) >= 0) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ err = alloc_netid(net, peer, nsid);
+ if (err > 0)
+ err = 0;
+out:
+ put_net(peer);
+ return err;
+}
+
+static int rtnl_net_get_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct rtgenmsg))
+ + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+ ;
+}
+
+static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
+ int cmd, struct net *net, struct net *peer)
+{
+ struct nlmsghdr *nlh;
+ struct rtgenmsg *rth;
+ int id;
+
+ ASSERT_RTNL();
+
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rth = nlmsg_data(nlh);
+ rth->rtgen_family = AF_UNSPEC;
+
+ id = __peernet2id(net, peer, false);
+ if (id < 0)
+ id = NETNSA_NSID_NOT_ASSIGNED;
+ if (nla_put_s32(skb, NETNSA_NSID, id))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NETNSA_MAX + 1];
+ struct sk_buff *msg;
+ int err = -ENOBUFS;
+ struct net *peer;
+
+ err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+ rtnl_net_policy);
+ if (err < 0)
+ return err;
+ if (tb[NETNSA_PID])
+ peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+ else if (tb[NETNSA_FD])
+ peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+ else
+ return -EINVAL;
+
+ if (IS_ERR(peer))
+ return PTR_ERR(peer);
+
+ msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ RTM_GETNSID, net, peer);
+ if (err < 0)
+ goto err_out;
+
+ err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
+ goto out;
+
+err_out:
+ nlmsg_free(msg);
+out:
+ put_net(peer);
+ return err;
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -435,6 +645,9 @@ static int __init net_ns_init(void)
register_pernet_subsys(&net_ns_ops);
+ rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+
return 0;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e0ad5d16c9c5..c126a878c47c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -77,7 +77,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
features = netif_skb_features(skb);
- if (vlan_tx_tag_present(skb) &&
+ if (skb_vlan_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto)) {
skb = __vlan_hwaccel_push_inside(skb);
if (unlikely(!skb)) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index da934fc3faa8..b4899f5b7388 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -97,7 +97,7 @@
* New xmit() return, do_div and misc clean up by Stephen Hemminger
* <shemminger@osdl.org> 040923
*
- * Randy Dunlap fixed u64 printk compiler waring
+ * Randy Dunlap fixed u64 printk compiler warning
*
* Remove FCS from BW calculation. Lennert Buytenhek <buytenh@wantstofly.org>
* New time handling. Lennert Buytenhek <buytenh@wantstofly.org> 041213
@@ -2842,25 +2842,25 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
+ pktgen_finalize_skb(pkt_dev, skb, datalen);
+
if (!(pkt_dev->flags & F_UDPCSUM)) {
skb->ip_summed = CHECKSUM_NONE;
} else if (odev->features & NETIF_F_V4_CSUM) {
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- udp4_hwcsum(skb, udph->source, udph->dest);
+ udp4_hwcsum(skb, iph->saddr, iph->daddr);
} else {
- __wsum csum = udp_csum(skb);
+ __wsum csum = skb_checksum(skb, skb_transport_offset(skb), datalen + 8, 0);
/* add protocol-dependent pseudo-header */
- udph->check = csum_tcpudp_magic(udph->source, udph->dest,
+ udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
datalen + 8, IPPROTO_UDP, csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
}
- pktgen_finalize_skb(pkt_dev, skb, datalen);
-
#ifdef CONFIG_XFRM
if (!process_ipsec(pkt_dev, skb, protocol))
return NULL;
@@ -2976,6 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
+ pktgen_finalize_skb(pkt_dev, skb, datalen);
+
if (!(pkt_dev->flags & F_UDPCSUM)) {
skb->ip_summed = CHECKSUM_NONE;
} else if (odev->features & NETIF_F_V6_CSUM) {
@@ -2984,7 +2986,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->csum_offset = offsetof(struct udphdr, check);
udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0);
} else {
- __wsum csum = udp_csum(skb);
+ __wsum csum = skb_checksum(skb, skb_transport_offset(skb), udplen, 0);
/* add protocol-dependent pseudo-header */
udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum);
@@ -2993,8 +2995,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
udph->check = CSUM_MANGLED_0;
}
- pktgen_finalize_skb(pkt_dev, skb, datalen);
-
return skb;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9cf6fe9ddc0c..ab293a3066b3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,6 +50,7 @@
#include <net/arp.h>
#include <net/route.h>
#include <net/udp.h>
+#include <net/tcp.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/fib_rules.h>
@@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
for (i = 0; i < RTAX_MAX; i++) {
if (metrics[i]) {
+ if (i == RTAX_CC_ALGO - 1) {
+ char tmp[TCP_CA_NAME_MAX], *name;
+
+ name = tcp_ca_get_name_by_key(metrics[i], tmp);
+ if (!name)
+ continue;
+ if (nla_put_string(skb, i + 1, name))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u32(skb, i + 1, metrics[i]))
+ goto nla_put_failure;
+ }
valid++;
- if (nla_put_u32(skb, i+1, metrics[i]))
- goto nla_put_failure;
}
}
@@ -864,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+ + nla_total_size(4) /* IFLA_LINK_NETNSID */
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1158,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
+ if (dev->rtnl_link_ops &&
+ dev->rtnl_link_ops->get_link_net) {
+ struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+ if (!net_eq(dev_net(dev), link_net)) {
+ int id = peernet2id(dev_net(dev), link_net);
+
+ if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+ goto nla_put_failure;
+ }
+ }
+
if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
goto nla_put_failure;
@@ -1188,7 +1212,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_nest_end(skb, af_spec);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -1223,6 +1248,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
+ [IFLA_LINK_NETNSID] = { .type = NLA_S32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1237,18 +1263,12 @@ static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
};
static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
- [IFLA_VF_MAC] = { .type = NLA_BINARY,
- .len = sizeof(struct ifla_vf_mac) },
- [IFLA_VF_VLAN] = { .type = NLA_BINARY,
- .len = sizeof(struct ifla_vf_vlan) },
- [IFLA_VF_TX_RATE] = { .type = NLA_BINARY,
- .len = sizeof(struct ifla_vf_tx_rate) },
- [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY,
- .len = sizeof(struct ifla_vf_spoofchk) },
- [IFLA_VF_RATE] = { .type = NLA_BINARY,
- .len = sizeof(struct ifla_vf_rate) },
- [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY,
- .len = sizeof(struct ifla_vf_link_state) },
+ [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) },
+ [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) },
+ [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) },
+ [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) },
+ [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) },
+ [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1315,7 +1335,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
*/
WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err <= 0)
+ if (err < 0)
goto out;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -1996,7 +2016,7 @@ replay:
struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0];
struct nlattr **data = NULL;
struct nlattr **slave_data = NULL;
- struct net *dest_net;
+ struct net *dest_net, *link_net = NULL;
if (ops) {
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
@@ -2102,7 +2122,18 @@ replay:
if (IS_ERR(dest_net))
return PTR_ERR(dest_net);
- dev = rtnl_create_link(dest_net, ifname, name_assign_type, ops, tb);
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+
+ link_net = get_net_ns_by_id(dest_net, id);
+ if (!link_net) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ name_assign_type, ops, tb);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
goto out;
@@ -2111,7 +2142,7 @@ replay:
dev->ifindex = ifm->ifi_index;
if (ops->newlink) {
- err = ops->newlink(net, dev, tb, data);
+ err = ops->newlink(link_net ? : net, dev, tb, data);
/* Drivers should call free_netdev() in ->destructor
* and unregister it on failure after registration
* so that device could be finally freed in rtnl_unlock.
@@ -2130,9 +2161,26 @@ replay:
}
}
err = rtnl_configure_link(dev, ifm);
- if (err < 0)
- unregister_netdevice(dev);
+ if (err < 0) {
+ if (ops->newlink) {
+ LIST_HEAD(list_kill);
+
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ } else {
+ unregister_netdevice(dev);
+ }
+ goto out;
+ }
+
+ if (link_net) {
+ err = dev_change_net_namespace(dev, dest_net, ifname);
+ if (err < 0)
+ unregister_netdevice(dev);
+ }
out:
+ if (link_net)
+ put_net(link_net);
put_net(dest_net);
return err;
}
@@ -2315,7 +2363,8 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
goto nla_put_failure;
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
@@ -2698,10 +2747,11 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
idx);
}
- idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
if (dev->netdev_ops->ndo_fdb_dump)
- idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, bdev, dev,
+ idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
idx);
+ else
+ idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
cops = NULL;
}
@@ -2797,7 +2847,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
nla_nest_end(skb, protinfo);
- return nlmsg_end(skb, nlh);
+ nlmsg_end(skb, nlh);
+ return 0;
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -2868,39 +2919,35 @@ static inline size_t bridge_nlmsg_size(void)
+ nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */
}
-static int rtnl_bridge_notify(struct net_device *dev, u16 flags)
+static int rtnl_bridge_notify(struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct net_device *br_dev = netdev_master_upper_dev_get(dev);
struct sk_buff *skb;
int err = -EOPNOTSUPP;
+ if (!dev->netdev_ops->ndo_bridge_getlink)
+ return 0;
+
skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC);
if (!skb) {
err = -ENOMEM;
goto errout;
}
- if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) &&
- br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
- err = br_dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
- if (err < 0)
- goto errout;
- }
+ err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
+ if (err < 0)
+ goto errout;
- if ((flags & BRIDGE_FLAGS_SELF) &&
- dev->netdev_ops->ndo_bridge_getlink) {
- err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
- if (err < 0)
- goto errout;
- }
+ if (!skb->len)
+ goto errout;
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
return 0;
errout:
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
- rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ if (err)
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
return err;
}
@@ -2911,7 +2958,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net_device *dev;
struct nlattr *br_spec, *attr = NULL;
int rem, err = -EOPNOTSUPP;
- u16 oflags, flags = 0;
+ u16 flags = 0;
bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
@@ -2941,8 +2988,6 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
- oflags = flags;
-
if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
@@ -2951,7 +2996,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
goto out;
}
- err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+ err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
if (err)
goto out;
@@ -2962,17 +3007,20 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!dev->netdev_ops->ndo_bridge_setlink)
err = -EOPNOTSUPP;
else
- err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
-
- if (!err)
+ err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
+ flags);
+ if (!err) {
flags &= ~BRIDGE_FLAGS_SELF;
+
+ /* Generate event to notify upper layer of bridge
+ * change
+ */
+ err = rtnl_bridge_notify(dev);
+ }
}
if (have_flags)
memcpy(nla_data(attr), &flags, sizeof(flags));
- /* Generate event to notify upper layer of bridge change */
- if (!err)
- err = rtnl_bridge_notify(dev, oflags);
out:
return err;
}
@@ -2984,7 +3032,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net_device *dev;
struct nlattr *br_spec, *attr = NULL;
int rem, err = -EOPNOTSUPP;
- u16 oflags, flags = 0;
+ u16 flags = 0;
bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
@@ -3014,8 +3062,6 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
}
}
- oflags = flags;
-
if (!flags || (flags & BRIDGE_FLAGS_MASTER)) {
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
@@ -3024,7 +3070,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
goto out;
}
- err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+ err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags);
if (err)
goto out;
@@ -3035,17 +3081,21 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!dev->netdev_ops->ndo_bridge_dellink)
err = -EOPNOTSUPP;
else
- err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+ err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh,
+ flags);
- if (!err)
+ if (!err) {
flags &= ~BRIDGE_FLAGS_SELF;
+
+ /* Generate event to notify upper layer of bridge
+ * change
+ */
+ err = rtnl_bridge_notify(dev);
+ }
}
if (have_flags)
memcpy(nla_data(attr), &flags, sizeof(flags));
- /* Generate event to notify upper layer of bridge change */
- if (!err)
- err = rtnl_bridge_notify(dev, oflags);
out:
return err;
}
@@ -3135,6 +3185,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_UNREGISTER_FINAL:
case NETDEV_RELEASE:
case NETDEV_JOIN:
+ case NETDEV_BONDING_INFO:
break;
default:
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 395c15b82087..88c613eab142 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -74,6 +74,8 @@
#include <asm/uaccess.h>
#include <trace/events/skb.h>
#include <linux/highmem.h>
+#include <linux/capability.h>
+#include <linux/user_namespace.h>
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
@@ -677,13 +679,6 @@ static void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nf_bridge_put(skb->nf_bridge);
#endif
-/* XXX: IS this still necessary? - JHS */
-#ifdef CONFIG_NET_SCHED
- skb->tc_index = 0;
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_verd = 0;
-#endif
-#endif
}
/* Free everything but the sk_buff shell. */
@@ -830,6 +825,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#ifdef CONFIG_NET_RX_BUSY_POLL
CHECK_SKB_FIELD(napi_id);
#endif
+#ifdef CONFIG_XPS
+ CHECK_SKB_FIELD(sender_cpu);
+#endif
#ifdef CONFIG_NET_SCHED
CHECK_SKB_FIELD(tc_index);
#ifdef CONFIG_NET_CLS_ACT
@@ -3697,11 +3695,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
kfree_skb(skb);
}
+static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
+{
+ bool ret;
+
+ if (likely(sysctl_tstamp_allow_data || tsonly))
+ return true;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ ret = sk->sk_socket && sk->sk_socket->file &&
+ file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
+ read_unlock_bh(&sk->sk_callback_lock);
+ return ret;
+}
+
void skb_complete_tx_timestamp(struct sk_buff *skb,
struct skb_shared_hwtstamps *hwtstamps)
{
struct sock *sk = skb->sk;
+ if (!skb_may_tx_timestamp(sk, false))
+ return;
+
/* take a reference to prevent skb_orphan() from freeing the socket */
sock_hold(sk);
@@ -3717,19 +3732,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
struct sock *sk, int tstype)
{
struct sk_buff *skb;
+ bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
- if (!sk)
+ if (!sk || !skb_may_tx_timestamp(sk, tsonly))
return;
- if (hwtstamps)
- *skb_hwtstamps(orig_skb) = *hwtstamps;
+ if (tsonly)
+ skb = alloc_skb(0, GFP_ATOMIC);
else
- orig_skb->tstamp = ktime_get_real();
-
- skb = skb_clone(orig_skb, GFP_ATOMIC);
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!skb)
return;
+ if (tsonly) {
+ skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
+ skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
+ }
+
+ if (hwtstamps)
+ *skb_hwtstamps(skb) = *hwtstamps;
+ else
+ skb->tstamp = ktime_get_real();
+
__skb_complete_tx_timestamp(skb, sk, tstype);
}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
@@ -4148,6 +4172,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
skb->ignore_df = 0;
skb_dst_drop(skb);
skb->mark = 0;
+ skb->sender_cpu = 0;
skb_init_secmark(skb);
secpath_reset(skb);
nf_reset(skb);
@@ -4204,7 +4229,7 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
struct vlan_hdr *vhdr;
u16 vlan_tci;
- if (unlikely(vlan_tx_tag_present(skb))) {
+ if (unlikely(skb_vlan_tag_present(skb))) {
/* vlan_tci is already set-up so leave this for another time */
return skb;
}
@@ -4290,7 +4315,7 @@ int skb_vlan_pop(struct sk_buff *skb)
__be16 vlan_proto;
int err;
- if (likely(vlan_tx_tag_present(skb))) {
+ if (likely(skb_vlan_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
@@ -4320,7 +4345,7 @@ EXPORT_SYMBOL(skb_vlan_pop);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
{
- if (vlan_tx_tag_present(skb)) {
+ if (skb_vlan_tag_present(skb)) {
unsigned int offset = skb->data - skb_mac_header(skb);
int err;
@@ -4330,7 +4355,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
*/
__skb_push(skb, offset);
err = __vlan_insert_tag(skb, skb->vlan_proto,
- vlan_tx_tag_get(skb));
+ skb_vlan_tag_get(skb));
if (err)
return err;
skb->protocol = skb->vlan_proto;
diff --git a/net/core/sock.c b/net/core/sock.c
index 1c7a33db1314..93c8b20c91e4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
+int sysctl_tstamp_allow_data __read_mostly = 1;
+
struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
EXPORT_SYMBOL_GPL(memalloc_socks);
@@ -840,6 +842,7 @@ set_rcvbuf:
ret = -EINVAL;
break;
}
+
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
if (sk->sk_protocol == IPPROTO_TCP) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31baba2a71ce..433424804284 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -52,7 +52,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
if (write) {
if (size) {
- if (size > 1<<30) {
+ if (size > 1<<29) {
/* Enforce limit to prevent overflow */
mutex_unlock(&sock_flow_mutex);
return -EINVAL;
@@ -65,7 +65,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
mutex_unlock(&sock_flow_mutex);
return -ENOMEM;
}
-
+ rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
sock_table->mask = size - 1;
} else
sock_table = orig_sock_table;
@@ -155,7 +155,7 @@ write_unlock:
rcu_read_unlock();
len = min(sizeof(kbuf) - 1, *lenp);
- len = cpumask_scnprintf(kbuf, len, mask);
+ len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
if (!len) {
*lenp = 0;
goto done;
@@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tstamp_allow_data",
+ .data = &sysctl_tstamp_allow_data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one
+ },
#ifdef CONFIG_RPS
{
.procname = "rps_sock_flow_entries",