aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c46
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/filter.c115
-rw-r--r--net/core/link_watch.c4
-rw-r--r--net/core/net-sysfs.c10
-rw-r--r--net/core/net_namespace.c107
-rw-r--r--net/core/rtnetlink.c12
-rw-r--r--net/core/sock.c19
8 files changed, 274 insertions, 41 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 65492b0354c0..b2775f06c710 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -660,6 +660,27 @@ __setup("netdev=", netdev_boot_setup);
*******************************************************************************/
/**
+ * dev_get_iflink - get 'iflink' value of a interface
+ * @dev: targeted interface
+ *
+ * Indicates the ifindex the interface is linked to.
+ * Physical interfaces have the same 'ifindex' and 'iflink' values.
+ */
+
+int dev_get_iflink(const struct net_device *dev)
+{
+ if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
+ return dev->netdev_ops->ndo_get_iflink(dev);
+
+ /* If dev->rtnl_link_ops is set, it's a virtual interface. */
+ if (dev->rtnl_link_ops)
+ return 0;
+
+ return dev->ifindex;
+}
+EXPORT_SYMBOL(dev_get_iflink);
+
+/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
* @name: name to find
@@ -2849,14 +2870,16 @@ static void skb_update_prio(struct sk_buff *skb)
#define skb_update_prio(skb)
#endif
-static DEFINE_PER_CPU(int, xmit_recursion);
+DEFINE_PER_CPU(int, xmit_recursion);
+EXPORT_SYMBOL(xmit_recursion);
+
#define RECURSION_LIMIT 10
/**
* dev_loopback_xmit - loop back @skb
* @skb: buffer to transmit
*/
-int dev_loopback_xmit(struct sk_buff *skb)
+int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
{
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
@@ -2994,11 +3017,11 @@ out:
return rc;
}
-int dev_queue_xmit(struct sk_buff *skb)
+int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
{
return __dev_queue_xmit(skb, NULL);
}
-EXPORT_SYMBOL(dev_queue_xmit);
+EXPORT_SYMBOL(dev_queue_xmit_sk);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
{
@@ -3830,13 +3853,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
* NET_RX_SUCCESS: no congestion
* NET_RX_DROP: packet was dropped
*/
-int netif_receive_skb(struct sk_buff *skb)
+int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
{
trace_netif_receive_skb_entry(skb);
return netif_receive_skb_internal(skb);
}
-EXPORT_SYMBOL(netif_receive_skb);
+EXPORT_SYMBOL(netif_receive_skb_sk);
/* Network device is going away, flush any packets still pending
* Called with irqs disabled.
@@ -6314,8 +6337,6 @@ int register_netdevice(struct net_device *dev)
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
- dev->iflink = -1;
-
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;
@@ -6345,9 +6366,6 @@ int register_netdevice(struct net_device *dev)
else if (__dev_get_by_index(net, dev->ifindex))
goto err_uninit;
- if (dev->iflink == -1)
- dev->iflink = dev->ifindex;
-
/* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO).
*/
@@ -7060,12 +7078,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
dev_net_set(dev, net);
/* If there is an ifindex conflict assign a new one */
- if (__dev_get_by_index(net, dev->ifindex)) {
- int iflink = (dev->iflink == dev->ifindex);
+ if (__dev_get_by_index(net, dev->ifindex))
dev->ifindex = dev_new_index(net);
- if (iflink)
- dev->iflink = dev->ifindex;
- }
/* Send a netdev-add uevent to the new namespace */
kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 68ea6950cad1..9a12668f7d62 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -165,9 +165,9 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
spin_lock(&net->rules_mod_lock);
list_del_rcu(&ops->list);
- fib_rules_cleanup_ops(ops);
spin_unlock(&net->rules_mod_lock);
+ fib_rules_cleanup_ops(ops);
kfree_rcu(ops, rcu);
}
EXPORT_SYMBOL_GPL(fib_rules_unregister);
diff --git a/net/core/filter.c b/net/core/filter.c
index 444a07e4f68d..b669e75d2b36 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,7 +1175,9 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1)
+
+static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
unsigned int offset = (unsigned int) r2;
@@ -1192,7 +1194,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
*
* so check for invalid 'offset' and too large 'len'
*/
- if (offset > 0xffff || len > sizeof(buf))
+ if (unlikely(offset > 0xffff || len > sizeof(buf)))
return -EFAULT;
if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
@@ -1202,7 +1204,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
if (unlikely(!ptr))
return -EFAULT;
- skb_postpull_rcsum(skb, ptr, len);
+ if (BPF_RECOMPUTE_CSUM(flags))
+ skb_postpull_rcsum(skb, ptr, len);
memcpy(ptr, from, len);
@@ -1210,7 +1213,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
/* skb_store_bits cannot return -EFAULT here */
skb_store_bits(skb, offset, ptr, len);
- if (skb->ip_summed == CHECKSUM_COMPLETE)
+ if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
return 0;
}
@@ -1223,6 +1226,99 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_STACK,
.arg4_type = ARG_CONST_STACK_SIZE,
+ .arg5_type = ARG_ANYTHING,
+};
+
+#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f)
+#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10)
+
+static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ __sum16 sum, *ptr;
+
+ if (unlikely(offset > 0xffff))
+ return -EFAULT;
+
+ if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+ return -EFAULT;
+
+ ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+ if (unlikely(!ptr))
+ return -EFAULT;
+
+ switch (BPF_HEADER_FIELD_SIZE(flags)) {
+ case 2:
+ csum_replace2(ptr, from, to);
+ break;
+ case 4:
+ csum_replace4(ptr, from, to);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ptr == &sum)
+ /* skb_store_bits guaranteed to not return -EFAULT here */
+ skb_store_bits(skb, offset, ptr, sizeof(sum));
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_l3_csum_replace_proto = {
+ .func = bpf_l3_csum_replace,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+ __sum16 sum, *ptr;
+
+ if (unlikely(offset > 0xffff))
+ return -EFAULT;
+
+ if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+ return -EFAULT;
+
+ ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+ if (unlikely(!ptr))
+ return -EFAULT;
+
+ switch (BPF_HEADER_FIELD_SIZE(flags)) {
+ case 2:
+ inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
+ break;
+ case 4:
+ inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ptr == &sum)
+ /* skb_store_bits guaranteed to not return -EFAULT here */
+ skb_store_bits(skb, offset, ptr, sizeof(sum));
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_l4_csum_replace_proto = {
+ .func = bpf_l4_csum_replace,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
@@ -1250,6 +1346,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
switch (func_id) {
case BPF_FUNC_skb_store_bytes:
return &bpf_skb_store_bytes_proto;
+ case BPF_FUNC_l3_csum_replace:
+ return &bpf_l3_csum_replace_proto;
+ case BPF_FUNC_l4_csum_replace:
+ return &bpf_l4_csum_replace_proto;
default:
return sk_filter_func_proto(func_id);
}
@@ -1304,6 +1404,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
offsetof(struct sk_buff, vlan_proto));
break;
+ case offsetof(struct __sk_buff, priority):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
+ break;
+
case offsetof(struct __sk_buff, mark):
return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 49a9e3e06c08..982861607f88 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -40,7 +40,7 @@ static DEFINE_SPINLOCK(lweventlist_lock);
static unsigned char default_operstate(const struct net_device *dev)
{
if (!netif_carrier_ok(dev))
- return (dev->ifindex != dev->iflink ?
+ return (dev->ifindex != dev_get_iflink(dev) ?
IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN);
if (netif_dormant(dev))
@@ -89,7 +89,7 @@ static bool linkwatch_urgent_event(struct net_device *dev)
if (!netif_running(dev))
return false;
- if (dev->ifindex != dev->iflink)
+ if (dev->ifindex != dev_get_iflink(dev))
return true;
if (dev->priv_flags & IFF_TEAM_PORT)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index cc5cf689809c..4238d6da5c60 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -109,11 +109,19 @@ NETDEVICE_SHOW_RO(dev_id, fmt_hex);
NETDEVICE_SHOW_RO(dev_port, fmt_dec);
NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
NETDEVICE_SHOW_RO(addr_len, fmt_dec);
-NETDEVICE_SHOW_RO(iflink, fmt_dec);
NETDEVICE_SHOW_RO(ifindex, fmt_dec);
NETDEVICE_SHOW_RO(type, fmt_dec);
NETDEVICE_SHOW_RO(link_mode, fmt_dec);
+static ssize_t iflink_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *ndev = to_net_dev(dev);
+
+ return sprintf(buf, fmt_dec, dev_get_iflink(ndev));
+}
+static DEVICE_ATTR_RO(iflink);
+
static ssize_t format_name_assign_type(const struct net_device *dev, char *buf)
{
return sprintf(buf, fmt_dec, dev->name_assign_type);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e5e96b0f6717..a3abb719221f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -148,9 +148,11 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
+static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
+ int id);
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
- int min = 0, max = 0;
+ int min = 0, max = 0, id;
ASSERT_RTNL();
@@ -159,7 +161,11 @@ static int alloc_netid(struct net *net, struct net *peer, int reqid)
max = reqid + 1;
}
- return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+ id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+ if (id >= 0)
+ rtnl_net_notifyid(net, peer, RTM_NEWNSID, id);
+
+ return id;
}
/* This function is used by idr_for_each(). If net is equal to peer, the
@@ -198,8 +204,10 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc)
*/
int peernet2id(struct net *net, struct net *peer)
{
- int id = __peernet2id(net, peer, true);
+ bool alloc = atomic_read(&peer->count) == 0 ? false : true;
+ int id;
+ id = __peernet2id(net, peer, alloc);
return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
}
EXPORT_SYMBOL(peernet2id);
@@ -357,8 +365,10 @@ static void cleanup_net(struct work_struct *work)
for_each_net(tmp) {
int id = __peernet2id(tmp, net, false);
- if (id >= 0)
+ if (id >= 0) {
+ rtnl_net_notifyid(tmp, net, RTM_DELNSID, id);
idr_remove(&tmp->netns_ids, id);
+ }
}
idr_destroy(&net->netns_ids);
@@ -529,7 +539,8 @@ static int rtnl_net_get_size(void)
}
static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
- int cmd, struct net *net, struct net *peer)
+ int cmd, struct net *net, struct net *peer,
+ int nsid)
{
struct nlmsghdr *nlh;
struct rtgenmsg *rth;
@@ -544,9 +555,13 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
rth = nlmsg_data(nlh);
rth->rtgen_family = AF_UNSPEC;
- id = __peernet2id(net, peer, false);
- if (id < 0)
- id = NETNSA_NSID_NOT_ASSIGNED;
+ if (nsid >= 0) {
+ id = nsid;
+ } else {
+ id = __peernet2id(net, peer, false);
+ if (id < 0)
+ id = NETNSA_NSID_NOT_ASSIGNED;
+ }
if (nla_put_s32(skb, NETNSA_NSID, id))
goto nla_put_failure;
@@ -563,8 +578,8 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
struct sk_buff *msg;
- int err = -ENOBUFS;
struct net *peer;
+ int err;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
rtnl_net_policy);
@@ -587,7 +602,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
}
err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- RTM_GETNSID, net, peer);
+ RTM_GETNSID, net, peer, -1);
if (err < 0)
goto err_out;
@@ -601,6 +616,75 @@ out:
return err;
}
+struct rtnl_net_dump_cb {
+ struct net *net;
+ struct sk_buff *skb;
+ struct netlink_callback *cb;
+ int idx;
+ int s_idx;
+};
+
+static int rtnl_net_dumpid_one(int id, void *peer, void *data)
+{
+ struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
+ int ret;
+
+ if (net_cb->idx < net_cb->s_idx)
+ goto cont;
+
+ ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
+ net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ RTM_NEWNSID, net_cb->net, peer, id);
+ if (ret < 0)
+ return ret;
+
+cont:
+ net_cb->idx++;
+ return 0;
+}
+
+static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct rtnl_net_dump_cb net_cb = {
+ .net = net,
+ .skb = skb,
+ .cb = cb,
+ .idx = 0,
+ .s_idx = cb->args[0],
+ };
+
+ ASSERT_RTNL();
+
+ idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
+
+ cb->args[0] = net_cb.idx;
+ return skb->len;
+}
+
+static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
+ int id)
+{
+ struct sk_buff *msg;
+ int err = -ENOMEM;
+
+ msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id);
+ if (err < 0)
+ goto err_out;
+
+ rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
+ return;
+
+err_out:
+ nlmsg_free(msg);
+out:
+ rtnl_set_sk_err(net, RTNLGRP_NSID, err);
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -635,7 +719,8 @@ static int __init net_ns_init(void)
register_pernet_subsys(&net_ns_ops);
rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
+ NULL);
return 0;
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b96ac2109c82..5e02260b087f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1055,8 +1055,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
- (dev->ifindex != dev->iflink &&
- nla_put_u32(skb, IFLA_LINK, dev->iflink)) ||
+ (dev->ifindex != dev_get_iflink(dev) &&
+ nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
(upper_dev &&
nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
@@ -1991,10 +1991,10 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
struct ifinfomsg *ifm,
struct nlattr **tb)
{
- struct net_device *dev;
+ struct net_device *dev, *aux;
int err;
- for_each_netdev(net, dev) {
+ for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
err = do_setlink(skb, dev, ifm, tb, NULL, 0);
if (err < 0)
@@ -2863,8 +2863,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) ||
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
- (dev->ifindex != dev->iflink &&
- nla_put_u32(skb, IFLA_LINK, dev->iflink)))
+ (dev->ifindex != dev_get_iflink(dev) &&
+ nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
goto nla_put_failure;
br_afspec = nla_nest_start(skb, IFLA_AF_SPEC);
diff --git a/net/core/sock.c b/net/core/sock.c
index 119ae464b44a..654e38a99759 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -653,6 +653,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
sock_reset_flag(sk, bit);
}
+bool sk_mc_loop(struct sock *sk)
+{
+ if (dev_recursion_level())
+ return false;
+ if (!sk)
+ return true;
+ switch (sk->sk_family) {
+ case AF_INET:
+ return inet_sk(sk)->mc_loop;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ return inet6_sk(sk)->mc_loop;
+#endif
+ }
+ WARN_ON(1);
+ return true;
+}
+EXPORT_SYMBOL(sk_mc_loop);
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.