aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c133
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/filter.c142
-rw-r--r--net/core/lwtunnel.c4
-rw-r--r--net/core/neighbour.c45
-rw-r--r--net/core/net-sysfs.c11
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/ptp_classifier.c16
-rw-r--r--net/core/request_sock.c88
-rw-r--r--net/core/rtnetlink.c38
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock.c81
-rw-r--r--net/core/sock_diag.c14
-rw-r--r--net/core/tso.c18
-rw-r--r--net/core/utils.c49
16 files changed, 442 insertions, 246 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bb6470f5b7b..ab9b8d0d115e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -99,6 +99,7 @@
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
#include <net/xfrm.h>
@@ -682,6 +683,32 @@ int dev_get_iflink(const struct net_device *dev)
EXPORT_SYMBOL(dev_get_iflink);
/**
+ * dev_fill_metadata_dst - Retrieve tunnel egress information.
+ * @dev: targeted interface
+ * @skb: The packet.
+ *
+ * For better visibility of tunnel traffic OVS needs to retrieve
+ * egress tunnel information for a packet. Following API allows
+ * user to get this info.
+ */
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ip_tunnel_info *info;
+
+ if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
+ return -EINVAL;
+
+ info = skb_tunnel_info_unclone(skb);
+ if (!info)
+ return -ENOMEM;
+ if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+ return -EINVAL;
+
+ return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+}
+EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
+/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
* @name: name to find
@@ -2915,9 +2942,11 @@ EXPORT_SYMBOL(xmit_recursion);
/**
* dev_loopback_xmit - loop back @skb
+ * @net: network namespace this loopback is happening in
+ * @sk: sk needed to be a netfilter okfn
* @skb: buffer to transmit
*/
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
@@ -2972,6 +3001,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
new_index = skb_tx_hash(dev, skb);
if (queue_index != new_index && sk &&
+ sk_fullsock(sk) &&
rcu_access_pointer(sk->sk_dst_cache))
sk_tx_queue_set(sk, new_index);
@@ -3143,11 +3173,11 @@ out:
return rc;
}
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb)
{
return __dev_queue_xmit(skb, NULL);
}
-EXPORT_SYMBOL(dev_queue_xmit_sk);
+EXPORT_SYMBOL(dev_queue_xmit);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
{
@@ -3668,6 +3698,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by cls/act_bpf, so
+ * we can safely push the L2 header back before
+ * redirecting to another netdev
+ */
+ __skb_push(skb, skb->mac_len);
+ skb_do_redirect(skb);
+ return NULL;
default:
break;
}
@@ -3982,13 +4020,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
* NET_RX_SUCCESS: no congestion
* NET_RX_DROP: packet was dropped
*/
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
+int netif_receive_skb(struct sk_buff *skb)
{
trace_netif_receive_skb_entry(skb);
return netif_receive_skb_internal(skb);
}
-EXPORT_SYMBOL(netif_receive_skb_sk);
+EXPORT_SYMBOL(netif_receive_skb);
/* Network device is going away, flush any packets still pending
* Called with irqs disabled.
@@ -4857,8 +4895,7 @@ struct netdev_adjacent {
struct rcu_head rcu;
};
-static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
- struct net_device *adj_dev,
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
struct list_head *adj_list)
{
struct netdev_adjacent *adj;
@@ -4884,7 +4921,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
{
ASSERT_RTNL();
- return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
+ return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -5146,7 +5183,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct netdev_adjacent *adj;
int ret;
- adj = __netdev_find_adj(dev, adj_dev, dev_list);
+ adj = __netdev_find_adj(adj_dev, dev_list);
if (adj) {
adj->ref_nr++;
@@ -5202,7 +5239,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
{
struct netdev_adjacent *adj;
- adj = __netdev_find_adj(dev, adj_dev, dev_list);
+ adj = __netdev_find_adj(adj_dev, dev_list);
if (!adj) {
pr_err("tried to remove device %s from %s\n",
@@ -5323,10 +5360,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
+ if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
+ if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
@@ -5336,6 +5373,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
changeupper_info.master = master;
changeupper_info.linking = true;
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ &changeupper_info.info);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ return ret;
+
ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
master);
if (ret)
@@ -5478,6 +5521,9 @@ void netdev_upper_dev_unlink(struct net_device *dev,
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
changeupper_info.linking = false;
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ &changeupper_info.info);
+
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
/* Here is the tricky part. We must remove all dev's lower
@@ -5604,7 +5650,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
if (!lower_dev)
return NULL;
- lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+ lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
if (!lower)
return NULL;
@@ -6242,6 +6288,48 @@ static void rollback_registered(struct net_device *dev)
list_del(&single);
}
+static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
+ struct net_device *upper, netdev_features_t features)
+{
+ netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+ netdev_features_t feature;
+ int feature_bit;
+
+ for_each_netdev_feature(&upper_disables, feature_bit) {
+ feature = __NETIF_F_BIT(feature_bit);
+ if (!(upper->wanted_features & feature)
+ && (features & feature)) {
+ netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
+ &feature, upper->name);
+ features &= ~feature;
+ }
+ }
+
+ return features;
+}
+
+static void netdev_sync_lower_features(struct net_device *upper,
+ struct net_device *lower, netdev_features_t features)
+{
+ netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+ netdev_features_t feature;
+ int feature_bit;
+
+ for_each_netdev_feature(&upper_disables, feature_bit) {
+ feature = __NETIF_F_BIT(feature_bit);
+ if (!(features & feature) && (lower->features & feature)) {
+ netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
+ &feature, lower->name);
+ lower->wanted_features &= ~feature;
+ netdev_update_features(lower);
+
+ if (unlikely(lower->features & feature))
+ netdev_WARN(upper, "failed to disable %pNF on %s!\n",
+ &feature, lower->name);
+ }
+ }
+}
+
static netdev_features_t netdev_fix_features(struct net_device *dev,
netdev_features_t features)
{
@@ -6311,8 +6399,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
int __netdev_update_features(struct net_device *dev)
{
+ struct net_device *upper, *lower;
netdev_features_t features;
- int err = 0;
+ struct list_head *iter;
+ int err = -1;
ASSERT_RTNL();
@@ -6324,8 +6414,12 @@ int __netdev_update_features(struct net_device *dev)
/* driver might be less strict about feature dependencies */
features = netdev_fix_features(dev, features);
+ /* some features can't be enabled if they're off an an upper device */
+ netdev_for_each_upper_dev_rcu(dev, upper, iter)
+ features = netdev_sync_upper_features(dev, upper, features);
+
if (dev->features == features)
- return 0;
+ goto sync_lower;
netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
&dev->features, &features);
@@ -6340,10 +6434,17 @@ int __netdev_update_features(struct net_device *dev)
return -1;
}
+sync_lower:
+ /* some features must be disabled on lower devices when disabled
+ * on an upper device (think: bonding master or bridge)
+ */
+ netdev_for_each_lower_dev(dev, lower, iter)
+ netdev_sync_lower_features(dev, lower, features);
+
if (!err)
dev->features = features;
- return 1;
+ return err < 0 ? 0 : 1;
}
/**
diff --git a/net/core/dst.c b/net/core/dst.c
index 0771c8cb9307..e6dc77252fe9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -144,12 +144,12 @@ loop:
mutex_unlock(&dst_gc_mutex);
}
-int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
+int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(dst_discard_sk);
+EXPORT_SYMBOL(dst_discard_out);
const u32 dst_default_metrics[RTAX_MAX + 1] = {
/* This initializer is needed to force linker to place this variable
@@ -177,7 +177,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
- dst->output = dst_discard_sk;
+ dst->output = dst_discard_out;
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
@@ -224,7 +224,7 @@ static void ___dst_free(struct dst_entry *dst)
*/
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
dst->input = dst_discard;
- dst->output = dst_discard_sk;
+ dst->output = dst_discard_out;
}
dst->obsolete = DST_OBSOLETE_DEAD;
}
@@ -306,7 +306,7 @@ void dst_release(struct dst_entry *dst)
if (unlikely(newrefcnt < 0))
net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
__func__, dst, newrefcnt);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
@@ -352,7 +352,7 @@ static struct dst_ops md_dst_ops = {
.family = AF_UNSPEC,
};
-static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb)
+static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
WARN_ONCE(1, "Attempting to call output on metadata dst\n");
kfree_skb(skb);
@@ -375,7 +375,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
dst->input = dst_md_discard;
- dst->output = dst_md_discard_sk;
+ dst->output = dst_md_discard_out;
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
}
@@ -430,7 +430,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst_discard;
- dst->output = dst_discard_sk;
+ dst->output = dst_discard_out;
} else {
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b495ab1797fa..29edf74846fc 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1284,7 +1284,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
gstrings.len = ret;
- data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+ data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER);
if (!data)
return -ENOMEM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 05a04ea87172..672eefbfbe99 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,16 +49,17 @@
#include <net/sch_generic.h>
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
+#include <net/dst.h>
/**
* sk_filter - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
*
- * Run the filter code and then cut skb->data to correct size returned by
- * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * Run the eBPF program and then cut skb->data to correct size returned by
+ * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
+ * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -82,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+ unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -148,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return raw_smp_processor_id();
}
-/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
-{
- return prandom_u32();
-}
-
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
struct bpf_insn *insn_buf)
{
@@ -312,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
break;
case SKF_AD_OFF + SKF_AD_RANDOM:
- *insn = BPF_EMIT_CALL(__get_random_u32);
+ *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
+ bpf_user_rnd_init_once();
break;
}
break;
@@ -1001,7 +997,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
int err;
fp->bpf_func = NULL;
- fp->jited = false;
+ fp->jited = 0;
err = bpf_check_classic(fp->insns, fp->len);
if (err) {
@@ -1083,16 +1079,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create);
* @pfp: the unattached filter that is created
* @fprog: the filter program
* @trans: post-classic verifier transformation handler
+ * @save_orig: save classic BPF program
*
* This function effectively does the same as bpf_prog_create(), only
* that it builds up its insns buffer from user space provided buffer.
* It also allows for passing a bpf_aux_classic_check_t handler.
*/
int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
- bpf_aux_classic_check_t trans)
+ bpf_aux_classic_check_t trans, bool save_orig)
{
unsigned int fsize = bpf_classic_proglen(fprog);
struct bpf_prog *fp;
+ int err;
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
@@ -1108,12 +1106,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
}
fp->len = fprog->len;
- /* Since unattached filters are not copied back to user
- * space through sk_get_filter(), we do not need to hold
- * a copy here, and can spare us the work.
- */
fp->orig_prog = NULL;
+ if (save_orig) {
+ err = bpf_prog_store_orig_filter(fp, fprog);
+ if (err) {
+ __bpf_prog_free(fp);
+ return -ENOMEM;
+ }
+ }
+
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
@@ -1404,9 +1406,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
if (unlikely(!dev))
return -EINVAL;
- if (unlikely(!(dev->flags & IFF_UP)))
- return -EINVAL;
-
skb2 = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!skb2))
return -ENOMEM;
@@ -1415,6 +1414,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
return dev_forward_skb(dev, skb2);
skb2->dev = dev;
+ skb_sender_cpu_clear(skb2);
return dev_queue_xmit(skb2);
}
@@ -1427,6 +1427,49 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
+struct redirect_info {
+ u32 ifindex;
+ u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+ ri->ifindex = ifindex;
+ ri->flags = flags;
+ return TC_ACT_REDIRECT;
+}
+
+int skb_do_redirect(struct sk_buff *skb)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+ ri->ifindex = 0;
+ if (unlikely(!dev)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+ return dev_forward_skb(dev, skb);
+
+ skb->dev = dev;
+ skb_sender_cpu_clear(skb);
+ return dev_queue_xmit(skb);
+}
+
+const struct bpf_func_proto bpf_redirect_proto = {
+ .func = bpf_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
return task_get_classid((struct sk_buff *) (unsigned long) r1);
@@ -1439,6 +1482,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ const struct dst_entry *dst;
+
+ dst = skb_dst((struct sk_buff *) (unsigned long) r1);
+ if (dst)
+ return dst->tclassid;
+#endif
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_get_route_realm_proto = {
+ .func = bpf_get_route_realm,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1579,7 +1641,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_trace_printk:
- return bpf_get_trace_printk_proto();
+ if (capable(CAP_SYS_ADMIN))
+ return bpf_get_trace_printk_proto();
default:
return NULL;
}
@@ -1607,6 +1670,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
return bpf_get_skb_set_tunnel_key_proto();
+ case BPF_FUNC_redirect:
+ return &bpf_redirect_proto;
+ case BPF_FUNC_get_route_realm:
+ return &bpf_get_route_realm_proto;
default:
return sk_filter_func_proto(func_id);
}
@@ -1632,6 +1699,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type)
{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return false;
+
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, cb[0]) ...
@@ -1648,10 +1718,14 @@ static bool sk_filter_is_valid_access(int off, int size,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type)
{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return type == BPF_WRITE ? true : false;
+
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, mark):
case offsetof(struct __sk_buff, tc_index):
+ case offsetof(struct __sk_buff, priority):
case offsetof(struct __sk_buff, cb[0]) ...
offsetof(struct __sk_buff, cb[4]):
break;
@@ -1664,7 +1738,8 @@ static bool tc_cls_act_is_valid_access(int off, int size,
static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
- struct bpf_insn *insn_buf)
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
@@ -1693,8 +1768,12 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
case offsetof(struct __sk_buff, priority):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
- offsetof(struct sk_buff, priority));
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
break;
case offsetof(struct __sk_buff, ingress_ifindex):
@@ -1751,6 +1830,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
offsetof(struct __sk_buff, cb[4]):
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+ prog->cb_access = 1;
ctx_off -= offsetof(struct __sk_buff, cb[0]);
ctx_off += offsetof(struct sk_buff, cb);
ctx_off += offsetof(struct qdisc_skb_cb, data);
@@ -1760,6 +1840,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
break;
+ case offsetof(struct __sk_buff, tc_classid):
+ ctx_off -= offsetof(struct __sk_buff, tc_classid);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+ WARN_ON(type != BPF_WRITE);
+ *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ break;
+
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
@@ -1854,9 +1942,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
goto out;
/* We're copying the filter that has been originally attached,
- * so no conversion/decode needed anymore.
+ * so no conversion/decode needed anymore. eBPF programs that
+ * have no original program cannot be dumped through this.
*/
+ ret = -EACCES;
fprog = filter->prog->orig_prog;
+ if (!fprog)
+ goto out;
ret = fprog->len;
if (!len)
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index dfb1a9ca0835..299cfc24d888 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -180,7 +180,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
}
EXPORT_SYMBOL(lwtunnel_cmp_encap);
-int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
@@ -199,7 +199,7 @@ int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
if (likely(ops && ops->output))
- ret = ops->output(sk, skb);
+ ret = ops->output(net, sk, skb);
rcu_read_unlock();
if (ret == -EOPNOTSUPP)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2b515ba7e94f..1aa8437ed6c4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2235,14 +2235,53 @@ static void neigh_update_notify(struct neighbour *neigh)
__neigh_notify(neigh, RTM_NEWNEIGH, 0);
}
+static bool neigh_master_filtered(struct net_device *dev, int master_idx)
+{
+ struct net_device *master;
+
+ if (!master_idx)
+ return false;
+
+ master = netdev_master_upper_dev_get(dev);
+ if (!master || master->ifindex != master_idx)
+ return true;
+
+ return false;
+}
+
+static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
+{
+ if (filter_idx && dev->ifindex != filter_idx)
+ return true;
+
+ return false;
+}
+
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct nlattr *tb[NDA_MAX + 1];
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
struct neigh_hash_table *nht;
+ int filter_master_idx = 0, filter_idx = 0;
+ unsigned int flags = NLM_F_MULTI;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
+ if (!err) {
+ if (tb[NDA_IFINDEX])
+ filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
+
+ if (tb[NDA_MASTER])
+ filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
+
+ if (filter_idx || filter_master_idx)
+ flags |= NLM_F_DUMP_FILTERED;
+ }
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
@@ -2255,12 +2294,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
n = rcu_dereference_bh(n->next)) {
if (!net_eq(dev_net(n->dev), net))
continue;
+ if (neigh_ifindex_filtered(n->dev, filter_idx))
+ continue;
+ if (neigh_master_filtered(n->dev, filter_master_idx))
+ continue;
if (idx < s_idx)
goto next;
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
- NLM_F_MULTI) < 0) {
+ flags) < 0) {
rc = -1;
goto out;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 830f8a7c1cb1..f88a62ab019d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -471,7 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
if (dev_isalive(netdev)) {
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
@@ -1003,15 +1003,12 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
}
#ifdef CONFIG_XPS
-static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
- int i;
-
- for (i = 0; i < dev->num_tx_queues; i++)
- if (queue == &dev->_tx[i])
- break;
+ unsigned int i;
+ i = queue - dev->_tx;
BUG_ON(i >= dev->num_tx_queues);
return i;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8bdada242a7d..94acfc89ad97 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -140,7 +140,7 @@ static void queue_process(struct work_struct *work)
* case. Further, we test the poll_owner to avoid recursion on UP
* systems where the lock doesn't exist.
*/
-static int poll_one_napi(struct napi_struct *napi, int budget)
+static void poll_one_napi(struct napi_struct *napi)
{
int work = 0;
@@ -149,33 +149,33 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
* holding the napi->poll_lock.
*/
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
- return budget;
+ return;
/* If we set this bit but see that it has already been set,
* that indicates that napi has been disabled and we need
* to abort this operation
*/
if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
- goto out;
+ return;
- work = napi->poll(napi, budget);
- WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
+ /* We explicilty pass the polling call a budget of 0 to
+ * indicate that we are clearing the Tx path only.
+ */
+ work = napi->poll(napi, 0);
+ WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
trace_napi_poll(napi);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
-
-out:
- return budget - work;
}
-static void poll_napi(struct net_device *dev, int budget)
+static void poll_napi(struct net_device *dev)
{
struct napi_struct *napi;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(napi, budget);
+ poll_one_napi(napi);
spin_unlock(&napi->poll_lock);
}
}
@@ -185,7 +185,6 @@ static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
- int budget = 0;
/* Don't do any rx activity if the dev_lock mutex is held
* the dev_open/close paths use this to block netpoll activity
@@ -208,7 +207,7 @@ static void netpoll_poll_dev(struct net_device *dev)
/* Process pending work on NIC */
ops->ndo_poll_controller(dev);
- poll_napi(dev, budget);
+ poll_napi(dev);
up(&ni->dev_lock);
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 4eab4a94a59d..703cf76aa7c2 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -58,7 +58,7 @@
* jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
* ldh [18] ; reload payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x70 ; PTP_CLASS_VLAN|PTP_CLASS_L2
+ * or #0xc0 ; PTP_CLASS_VLAN|PTP_CLASS_L2
* ret a ; return PTP class
*
* ; PTP over UDP over IPv4 over 802.1Q over Ethernet
@@ -73,7 +73,7 @@
* jneq #319, drop_8021q_ipv4 ; is port PTP_EV_PORT ?
* ldh [x + 26] ; load payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x50 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
+ * or #0x90 ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
* ret a ; return PTP class
* drop_8021q_ipv4: ret #0x0 ; PTP_CLASS_NONE
*
@@ -86,7 +86,7 @@
* jneq #319, drop_8021q_ipv6 ; is port PTP_EV_PORT ?
* ldh [66] ; load payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x60 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
+ * or #0xa0 ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
* ret a ; return PTP class
* drop_8021q_ipv6: ret #0x0 ; PTP_CLASS_NONE
*
@@ -98,7 +98,7 @@
* jneq #0x0, drop_ieee1588 ; for PTP_GEN_BIT and drop these
* ldh [14] ; reload payload
* and #0xf ; mask PTP_CLASS_VMASK
- * or #0x30 ; PTP_CLASS_L2
+ * or #0x40 ; PTP_CLASS_L2
* ret a ; return PTP class
* drop_ieee1588: ret #0x0 ; PTP_CLASS_NONE
*/
@@ -150,7 +150,7 @@ void __init ptp_classifier_init(void)
{ 0x15, 0, 35, 0x00000000 },
{ 0x28, 0, 0, 0x00000012 },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000070 },
+ { 0x44, 0, 0, 0x000000c0 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x15, 0, 12, 0x00000800 },
{ 0x30, 0, 0, 0x0000001b },
@@ -162,7 +162,7 @@ void __init ptp_classifier_init(void)
{ 0x15, 0, 4, 0x0000013f },
{ 0x48, 0, 0, 0x0000001a },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000050 },
+ { 0x44, 0, 0, 0x00000090 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
{ 0x15, 0, 8, 0x000086dd },
@@ -172,7 +172,7 @@ void __init ptp_classifier_init(void)
{ 0x15, 0, 4, 0x0000013f },
{ 0x28, 0, 0, 0x00000042 },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000060 },
+ { 0x44, 0, 0, 0x000000a0 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
{ 0x15, 0, 7, 0x000088f7 },
@@ -181,7 +181,7 @@ void __init ptp_classifier_init(void)
{ 0x15, 0, 4, 0x00000000 },
{ 0x28, 0, 0, 0x0000000e },
{ 0x54, 0, 0, 0x0000000f },
- { 0x44, 0, 0, 0x00000030 },
+ { 0x44, 0, 0, 0x00000040 },
{ 0x16, 0, 0, 0x00000000 },
{ 0x06, 0, 0, 0x00000000 },
};
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index b42f0e26f89e..5d26056b6d8f 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -37,90 +37,16 @@
int sysctl_max_syn_backlog = 256;
EXPORT_SYMBOL(sysctl_max_syn_backlog);
-int reqsk_queue_alloc(struct request_sock_queue *queue,
- unsigned int nr_table_entries)
+void reqsk_queue_alloc(struct request_sock_queue *queue)
{
- size_t lopt_size = sizeof(struct listen_sock);
- struct listen_sock *lopt = NULL;
+ spin_lock_init(&queue->rskq_lock);
- nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
- nr_table_entries = max_t(u32, nr_table_entries, 8);
- nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
- lopt_size += nr_table_entries * sizeof(struct request_sock *);
+ spin_lock_init(&queue->fastopenq.lock);
+ queue->fastopenq.rskq_rst_head = NULL;
+ queue->fastopenq.rskq_rst_tail = NULL;
+ queue->fastopenq.qlen = 0;
- if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
- lopt = kzalloc(lopt_size, GFP_KERNEL |
- __GFP_NOWARN |
- __GFP_NORETRY);
- if (!lopt)
- lopt = vzalloc(lopt_size);
- if (!lopt)
- return -ENOMEM;
-
- get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
- spin_lock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = NULL;
- lopt->nr_table_entries = nr_table_entries;
- lopt->max_qlen_log = ilog2(nr_table_entries);
-
- spin_lock_bh(&queue->syn_wait_lock);
- queue->listen_opt = lopt;
- spin_unlock_bh(&queue->syn_wait_lock);
-
- return 0;
-}
-
-void __reqsk_queue_destroy(struct request_sock_queue *queue)
-{
- /* This is an error recovery path only, no locking needed */
- kvfree(queue->listen_opt);
-}
-
-static inline struct listen_sock *reqsk_queue_yank_listen_sk(
- struct request_sock_queue *queue)
-{
- struct listen_sock *lopt;
-
- spin_lock_bh(&queue->syn_wait_lock);
- lopt = queue->listen_opt;
- queue->listen_opt = NULL;
- spin_unlock_bh(&queue->syn_wait_lock);
-
- return lopt;
-}
-
-void reqsk_queue_destroy(struct request_sock_queue *queue)
-{
- /* make all the listen_opt local to us */
- struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
-
- if (listen_sock_qlen(lopt) != 0) {
- unsigned int i;
-
- for (i = 0; i < lopt->nr_table_entries; i++) {
- struct request_sock *req;
-
- spin_lock_bh(&queue->syn_wait_lock);
- while ((req = lopt->syn_table[i]) != NULL) {
- lopt->syn_table[i] = req->dl_next;
- /* Because of following del_timer_sync(),
- * we must release the spinlock here
- * or risk a dead lock.
- */
- spin_unlock_bh(&queue->syn_wait_lock);
- atomic_inc(&lopt->qlen_dec);
- if (del_timer_sync(&req->rsk_timer))
- reqsk_put(req);
- reqsk_put(req);
- spin_lock_bh(&queue->syn_wait_lock);
- }
- spin_unlock_bh(&queue->syn_wait_lock);
- }
- }
-
- if (WARN_ON(listen_sock_qlen(lopt) != 0))
- pr_err("qlen %u\n", listen_sock_qlen(lopt));
- kvfree(lopt);
}
/*
@@ -174,7 +100,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
struct sock *lsk = req->rsk_listener;
struct fastopen_queue *fastopenq;
- fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
+ fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
tcp_sk(sk)->fastopen_rsk = NULL;
spin_lock_bh(&fastopenq->lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0ec48403ed68..504bd17b7456 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -96,7 +96,7 @@ int rtnl_is_locked(void)
EXPORT_SYMBOL(rtnl_is_locked);
#ifdef CONFIG_PROVE_LOCKING
-int lockdep_rtnl_is_held(void)
+bool lockdep_rtnl_is_held(void)
{
return lockdep_is_held(&rtnl_mutex);
}
@@ -497,7 +497,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops)
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
-static size_t rtnl_link_get_af_size(const struct net_device *dev)
+static size_t rtnl_link_get_af_size(const struct net_device *dev,
+ u32 ext_filter_mask)
{
struct rtnl_af_ops *af_ops;
size_t size;
@@ -509,7 +510,7 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev)
if (af_ops->get_link_af_size) {
/* AF_* + nested data */
size += nla_total_size(sizeof(struct nlattr)) +
- af_ops->get_link_af_size(dev);
+ af_ops->get_link_af_size(dev, ext_filter_mask);
}
}
@@ -837,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
/* IFLA_VF_STATS_BROADCAST */
nla_total_size(sizeof(__u64)) +
/* IFLA_VF_STATS_MULTICAST */
- nla_total_size(sizeof(__u64)));
+ nla_total_size(sizeof(__u64)) +
+ nla_total_size(sizeof(struct ifla_vf_trust)));
return size;
} else
return 0;
@@ -900,7 +902,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
+ rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
- + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+ + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
@@ -1025,7 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
@@ -1160,6 +1162,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_link_state vf_linkstate;
struct ifla_vf_rss_query_en vf_rss_query_en;
struct ifla_vf_stats vf_stats;
+ struct ifla_vf_trust vf_trust;
/*
* Not all SR-IOV capable drivers support the
@@ -1169,6 +1172,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
*/
ivi.spoofchk = -1;
ivi.rss_query_en = -1;
+ ivi.trusted = -1;
memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1182,7 +1186,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_tx_rate.vf =
vf_spoofchk.vf =
vf_linkstate.vf =
- vf_rss_query_en.vf = ivi.vf;
+ vf_rss_query_en.vf =
+ vf_trust.vf = ivi.vf;
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
@@ -1193,6 +1198,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
vf_rss_query_en.setting = ivi.rss_query_en;
+ vf_trust.setting = ivi.trusted;
vf = nla_nest_start(skb, IFLA_VF_INFO);
if (!vf) {
nla_nest_cancel(skb, vfinfo);
@@ -1210,7 +1216,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
&vf_linkstate) ||
nla_put(skb, IFLA_VF_RSS_QUERY_EN,
sizeof(vf_rss_query_en),
- &vf_rss_query_en))
+ &vf_rss_query_en) ||
+ nla_put(skb, IFLA_VF_TRUST,
+ sizeof(vf_trust), &vf_trust))
goto nla_put_failure;
memset(&vf_stats, 0, sizeof(vf_stats));
if (dev->netdev_ops->ndo_get_vf_stats)
@@ -1272,7 +1280,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (!(af = nla_nest_start(skb, af_ops->family)))
goto nla_put_failure;
- err = af_ops->fill_link_af(skb, dev);
+ err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
/*
* Caller may return ENODATA to indicate that there
@@ -1347,6 +1355,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
[IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
[IFLA_VF_STATS] = { .type = NLA_NESTED },
+ [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) },
};
static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
@@ -1586,6 +1595,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
+ if (tb[IFLA_VF_TRUST]) {
+ struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_trust)
+ err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
+ if (err < 0)
+ return err;
+ }
+
return err;
}
@@ -3443,4 +3462,3 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
}
-
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..aa41e6dd6429 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
len += NET_SKB_PAD;
if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
- (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+ (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
goto skb_fail;
@@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
len += NET_SKB_PAD + NET_IP_ALIGN;
if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
- (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+ (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
goto skb_fail;
@@ -4452,7 +4452,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
return NULL;
gfp_head = gfp_mask;
- if (gfp_head & __GFP_WAIT)
+ if (gfp_head & __GFP_DIRECT_RECLAIM)
gfp_head |= __GFP_REPEAT;
*errcode = -ENOBUFS;
@@ -4467,7 +4467,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
while (order) {
if (npages >= 1 << order) {
- page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
+ page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP |
__GFP_NOWARN |
__GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 3307c02244d3..1e4dd54bfb5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -422,13 +422,25 @@ static void sock_warn_obsolete_bsdism(const char *name)
}
}
+static bool sock_needs_netstamp(const struct sock *sk)
+{
+ switch (sk->sk_family) {
+ case AF_UNSPEC:
+ case AF_UNIX:
+ return false;
+ default:
+ return true;
+ }
+}
+
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
{
if (sk->sk_flags & flags) {
sk->sk_flags &= ~flags;
- if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
+ if (sock_needs_netstamp(sk) &&
+ !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
net_disable_timestamp();
}
}
@@ -988,6 +1000,10 @@ set_rcvbuf:
sk->sk_max_pacing_rate);
break;
+ case SO_INCOMING_CPU:
+ sk->sk_incoming_cpu = val;
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1578,7 +1594,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
if (newsk->sk_prot->sockets_allocated)
sk_sockets_allocated_inc(newsk);
- if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+ if (sock_needs_netstamp(sk) &&
+ newsk->sk_flags & SK_FLAGS_TIMESTAMP)
net_enable_timestamp();
}
out:
@@ -1639,6 +1656,28 @@ void sock_wfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_wfree);
+void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+#ifdef CONFIG_INET
+ if (unlikely(!sk_fullsock(sk))) {
+ skb->destructor = sock_edemux;
+ sock_hold(sk);
+ return;
+ }
+#endif
+ skb->destructor = sock_wfree;
+ skb_set_hash_from_sk(skb, sk);
+ /*
+ * We used to take a refcount on sk, but following operation
+ * is enough to guarantee sk_free() wont free this sock until
+ * all in-flight packets are completed
+ */
+ atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+}
+EXPORT_SYMBOL(skb_set_owner_w);
+
void skb_orphan_partial(struct sk_buff *skb)
{
/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
@@ -1852,6 +1891,32 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
}
EXPORT_SYMBOL(sock_alloc_send_skb);
+int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
+ struct sockcm_cookie *sockc)
+{
+ struct cmsghdr *cmsg;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+ switch (cmsg->cmsg_type) {
+ case SO_MARK:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+ sockc->mark = *(u32 *)CMSG_DATA(cmsg);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(sock_cmsg_send);
+
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
@@ -1879,8 +1944,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
pfrag->offset = 0;
if (SKB_FRAG_PAGE_ORDER) {
- pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
- __GFP_NOWARN | __GFP_NORETRY,
+ /* Avoid direct reclaim but allow kswapd to wake */
+ pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
+ __GFP_COMP | __GFP_NOWARN |
+ __GFP_NORETRY,
SKB_FRAG_PAGE_ORDER);
if (likely(pfrag->page)) {
pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
@@ -2353,6 +2420,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
+ sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2479,7 +2547,8 @@ void sock_enable_timestamp(struct sock *sk, int flag)
* time stamping, but time stamping might have been on
* already because of the other one
*/
- if (!(previous_flags & SK_FLAGS_TIMESTAMP))
+ if (sock_needs_netstamp(sk) &&
+ !(previous_flags & SK_FLAGS_TIMESTAMP))
net_enable_timestamp();
}
}
@@ -2758,7 +2827,7 @@ static int req_prot_init(const struct proto *prot)
rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
rsk_prot->obj_size, 0,
- 0, NULL);
+ prot->slab_flags, NULL);
if (!rsk_prot->slab) {
pr_crit("%s: Can't create request sock SLAB cache!\n",
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 817622f3dbb7..0c1d58d43f67 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -1,3 +1,5 @@
+/* License: GPL */
+
#include <linux/mutex.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
@@ -323,14 +325,4 @@ static int __init sock_diag_init(void)
BUG_ON(!broadcast_wq);
return register_pernet_subsys(&diag_net_ops);
}
-
-static void __exit sock_diag_exit(void)
-{
- unregister_pernet_subsys(&diag_net_ops);
- destroy_workqueue(broadcast_wq);
-}
-
-module_init(sock_diag_init);
-module_exit(sock_diag_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
+device_initcall(sock_diag_init);
diff --git a/net/core/tso.c b/net/core/tso.c
index 630b30b4fb53..5dca7ce8ee9f 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,4 +1,5 @@
#include <linux/export.h>
+#include <linux/if_vlan.h>
#include <net/ip.h>
#include <net/tso.h>
#include <asm/unaligned.h>
@@ -14,18 +15,24 @@ EXPORT_SYMBOL(tso_count_descs);
void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last)
{
- struct iphdr *iph;
struct tcphdr *tcph;
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
int mac_hdr_len = skb_network_offset(skb);
memcpy(hdr, skb->data, hdr_len);
- iph = (struct iphdr *)(hdr + mac_hdr_len);
- iph->id = htons(tso->ip_id);
- iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+ if (!tso->ipv6) {
+ struct iphdr *iph = (void *)(hdr + mac_hdr_len);
+
+ iph->id = htons(tso->ip_id);
+ iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+ tso->ip_id++;
+ } else {
+ struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len);
+
+ iph->payload_len = htons(size + tcp_hdrlen(skb));
+ }
tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
put_unaligned_be32(tso->tcp_seq, &tcph->seq);
- tso->ip_id++;
if (!is_last) {
/* Clear all special flags for not last packet */
@@ -61,6 +68,7 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
tso->ip_id = ntohs(ip_hdr(skb)->id);
tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
tso->next_frag_idx = 0;
+ tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
/* Build first data */
tso->size = skb_headlen(skb) - hdr_len;
diff --git a/net/core/utils.c b/net/core/utils.c
index 3dffce953c39..3d17ca8b4744 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -348,52 +348,3 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
}
}
EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
-
-struct __net_random_once_work {
- struct work_struct work;
- struct static_key *key;
-};
-
-static void __net_random_once_deferred(struct work_struct *w)
-{
- struct __net_random_once_work *work =
- container_of(w, struct __net_random_once_work, work);
- BUG_ON(!static_key_enabled(work->key));
- static_key_slow_dec(work->key);
- kfree(work);
-}
-
-static void __net_random_once_disable_jump(struct static_key *key)
-{
- struct __net_random_once_work *w;
-
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (!w)
- return;
-
- INIT_WORK(&w->work, __net_random_once_deferred);
- w->key = key;
- schedule_work(&w->work);
-}
-
-bool __net_get_random_once(void *buf, int nbytes, bool *done,
- struct static_key *once_key)
-{
- static DEFINE_SPINLOCK(lock);
- unsigned long flags;
-
- spin_lock_irqsave(&lock, flags);
- if (*done) {
- spin_unlock_irqrestore(&lock, flags);
- return false;
- }
-
- get_random_bytes(buf, nbytes);
- *done = true;
- spin_unlock_irqrestore(&lock, flags);
-
- __net_random_once_disable_jump(once_key);
-
- return true;
-}
-EXPORT_SYMBOL(__net_get_random_once);