aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-06-01 13:49:33 -0700
committerAlexei Starovoitov <ast@kernel.org>2020-06-01 14:48:32 -0700
commit906312ca0d620cdcdb54d07594d26da0aed7eed2 (patch)
tree315a7405aabf602bdff8a8d95450829cd25f61ba /kernel
parentbpf: Use strncpy_from_unsafe_strict() in bpf_seq_printf() helper (diff)
parentselftest: Add tests for XDP programs in devmap entries (diff)
downloadwireguard-linux-906312ca0d620cdcdb54d07594d26da0aed7eed2.tar.xz
wireguard-linux-906312ca0d620cdcdb54d07594d26da0aed7eed2.zip
Merge branch 'xdp_devmap'
David Ahern says: ==================== Implementation of Daniel's proposal for allowing DEVMAP entries to be a device index, program fd pair. Programs are run after XDP_REDIRECT and have access to both Rx device and Tx device. v4 - moved struct bpf_devmap_val from uapi to devmap.c, named the union and dropped the prefix from the elements - Jesper - fixed 2 bugs in selftests v3 - renamed struct to bpf_devmap_val - used offsetofend to check for expected map size, modification of Toke's comment - check for explicit value sizes - adjusted switch statement in dev_map_run_prog per Andrii's comment - changed SEC shortcut to xdp_devmap - changed selftests to use skeleton and new map declaration v2 - moved dev_map_ext_val definition to uapi to formalize the API for devmap extensions; add bpf_ prefix to the prog_fd and prog_id entries - changed devmap code to handle struct in a way that it can support future extensions - fixed subject in libbpf patch v1 - fixed prog put on invalid program - Toke - changed write value from id to fd per Toke's comments about capabilities - add test cases ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to '')
-rw-r--r--kernel/bpf/devmap.c130
1 files changed, 112 insertions, 18 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a51d9fb7a359..c04fb1c72f5e 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -60,12 +60,23 @@ struct xdp_dev_bulk_queue {
unsigned int count;
};
+/* DEVMAP values */
+struct bpf_devmap_val {
+ u32 ifindex; /* device index */
+ union {
+ int fd; /* prog fd on map write */
+ u32 id; /* prog id on map read */
+ } bpf_prog;
+};
+
struct bpf_dtab_netdev {
struct net_device *dev; /* must be first member, due to tracepoint */
struct hlist_node index_hlist;
struct bpf_dtab *dtab;
+ struct bpf_prog *xdp_prog;
struct rcu_head rcu;
unsigned int idx;
+ struct bpf_devmap_val val;
};
struct bpf_dtab {
@@ -105,12 +116,18 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
+ u32 valsize = attr->value_size;
u64 cost = 0;
int err;
- /* check sanity of attributes */
+ /* check sanity of attributes. 2 value sizes supported:
+ * 4 bytes: ifindex
+ * 8 bytes: ifindex + prog fd
+ */
if (attr->max_entries == 0 || attr->key_size != 4 ||
- attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
+ (valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
+ valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
+ attr->map_flags & ~DEV_CREATE_FLAG_MASK)
return -EINVAL;
/* Lookup returns a pointer straight to dev->ifindex, so make sure the
@@ -217,6 +234,8 @@ static void dev_map_free(struct bpf_map *map)
hlist_for_each_entry_safe(dev, next, head, index_hlist) {
hlist_del_rcu(&dev->index_hlist);
+ if (dev->xdp_prog)
+ bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
@@ -231,6 +250,8 @@ static void dev_map_free(struct bpf_map *map)
if (!dev)
continue;
+ if (dev->xdp_prog)
+ bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
@@ -317,6 +338,16 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT;
}
+bool dev_map_can_have_prog(struct bpf_map *map)
+{
+ if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
+ map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
+ map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
+ return true;
+
+ return false;
+}
+
static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
{
struct net_device *dev = bq->dev;
@@ -441,6 +472,33 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
return bq_enqueue(dev, xdpf, dev_rx);
}
+static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ struct xdp_txq_info txq = { .dev = dev };
+ u32 act;
+
+ xdp->txq = &txq;
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ return xdp;
+ case XDP_DROP:
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(dev, xdp_prog, act);
+ break;
+ }
+
+ xdp_return_buff(xdp);
+ return NULL;
+}
+
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
@@ -452,6 +510,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
{
struct net_device *dev = dst->dev;
+ if (dst->xdp_prog) {
+ xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
+ if (!xdp)
+ return 0;
+ }
return __xdp_enqueue(dev, xdp, dev_rx);
}
@@ -472,18 +535,15 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
- struct net_device *dev = obj ? obj->dev : NULL;
- return dev ? &dev->ifindex : NULL;
+ return obj ? &obj->val : NULL;
}
static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
*(u32 *)key);
- struct net_device *dev = obj ? obj->dev : NULL;
-
- return dev ? &dev->ifindex : NULL;
+ return obj ? &obj->val : NULL;
}
static void __dev_map_entry_free(struct rcu_head *rcu)
@@ -491,6 +551,8 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
struct bpf_dtab_netdev *dev;
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
+ if (dev->xdp_prog)
+ bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
@@ -541,9 +603,10 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_dtab *dtab,
- u32 ifindex,
+ struct bpf_devmap_val *val,
unsigned int idx)
{
+ struct bpf_prog *prog = NULL;
struct bpf_dtab_netdev *dev;
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
@@ -551,24 +614,46 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
if (!dev)
return ERR_PTR(-ENOMEM);
- dev->dev = dev_get_by_index(net, ifindex);
- if (!dev->dev) {
- kfree(dev);
- return ERR_PTR(-EINVAL);
+ dev->dev = dev_get_by_index(net, val->ifindex);
+ if (!dev->dev)
+ goto err_out;
+
+ if (val->bpf_prog.fd >= 0) {
+ prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
+ BPF_PROG_TYPE_XDP, false);
+ if (IS_ERR(prog))
+ goto err_put_dev;
+ if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+ goto err_put_prog;
}
dev->idx = idx;
dev->dtab = dtab;
+ if (prog) {
+ dev->xdp_prog = prog;
+ dev->val.bpf_prog.id = prog->aux->id;
+ } else {
+ dev->xdp_prog = NULL;
+ dev->val.bpf_prog.id = 0;
+ }
+ dev->val.ifindex = val->ifindex;
return dev;
+err_put_prog:
+ bpf_prog_put(prog);
+err_put_dev:
+ dev_put(dev->dev);
+err_out:
+ kfree(dev);
+ return ERR_PTR(-EINVAL);
}
static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+ struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
struct bpf_dtab_netdev *dev, *old_dev;
- u32 ifindex = *(u32 *)value;
u32 i = *(u32 *)key;
if (unlikely(map_flags > BPF_EXIST))
@@ -578,10 +663,16 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
if (unlikely(map_flags == BPF_NOEXIST))
return -EEXIST;
- if (!ifindex) {
+ /* already verified value_size <= sizeof val */
+ memcpy(&val, value, map->value_size);
+
+ if (!val.ifindex) {
dev = NULL;
+ /* can not specify fd if ifindex is 0 */
+ if (val.bpf_prog.fd != -1)
+ return -EINVAL;
} else {
- dev = __dev_map_alloc_node(net, dtab, ifindex, i);
+ dev = __dev_map_alloc_node(net, dtab, &val, i);
if (IS_ERR(dev))
return PTR_ERR(dev);
}
@@ -608,13 +699,16 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+ struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
struct bpf_dtab_netdev *dev, *old_dev;
- u32 ifindex = *(u32 *)value;
u32 idx = *(u32 *)key;
unsigned long flags;
int err = -EEXIST;
- if (unlikely(map_flags > BPF_EXIST || !ifindex))
+ /* already verified value_size <= sizeof val */
+ memcpy(&val, value, map->value_size);
+
+ if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
return -EINVAL;
spin_lock_irqsave(&dtab->index_lock, flags);
@@ -623,7 +717,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
if (old_dev && (map_flags & BPF_NOEXIST))
goto out_err;
- dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
+ dev = __dev_map_alloc_node(net, dtab, &val, idx);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
goto out_err;