aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/devmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r--kernel/bpf/devmap.c118
1 files changed, 99 insertions, 19 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index fdc20892837c..f02d04540c0c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT;
}
-bool dev_map_can_have_prog(struct bpf_map *map)
-{
- if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
- map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
- map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
- return true;
-
- return false;
-}
-
static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
struct xdp_frame **frames, int n,
struct net_device *dev)
@@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
return 0;
}
+static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst)
+{
+ struct xdp_txq_info txq = { .dev = dst->dev };
+ struct xdp_buff xdp;
+ u32 act;
+
+ if (!dst->xdp_prog)
+ return XDP_PASS;
+
+ __skb_pull(skb, skb->mac_len);
+ xdp.txq = &txq;
+
+ act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
+ switch (act) {
+ case XDP_PASS:
+ __skb_push(skb, skb->mac_len);
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(dst->dev, dst->xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ kfree_skb(skb);
+ break;
+ }
+
+ return act;
+}
+
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
@@ -513,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
}
-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
- int exclude_ifindex)
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
{
- if (!obj || obj->dev->ifindex == exclude_ifindex ||
+ if (!obj ||
!obj->dev->netdev_ops->ndo_xdp_xmit)
return false;
@@ -541,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
return 0;
}
+static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
+{
+ while (num_excluded--) {
+ if (ifindex == excluded[num_excluded])
+ return true;
+ }
+ return false;
+}
+
+/* Get ifindex of each upper device. 'indexes' must be able to hold at
+ * least MAX_NEST_DEV elements.
+ * Returns the number of ifindexes added.
+ */
+static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+{
+ struct net_device *upper;
+ struct list_head *iter;
+ int n = 0;
+
+ netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ indexes[n++] = upper->ifindex;
+ }
+ return n;
+}
+
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
struct bpf_map *map, bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
+ int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct xdp_frame *xdpf;
+ int num_excluded = 0;
unsigned int i;
int err;
+ if (exclude_ingress) {
+ num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+ excluded_devices[num_excluded++] = dev_rx->ifindex;
+ }
+
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@@ -560,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!is_valid_dst(dst, xdp, exclude_ifindex))
+ if (!is_valid_dst(dst, xdp))
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -580,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_rcu(dst, head, index_hlist,
lockdep_is_held(&dtab->index_lock)) {
- if (!is_valid_dst(dst, xdp, exclude_ifindex))
+ if (!is_valid_dst(dst, xdp))
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded,
+ dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -615,6 +673,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
err = xdp_ok_fwd_dev(dst->dev, skb->len);
if (unlikely(err))
return err;
+
+ /* Redirect has already succeeded semantically at this point, so we just
+ * return 0 even if packet is dropped. Helper below takes care of
+ * freeing skb.
+ */
+ if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS)
+ return 0;
+
skb->dev = dst->dev;
generic_xdp_tx(skb, xdp_prog);
@@ -646,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
bool exclude_ingress)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
struct bpf_dtab_netdev *dst, *last_dst = NULL;
+ int excluded_devices[1+MAX_NEST_DEV];
struct hlist_head *head;
struct hlist_node *next;
+ int num_excluded = 0;
unsigned int i;
int err;
+ if (exclude_ingress) {
+ num_excluded = get_upper_ifindexes(dev, excluded_devices);
+ excluded_devices[num_excluded++] = dev->ifindex;
+ }
+
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
for (i = 0; i < map->max_entries; i++) {
dst = rcu_dereference_check(dtab->netdev_map[i],
rcu_read_lock_bh_held());
- if (!dst || dst->dev->ifindex == exclude_ifindex)
+ if (!dst)
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */
@@ -671,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
return err;
last_dst = dst;
+
}
} else { /* BPF_MAP_TYPE_DEVMAP_HASH */
for (i = 0; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_safe(dst, next, head, index_hlist) {
- if (!dst || dst->dev->ifindex == exclude_ifindex)
+ if (!dst)
+ continue;
+
+ if (is_ifindex_excluded(excluded_devices, num_excluded,
+ dst->dev->ifindex))
continue;
/* we only need n-1 clones; last_dst enqueued below */