aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c1
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c11
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c4
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h5
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c10
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c10
-rw-r--r--drivers/net/netdevsim/bpf.c2
-rw-r--r--drivers/net/tun.c24
-rw-r--r--drivers/net/virtio_net.c14
-rw-r--r--fs/nsfs.c29
-rw-r--r--include/linux/bpf.h18
-rw-r--r--include/linux/bpf_types.h2
-rw-r--r--include/linux/bpf_verifier.h16
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/linux/proc_ns.h3
-rw-r--r--include/net/xdp.h48
-rw-r--r--include/uapi/linux/bpf.h6
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/offload.c147
-rw-r--r--kernel/bpf/sockmap.c8
-rw-r--r--kernel/bpf/stackmap.c28
-rw-r--r--kernel/bpf/syscall.c19
-rw-r--r--kernel/bpf/verifier.c20
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c69
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/xdp.c73
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/xdp_rxq_info_kern.c96
-rw-r--r--samples/bpf/xdp_rxq_info_user.c531
-rw-r--r--tools/bpf/Makefile29
-rw-r--r--tools/bpf/bpf_jit_disasm.c7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst6
-rw-r--r--tools/bpf/bpftool/Makefile27
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool8
-rw-r--r--tools/bpf/bpftool/cgroup.c35
-rw-r--r--tools/bpf/bpftool/common.c52
-rw-r--r--tools/bpf/bpftool/jit_disasm.c7
-rw-r--r--tools/bpf/bpftool/main.c13
-rw-r--r--tools/bpf/bpftool/main.h2
-rw-r--r--tools/bpf/bpftool/map.c3
-rw-r--r--tools/bpf/bpftool/prog.c10
-rw-r--r--tools/build/feature/Makefile4
-rw-r--r--tools/build/feature/test-disassembler-four-args.c15
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py112
-rw-r--r--tools/testing/selftests/bpf/test_progs.c127
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_map.c62
72 files changed, 1687 insertions, 178 deletions
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9efbdc6f1fcb..89c3c8760a78 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2247,6 +2247,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
if (rxr->xdp_prog)
bpf_prog_put(rxr->xdp_prog);
+ if (xdp_rxq_info_is_reg(&rxr->xdp_rxq))
+ xdp_rxq_info_unreg(&rxr->xdp_rxq);
+
kfree(rxr->rx_tpa);
rxr->rx_tpa = NULL;
@@ -2280,6 +2283,10 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)
ring = &rxr->rx_ring_struct;
+ rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
+ if (rc < 0)
+ return rc;
+
rc = bnxt_alloc_ring(bp, ring);
if (rc)
return rc;
@@ -2834,6 +2841,9 @@ void bnxt_set_ring_params(struct bnxt *bp)
bp->cp_ring_mask = bp->cp_bit - 1;
}
+/* Changing allocation mode of RX rings.
+ * TODO: Update when extending xdp_rxq_info to support allocation modes.
+ */
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
{
if (page_mode) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5359a1f0045f..2d268fc26f5e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -23,6 +23,7 @@
#include <net/devlink.h>
#include <net/dst_metadata.h>
#include <net/switchdev.h>
+#include <net/xdp.h>
struct tx_bd {
__le32 tx_bd_len_flags_type;
@@ -664,6 +665,7 @@ struct bnxt_rx_ring_info {
struct bnxt_ring_struct rx_ring_struct;
struct bnxt_ring_struct rx_agg_ring_struct;
+ struct xdp_rxq_info xdp_rxq;
};
struct bnxt_cp_ring_info {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 261e5847557a..1389ab5e05df 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -96,6 +96,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
xdp.data = *data_ptr;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = *data_ptr + *len;
+ xdp.rxq = &rxr->xdp_rxq;
orig_data = xdp.data;
mapping = rx_buf->mapping - bp->rx_dma_offset;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 52b3a6044f85..21618d0d694f 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -521,7 +521,7 @@ static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
- struct sk_buff **skb)
+ struct rcv_queue *rq, struct sk_buff **skb)
{
struct xdp_buff xdp;
struct page *page;
@@ -545,6 +545,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
xdp.data = (void *)cpu_addr;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
+ xdp.rxq = &rq->xdp_rxq;
orig_data = xdp.data;
rcu_read_lock();
@@ -698,7 +699,8 @@ static inline void nicvf_set_rxhash(struct net_device *netdev,
static void nicvf_rcv_pkt_handler(struct net_device *netdev,
struct napi_struct *napi,
- struct cqe_rx_t *cqe_rx, struct snd_queue *sq)
+ struct cqe_rx_t *cqe_rx,
+ struct snd_queue *sq, struct rcv_queue *rq)
{
struct sk_buff *skb = NULL;
struct nicvf *nic = netdev_priv(netdev);
@@ -724,7 +726,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
/* For XDP, ignore pkts spanning multiple pages */
if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
/* Packet consumed by XDP */
- if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb))
+ if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
return;
} else {
skb = nicvf_get_rcv_skb(snic, cqe_rx,
@@ -781,6 +783,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
struct cqe_rx_t *cq_desc;
struct netdev_queue *txq;
struct snd_queue *sq = &qs->sq[cq_idx];
+ struct rcv_queue *rq = &qs->rq[cq_idx];
unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
spin_lock_bh(&cq->lock);
@@ -811,7 +814,7 @@ loop:
switch (cq_desc->cqe_type) {
case CQE_TYPE_RX:
- nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq);
+ nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
work_done++;
break;
case CQE_TYPE_SEND:
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index f38ea349aa00..14e62c6ac342 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -760,6 +760,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
if (!rq->enable) {
nicvf_reclaim_rcv_queue(nic, qs, qidx);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
return;
}
@@ -772,6 +773,9 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
/* all writes of RBDR data to be loaded into L2 Cache as well*/
rq->caching = 1;
+ /* Driver have no proper error path for failed XDP RX-queue info reg */
+ WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);
+
/* Send a mailbox msg to PF to config RQ */
mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
mbx.rq.qs_num = qs->vnic_id;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 178ab6e8e3c5..7d1e4e2aaad0 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -12,6 +12,7 @@
#include <linux/netdevice.h>
#include <linux/iommu.h>
#include <linux/bpf.h>
+#include <net/xdp.h>
#include "q_struct.h"
#define MAX_QUEUE_SET 128
@@ -255,6 +256,7 @@ struct rcv_queue {
u8 start_qs_rbdr_idx; /* RBDR idx in the above QS */
u8 caching;
struct rx_tx_queue_stats stats;
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
struct cmp_queue {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5f6cf7212d4f..cfd788b4fd7a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1585,6 +1585,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
*/
rx_rings[i].desc = NULL;
rx_rings[i].rx_bi = NULL;
+ /* Clear cloned XDP RX-queue info before setup call */
+ memset(&rx_rings[i].xdp_rxq, 0, sizeof(rx_rings[i].xdp_rxq));
/* this is to allow wr32 to have something to write to
* during early allocation of Rx buffers
*/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 4566d66ffc7c..2a8a85e3ae8f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -27,6 +27,7 @@
#include <linux/prefetch.h>
#include <net/busy_poll.h>
#include <linux/bpf_trace.h>
+#include <net/xdp.h>
#include "i40e.h"
#include "i40e_trace.h"
#include "i40e_prototype.h"
@@ -1236,6 +1237,8 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
void i40e_free_rx_resources(struct i40e_ring *rx_ring)
{
i40e_clean_rx_ring(rx_ring);
+ if (rx_ring->vsi->type == I40E_VSI_MAIN)
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
rx_ring->xdp_prog = NULL;
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
@@ -1256,6 +1259,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
+ int err = -ENOMEM;
int bi_size;
/* warn if we are about to overwrite the pointer */
@@ -1283,13 +1287,21 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
+ /* XDP RX-queue info only needed for RX rings exposed to XDP */
+ if (rx_ring->vsi->type == I40E_VSI_MAIN) {
+ err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+ rx_ring->queue_index);
+ if (err < 0)
+ goto err;
+ }
+
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
return 0;
err:
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
- return -ENOMEM;
+ return err;
}
/**
@@ -2068,11 +2080,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
bool failure = false, xdp_xmit = false;
+ struct xdp_buff xdp;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
- struct xdp_buff xdp;
unsigned int size;
u16 vlan_tag;
u8 rx_ptype;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index fbae1182e2ea..2d08760fc4ce 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -27,6 +27,8 @@
#ifndef _I40E_TXRX_H_
#define _I40E_TXRX_H_
+#include <net/xdp.h>
+
/* Interrupt Throttling and Rate Limiting Goodies */
#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
@@ -428,6 +430,7 @@ struct i40e_ring {
*/
struct i40e_channel *ch;
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 468c3555a629..8611763d6129 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -53,6 +53,7 @@
#include <linux/dca.h>
#endif
+#include <net/xdp.h>
#include <net/busy_poll.h>
/* common prefix used by pr_<> macros */
@@ -371,6 +372,7 @@ struct ixgbe_ring {
struct ixgbe_tx_queue_stats tx_stats;
struct ixgbe_rx_queue_stats rx_stats;
};
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp;
enum ixgbe_ring_f_enum {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 0aad1c2a3667..0aaf70b3cfcd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1156,6 +1156,10 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
memcpy(&temp_ring[i], adapter->rx_ring[i],
sizeof(struct ixgbe_ring));
+ /* Clear copied XDP RX-queue info */
+ memset(&temp_ring[i].xdp_rxq, 0,
+ sizeof(temp_ring[i].xdp_rxq));
+
temp_ring[i].count = new_rx_count;
err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
if (err) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 7737a05c717c..95aba975b391 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2318,12 +2318,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
#endif /* IXGBE_FCOE */
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
bool xdp_xmit = false;
+ struct xdp_buff xdp;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;
struct ixgbe_rx_buffer *rx_buffer;
struct sk_buff *skb;
- struct xdp_buff xdp;
unsigned int size;
/* return some buffers to hardware, one at a time is too slow */
@@ -6444,6 +6446,11 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
+ /* XDP RX-queue info */
+ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
+ rx_ring->queue_index) < 0)
+ goto err;
+
rx_ring->xdp_prog = adapter->xdp_prog;
return 0;
@@ -6541,6 +6548,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
ixgbe_clean_rx_ring(rx_ring);
rx_ring->xdp_prog = NULL;
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 21bc17fa3854..8fc51bc29003 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2172,8 +2172,9 @@ static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
prof->rx_ring_size, priv->stride,
- node))
+ node, i))
goto err;
+
}
#ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5f9dbc9a7f5b..b4d144e67514 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -262,7 +262,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
- u32 size, u16 stride, int node)
+ u32 size, u16 stride, int node, int queue_index)
{
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_rx_ring *ring;
@@ -286,6 +286,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->log_stride = ffs(ring->stride) - 1;
ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
+ if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index) < 0)
+ goto err_ring;
+
tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
sizeof(struct mlx4_en_rx_alloc));
ring->rx_info = vzalloc_node(tmp, node);
@@ -293,7 +296,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->rx_info = vzalloc(tmp);
if (!ring->rx_info) {
err = -ENOMEM;
- goto err_ring;
+ goto err_xdp_info;
}
}
@@ -317,6 +320,8 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
err_info:
vfree(ring->rx_info);
ring->rx_info = NULL;
+err_xdp_info:
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
err_ring:
kfree(ring);
*pring = NULL;
@@ -440,6 +445,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
lockdep_is_held(&mdev->state_lock));
if (old_prog)
bpf_prog_put(old_prog);
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
vfree(ring->rx_info);
ring->rx_info = NULL;
@@ -652,6 +658,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
int cq_ring = cq->ring;
bool doorbell_pending;
struct mlx4_cqe *cqe;
+ struct xdp_buff xdp;
int polled = 0;
int index;
@@ -666,6 +673,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
rcu_read_lock();
xdp_prog = rcu_dereference(ring->xdp_prog);
+ xdp.rxq = &ring->xdp_rxq;
doorbell_pending = 0;
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
@@ -750,7 +758,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
* read bytes but not past the end of the frag.
*/
if (xdp_prog) {
- struct xdp_buff xdp;
dma_addr_t dma;
void *orig_data;
u32 act;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 7db3d0d9bfce..f470ae37d937 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -46,6 +46,7 @@
#endif
#include <linux/cpu_rmap.h>
#include <linux/ptp_clock_kernel.h>
+#include <net/xdp.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/qp.h>
@@ -356,6 +357,7 @@ struct mlx4_en_rx_ring {
unsigned long dropped;
int hwtstamp_rx_filter;
cpumask_var_t affinity_mask;
+ struct xdp_rxq_info xdp_rxq;
};
struct mlx4_en_cq {
@@ -720,7 +722,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev);
void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv);
int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
- u32 size, u16 stride, int node);
+ u32 size, u16 stride, int node, int queue_index);
void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
u32 size, u16 stride);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 543060c305a0..5299310f2481 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -46,6 +46,7 @@
#include <linux/mlx5/transobj.h>
#include <linux/rhashtable.h>
#include <net/switchdev.h>
+#include <net/xdp.h>
#include "wq.h"
#include "mlx5_core.h"
#include "en_stats.h"
@@ -571,6 +572,9 @@ struct mlx5e_rq {
u32 rqn;
struct mlx5_core_dev *mdev;
struct mlx5_core_mkey umr_mkey;
+
+ /* XDP read-mostly */
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
struct mlx5e_channel {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3aa1c90e7c86..539bd1d24396 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -582,6 +582,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
+ if (xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix) < 0)
+ goto err_rq_wq_destroy;
+
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
rq->buff.headroom = params->rq_headroom;
@@ -687,6 +690,7 @@ err_destroy_umr_mkey:
err_rq_wq_destroy:
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
mlx5_wq_destroy(&rq->wq_ctrl);
return err;
@@ -699,6 +703,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
mlx5e_rq_free_mpwqe_info(rq);
@@ -2766,6 +2772,9 @@ static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
if (err)
return err;
+ /* Mark as unused given "Drop-RQ" packets never reach XDP */
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+
rq->mdev = mdev;
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5b499c7a698f..7b38480811d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -812,6 +812,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
xdp.data_hard_start = va;
+ xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(prog, &xdp);
switch (act) {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index aae1be9ed056..89a9b6393882 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -238,7 +238,7 @@ struct nfp_bpf_vnic {
int nfp_bpf_jit(struct nfp_prog *prog);
-extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
+extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
struct netdev_bpf;
struct nfp_app;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 9c2608445bd8..d8870c2f11f3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -260,6 +260,6 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
return 0;
}
-const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
+const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
.insn_hook = nfp_verify_insn,
};
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 3801c52098d5..0e564cfabe7e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -47,6 +47,7 @@
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/io-64-nonatomic-hi-lo.h>
+#include <net/xdp.h>
#include "nfp_net_ctrl.h"
@@ -350,6 +351,7 @@ struct nfp_net_rx_buf {
* @rxds: Virtual address of FL/RX ring in host memory
* @dma: DMA address of the FL/RX ring
* @size: Size, in bytes, of the FL/RX ring (needed to free)
+ * @xdp_rxq: RX-ring info avail for XDP
*/
struct nfp_net_rx_ring {
struct nfp_net_r_vector *r_vec;
@@ -361,13 +363,14 @@ struct nfp_net_rx_ring {
u32 idx;
int fl_qcidx;
+ unsigned int size;
u8 __iomem *qcp_fl;
struct nfp_net_rx_buf *rxbufs;
struct nfp_net_rx_desc *rxds;
dma_addr_t dma;
- unsigned int size;
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned;
/**
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 29c0947f6d70..05e071b3dc5b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1608,11 +1608,13 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
unsigned int true_bufsz;
struct sk_buff *skb;
int pkts_polled = 0;
+ struct xdp_buff xdp;
int idx;
rcu_read_lock();
xdp_prog = READ_ONCE(dp->xdp_prog);
true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
+ xdp.rxq = &rx_ring->xdp_rxq;
tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) {
@@ -1703,7 +1705,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
dp->bpf_offload_xdp) && !meta.portid) {
void *orig_data = rxbuf->frag + pkt_off;
unsigned int dma_off;
- struct xdp_buff xdp;
int act;
xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
@@ -2252,6 +2253,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
kfree(rx_ring->rxbufs);
if (rx_ring->rxds)
@@ -2275,7 +2277,11 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
static int
nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
{
- int sz;
+ int sz, err;
+
+ err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx);
+ if (err < 0)
+ return err;
rx_ring->cnt = dp->rxd_cnt;
rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 8e01b53765dd..9935978c5542 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -40,6 +40,7 @@
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/bpf.h>
+#include <net/xdp.h>
#include <linux/qed/qede_rdma.h>
#include <linux/io.h>
#ifdef CONFIG_RFS_ACCEL
@@ -345,6 +346,7 @@ struct qede_rx_queue {
u64 xdp_no_pass;
void *handle;
+ struct xdp_rxq_info xdp_rxq;
};
union db_prod {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 48ec4c56cddf..dafc079ab6b9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1006,6 +1006,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
xdp.data = xdp.data_hard_start + *data_offset;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
+ xdp.rxq = &rxq->xdp_rxq;
/* Queues always have a full reset currently, so for the time
* being until there's atomic program replace just mark read
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 9292ca25c40c..2db70eabddfe 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -765,6 +765,12 @@ static void qede_free_fp_array(struct qede_dev *edev)
fp = &edev->fp_array[i];
kfree(fp->sb_info);
+ /* Handle mem alloc failure case where qede_init_fp
+ * didn't register xdp_rxq_info yet.
+ * Implicit only (fp->type & QEDE_FASTPATH_RX)
+ */
+ if (fp->rxq && xdp_rxq_info_is_reg(&fp->rxq->xdp_rxq))
+ xdp_rxq_info_unreg(&fp->rxq->xdp_rxq);
kfree(fp->rxq);
kfree(fp->xdp_tx);
kfree(fp->txq);
@@ -1493,6 +1499,10 @@ static void qede_init_fp(struct qede_dev *edev)
else
fp->rxq->data_direction = DMA_FROM_DEVICE;
fp->rxq->dev = &edev->pdev->dev;
+
+ /* Driver have no error path from here */
+ WARN_ON(xdp_rxq_info_reg(&fp->rxq->xdp_rxq, edev->ndev,
+ fp->rxq->rxq_id) < 0);
}
if (fp->type & QEDE_FASTPATH_TX) {
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index a243fa7ae02f..5134d5c1306c 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -66,7 +66,7 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
return 0;
}
-static const struct bpf_ext_analyzer_ops nsim_bpf_analyzer_ops = {
+static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = {
.insn_hook = nsim_bpf_verify_insn,
};
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e367d6310353..e7c5f4b2a9a6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -180,6 +180,7 @@ struct tun_file {
struct list_head next;
struct tun_struct *detached;
struct skb_array tx_array;
+ struct xdp_rxq_info xdp_rxq;
};
struct tun_flow_entry {
@@ -687,8 +688,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
- if (tun)
+ if (tun) {
skb_array_cleanup(&tfile->tx_array);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ }
sock_put(&tfile->sk);
}
}
@@ -728,11 +731,13 @@ static void tun_detach_all(struct net_device *dev)
tun_napi_del(tun, tfile);
/* Drop read queue */
tun_queue_purge(tfile);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
tun_enable_queue(tfile);
tun_queue_purge(tfile);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
}
BUG_ON(tun->numdisabled != 0);
@@ -784,6 +789,22 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
tfile->queue_index = tun->numqueues;
tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
+
+ if (tfile->detached) {
+ /* Re-attach detached tfile, updating XDP queue_index */
+ WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq));
+
+ if (tfile->xdp_rxq.queue_index != tfile->queue_index)
+ tfile->xdp_rxq.queue_index = tfile->queue_index;
+ } else {
+ /* Setup XDP RX-queue info, for new tfile getting attached */
+ err = xdp_rxq_info_reg(&tfile->xdp_rxq,
+ tun->dev, tfile->queue_index);
+ if (err < 0)
+ goto out;
+ err = 0;
+ }
+
rcu_assign_pointer(tfile->tun, tun);
rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
tun->numqueues++;
@@ -1508,6 +1529,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
xdp.data = buf + pad;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
+ xdp.rxq = &tfile->xdp_rxq;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6fb7b658a6cc..ed8299343728 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -31,6 +31,7 @@
#include <linux/average.h>
#include <linux/filter.h>
#include <net/route.h>
+#include <net/xdp.h>
static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444);
@@ -115,6 +116,8 @@ struct receive_queue {
/* Name of this receive queue: input.$index */
char name[40];
+
+ struct xdp_rxq_info xdp_rxq;
};
struct virtnet_info {
@@ -559,6 +562,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdp.data = xdp.data_hard_start + xdp_headroom;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
+ xdp.rxq = &rq->xdp_rxq;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -692,6 +696,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdp.data = data + vi->hdr_len;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + (len - vi->hdr_len);
+ xdp.rxq = &rq->xdp_rxq;
+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
if (act != XDP_PASS)
@@ -1225,13 +1231,18 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
- int i;
+ int i, err;
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
+
+ err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
+ if (err < 0)
+ return err;
+
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
}
@@ -1560,6 +1571,7 @@ static int virtnet_close(struct net_device *dev)
cancel_delayed_work_sync(&vi->refill);
for (i = 0; i < vi->max_queue_pairs; i++) {
+ xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
napi_disable(&vi->rq[i].napi);
virtnet_napi_tx_disable(&vi->sq[i].napi);
}
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 7c6f76d29f56..36b0772701a0 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -103,14 +103,14 @@ slow:
goto got_it;
}
-void *ns_get_path(struct path *path, struct task_struct *task,
- const struct proc_ns_operations *ns_ops)
+void *ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
+ void *private_data)
{
struct ns_common *ns;
void *ret;
again:
- ns = ns_ops->get(task);
+ ns = ns_get_cb(private_data);
if (!ns)
return ERR_PTR(-ENOENT);
@@ -120,6 +120,29 @@ again:
return ret;
}
+struct ns_get_path_task_args {
+ const struct proc_ns_operations *ns_ops;
+ struct task_struct *task;
+};
+
+static struct ns_common *ns_get_path_task(void *private_data)
+{
+ struct ns_get_path_task_args *args = private_data;
+
+ return args->ns_ops->get(args->task);
+}
+
+void *ns_get_path(struct path *path, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops)
+{
+ struct ns_get_path_task_args args = {
+ .ns_ops = ns_ops,
+ .task = task,
+ };
+
+ return ns_get_path_cb(path, ns_get_path_task, &args);
+}
+
int open_related_ns(struct ns_common *ns,
struct ns_common *(*get_ns)(struct ns_common *ns))
{
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index da54ef644fcd..9e03046d1df2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -17,6 +17,7 @@
#include <linux/numa.h>
#include <linux/wait.h>
+struct bpf_verifier_env;
struct perf_event;
struct bpf_prog;
struct bpf_map;
@@ -184,14 +185,18 @@ struct bpf_verifier_ops {
struct bpf_prog *prog, u32 *target_size);
};
+struct bpf_prog_offload_ops {
+ int (*insn_hook)(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx);
+};
+
struct bpf_dev_offload {
struct bpf_prog *prog;
struct net_device *netdev;
void *dev_priv;
struct list_head offloads;
bool dev_state;
- bool verifier_running;
- wait_queue_head_t verifier_done;
+ const struct bpf_prog_offload_ops *dev_ops;
};
struct bpf_prog_aux {
@@ -201,6 +206,7 @@ struct bpf_prog_aux {
u32 stack_depth;
u32 id;
u32 func_cnt;
+ bool offload_requested;
struct bpf_prog **func;
void *jit_data; /* JIT specific data. arch dependent */
struct latch_tree_node ksym_tnode;
@@ -351,6 +357,8 @@ void bpf_prog_put(struct bpf_prog *prog);
int __bpf_prog_charge(struct user_struct *user, u32 pages);
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
+void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
+
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
@@ -523,13 +531,15 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
int bpf_prog_offload_compile(struct bpf_prog *prog);
void bpf_prog_offload_destroy(struct bpf_prog *prog);
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+ struct bpf_prog *prog);
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
{
- return aux->offload;
+ return aux->offload_requested;
}
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
@@ -544,7 +554,7 @@ static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
-#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)
+#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
#else
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 978c1d9c9383..19b8349a3809 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -42,7 +42,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
#ifdef CONFIG_NET
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-#ifdef CONFIG_STREAM_PARSER
+#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 883a35d50cd5..2feb218c001d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -166,12 +166,6 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log)
return log->len_used >= log->len_total - 1;
}
-struct bpf_verifier_env;
-struct bpf_ext_analyzer_ops {
- int (*insn_hook)(struct bpf_verifier_env *env,
- int insn_idx, int prev_insn_idx);
-};
-
#define BPF_MAX_SUBPROGS 256
/* single container for all structs
@@ -185,7 +179,6 @@ struct bpf_verifier_env {
bool strict_alignment; /* perform strict pointer alignment checks */
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
- const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
@@ -206,13 +199,8 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
return cur->frame[cur->curframe]->regs;
}
-#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
-#else
-static inline int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
-{
- return -EOPNOTSUPP;
-}
-#endif
+int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx);
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2b0df2703671..425056c7f96c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,6 +20,7 @@
#include <linux/set_memory.h>
#include <linux/kallsyms.h>
+#include <net/xdp.h>
#include <net/sch_generic.h>
#include <uapi/linux/filter.h>
@@ -503,6 +504,7 @@ struct xdp_buff {
void *data_end;
void *data_meta;
void *data_hard_start;
+ struct xdp_rxq_info *rxq;
};
/* Compute the linear packet data range [data, data_end) which
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 352066e4eeef..440b000f07f4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -44,6 +44,7 @@
#include <net/dcbnl.h>
#endif
#include <net/netprio_cgroup.h>
+#include <net/xdp.h>
#include <linux/netdev_features.h>
#include <linux/neighbour.h>
@@ -686,6 +687,7 @@ struct netdev_rx_queue {
#endif
struct kobject kobj;
struct net_device *dev;
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
/*
@@ -804,7 +806,7 @@ enum bpf_netdev_command {
BPF_OFFLOAD_DESTROY,
};
-struct bpf_ext_analyzer_ops;
+struct bpf_prog_offload_ops;
struct netlink_ext_ack;
struct netdev_bpf {
@@ -826,7 +828,7 @@ struct netdev_bpf {
/* BPF_OFFLOAD_VERIFIER_PREP */
struct {
struct bpf_prog *prog;
- const struct bpf_ext_analyzer_ops *ops; /* callee set */
+ const struct bpf_prog_offload_ops *ops; /* callee set */
} verifier;
/* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */
struct {
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 2ff18c9840a7..d31cb6215905 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -78,6 +78,9 @@ extern struct file *proc_ns_fget(int fd);
#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
extern void *ns_get_path(struct path *path, struct task_struct *task,
const struct proc_ns_operations *ns_ops);
+typedef struct ns_common *ns_get_path_helper_t(void *);
+extern void *ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
+ void *private_data);
extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
const struct proc_ns_operations *ns_ops);
diff --git a/include/net/xdp.h b/include/net/xdp.h
new file mode 100644
index 000000000000..b2362ddfa694
--- /dev/null
+++ b/include/net/xdp.h
@@ -0,0 +1,48 @@
+/* include/net/xdp.h
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+#ifndef __LINUX_NET_XDP_H__
+#define __LINUX_NET_XDP_H__
+
+/**
+ * DOC: XDP RX-queue information
+ *
+ * The XDP RX-queue info (xdp_rxq_info) is associated with the driver
+ * level RX-ring queues. It is information that is specific to how
+ * the driver have configured a given RX-ring queue.
+ *
+ * Each xdp_buff frame received in the driver carry a (pointer)
+ * reference to this xdp_rxq_info structure. This provides the XDP
+ * data-path read-access to RX-info for both kernel and bpf-side
+ * (limited subset).
+ *
+ * For now, direct access is only safe while running in NAPI/softirq
+ * context. Contents is read-mostly and must not be updated during
+ * driver NAPI/softirq poll.
+ *
+ * The driver usage API is a register and unregister API.
+ *
+ * The struct is not directly tied to the XDP prog. A new XDP prog
+ * can be attached as long as it doesn't change the underlying
+ * RX-ring. If the RX-ring does change significantly, the NIC driver
+ * naturally need to stop the RX-ring before purging and reallocating
+ * memory. In that process the driver MUST call unregistor (which
+ * also apply for driver shutdown and unload). The register API is
+ * also mandatory during RX-ring setup.
+ */
+
+struct xdp_rxq_info {
+ struct net_device *dev;
+ u32 queue_index;
+ u32 reg_state;
+} ____cacheline_aligned; /* perf critical, avoid false-sharing */
+
+int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index);
+void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
+void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+
+#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 69eabfcb9bdb..405317f9c064 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -899,6 +899,9 @@ struct xdp_md {
__u32 data;
__u32 data_end;
__u32 data_meta;
+ /* Below access go though struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
};
enum sk_action {
@@ -921,6 +924,9 @@ struct bpf_prog_info {
__u32 nr_map_ids;
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u64 netns_dev;
+ __u64 netns_ino;
} __attribute__((aligned(8)));
struct bpf_map_info {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e691da0b3bab..a713fd23ec88 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -9,9 +9,11 @@ obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
obj-$(CONFIG_BPF_SYSCALL) += offload.o
ifeq ($(CONFIG_STREAM_PARSER),y)
+ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
endif
endif
+endif
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8455b89d1bbf..040d4e0edf3f 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -16,17 +16,22 @@
#include <linux/bpf.h>
#include <linux/bpf_verifier.h>
#include <linux/bug.h>
+#include <linux/kdev_t.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
+#include <linux/proc_ns.h>
#include <linux/rtnetlink.h>
+#include <linux/rwsem.h>
-/* protected by RTNL */
+/* Protects bpf_prog_offload_devs and offload members of all progs.
+ * RTNL lock cannot be taken when holding this lock.
+ */
+static DECLARE_RWSEM(bpf_devs_lock);
static LIST_HEAD(bpf_prog_offload_devs);
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
{
- struct net *net = current->nsproxy->net_ns;
struct bpf_dev_offload *offload;
if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
@@ -41,32 +46,40 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
return -ENOMEM;
offload->prog = prog;
- init_waitqueue_head(&offload->verifier_done);
- rtnl_lock();
- offload->netdev = __dev_get_by_index(net, attr->prog_ifindex);
- if (!offload->netdev) {
- rtnl_unlock();
- kfree(offload);
- return -EINVAL;
- }
+ offload->netdev = dev_get_by_index(current->nsproxy->net_ns,
+ attr->prog_ifindex);
+ if (!offload->netdev)
+ goto err_free;
+ down_write(&bpf_devs_lock);
+ if (offload->netdev->reg_state != NETREG_REGISTERED)
+ goto err_unlock;
prog->aux->offload = offload;
list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
- rtnl_unlock();
+ dev_put(offload->netdev);
+ up_write(&bpf_devs_lock);
return 0;
+err_unlock:
+ up_write(&bpf_devs_lock);
+ dev_put(offload->netdev);
+err_free:
+ kfree(offload);
+ return -EINVAL;
}
static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
struct netdev_bpf *data)
{
- struct net_device *netdev = prog->aux->offload->netdev;
+ struct bpf_dev_offload *offload = prog->aux->offload;
+ struct net_device *netdev;
ASSERT_RTNL();
- if (!netdev)
+ if (!offload)
return -ENODEV;
+ netdev = offload->netdev;
if (!netdev->netdev_ops->ndo_bpf)
return -EOPNOTSUPP;
@@ -87,62 +100,63 @@ int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
if (err)
goto exit_unlock;
- env->dev_ops = data.verifier.ops;
-
+ env->prog->aux->offload->dev_ops = data.verifier.ops;
env->prog->aux->offload->dev_state = true;
- env->prog->aux->offload->verifier_running = true;
exit_unlock:
rtnl_unlock();
return err;
}
+int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx)
+{
+ struct bpf_dev_offload *offload;
+ int ret = -ENODEV;
+
+ down_read(&bpf_devs_lock);
+ offload = env->prog->aux->offload;
+ if (offload)
+ ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
+ up_read(&bpf_devs_lock);
+
+ return ret;
+}
+
static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
{
struct bpf_dev_offload *offload = prog->aux->offload;
struct netdev_bpf data = {};
- /* Caution - if netdev is destroyed before the program, this function
- * will be called twice.
- */
-
data.offload.prog = prog;
- if (offload->verifier_running)
- wait_event(offload->verifier_done, !offload->verifier_running);
-
if (offload->dev_state)
WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
- offload->dev_state = false;
+ /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
+ bpf_prog_free_id(prog, true);
+
list_del_init(&offload->offloads);
- offload->netdev = NULL;
+ kfree(offload);
+ prog->aux->offload = NULL;
}
void bpf_prog_offload_destroy(struct bpf_prog *prog)
{
- struct bpf_dev_offload *offload = prog->aux->offload;
-
- offload->verifier_running = false;
- wake_up(&offload->verifier_done);
-
rtnl_lock();
- __bpf_prog_offload_destroy(prog);
+ down_write(&bpf_devs_lock);
+ if (prog->aux->offload)
+ __bpf_prog_offload_destroy(prog);
+ up_write(&bpf_devs_lock);
rtnl_unlock();
-
- kfree(offload);
}
static int bpf_prog_offload_translate(struct bpf_prog *prog)
{
- struct bpf_dev_offload *offload = prog->aux->offload;
struct netdev_bpf data = {};
int ret;
data.offload.prog = prog;
- offload->verifier_running = false;
- wake_up(&offload->verifier_done);
-
rtnl_lock();
ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data);
rtnl_unlock();
@@ -164,6 +178,63 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
return bpf_prog_offload_translate(prog);
}
+struct ns_get_path_bpf_prog_args {
+ struct bpf_prog *prog;
+ struct bpf_prog_info *info;
+};
+
+static struct ns_common *bpf_prog_offload_info_fill_ns(void *private_data)
+{
+ struct ns_get_path_bpf_prog_args *args = private_data;
+ struct bpf_prog_aux *aux = args->prog->aux;
+ struct ns_common *ns;
+ struct net *net;
+
+ rtnl_lock();
+ down_read(&bpf_devs_lock);
+
+ if (aux->offload) {
+ args->info->ifindex = aux->offload->netdev->ifindex;
+ net = dev_net(aux->offload->netdev);
+ get_net(net);
+ ns = &net->ns;
+ } else {
+ args->info->ifindex = 0;
+ ns = NULL;
+ }
+
+ up_read(&bpf_devs_lock);
+ rtnl_unlock();
+
+ return ns;
+}
+
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+ struct bpf_prog *prog)
+{
+ struct ns_get_path_bpf_prog_args args = {
+ .prog = prog,
+ .info = info,
+ };
+ struct inode *ns_inode;
+ struct path ns_path;
+ void *res;
+
+ res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args);
+ if (IS_ERR(res)) {
+ if (!info->ifindex)
+ return -ENODEV;
+ return PTR_ERR(res);
+ }
+
+ ns_inode = ns_path.dentry->d_inode;
+ info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
+ info->netns_ino = ns_inode->i_ino;
+ path_put(&ns_path);
+
+ return 0;
+}
+
const struct bpf_prog_ops bpf_offload_prog_ops = {
};
@@ -181,11 +252,13 @@ static int bpf_offload_notification(struct notifier_block *notifier,
if (netdev->reg_state != NETREG_UNREGISTERING)
break;
+ down_write(&bpf_devs_lock);
list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
offloads) {
if (offload->netdev == netdev)
__bpf_prog_offload_destroy(offload->prog);
}
+ up_write(&bpf_devs_lock);
break;
default:
break;
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 5ee2e41893d9..3f662ee23a34 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -96,14 +96,6 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
return rcu_dereference_sk_user_data(sk);
}
-/* compute the linear packet data range [data, data_end) for skb when
- * sk_skb type programs are in use.
- */
-static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
-{
- TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
-}
-
enum __sk_action {
__SK_DROP = 0,
__SK_PASS,
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index a15bc636cc98..6c63c2222ea8 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -226,9 +226,33 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
return 0;
}
-static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+static int stack_map_get_next_key(struct bpf_map *map, void *key,
+ void *next_key)
{
- return -EINVAL;
+ struct bpf_stack_map *smap = container_of(map,
+ struct bpf_stack_map, map);
+ u32 id;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (!key) {
+ id = 0;
+ } else {
+ id = *(u32 *)key;
+ if (id >= smap->n_buckets || !smap->buckets[id])
+ id = 0;
+ else
+ id++;
+ }
+
+ while (id < smap->n_buckets && !smap->buckets[id])
+ id++;
+
+ if (id >= smap->n_buckets)
+ return -ENOENT;
+
+ *(u32 *)next_key = id;
+ return 0;
}
static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 007802c5ca7d..ebf0fb23e237 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -905,9 +905,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
return id > 0 ? 0 : id;
}
-static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
+void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
{
- /* cBPF to eBPF migrations are currently not in the idr store. */
+ /* cBPF to eBPF migrations are currently not in the idr store.
+ * Offloaded programs are removed from the store when their device
+ * disappears - even if someone grabs an fd to them they are unusable,
+ * simply waiting for refcnt to drop to be freed.
+ */
if (!prog->aux->id)
return;
@@ -917,6 +921,7 @@ static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
__acquire(&prog_idr_lock);
idr_remove(&prog_idr, prog->aux->id);
+ prog->aux->id = 0;
if (do_idr_lock)
spin_unlock_bh(&prog_idr_lock);
@@ -1157,6 +1162,8 @@ static int bpf_prog_load(union bpf_attr *attr)
if (!prog)
return -ENOMEM;
+ prog->aux->offload_requested = !!attr->prog_ifindex;
+
err = security_bpf_prog_alloc(prog->aux);
if (err)
goto free_prog_nouncharge;
@@ -1178,7 +1185,7 @@ static int bpf_prog_load(union bpf_attr *attr)
atomic_set(&prog->aux->refcnt, 1);
prog->gpl_compatible = is_gpl ? 1 : 0;
- if (attr->prog_ifindex) {
+ if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_offload_init(prog, attr);
if (err)
goto free_prog;
@@ -1700,6 +1707,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
return -EFAULT;
}
+ if (bpf_prog_is_dev_bound(prog->aux)) {
+ err = bpf_prog_offload_info_fill(&info, prog);
+ if (err)
+ return err;
+ }
+
done:
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 98d8637cf70d..a2b211262c25 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4438,15 +4438,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
return 0;
}
-static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
- int insn_idx, int prev_insn_idx)
-{
- if (env->dev_ops && env->dev_ops->insn_hook)
- return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
-
- return 0;
-}
-
static int do_check(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *state;
@@ -4531,9 +4522,12 @@ static int do_check(struct bpf_verifier_env *env)
print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
}
- err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
- if (err)
- return err;
+ if (bpf_prog_is_dev_bound(env->prog->aux)) {
+ err = bpf_prog_offload_verify_insn(env, insn_idx,
+ prev_insn_idx);
+ if (err)
+ return err;
+ }
regs = cur_regs(env);
env->insn_aux_data[insn_idx].seen = true;
@@ -5463,7 +5457,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;
- if (env->prog->aux->offload) {
+ if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env);
if (ret)
goto err_unlock;
diff --git a/net/core/Makefile b/net/core/Makefile
index 1fd0a9c88b1b..6dbbba8c57ae 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
- fib_notifier.o
+ fib_notifier.o xdp.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 2eb66c0d9cdb..d7925ef8743d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3906,9 +3906,33 @@ drop:
return NET_RX_DROP;
}
+static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_rx_queue *rxqueue;
+
+ rxqueue = dev->_rx;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+
+ if (unlikely(index >= dev->real_num_rx_queues)) {
+ WARN_ONCE(dev->real_num_rx_queues > 1,
+ "%s received packet on queue %u, but number "
+ "of RX queues is %u\n",
+ dev->name, index, dev->real_num_rx_queues);
+
+ return rxqueue; /* Return first rxqueue */
+ }
+ rxqueue += index;
+ }
+ return rxqueue;
+}
+
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
void *orig_data;
@@ -3952,6 +3976,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
+ rxqueue = netif_get_rxqueue(skb);
+ xdp.rxq = &rxqueue->xdp_rxq;
+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
off = xdp.data - orig_data;
@@ -7589,12 +7616,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
-#ifdef CONFIG_SYSFS
static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
size_t sz = count * sizeof(*rx);
+ int err = 0;
BUG_ON(count < 1);
@@ -7604,11 +7631,39 @@ static int netif_alloc_rx_queues(struct net_device *dev)
dev->_rx = rx;
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
rx[i].dev = dev;
+
+ /* XDP RX-queue setup */
+ err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
+ if (err < 0)
+ goto err_rxq_info;
+ }
return 0;
+
+err_rxq_info:
+ /* Rollback successful reg's and free other resources */
+ while (i--)
+ xdp_rxq_info_unreg(&rx[i].xdp_rxq);
+ kfree(dev->_rx);
+ dev->_rx = NULL;
+ return err;
+}
+
+static void netif_free_rx_queues(struct net_device *dev)
+{
+ unsigned int i, count = dev->num_rx_queues;
+ struct netdev_rx_queue *rx;
+
+ /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
+ if (!dev->_rx)
+ return;
+
+ rx = dev->_rx;
+
+ for (i = 0; i < count; i++)
+ xdp_rxq_info_unreg(&rx[i].xdp_rxq);
}
-#endif
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue, void *_unused)
@@ -8169,12 +8224,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
-#ifdef CONFIG_SYSFS
if (rxqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
return NULL;
}
-#endif
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
@@ -8231,12 +8284,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (netif_alloc_netdev_queues(dev))
goto free_all;
-#ifdef CONFIG_SYSFS
dev->num_rx_queues = rxqs;
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
-#endif
strcpy(dev->name, name);
dev->name_assign_type = name_assign_type;
@@ -8275,9 +8326,7 @@ void free_netdev(struct net_device *dev)
might_sleep();
netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kvfree(dev->_rx);
-#endif
+ netif_free_rx_queues(dev);
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
diff --git a/net/core/filter.c b/net/core/filter.c
index 130b842c3a15..acdb94c0e97f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4304,6 +4304,25 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data_end));
break;
+ case offsetof(struct xdp_md, ingress_ifindex):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, rxq));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_rxq_info, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct net_device,
+ ifindex, 4, target_size));
+ break;
+ case offsetof(struct xdp_md, rx_queue_index):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, rxq));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct xdp_rxq_info,
+ queue_index, 4, target_size));
+ break;
}
return insn - insn_buf;
diff --git a/net/core/xdp.c b/net/core/xdp.c
new file mode 100644
index 000000000000..097a0f74e004
--- /dev/null
+++ b/net/core/xdp.c
@@ -0,0 +1,73 @@
+/* net/core/xdp.c
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+#include <linux/types.h>
+#include <linux/mm.h>
+
+#include <net/xdp.h>
+
+#define REG_STATE_NEW 0x0
+#define REG_STATE_REGISTERED 0x1
+#define REG_STATE_UNREGISTERED 0x2
+#define REG_STATE_UNUSED 0x3
+
+void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
+{
+ /* Simplify driver cleanup code paths, allow unreg "unused" */
+ if (xdp_rxq->reg_state == REG_STATE_UNUSED)
+ return;
+
+ WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
+
+ xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
+ xdp_rxq->dev = NULL;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
+
+static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
+{
+ memset(xdp_rxq, 0, sizeof(*xdp_rxq));
+}
+
+/* Returns 0 on success, negative on failure */
+int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index)
+{
+ if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
+ WARN(1, "Driver promised not to register this");
+ return -EINVAL;
+ }
+
+ if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
+ WARN(1, "Missing unregister, handled but fix driver");
+ xdp_rxq_info_unreg(xdp_rxq);
+ }
+
+ if (!dev) {
+ WARN(1, "Missing net_device from driver");
+ return -ENODEV;
+ }
+
+ /* State either UNREGISTERED or NEW */
+ xdp_rxq_info_init(xdp_rxq);
+ xdp_rxq->dev = dev;
+ xdp_rxq->queue_index = queue_index;
+
+ xdp_rxq->reg_state = REG_STATE_REGISTERED;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+
+void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
+{
+ xdp_rxq->reg_state = REG_STATE_UNUSED;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
+
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
+{
+ return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4fb944a7ecf8..3ff7a05bea9a 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -41,6 +41,7 @@ hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
hostprogs-y += xdp_redirect_cpu
hostprogs-y += xdp_monitor
+hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
# Libbpf dependencies
@@ -90,6 +91,7 @@ xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
+xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
# Tell kbuild to always build the programs
@@ -139,6 +141,7 @@ always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
+always += xdp_rxq_info_kern.o
always += syscall_tp_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
@@ -182,6 +185,7 @@ HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
HOSTLOADLIBES_xdp_redirect_cpu += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
+HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
new file mode 100644
index 000000000000..3fd209291653
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * Example howto extract XDP RX-queue info
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Config setup from with userspace
+ *
+ * User-side setup ifindex in config_map, to verify that
+ * ctx->ingress_ifindex is correct (against configured ifindex)
+ */
+struct config {
+ __u32 action;
+ int ifindex;
+};
+struct bpf_map_def SEC("maps") config_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct config),
+ .max_entries = 1,
+};
+
+/* Common stats data record (shared with userspace) */
+struct datarec {
+ __u64 processed;
+ __u64 issue;
+};
+
+struct bpf_map_def SEC("maps") stats_global_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+#define MAX_RXQs 64
+
+/* Stats per rx_queue_index (per CPU) */
+struct bpf_map_def SEC("maps") rx_queue_index_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = MAX_RXQs + 1,
+};
+
+SEC("xdp_prog0")
+int xdp_prognum0(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct datarec *rec, *rxq_rec;
+ int ingress_ifindex;
+ struct config *config;
+ u32 key = 0;
+
+ /* Global stats record */
+ rec = bpf_map_lookup_elem(&stats_global_map, &key);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ /* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF
+ * instructions inside kernel to access xdp_rxq->dev->ifindex
+ */
+ ingress_ifindex = ctx->ingress_ifindex;
+
+ config = bpf_map_lookup_elem(&config_map, &key);
+ if (!config)
+ return XDP_ABORTED;
+
+ /* Simple test: check ctx provided ifindex is as expected */
+ if (ingress_ifindex != config->ifindex) {
+ /* count this error case */
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ /* Update stats per rx_queue_index. Handle if rx_queue_index
+ * is larger than stats map can contain info for.
+ */
+ key = ctx->rx_queue_index;
+ if (key >= MAX_RXQs)
+ key = MAX_RXQs;
+ rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key);
+ if (!rxq_rec)
+ return XDP_ABORTED;
+ rxq_rec->processed++;
+ if (key == MAX_RXQs)
+ rxq_rec->issue++;
+
+ return config->action;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
new file mode 100644
index 000000000000..32430e8b3a6a
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+static const char *__doc__ = " XDP RX-queue info extract example\n\n"
+ "Monitor how many packets per sec (pps) are received\n"
+ "per NIC RX queue index and which CPU processed the packet\n"
+ ;
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <locale.h>
+#include <sys/resource.h>
+#include <getopt.h>
+#include <net/if.h>
+#include <time.h>
+
+#include <arpa/inet.h>
+#include <linux/if_link.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+
+static int ifindex = -1;
+static char ifname_buf[IF_NAMESIZE];
+static char *ifname;
+
+static __u32 xdp_flags;
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"dev", required_argument, NULL, 'd' },
+ {"skb-mode", no_argument, NULL, 'S' },
+ {"sec", required_argument, NULL, 's' },
+ {"no-separators", no_argument, NULL, 'z' },
+ {"action", required_argument, NULL, 'a' },
+ {0, 0, NULL, 0 }
+};
+
+static void int_exit(int sig)
+{
+ fprintf(stderr,
+ "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
+ ifindex, ifname);
+ if (ifindex > -1)
+ set_link_xdp_fd(ifindex, -1, xdp_flags);
+ exit(EXIT_OK);
+}
+
+struct config {
+ __u32 action;
+ int ifindex;
+};
+#define XDP_ACTION_MAX (XDP_TX + 1)
+#define XDP_ACTION_MAX_STRLEN 11
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+};
+
+static const char *action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+static int parse_xdp_action(char *action_str)
+{
+ size_t maxlen;
+ __u64 action = -1;
+ int i;
+
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ maxlen = XDP_ACTION_MAX_STRLEN;
+ if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) {
+ action = i;
+ break;
+ }
+ }
+ return action;
+}
+
+static void list_xdp_actions(void)
+{
+ int i;
+
+ printf("Available XDP --action <options>\n");
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ printf("\t%s\n", xdp_action_names[i]);
+ printf("\n");
+}
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf("\nDOCUMENTATION:\n%s\n", __doc__);
+ printf(" Usage: %s (options-see-below)\n", argv[0]);
+ printf(" Listing options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)",
+ *long_options[i].flag);
+ else
+ printf(" short-option: -%c",
+ long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+ list_xdp_actions();
+}
+
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+ exit(EXIT_FAIL);
+ }
+ return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+/* Common stats data record shared with _kern.c */
+struct datarec {
+ __u64 processed;
+ __u64 issue;
+};
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+struct stats_record {
+ struct record stats;
+ struct record *rxq;
+};
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec *array;
+ size_t size;
+
+ size = sizeof(struct datarec) * nr_cpus;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct record *alloc_record_per_rxq(void)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ struct record *array;
+ size_t size;
+
+ size = sizeof(struct record) * nr_rxqs;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ struct stats_record *rec;
+ int i;
+
+ rec = malloc(sizeof(*rec));
+ memset(rec, 0, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Mem alloc error\n");
+ exit(EXIT_FAIL_MEM);
+ }
+ rec->rxq = alloc_record_per_rxq();
+ for (i = 0; i < nr_rxqs; i++)
+ rec->rxq[i].cpu = alloc_record_per_cpu();
+
+ rec->stats.cpu = alloc_record_per_cpu();
+ return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ int i;
+
+ for (i = 0; i < nr_rxqs; i++)
+ free(r->rxq[i].cpu);
+
+ free(r->rxq);
+ free(r->stats.cpu);
+ free(r);
+}
+
+static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec values[nr_cpus];
+ __u64 sum_processed = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_processed += values[i].processed;
+ rec->cpu[i].issue = values[i].issue;
+ sum_issue += values[i].issue;
+ }
+ rec->total.processed = sum_processed;
+ rec->total.issue = sum_issue;
+ return true;
+}
+
+static void stats_collect(struct stats_record *rec)
+{
+ int fd, i, max_rxqs;
+
+ fd = map_data[1].fd; /* map: stats_global_map */
+ map_collect_percpu(fd, 0, &rec->stats);
+
+ fd = map_data[2].fd; /* map: rx_queue_index_map */
+ max_rxqs = map_data[2].def.max_entries;
+ for (i = 0; i < max_rxqs; i++)
+ map_collect_percpu(fd, i, &rec->rxq[i]);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double) period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r,
+ struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static void stats_print(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int action)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ double pps = 0, err = 0;
+ struct record *rec, *prev;
+ double t;
+ int rxq;
+ int i;
+
+ /* Header */
+ printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
+ ifname, ifindex, action2str(action));
+
+ /* stats_global_map */
+ {
+ char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n";
+ char *fm2_rx = "%-15s %-7s %'-11.0f\n";
+ char *errstr = "";
+
+ printf("%-15s %-7s %-11s %-11s\n",
+ "XDP stats", "CPU", "pps", "issue-pps");
+
+ rec = &stats_rec->stats;
+ prev = &stats_prev->stats;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps (r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0)
+ errstr = "invalid-ifindex";
+ if (pps > 0)
+ printf(fmt_rx, "XDP-RX CPU",
+ i, pps, err, errstr);
+ }
+ pps = calc_pps (&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ printf(fm2_rx, "XDP-RX CPU", "total", pps, err);
+ }
+
+ /* rx_queue_index_map */
+ printf("\n%-15s %-7s %-11s %-11s\n",
+ "RXQ stats", "RXQ:CPU", "pps", "issue-pps");
+
+ for (rxq = 0; rxq < nr_rxqs; rxq++) {
+ char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n";
+ char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n";
+ char *errstr = "";
+ int rxq_ = rxq;
+
+ /* Last RXQ in map catch overflows */
+ if (rxq_ == nr_rxqs - 1)
+ rxq_ = -1;
+
+ rec = &stats_rec->rxq[rxq];
+ prev = &stats_prev->rxq[rxq];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps (r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0) {
+ if (rxq_ == -1)
+ errstr = "map-overflow-RXQ";
+ else
+ errstr = "err";
+ }
+ if (pps > 0)
+ printf(fmt_rx, "rx_queue_index",
+ rxq_, i, pps, err, errstr);
+ }
+ pps = calc_pps (&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ if (pps || err)
+ printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err);
+ }
+}
+
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static void stats_poll(int interval, int action)
+{
+ struct stats_record *record, *prev;
+
+ record = alloc_stats_record();
+ prev = alloc_stats_record();
+ stats_collect(record);
+
+ while (1) {
+ swap(&prev, &record);
+ stats_collect(record);
+ stats_print(record, prev, action);
+ sleep(interval);
+ }
+
+ free_stats_record(record);
+ free_stats_record(prev);
+}
+
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ bool use_separators = true;
+ struct config cfg = { 0 };
+ char filename[256];
+ int longindex = 0;
+ int interval = 2;
+ __u32 key = 0;
+ int opt, err;
+
+ char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
+ int action = XDP_PASS; /* Default action */
+ char *action_str = NULL;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (load_bpf_file(filename)) {
+ fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
+ return EXIT_FAIL;
+ }
+
+ if (!prog_fd[0]) {
+ fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
+ return EXIT_FAIL;
+ }
+
+ /* Parse commands line args */
+ while ((opt = getopt_long(argc, argv, "hSd:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 'd':
+ if (strlen(optarg) >= IF_NAMESIZE) {
+ fprintf(stderr, "ERR: --dev name too long\n");
+ goto error;
+ }
+ ifname = (char *)&ifname_buf;
+ strncpy(ifname, optarg, IF_NAMESIZE);
+ ifindex = if_nametoindex(ifname);
+ if (ifindex == 0) {
+ fprintf(stderr,
+ "ERR: --dev name unknown err(%d):%s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ break;
+ case 's':
+ interval = atoi(optarg);
+ break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'z':
+ use_separators = false;
+ break;
+ case 'a':
+ action_str = (char *)&action_str_buf;
+ strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
+ break;
+ case 'h':
+ error:
+ default:
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ /* Required option */
+ if (ifindex == -1) {
+ fprintf(stderr, "ERR: required option --dev missing\n");
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ cfg.ifindex = ifindex;
+
+ /* Parse action string */
+ if (action_str) {
+ action = parse_xdp_action(action_str);
+ if (action < 0) {
+ fprintf(stderr, "ERR: Invalid XDP --action: %s\n",
+ action_str);
+ list_xdp_actions();
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ cfg.action = action;
+
+ /* Trick to pretty printf with thousands separators use %' */
+ if (use_separators)
+ setlocale(LC_NUMERIC, "en_US");
+
+ /* User-side setup ifindex in config_map */
+ err = bpf_map_update_elem(map_fd[0], &key, &cfg, 0);
+ if (err) {
+ fprintf(stderr, "Store config failed (err:%d)\n", err);
+ exit(EXIT_FAIL_BPF);
+ }
+
+ /* Remove XDP program when program is interrupted */
+ signal(SIGINT, int_exit);
+
+ if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ fprintf(stderr, "link set xdp fd failed\n");
+ return EXIT_FAIL_XDP;
+ }
+
+ stats_poll(interval, action);
+ return EXIT_OK;
+}
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 07a6697466ef..c8ec0ae16bf0 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -9,6 +9,35 @@ MAKE = make
CFLAGS += -Wall -O2
CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+FEATURE_USER = .bpf
+FEATURE_TESTS = libbfd disassembler-four-args
+FEATURE_DISPLAY = libbfd disassembler-four-args
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+ check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
+ifeq ($(feature-disassembler-four-args), 1)
+CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+endif
+
%.yacc.c: %.y
$(YACC) -o $@ -d $<
diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index 75bf526a0168..30044bc4f389 100644
--- a/tools/bpf/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -72,7 +72,14 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
disassemble_init_for_target(&info);
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
disassemble = disassembler(bfdf);
+#endif
assert(disassemble);
do {
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 45c71b1f682b..2fe2a1bdbe3e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -15,12 +15,12 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
*COMMANDS* :=
- { **list** | **attach** | **detach** | **help** }
+ { **show** | **list** | **attach** | **detach** | **help** }
MAP COMMANDS
=============
-| **bpftool** **cgroup list** *CGROUP*
+| **bpftool** **cgroup { show | list }** *CGROUP*
| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
| **bpftool** **cgroup help**
@@ -31,7 +31,7 @@ MAP COMMANDS
DESCRIPTION
===========
- **bpftool cgroup list** *CGROUP*
+ **bpftool cgroup { show | list }** *CGROUP*
List all programs attached to the cgroup *CGROUP*.
Output will start with program ID followed by attach type,
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 421cabc417e6..0ab32b312aec 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -15,13 +15,13 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
*COMMANDS* :=
- { **show** | **dump** | **update** | **lookup** | **getnext** | **delete**
+ { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
| **pin** | **help** }
MAP COMMANDS
=============
-| **bpftool** **map show** [*MAP*]
+| **bpftool** **map { show | list }** [*MAP*]
| **bpftool** **map dump** *MAP*
| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*]
| **bpftool** **map lookup** *MAP* **key** *BYTES*
@@ -36,7 +36,7 @@ MAP COMMANDS
DESCRIPTION
===========
- **bpftool map show** [*MAP*]
+ **bpftool map { show | list }** [*MAP*]
Show information about loaded maps. If *MAP* is specified
show information only about given map, otherwise list all
maps currently loaded on the system.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 81c97c0e9b67..e4ceee7f2dff 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -15,12 +15,12 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
*COMMANDS* :=
- { **show** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
+ { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
MAP COMMANDS
=============
-| **bpftool** **prog show** [*PROG*]
+| **bpftool** **prog { show | list }** [*PROG*]
| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}]
| **bpftool** **prog pin** *PROG* *FILE*
@@ -31,7 +31,7 @@ MAP COMMANDS
DESCRIPTION
===========
- **bpftool prog show** [*PROG*]
+ **bpftool prog { show | list }** [*PROG*]
Show information about loaded programs. If *PROG* is
specified show information only about given program, otherwise
list all programs currently loaded on the system.
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 6732a5a617e4..20689a321ffe 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -22,13 +22,13 @@ SYNOPSIS
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
*MAP-COMMANDS* :=
- { **show** | **dump** | **update** | **lookup** | **getnext** | **delete**
+ { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
| **pin** | **help** }
- *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin**
+ *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
| **load** | **help** }
- *CGROUP-COMMANDS* := { **list** | **attach** | **detach** | **help** }
+ *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
DESCRIPTION
===========
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 3f17ad317512..2237bc43f71c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -23,6 +23,8 @@ endif
LIBBPF = $(BPF_PATH)libbpf.a
+BPFTOOL_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
+
$(LIBBPF): FORCE
$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
@@ -38,11 +40,36 @@ CC = gcc
CFLAGS += -O2
CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
+CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
INSTALL ?= install
RM ?= rm -f
+FEATURE_USER = .bpftool
+FEATURE_TESTS = libbfd disassembler-four-args
+FEATURE_DISPLAY = libbfd disassembler-four-args
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+ check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
+ifeq ($(feature-disassembler-four-args), 1)
+CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+endif
+
include $(wildcard *.d)
all: $(OUTPUT)bpftool
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 7febee05c8e7..0137866bb8f6 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -197,7 +197,7 @@ _bpftool()
local PROG_TYPE='id pinned tag'
case $command in
- show)
+ show|list)
[[ $prev != "$command" ]] && return 0
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
return 0
@@ -232,7 +232,7 @@ _bpftool()
;;
*)
[[ $prev == $object ]] && \
- COMPREPLY=( $( compgen -W 'dump help pin show' -- \
+ COMPREPLY=( $( compgen -W 'dump help pin show list' -- \
"$cur" ) )
;;
esac
@@ -240,7 +240,7 @@ _bpftool()
map)
local MAP_TYPE='id pinned'
case $command in
- show|dump)
+ show|list|dump)
case $prev in
$command)
COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
@@ -343,7 +343,7 @@ _bpftool()
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'delete dump getnext help \
- lookup pin show update' -- "$cur" ) )
+ lookup pin show list update' -- "$cur" ) )
;;
esac
;;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 34ca303d72bc..cae32a61cb18 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -41,7 +41,7 @@ static enum bpf_attach_type parse_attach_type(const char *str)
return __MAX_BPF_ATTACH_TYPE;
}
-static int list_bpf_prog(int id, const char *attach_type_str,
+static int show_bpf_prog(int id, const char *attach_type_str,
const char *attach_flags_str)
{
struct bpf_prog_info info = {};
@@ -77,7 +77,7 @@ static int list_bpf_prog(int id, const char *attach_type_str,
return 0;
}
-static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
{
__u32 prog_ids[1024] = {0};
char *attach_flags_str;
@@ -111,29 +111,29 @@ static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
}
for (iter = 0; iter < prog_cnt; iter++)
- list_bpf_prog(prog_ids[iter], attach_type_strings[type],
+ show_bpf_prog(prog_ids[iter], attach_type_strings[type],
attach_flags_str);
return 0;
}
-static int do_list(int argc, char **argv)
+static int do_show(int argc, char **argv)
{
enum bpf_attach_type type;
int cgroup_fd;
int ret = -1;
if (argc < 1) {
- p_err("too few parameters for cgroup list\n");
+ p_err("too few parameters for cgroup show");
goto exit;
} else if (argc > 1) {
- p_err("too many parameters for cgroup list\n");
+ p_err("too many parameters for cgroup show");
goto exit;
}
cgroup_fd = open(argv[0], O_RDONLY);
if (cgroup_fd < 0) {
- p_err("can't open cgroup %s\n", argv[1]);
+ p_err("can't open cgroup %s", argv[1]);
goto exit;
}
@@ -147,10 +147,10 @@ static int do_list(int argc, char **argv)
/*
* Not all attach types may be supported, so it's expected,
* that some requests will fail.
- * If we were able to get the list for at least one
+ * If we were able to get the show for at least one
* attach type, let's return 0.
*/
- if (list_attached_bpf_progs(cgroup_fd, type) == 0)
+ if (show_attached_bpf_progs(cgroup_fd, type) == 0)
ret = 0;
}
@@ -171,19 +171,19 @@ static int do_attach(int argc, char **argv)
int i;
if (argc < 4) {
- p_err("too few parameters for cgroup attach\n");
+ p_err("too few parameters for cgroup attach");
goto exit;
}
cgroup_fd = open(argv[0], O_RDONLY);
if (cgroup_fd < 0) {
- p_err("can't open cgroup %s\n", argv[1]);
+ p_err("can't open cgroup %s", argv[1]);
goto exit;
}
attach_type = parse_attach_type(argv[1]);
if (attach_type == __MAX_BPF_ATTACH_TYPE) {
- p_err("invalid attach type\n");
+ p_err("invalid attach type");
goto exit_cgroup;
}
@@ -199,7 +199,7 @@ static int do_attach(int argc, char **argv)
} else if (is_prefix(argv[i], "override")) {
attach_flags |= BPF_F_ALLOW_OVERRIDE;
} else {
- p_err("unknown option: %s\n", argv[i]);
+ p_err("unknown option: %s", argv[i]);
goto exit_cgroup;
}
}
@@ -229,13 +229,13 @@ static int do_detach(int argc, char **argv)
int ret = -1;
if (argc < 4) {
- p_err("too few parameters for cgroup detach\n");
+ p_err("too few parameters for cgroup detach");
goto exit;
}
cgroup_fd = open(argv[0], O_RDONLY);
if (cgroup_fd < 0) {
- p_err("can't open cgroup %s\n", argv[1]);
+ p_err("can't open cgroup %s", argv[1]);
goto exit;
}
@@ -277,7 +277,7 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s list CGROUP\n"
+ "Usage: %s %s { show | list } CGROUP\n"
" %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
" %s %s detach CGROUP ATTACH_TYPE PROG\n"
" %s %s help\n"
@@ -294,7 +294,8 @@ static int do_help(int argc, char **argv)
}
static const struct cmd cmds[] = {
- { "list", do_list },
+ { "show", do_show },
+ { "list", do_show },
{ "attach", do_attach },
{ "detach", do_detach },
{ "help", do_help },
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b62c94e3997a..6601c95a9258 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -44,7 +44,9 @@
#include <unistd.h>
#include <linux/limits.h>
#include <linux/magic.h>
+#include <net/if.h>
#include <sys/mount.h>
+#include <sys/stat.h>
#include <sys/types.h>
#include <sys/vfs.h>
@@ -412,3 +414,53 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
free(obj);
}
}
+
+static char *
+ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
+{
+ struct stat st;
+ int err;
+
+ err = stat("/proc/self/ns/net", &st);
+ if (err) {
+ p_err("Can't stat /proc/self: %s", strerror(errno));
+ return NULL;
+ }
+
+ if (st.st_dev != ns_dev || st.st_ino != ns_ino)
+ return NULL;
+
+ return if_indextoname(ifindex, buf);
+}
+
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+ char name[IF_NAMESIZE];
+
+ if (!ifindex)
+ return;
+
+ printf(" dev ");
+ if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+ printf("%s", name);
+ else
+ printf("ifindex %u ns_dev %llu ns_ino %llu",
+ ifindex, ns_dev, ns_inode);
+}
+
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+ char name[IF_NAMESIZE];
+
+ if (!ifindex)
+ return;
+
+ jsonw_name(json_wtr, "dev");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "ifindex", ifindex);
+ jsonw_uint_field(json_wtr, "ns_dev", ns_dev);
+ jsonw_uint_field(json_wtr, "ns_inode", ns_inode);
+ if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_end_object(json_wtr);
+}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 1551d3918d4c..57d32e8a1391 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -107,7 +107,14 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
disassemble_init_for_target(&info);
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
disassemble = disassembler(bfdf);
+#endif
assert(disassemble);
if (json_output)
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index ecd53ccf1239..3a0396d87c42 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -38,7 +38,6 @@
#include <errno.h>
#include <getopt.h>
#include <linux/bpf.h>
-#include <linux/version.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -95,21 +94,13 @@ static int do_help(int argc, char **argv)
static int do_version(int argc, char **argv)
{
- unsigned int version[3];
-
- version[0] = LINUX_VERSION_CODE >> 16;
- version[1] = LINUX_VERSION_CODE >> 8 & 0xf;
- version[2] = LINUX_VERSION_CODE & 0xf;
-
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_name(json_wtr, "version");
- jsonw_printf(json_wtr, "\"%u.%u.%u\"",
- version[0], version[1], version[2]);
+ jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION);
jsonw_end_object(json_wtr);
} else {
- printf("%s v%u.%u.%u\n", bin_name,
- version[0], version[1], version[2]);
+ printf("%s v%s\n", bin_name, BPFTOOL_VERSION);
}
return 0;
}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 8f6d3cac0347..65b526fe6e7e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -96,6 +96,8 @@ struct pinned_obj {
int build_pinned_obj_table(struct pinned_obj_table *table,
enum bpf_obj_type type);
void delete_pinned_obj_table(struct pinned_obj_table *tab);
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
struct cmd {
const char *cmd;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a8c3a33dd185..8d7db9d6b9cd 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -861,7 +861,7 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s show [MAP]\n"
+ "Usage: %s %s { show | list } [MAP]\n"
" %s %s dump MAP\n"
" %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n"
" %s %s lookup MAP key BYTES\n"
@@ -885,6 +885,7 @@ static int do_help(int argc, char **argv)
static const struct cmd cmds[] = {
{ "show", do_show },
+ { "list", do_show },
{ "help", do_help },
{ "dump", do_dump },
{ "update", do_update },
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index fd0873178503..c6a28be4665c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -230,6 +230,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
info->tag[0], info->tag[1], info->tag[2], info->tag[3],
info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
+ print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
if (info->load_time) {
char buf[32];
@@ -287,6 +289,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
printf("tag ");
fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
+ print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
printf("\n");
if (info->load_time) {
@@ -810,12 +813,12 @@ static int do_load(int argc, char **argv)
usage();
if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) {
- p_err("failed to load program\n");
+ p_err("failed to load program");
return -1;
}
if (do_pin_fd(prog_fd, argv[1])) {
- p_err("failed to pin program\n");
+ p_err("failed to pin program");
return -1;
}
@@ -833,7 +836,7 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s show [PROG]\n"
+ "Usage: %s %s { show | list } [PROG]\n"
" %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
" %s %s dump jited PROG [{ file FILE | opcodes }]\n"
" %s %s pin PROG FILE\n"
@@ -851,6 +854,7 @@ static int do_help(int argc, char **argv)
static const struct cmd cmds[] = {
{ "show", do_show },
+ { "list", do_show },
{ "help", do_help },
{ "dump", do_dump },
{ "pin", do_pin },
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 96982640fbf8..17f2c73fff8b 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -13,6 +13,7 @@ FILES= \
test-hello.bin \
test-libaudit.bin \
test-libbfd.bin \
+ test-disassembler-four-args.bin \
test-liberty.bin \
test-liberty-z.bin \
test-cplus-demangle.bin \
@@ -188,6 +189,9 @@ $(OUTPUT)test-libpython-version.bin:
$(OUTPUT)test-libbfd.bin:
$(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
+$(OUTPUT)test-disassembler-four-args.bin:
+ $(BUILD) -lbfd -lopcodes
+
$(OUTPUT)test-liberty.bin:
$(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
diff --git a/tools/build/feature/test-disassembler-four-args.c b/tools/build/feature/test-disassembler-four-args.c
new file mode 100644
index 000000000000..45ce65cfddf0
--- /dev/null
+++ b/tools/build/feature/test-disassembler-four-args.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bfd.h>
+#include <dis-asm.h>
+
+int main(void)
+{
+ bfd *abfd = bfd_openr(NULL, NULL);
+
+ disassembler(bfd_get_arch(abfd),
+ bfd_big_endian(abfd),
+ bfd_get_mach(abfd),
+ abfd);
+
+ return 0;
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db1b0923a308..4e8c60acfa32 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -921,6 +921,9 @@ struct bpf_prog_info {
__u32 nr_map_ids;
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u64 netns_dev;
+ __u64 netns_ino;
} __attribute__((aligned(8)));
struct bpf_map_info {
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1304753d29ea..a8aa7e251c8e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
- test_l4lb_noinline.o test_xdp_noinline.o
+ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
test_offload.py
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index c940505c2978..e3c750f17cb8 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -18,6 +18,8 @@ import argparse
import json
import os
import pprint
+import random
+import string
import subprocess
import time
@@ -27,6 +29,7 @@ bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
pp = pprint.PrettyPrinter()
devs = [] # devices we created for clean up
files = [] # files to be removed
+netns = [] # net namespaces to be removed
def log_get_sec(level=0):
return "*" * (log_level + level)
@@ -128,22 +131,25 @@ def rm(f):
if f in files:
files.remove(f)
-def tool(name, args, flags, JSON=True, fail=True):
+def tool(name, args, flags, JSON=True, ns="", fail=True):
params = ""
if JSON:
params += "%s " % (flags["json"])
- ret, out = cmd(name + " " + params + args, fail=fail)
+ if ns != "":
+ ns = "ip netns exec %s " % (ns)
+
+ ret, out = cmd(ns + name + " " + params + args, fail=fail)
if JSON and len(out.strip()) != 0:
return ret, json.loads(out)
else:
return ret, out
-def bpftool(args, JSON=True, fail=True):
- return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail)
+def bpftool(args, JSON=True, ns="", fail=True):
+ return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
-def bpftool_prog_list(expected=None):
- _, progs = bpftool("prog show", JSON=True, fail=True)
+def bpftool_prog_list(expected=None, ns=""):
+ _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
if expected is not None:
if len(progs) != expected:
fail(True, "%d BPF programs loaded, expected %d" %
@@ -158,13 +164,13 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
time.sleep(0.05)
raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
-def ip(args, force=False, JSON=True, fail=True):
+def ip(args, force=False, JSON=True, ns="", fail=True):
if force:
args = "-force " + args
- return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail)
+ return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail)
-def tc(args, JSON=True, fail=True):
- return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail)
+def tc(args, JSON=True, ns="", fail=True):
+ return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
def ethtool(dev, opt, args, fail=True):
return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
@@ -178,6 +184,15 @@ def bpf_pinned(name):
def bpf_bytecode(bytecode):
return "bytecode \"%s\"" % (bytecode)
+def mknetns(n_retry=10):
+ for i in range(n_retry):
+ name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
+ ret, _ = ip("netns add %s" % (name), fail=False)
+ if ret == 0:
+ netns.append(name)
+ return name
+ return None
+
class DebugfsDir:
"""
Class for accessing DebugFS directories as a dictionary.
@@ -237,6 +252,8 @@ class NetdevSim:
self.dev = self._netdevsim_create()
devs.append(self)
+ self.ns = ""
+
self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
self.dfs_refresh()
@@ -257,7 +274,7 @@ class NetdevSim:
def remove(self):
devs.remove(self)
- ip("link del dev %s" % (self.dev["ifname"]))
+ ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
def dfs_refresh(self):
self.dfs = DebugfsDir(self.dfs_dir)
@@ -285,6 +302,11 @@ class NetdevSim:
time.sleep(0.05)
raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
+ def set_ns(self, ns):
+ name = "1" if ns == "" else ns
+ ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
+ self.ns = ns
+
def set_mtu(self, mtu, fail=True):
return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
fail=fail)
@@ -372,6 +394,8 @@ def clean_up():
dev.remove()
for f in files:
cmd("rm -f %s" % (f))
+ for ns in netns:
+ cmd("ip netns delete %s" % (ns))
def pin_prog(file_name, idx=0):
progs = bpftool_prog_list(expected=(idx + 1))
@@ -381,6 +405,35 @@ def pin_prog(file_name, idx=0):
return file_name, bpf_pinned(file_name)
+def check_dev_info(other_ns, ns, pin_file=None, removed=False):
+ if removed:
+ bpftool_prog_list(expected=0)
+ ret, err = bpftool("prog show pin %s" % (pin_file), fail=False)
+ fail(ret == 0, "Showing prog with removed device did not fail")
+ fail(err["error"].find("No such device") == -1,
+ "Showing prog with removed device expected ENODEV, error is %s" %
+ (err["error"]))
+ return
+ progs = bpftool_prog_list(expected=int(not removed), ns=ns)
+ prog = progs[0]
+
+ fail("dev" not in prog.keys(), "Device parameters not reported")
+ dev = prog["dev"]
+ fail("ifindex" not in dev.keys(), "Device parameters not reported")
+ fail("ns_dev" not in dev.keys(), "Device parameters not reported")
+ fail("ns_inode" not in dev.keys(), "Device parameters not reported")
+
+ if not removed and not other_ns:
+ fail("ifname" not in dev.keys(), "Ifname not reported")
+ fail(dev["ifname"] != sim["ifname"],
+ "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
+ else:
+ fail("ifname" in dev.keys(), "Ifname is reported for other ns")
+ if removed:
+ fail(dev["ifindex"] != 0, "Device perameters not zero on removed")
+ fail(dev["ns_dev"] != 0, "Device perameters not zero on removed")
+ fail(dev["ns_inode"] != 0, "Device perameters not zero on removed")
+
# Parse command line
parser = argparse.ArgumentParser()
parser.add_argument("--log", help="output verbose log to given file")
@@ -417,6 +470,12 @@ for s in samples:
skip(ret != 0, "sample %s/%s not found, please compile it" %
(bpf_test_dir, s))
+# Check if net namespaces seem to work
+ns = mknetns()
+skip(ns is None, "Could not create a net namespace")
+cmd("ip netns delete %s" % (ns))
+netns = []
+
try:
obj = bpf_obj("sample_ret0.o")
bytecode = bpf_bytecode("1,6 0 0 4294967295,")
@@ -549,6 +608,8 @@ try:
progs = bpftool_prog_list(expected=1)
fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
"Loaded program has wrong ID")
+ fail("dev" in progs[0].keys(),
+ "Device parameters reported for non-offloaded program")
start_test("Test XDP prog replace with bad flags...")
ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
@@ -673,6 +734,35 @@ try:
fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
(time_diff, delay_sec))
+ # Remove all pinned files and reinstantiate the netdev
+ clean_up()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+ sim.set_xdp(obj, "offload")
+
+ start_test("Test bpftool bound info reporting (own ns)...")
+ check_dev_info(False, "")
+
+ start_test("Test bpftool bound info reporting (other ns)...")
+ ns = mknetns()
+ sim.set_ns(ns)
+ check_dev_info(True, "")
+
+ start_test("Test bpftool bound info reporting (remote ns)...")
+ check_dev_info(False, ns)
+
+ start_test("Test bpftool bound info reporting (back to own ns)...")
+ sim.set_ns("")
+ check_dev_info(False, "")
+
+ pin_file, _ = pin_prog("/sys/fs/bpf/tmp")
+ sim.remove()
+
+ start_test("Test bpftool bound info reporting (removed dev)...")
+ check_dev_info(True, "", pin_file=pin_file, removed=True)
+
print("%s: OK" % (os.path.basename(__file__)))
finally:
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 09087ab12293..b549308abd19 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -837,6 +837,132 @@ static void test_tp_attach_query(void)
free(query);
}
+static int compare_map_keys(int map1_fd, int map2_fd)
+{
+ __u32 key, next_key;
+ char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
+ int err;
+
+ err = bpf_map_get_next_key(map1_fd, NULL, &key);
+ if (err)
+ return err;
+ err = bpf_map_lookup_elem(map2_fd, &key, val_buf);
+ if (err)
+ return err;
+
+ while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) {
+ err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf);
+ if (err)
+ return err;
+
+ key = next_key;
+ }
+ if (errno != ENOENT)
+ return -1;
+
+ return 0;
+}
+
+static void test_stacktrace_map()
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd;
+ const char *file = "./test_stacktrace_map.o";
+ int bytes, efd, err, pmu_fd, prog_fd;
+ struct perf_event_attr attr = {};
+ __u32 key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* Get the ID for the sched/sched_switch tracepoint */
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+ "read", "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ /* Open the perf event and attach bpf progrram */
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ /* give some time for bpf program run */
+ sleep(1);
+
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ ; /* fall through */
+
+disable_pmu:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+ close(pmu_fd);
+
+close_prog:
+ bpf_object__close(obj);
+
+out:
+ return;
+}
+
int main(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -852,6 +978,7 @@ int main(void)
test_pkt_md_access();
test_obj_name();
test_tp_attach_query();
+ test_stacktrace_map();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c
new file mode 100644
index 000000000000..76d85c5d08bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 10000,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[16];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[16];
+ int next_pid;
+ int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ __u32 key = 0, val = 0, *value_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* The size of stackmap and stackid_hmap should be the same */
+ key = bpf_get_stackid(ctx, &stackmap, 0);
+ if ((int)key >= 0)
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */