aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-07-14 18:47:44 -0700
committerDavid S. Miller <davem@davemloft.net>2018-07-14 18:47:44 -0700
commit2aa4a3378ad077d02131a23d22641ae8ae44cb28 (patch)
treeda31b9ef7436fb938c874d549d95a0ab1aac1b84 /include/linux
parentMerge branch 'mlxsw-VRRP' (diff)
parentMerge branch 'bpf-tcp-listen-cb' (diff)
downloadlinux-dev-2aa4a3378ad077d02131a23d22641ae8ae44cb28.tar.xz
linux-dev-2aa4a3378ad077d02131a23d22641ae8ae44cb28.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-07-15 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Various different arm32 JIT improvements in order to optimize code emission and make the JIT code itself more robust, from Russell. 2) Support simultaneous driver and offloaded XDP in order to allow for advanced use-cases where some work is offloaded to the NIC and some to the host. Also add ability for bpftool to load programs and maps beyond just the cgroup case, from Jakub. 3) Add BPF JIT support in nfp for multiplication as well as division. For the latter in particular, it uses the reciprocal algorithm to emulate it, from Jiong. 4) Add BTF pretty print functionality to bpftool in plain and JSON output format, from Okash. 5) Add build and installation to the BPF helper man page into bpftool, from Quentin. 6) Add a TCP BPF callback for listening sockets which is triggered right after the socket transitions to TCP_LISTEN state, from Andrey. 7) Add a new cgroup tree command to bpftool which iterates over the whole cgroup tree and prints all attached programs, from Roman. 8) Improve xdp_redirect_cpu sample to support parsing of double VLAN tagged packets, from Jesper. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/netdevice.h12
-rw-r--r--include/linux/reciprocal_div.h68
2 files changed, 72 insertions, 8 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2daf2fa6554f..4fa7f7a3f8b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -823,11 +823,8 @@ enum bpf_netdev_command {
*/
XDP_SETUP_PROG,
XDP_SETUP_PROG_HW,
- /* Check if a bpf program is set on the device. The callee should
- * set @prog_attached to one of XDP_ATTACHED_* values, note that "true"
- * is equivalent to XDP_ATTACHED_DRV.
- */
XDP_QUERY_PROG,
+ XDP_QUERY_PROG_HW,
/* BPF program for offload callbacks, invoked at program load time. */
BPF_OFFLOAD_VERIFIER_PREP,
BPF_OFFLOAD_TRANSLATE,
@@ -851,9 +848,8 @@ struct netdev_bpf {
struct bpf_prog *prog;
struct netlink_ext_ack *extack;
};
- /* XDP_QUERY_PROG */
+ /* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */
struct {
- u8 prog_attached;
u32 prog_id;
/* flags with which program was installed */
u32 prog_flags;
@@ -3560,8 +3556,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
int fd, u32 flags);
-void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
- struct netdev_bpf *xdp);
+u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
+ enum bpf_netdev_command cmd);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h
index e031e9f2f9d8..585ce89c0f33 100644
--- a/include/linux/reciprocal_div.h
+++ b/include/linux/reciprocal_div.h
@@ -25,6 +25,9 @@ struct reciprocal_value {
u8 sh1, sh2;
};
+/* "reciprocal_value" and "reciprocal_divide" together implement the basic
+ * version of the algorithm described in Figure 4.1 of the paper.
+ */
struct reciprocal_value reciprocal_value(u32 d);
static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
@@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
return (t + ((a - t) >> R.sh1)) >> R.sh2;
}
+struct reciprocal_value_adv {
+ u32 m;
+ u8 sh, exp;
+ bool is_wide_m;
+};
+
+/* "reciprocal_value_adv" implements the advanced version of the algorithm
+ * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
+ * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
+ * exception case could be easily handled before calling "reciprocal_value_adv".
+ *
+ * The advanced version requires more complex calculation to get the reciprocal
+ * multiplier and other control variables, but then could reduce the required
+ * emulation operations.
+ *
+ * It makes no sense to use this advanced version for host divide emulation,
+ * those extra complexities for calculating multiplier etc could completely
+ * waive our saving on emulation operations.
+ *
+ * However, it makes sense to use it for JIT divide code generation for which
+ * we are willing to trade performance of JITed code with that of host. As shown
+ * by the following pseudo code, the required emulation operations could go down
+ * from 6 (the basic version) to 3 or 4.
+ *
+ * To use the result of "reciprocal_value_adv", suppose we want to calculate
+ * n/d, the pseudo C code will be:
+ *
+ * struct reciprocal_value_adv rvalue;
+ * u8 pre_shift, exp;
+ *
+ * // handle exception case.
+ * if (d >= (1U << 31)) {
+ * result = n >= d;
+ * return;
+ * }
+ *
+ * rvalue = reciprocal_value_adv(d, 32)
+ * exp = rvalue.exp;
+ * if (rvalue.is_wide_m && !(d & 1)) {
+ * // floor(log2(d & (2^32 -d)))
+ * pre_shift = fls(d & -d) - 1;
+ * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
+ * } else {
+ * pre_shift = 0;
+ * }
+ *
+ * // code generation starts.
+ * if (imm == 1U << exp) {
+ * result = n >> exp;
+ * } else if (rvalue.is_wide_m) {
+ * // pre_shift must be zero when reached here.
+ * t = (n * rvalue.m) >> 32;
+ * result = n - t;
+ * result >>= 1;
+ * result += t;
+ * result >>= rvalue.sh - 1;
+ * } else {
+ * if (pre_shift)
+ * result = n >> pre_shift;
+ * result = ((u64)result * rvalue.m) >> 32;
+ * result >>= rvalue.sh;
+ * }
+ */
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
+
#endif /* _LINUX_RECIPROCAL_DIV_H */