22 files changed, 1828 insertions, 6 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..988a00797bcd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -212,6 +212,7 @@ enum bpf_reg_type {
 	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
 	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
 };
 
 /* The information passed from prog-specific *_is_valid_access
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index cd26c090e7c0..22083712dd18 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #ifdef CONFIG_INET
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 17a13e4785fc..ce0e863f02a2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -243,6 +243,8 @@ struct scatterlist;
 struct pipe_inode_info;
 struct iov_iter;
 struct napi_struct;
+struct bpf_prog;
+union bpf_attr;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
@@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 			     const struct flow_dissector_key *key,
 			     unsigned int key_count);
 
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+				       struct bpf_prog *prog);
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
+
 bool __skb_flow_dissect(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
 			void *target_container,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 9b5fdc50519a..99d4148e0f90 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -43,6 +43,7 @@ struct ctl_table_header;
 struct net_generic;
 struct uevent_sock;
 struct netns_ipvs;
+struct bpf_prog;
 
 
 #define NETDEV_HASHBITS    8
@@ -145,6 +146,8 @@ struct net {
 #endif
 	struct net_generic __rcu	*gen;
 
+	struct bpf_prog __rcu	*flow_dissector_prog;
+
 	/* Note : following structs are cache line aligned */
 #ifdef CONFIG_XFRM
 	struct netns_xfrm	xfrm;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a6d00093f35e..1b81ba85fd2d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@ struct Qdisc_ops;
 struct qdisc_walker;
 struct tcf_walker;
 struct module;
+struct bpf_flow_keys;
 
 typedef int tc_setup_cb_t(enum tc_setup_type type,
 			  void *type_data, void *cb_priv);
@@ -307,9 +308,14 @@ struct tcf_proto {
 };
 
 struct qdisc_skb_cb {
-	unsigned int		pkt_len;
-	u16			slave_dev_queue_mapping;
-	u16			tc_classid;
+	union {
+		struct {
+			unsigned int		pkt_len;
+			u16			slave_dev_queue_mapping;
+			u16			tc_classid;
+		};
+		struct bpf_flow_keys *flow_keys;
+	};
 #define QDISC_CB_PRIV_LEN 20
 	unsigned char		data[QDISC_CB_PRIV_LEN];
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66917a4eba27..aa5ccd2385ed 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -152,6 +152,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 	BPF_PROG_TYPE_LIRC_MODE2,
 	BPF_PROG_TYPE_SK_REUSEPORT,
+	BPF_PROG_TYPE_FLOW_DISSECTOR,
 };
 
 enum bpf_attach_type {
@@ -172,6 +173,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP4_SENDMSG,
 	BPF_CGROUP_UDP6_SENDMSG,
 	BPF_LIRC_MODE2,
+	BPF_FLOW_DISSECTOR,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2333,6 +2335,7 @@ struct __sk_buff {
 	/* ... here. */
 
 	__u32 data_meta;
+	struct bpf_flow_keys *flow_keys;
 };
 
 struct bpf_tunnel_key {
@@ -2778,4 +2781,27 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+struct bpf_flow_keys {
+	__u16	nhoff;
+	__u16	thoff;
+	__u16	addr_proto;			/* ETH_P_* of valid addrs */
+	__u8	is_frag;
+	__u8	is_first_frag;
+	__u8	is_encap;
+	__u8	ip_proto;
+	__be16	n_proto;
+	__be16	sport;
+	__be16	dport;
+	union {
+		struct {
+			__be32	ipv4_src;
+			__be32	ipv4_dst;
+		};
+		struct {
+			__u32	ipv6_src[4];	/* in6_addr; network order */
+			__u32	ipv6_dst[4];	/* in6_addr; network order */
+		};
+	};
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3c9636f03bb2..b3c2d09bcf7a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1615,6 +1615,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_LIRC_MODE2:
 		ptype = BPF_PROG_TYPE_LIRC_MODE2;
 		break;
+	case BPF_FLOW_DISSECTOR:
+		ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1636,6 +1639,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_LIRC_MODE2:
 		ret = lirc_prog_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+		ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
+		break;
 	default:
 		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 	}
@@ -1688,6 +1694,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
 	case BPF_LIRC_MODE2:
 		return lirc_prog_detach(attr);
+	case BPF_FLOW_DISSECTOR:
+		return skb_flow_dissector_bpf_prog_detach(attr);
 	default:
 		return -EINVAL;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6ff1bac1795d..8ccbff4fff93 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -261,6 +261,7 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_PACKET]		= "pkt",
 	[PTR_TO_PACKET_META]	= "pkt_meta",
 	[PTR_TO_PACKET_END]	= "pkt_end",
+	[PTR_TO_FLOW_KEYS]	= "flow_keys",
 };
 
 static char slot_type_char[] = {
@@ -965,6 +966,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_PACKET:
 	case PTR_TO_PACKET_META:
 	case PTR_TO_PACKET_END:
+	case PTR_TO_FLOW_KEYS:
 	case CONST_PTR_TO_MAP:
 		return true;
 	default:
@@ -1238,6 +1240,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	case BPF_PROG_TYPE_LWT_XMIT:
 	case BPF_PROG_TYPE_SK_SKB:
 	case BPF_PROG_TYPE_SK_MSG:
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		if (meta)
 			return meta->pkt_access;
 
@@ -1321,6 +1324,18 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 	return -EACCES;
 }
 
+static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
+				  int size)
+{
+	if (size < 0 || off < 0 ||
+	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
+		verbose(env, "invalid access to flow keys off=%d size=%d\n",
+			off, size);
+		return -EACCES;
+	}
+	return 0;
+}
+
 static bool __is_pointer_value(bool allow_ptr_leaks,
 			       const struct bpf_reg_state *reg)
 {
@@ -1422,6 +1437,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 		 * right in front, treat it the very same way.
 		 */
 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
+	case PTR_TO_FLOW_KEYS:
+		pointer_desc = "flow keys ";
+		break;
 	case PTR_TO_MAP_VALUE:
 		pointer_desc = "value ";
 		break;
@@ -1692,6 +1710,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		err = check_packet_access(env, regno, off, size, false);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
+	} else if (reg->type == PTR_TO_FLOW_KEYS) {
+		if (t == BPF_WRITE && value_regno >= 0 &&
+		    is_pointer_value(env, value_regno)) {
+			verbose(env, "R%d leaks addr into flow keys\n",
+				value_regno);
+			return -EACCES;
+		}
+
+		err = check_flow_keys_access(env, off, size);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
 	} else {
 		verbose(env, "R%d invalid mem access '%s'\n", regno,
 			reg_type_str[reg->type]);
@@ -1839,6 +1868,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	case PTR_TO_PACKET_META:
 		return check_packet_access(env, regno, reg->off, access_size,
 					   zero_size_allowed);
+	case PTR_TO_FLOW_KEYS:
+		return check_flow_keys_access(env, reg->off, access_size);
 	case PTR_TO_MAP_VALUE:
 		return check_map_access(env, regno, reg->off, access_size,
 					zero_size_allowed);
@@ -4366,6 +4397,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_CTX:
 	case CONST_PTR_TO_MAP:
 	case PTR_TO_PACKET_END:
+	case PTR_TO_FLOW_KEYS:
 		/* Only valid matches are exact, which memcmp() above
 		 * would have accepted
 		 */
diff --git a/net/core/filter.c b/net/core/filter.c
index bf5b6efd369a..9cc76f134ddb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5124,6 +5124,17 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 }
 
 static const struct bpf_func_proto *
+flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_skb_load_bytes_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static const struct bpf_func_proto *
 lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
@@ -5241,6 +5252,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (size != size_default)
 			return false;
 		break;
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
+		if (size != sizeof(struct bpf_flow_keys *))
+			return false;
+		break;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -5266,6 +5281,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, data_end):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
@@ -5291,6 +5307,7 @@ static bool lwt_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
 		return false;
 	}
 
@@ -5501,6 +5518,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_end):
 		info->reg_type = PTR_TO_PACKET_END;
 		break;
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
@@ -5702,6 +5720,7 @@ static bool sk_skb_is_valid_access(int off, int size,
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
 		return false;
 	}
 
@@ -5761,6 +5780,39 @@ static bool sk_msg_is_valid_access(int off, int size,
 	return true;
 }
 
+static bool flow_dissector_is_valid_access(int off, int size,
+					   enum bpf_access_type type,
+					   const struct bpf_prog *prog,
+					   struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
+		info->reg_type = PTR_TO_FLOW_KEYS;
+		break;
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+		return false;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
@@ -6055,6 +6107,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				      bpf_target_off(struct sock_common,
 						     skc_num, 2, target_size));
 		break;
+
+	case offsetof(struct __sk_buff, flow_keys):
+		off  = si->off;
+		off -= offsetof(struct __sk_buff, flow_keys);
+		off += offsetof(struct sk_buff, cb);
+		off += offsetof(struct qdisc_skb_cb, flow_keys);
+		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+				      si->src_reg, off);
+		break;
 	}
 
 	return insn - insn_buf;
@@ -7018,6 +7079,15 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = {
 const struct bpf_prog_ops sk_msg_prog_ops = {
 };
 
+const struct bpf_verifier_ops flow_dissector_verifier_ops = {
+	.get_func_proto		= flow_dissector_func_proto,
+	.is_valid_access	= flow_dissector_is_valid_access,
+	.convert_ctx_access	= bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops flow_dissector_prog_ops = {
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index ce9eeeb7c024..5c5dd74b5b3b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -25,6 +25,9 @@
 #include <net/flow_dissector.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <uapi/linux/batadv_packet.h>
+#include <linux/bpf.h>
+
+static DEFINE_MUTEX(flow_dissector_mutex);
 
 static void dissector_set_key(struct flow_dissector *flow_dissector,
 			      enum flow_dissector_key_id key_id)
@@ -62,6 +65,44 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 }
 EXPORT_SYMBOL(skb_flow_dissector_init);
 
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+				       struct bpf_prog *prog)
+{
+	struct bpf_prog *attached;
+	struct net *net;
+
+	net = current->nsproxy->net_ns;
+	mutex_lock(&flow_dissector_mutex);
+	attached = rcu_dereference_protected(net->flow_dissector_prog,
+					     lockdep_is_held(&flow_dissector_mutex));
+	if (attached) {
+		/* Only one BPF program can be attached at a time */
+		mutex_unlock(&flow_dissector_mutex);
+		return -EEXIST;
+	}
+	rcu_assign_pointer(net->flow_dissector_prog, prog);
+	mutex_unlock(&flow_dissector_mutex);
+	return 0;
+}
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+	struct bpf_prog *attached;
+	struct net *net;
+
+	net = current->nsproxy->net_ns;
+	mutex_lock(&flow_dissector_mutex);
+	attached = rcu_dereference_protected(net->flow_dissector_prog,
+					     lockdep_is_held(&flow_dissector_mutex));
+	if (!attached) {
+		mutex_unlock(&flow_dissector_mutex);
+		return -ENOENT;
+	}
+	bpf_prog_put(attached);
+	RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
+	mutex_unlock(&flow_dissector_mutex);
+	return 0;
+}
 /**
  * skb_flow_get_be16 - extract be16 entity
  * @skb: sk_buff to extract from
@@ -588,6 +629,60 @@ static bool skb_flow_dissect_allowed(int *num_hdrs)
 	return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
 }
 
+static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
+				     struct flow_dissector *flow_dissector,
+				     void *target_container)
+{
+	struct flow_dissector_key_control *key_control;
+	struct flow_dissector_key_basic *key_basic;
+	struct flow_dissector_key_addrs *key_addrs;
+	struct flow_dissector_key_ports *key_ports;
+
+	key_control = skb_flow_dissector_target(flow_dissector,
+						FLOW_DISSECTOR_KEY_CONTROL,
+						target_container);
+	key_control->thoff = flow_keys->thoff;
+	if (flow_keys->is_frag)
+		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
+	if (flow_keys->is_first_frag)
+		key_control->flags |= FLOW_DIS_FIRST_FRAG;
+	if (flow_keys->is_encap)
+		key_control->flags |= FLOW_DIS_ENCAPSULATION;
+
+	key_basic = skb_flow_dissector_target(flow_dissector,
+					      FLOW_DISSECTOR_KEY_BASIC,
+					      target_container);
+	key_basic->n_proto = flow_keys->n_proto;
+	key_basic->ip_proto = flow_keys->ip_proto;
+
+	if (flow_keys->addr_proto == ETH_P_IP &&
+	    dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		key_addrs = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						      target_container);
+		key_addrs->v4addrs.src = flow_keys->ipv4_src;
+		key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
+		key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	} else if (flow_keys->addr_proto == ETH_P_IPV6 &&
+		   dissector_uses_key(flow_dissector,
+				      FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		key_addrs = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						      target_container);
+		memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
+		       sizeof(key_addrs->v6addrs));
+		key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		key_ports = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_PORTS,
+						      target_container);
+		key_ports->src = flow_keys->sport;
+		key_ports->dst = flow_keys->dport;
+	}
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -619,6 +714,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_vlan *key_vlan;
 	enum flow_dissect_ret fdret;
 	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
+	struct bpf_prog *attached;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -658,6 +754,44 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 					      FLOW_DISSECTOR_KEY_BASIC,
 					      target_container);
 
+	rcu_read_lock();
+	attached = skb ? rcu_dereference(dev_net(skb->dev)->flow_dissector_prog)
+		       : NULL;
+	if (attached) {
+		/* Note that even though the const qualifier is discarded
+		 * throughout the execution of the BPF program, all changes(the
+		 * control block) are reverted after the BPF program returns.
+		 * Therefore, __skb_flow_dissect does not alter the skb.
+		 */
+		struct bpf_flow_keys flow_keys = {};
+		struct bpf_skb_data_end cb_saved;
+		struct bpf_skb_data_end *cb;
+		u32 result;
+
+		cb = (struct bpf_skb_data_end *)skb->cb;
+
+		/* Save Control Block */
+		memcpy(&cb_saved, cb, sizeof(cb_saved));
+		memset(cb, 0, sizeof(cb_saved));
+
+		/* Pass parameters to the BPF program */
+		cb->qdisc_cb.flow_keys = &flow_keys;
+		flow_keys.nhoff = nhoff;
+
+		bpf_compute_data_pointers((struct sk_buff *)skb);
+		result = BPF_PROG_RUN(attached, skb);
+
+		/* Restore state */
+		memcpy(cb, &cb_saved, sizeof(cb_saved));
+
+		__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+					 target_container);
+		key_control->thoff = min_t(u16, key_control->thoff, skb->len);
+		rcu_read_unlock();
+		return result == BPF_OK;
+	}
+	rcu_read_unlock();
+
 	if (dissector_uses_key(flow_dissector,
 			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 		struct ethhdr *eth = eth_hdr(skb);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index dce960d22106..b1cd3bc8db70 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -74,6 +74,7 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_RAW_TRACEPOINT]	= "raw_tracepoint",
 	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
 	[BPF_PROG_TYPE_LIRC_MODE2]	= "lirc_mode2",
+	[BPF_PROG_TYPE_FLOW_DISSECTOR]	= "flow_dissector",
 };
 
 static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 66917a4eba27..aa5ccd2385ed 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -152,6 +152,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 	BPF_PROG_TYPE_LIRC_MODE2,
 	BPF_PROG_TYPE_SK_REUSEPORT,
+	BPF_PROG_TYPE_FLOW_DISSECTOR,
 };
 
 enum bpf_attach_type {
@@ -172,6 +173,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP4_SENDMSG,
 	BPF_CGROUP_UDP6_SENDMSG,
 	BPF_LIRC_MODE2,
+	BPF_FLOW_DISSECTOR,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2333,6 +2335,7 @@ struct __sk_buff {
 	/* ... here. */
 
 	__u32 data_meta;
+	struct bpf_flow_keys *flow_keys;
 };
 
 struct bpf_tunnel_key {
@@ -2778,4 +2781,27 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+struct bpf_flow_keys {
+	__u16	nhoff;
+	__u16	thoff;
+	__u16	addr_proto;			/* ETH_P_* of valid addrs */
+	__u8	is_frag;
+	__u8	is_first_frag;
+	__u8	is_encap;
+	__u8	ip_proto;
+	__be16	n_proto;
+	__be16	sport;
+	__be16	dport;
+	union {
+		struct {
+			__be32	ipv4_src;
+			__be32	ipv4_dst;
+		};
+		struct {
+			__u32	ipv6_src[4];	/* in6_addr; network order */
+			__u32	ipv6_dst[4];	/* in6_addr; network order */
+		};
+	};
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8476da7f2720..9ca8e0e624d8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1502,6 +1502,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 	case BPF_PROG_TYPE_LIRC_MODE2:
 	case BPF_PROG_TYPE_SK_REUSEPORT:
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		return false;
 	case BPF_PROG_TYPE_UNSPEC:
 	case BPF_PROG_TYPE_KPROBE:
@@ -2121,6 +2122,7 @@ static const struct {
 	BPF_PROG_SEC("sk_skb",		BPF_PROG_TYPE_SK_SKB),
 	BPF_PROG_SEC("sk_msg",		BPF_PROG_TYPE_SK_MSG),
 	BPF_PROG_SEC("lirc_mode2",	BPF_PROG_TYPE_LIRC_MODE2),
+	BPF_PROG_SEC("flow_dissector",	BPF_PROG_TYPE_FLOW_DISSECTOR),
 	BPF_SA_PROG_SEC("cgroup/bind4",	BPF_CGROUP_INET4_BIND),
 	BPF_SA_PROG_SEC("cgroup/bind6",	BPF_CGROUP_INET6_BIND),
 	BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 4d789c1e5167..8a60c9b9892d 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -23,3 +23,5 @@ test_skb_cgroup_id_user
 test_socket_cookie
 test_cgroup_storage
 test_select_reuseport
+test_flow_dissector
+flow_dissector_load
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fff7fb1285fc..fd3851d5c079 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
 	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
 	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-	test_skb_cgroup_id_kern.o
+	test_skb_cgroup_id_kern.o bpf_flow.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -47,10 +47,12 @@ TEST_PROGS := test_kmod.sh \
 	test_tunnel.sh \
 	test_lwt_seg6local.sh \
 	test_lirc_mode2.sh \
-	test_skb_cgroup_id.sh
+	test_skb_cgroup_id.sh \
+	test_flow_dissector.sh
 
 # Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
+	flow_dissector_load test_flow_dissector
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
new file mode 100644
index 000000000000..5fb809d95867
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_flow.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <limits.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <linux/if_tunnel.h>
+#include <linux/mpls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+#define PROG(F) SEC(#F) int bpf_func_##F
+
+/* These are the identifiers of the BPF programs that will be used in tail
+ * calls. Name is limited to 16 characters, with the terminating character and
+ * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
+ */
+enum {
+	IP,
+	IPV6,
+	IPV6OP,	/* Destination/Hop-by-Hop Options IPv6 Extension header */
+	IPV6FR,	/* Fragmentation IPv6 Extension Header */
+	MPLS,
+	VLAN,
+};
+
+#define IP_MF		0x2000
+#define IP_OFFSET	0x1FFF
+#define IP6_MF		0x0001
+#define IP6_OFFSET	0xFFF8
+
+struct vlan_hdr {
+	__be16 h_vlan_TCI;
+	__be16 h_vlan_encapsulated_proto;
+};
+
+struct gre_hdr {
+	__be16 flags;
+	__be16 proto;
+};
+
+struct frag_hdr {
+	__u8 nexthdr;
+	__u8 reserved;
+	__be16 frag_off;
+	__be32 identification;
+};
+
+struct bpf_map_def SEC("maps") jmp_table = {
+	.type = BPF_MAP_TYPE_PROG_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 8
+};
+
+static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
+							 __u16 hdr_size,
+							 void *buffer)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	__u16 nhoff = skb->flow_keys->nhoff;
+	__u8 *hdr;
+
+	/* Verifies this variable offset does not overflow */
+	if (nhoff > (USHRT_MAX - hdr_size))
+		return NULL;
+
+	hdr = data + nhoff;
+	if (hdr + hdr_size <= data_end)
+		return hdr;
+
+	if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
+		return NULL;
+
+	return buffer;
+}
+
+/* Dispatches on ETHERTYPE */
+static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+
+	keys->n_proto = proto;
+	switch (proto) {
+	case bpf_htons(ETH_P_IP):
+		bpf_tail_call(skb, &jmp_table, IP);
+		break;
+	case bpf_htons(ETH_P_IPV6):
+		bpf_tail_call(skb, &jmp_table, IPV6);
+		break;
+	case bpf_htons(ETH_P_MPLS_MC):
+	case bpf_htons(ETH_P_MPLS_UC):
+		bpf_tail_call(skb, &jmp_table, MPLS);
+		break;
+	case bpf_htons(ETH_P_8021Q):
+	case bpf_htons(ETH_P_8021AD):
+		bpf_tail_call(skb, &jmp_table, VLAN);
+		break;
+	default:
+		/* Protocol not supported */
+		return BPF_DROP;
+	}
+
+	return BPF_DROP;
+}
+
+SEC("dissect")
+int dissect(struct __sk_buff *skb)
+{
+	if (!skb->vlan_present)
+		return parse_eth_proto(skb, skb->protocol);
+	else
+		return parse_eth_proto(skb, skb->vlan_proto);
+}
+
+/* Parses on IPPROTO_* */
+static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	void *data_end = (void *)(long)skb->data_end;
+	struct icmphdr *icmp, _icmp;
+	struct gre_hdr *gre, _gre;
+	struct ethhdr *eth, _eth;
+	struct tcphdr *tcp, _tcp;
+	struct udphdr *udp, _udp;
+
+	keys->ip_proto = proto;
+	switch (proto) {
+	case IPPROTO_ICMP:
+		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
+		if (!icmp)
+			return BPF_DROP;
+		return BPF_OK;
+	case IPPROTO_IPIP:
+		keys->is_encap = true;
+		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
+	case IPPROTO_IPV6:
+		keys->is_encap = true;
+		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
+	case IPPROTO_GRE:
+		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
+		if (!gre)
+			return BPF_DROP;
+
+		if (bpf_htons(gre->flags & GRE_VERSION))
+			/* Only inspect standard GRE packets with version 0 */
+			return BPF_OK;
+
+		keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
+		if (GRE_IS_CSUM(gre->flags))
+			keys->nhoff += 4; /* Step over chksum and Padding */
+		if (GRE_IS_KEY(gre->flags))
+			keys->nhoff += 4; /* Step over key */
+		if (GRE_IS_SEQ(gre->flags))
+			keys->nhoff += 4; /* Step over sequence number */
+
+		keys->is_encap = true;
+
+		if (gre->proto == bpf_htons(ETH_P_TEB)) {
+			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
+							  &_eth);
+			if (!eth)
+				return BPF_DROP;
+
+			keys->nhoff += sizeof(*eth);
+
+			return parse_eth_proto(skb, eth->h_proto);
+		} else {
+			return parse_eth_proto(skb, gre->proto);
+		}
+	case IPPROTO_TCP:
+		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
+		if (!tcp)
+			return BPF_DROP;
+
+		if (tcp->doff < 5)
+			return BPF_DROP;
+
+		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
+			return BPF_DROP;
+
+		keys->thoff = keys->nhoff;
+		keys->sport = tcp->source;
+		keys->dport = tcp->dest;
+		return BPF_OK;
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
+		if (!udp)
+			return BPF_DROP;
+
+		keys->thoff = keys->nhoff;
+		keys->sport = udp->source;
+		keys->dport = udp->dest;
+		return BPF_OK;
+	default:
+		return BPF_DROP;
+	}
+
+	return BPF_DROP;
+}
+
+static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+
+	keys->ip_proto = nexthdr;
+	switch (nexthdr) {
+	case IPPROTO_HOPOPTS:
+	case IPPROTO_DSTOPTS:
+		bpf_tail_call(skb, &jmp_table, IPV6OP);
+		break;
+	case IPPROTO_FRAGMENT:
+		bpf_tail_call(skb, &jmp_table, IPV6FR);
+		break;
+	default:
+		return parse_ip_proto(skb, nexthdr);
+	}
+
+	return BPF_DROP;
+}
+
+PROG(IP)(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph, _iph;
+	bool done = false;
+
+	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+	if (!iph)
+		return BPF_DROP;
+
+	/* IP header cannot be smaller than 20 bytes */
+	if (iph->ihl < 5)
+		return BPF_DROP;
+
+	keys->addr_proto = ETH_P_IP;
+	keys->ipv4_src = iph->saddr;
+	keys->ipv4_dst = iph->daddr;
+
+	keys->nhoff += iph->ihl << 2;
+	if (data + keys->nhoff > data_end)
+		return BPF_DROP;
+
+	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
+		keys->is_frag = true;
+		if (iph->frag_off & bpf_htons(IP_OFFSET))
+			/* From second fragment on, packets do not have headers
+			 * we can parse.
+			 */
+			done = true;
+		else
+			keys->is_first_frag = true;
+	}
+
+	if (done)
+		return BPF_OK;
+
+	return parse_ip_proto(skb, iph->protocol);
+}
+
+PROG(IPV6)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct ipv6hdr *ip6h, _ip6h;
+
+	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+	if (!ip6h)
+		return BPF_DROP;
+
+	keys->addr_proto = ETH_P_IPV6;
+	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
+
+	keys->nhoff += sizeof(struct ipv6hdr);
+
+	return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6OP)(struct __sk_buff *skb)
+{
+	struct ipv6_opt_hdr *ip6h, _ip6h;
+
+	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+	if (!ip6h)
+		return BPF_DROP;
+
+	/* hlen is in 8-octets and does not include the first 8 bytes
+	 * of the header
+	 */
+	skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
+
+	return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6FR)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct frag_hdr *fragh, _fragh;
+
+	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
+	if (!fragh)
+		return BPF_DROP;
+
+	keys->nhoff += sizeof(*fragh);
+	keys->is_frag = true;
+	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+		keys->is_first_frag = true;
+
+	return parse_ipv6_proto(skb, fragh->nexthdr);
+}
+
+PROG(MPLS)(struct __sk_buff *skb)
+{
+	struct mpls_label *mpls, _mpls;
+
+	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
+	if (!mpls)
+		return BPF_DROP;
+
+	return BPF_OK;
+}
+
+PROG(VLAN)(struct __sk_buff *skb)
+{
+	struct bpf_flow_keys *keys = skb->flow_keys;
+	struct vlan_hdr *vlan, _vlan;
+	__be16 proto;
+
+	/* Peek back to see if single or double-tagging */
+	if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
+			       sizeof(proto)))
+		return BPF_DROP;
+
+	/* Account for double-tagging */
+	if (proto == bpf_htons(ETH_P_8021AD)) {
+		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+		if (!vlan)
+			return BPF_DROP;
+
+		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
+			return BPF_DROP;
+
+		keys->nhoff += sizeof(*vlan);
+	}
+
+	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+	if (!vlan)
+		return BPF_DROP;
+
+	keys->nhoff += sizeof(*vlan);
+	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
+	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
+	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
+		return BPF_DROP;
+
+	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index b4994a94968b..3655508f95fd 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -18,3 +18,4 @@ CONFIG_CRYPTO_HMAC=m
 CONFIG_CRYPTO_SHA256=m
 CONFIG_VXLAN=y
 CONFIG_GENEVE=y
+CONFIG_NET_CLS_FLOWER=m
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
new file mode 100644
index 000000000000..d3273b5b3173
--- /dev/null
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
+const char *cfg_map_name = "jmp_table";
+bool cfg_attach = true;
+char *cfg_section_name;
+char *cfg_path_name;
+
+static void load_and_attach_program(void)
+{
+	struct bpf_program *prog, *main_prog;
+	struct bpf_map *prog_array;
+	int i, fd, prog_fd, ret;
+	struct bpf_object *obj;
+	int prog_array_fd;
+
+	ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj,
+			    &prog_fd);
+	if (ret)
+		error(1, 0, "bpf_prog_load %s", cfg_path_name);
+
+	main_prog = bpf_object__find_program_by_title(obj, cfg_section_name);
+	if (!main_prog)
+		error(1, 0, "bpf_object__find_program_by_title %s",
+		      cfg_section_name);
+
+	prog_fd = bpf_program__fd(main_prog);
+	if (prog_fd < 0)
+		error(1, 0, "bpf_program__fd");
+
+	prog_array = bpf_object__find_map_by_name(obj, cfg_map_name);
+	if (!prog_array)
+		error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name);
+
+	prog_array_fd = bpf_map__fd(prog_array);
+	if (prog_array_fd < 0)
+		error(1, 0, "bpf_map__fd %s", cfg_map_name);
+
+	i = 0;
+	bpf_object__for_each_program(prog, obj) {
+		fd = bpf_program__fd(prog);
+		if (fd < 0)
+			error(1, 0, "bpf_program__fd");
+
+		if (fd != prog_fd) {
+			printf("%d: %s\n", i, bpf_program__title(prog, false));
+			bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY);
+			++i;
+		}
+	}
+
+	ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0);
+	if (ret)
+		error(1, 0, "bpf_prog_attach %s", cfg_path_name);
+
+	ret = bpf_object__pin(obj, cfg_pin_path);
+	if (ret)
+		error(1, 0, "bpf_object__pin %s", cfg_pin_path);
+
+}
+
+static void detach_program(void)
+{
+	char command[64];
+	int ret;
+
+	ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+	if (ret)
+		error(1, 0, "bpf_prog_detach");
+
+	/* To unpin, it is necessary and sufficient to just remove this dir */
+	sprintf(command, "rm -r %s", cfg_pin_path);
+	ret = system(command);
+	if (ret)
+		error(1, errno, command);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	bool attach = false;
+	bool detach = false;
+	int c;
+
+	while ((c = getopt(argc, argv, "adp:s:")) != -1) {
+		switch (c) {
+		case 'a':
+			if (detach)
+				error(1, 0, "attach/detach are exclusive");
+			attach = true;
+			break;
+		case 'd':
+			if (attach)
+				error(1, 0, "attach/detach are exclusive");
+			detach = true;
+			break;
+		case 'p':
+			if (cfg_path_name)
+				error(1, 0, "only one prog name can be given");
+
+			cfg_path_name = optarg;
+			break;
+		case 's':
+			if (cfg_section_name)
+				error(1, 0, "only one section can be given");
+
+			cfg_section_name = optarg;
+			break;
+		}
+	}
+
+	if (detach)
+		cfg_attach = false;
+
+	if (cfg_attach && !cfg_path_name)
+		error(1, 0, "must provide a path to the BPF program");
+
+	if (cfg_attach && !cfg_section_name)
+		error(1, 0, "must provide a section name");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	if (cfg_attach)
+		load_and_attach_program();
+	else
+		detach_program();
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
new file mode 100644
index 000000000000..12b784afba31
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inject packets with all sorts of encapsulation into the kernel.
+ *
+ * IPv4/IPv6	outer layer 3
+ * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
+ * IPv4/IPv6    inner layer 3
+ */
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <netinet/ip.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define CFG_PORT_INNER	8000
+
+/* Add some protocol definitions that do not exist in userspace */
+
+struct grehdr {
+	uint16_t unused;
+	uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+	union {
+		struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+			__u8	hlen:5,
+				control:1,
+				version:2;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+			__u8	version:2,
+				control:1,
+				hlen:5;
+#else
+#error  "Please fix <asm/byteorder.h>"
+#endif
+			__u8	proto_ctype;
+			__be16	flags;
+		};
+		__be32	word;
+	};
+};
+
+static uint8_t	cfg_dsfield_inner;
+static uint8_t	cfg_dsfield_outer;
+static uint8_t	cfg_encap_proto;
+static bool	cfg_expect_failure = false;
+static int	cfg_l3_extra = AF_UNSPEC;	/* optional SIT prefix */
+static int	cfg_l3_inner = AF_UNSPEC;
+static int	cfg_l3_outer = AF_UNSPEC;
+static int	cfg_num_pkt = 10;
+static int	cfg_num_secs = 0;
+static char	cfg_payload_char = 'a';
+static int	cfg_payload_len = 100;
+static int	cfg_port_gue = 6080;
+static bool	cfg_only_rx;
+static bool	cfg_only_tx;
+static int	cfg_src_port = 9;
+
+static char	buf[ETH_DATA_LEN];
+
+#define INIT_ADDR4(name, addr4, port)				\
+	static struct sockaddr_in name = {			\
+		.sin_family = AF_INET,				\
+		.sin_port = __constant_htons(port),		\
+		.sin_addr.s_addr = __constant_htonl(addr4),	\
+	};
+
+#define INIT_ADDR6(name, addr6, port)				\
+	static struct sockaddr_in6 name = {			\
+		.sin6_family = AF_INET6,			\
+		.sin6_port = __constant_htons(port),		\
+		.sin6_addr = addr6,				\
+	};
+
+INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
+INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
+INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
+INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
+
+INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+
+static unsigned long util_gettime(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void util_printaddr(const char *msg, struct sockaddr *addr)
+{
+	unsigned long off = 0;
+	char nbuf[INET6_ADDRSTRLEN];
+
+	switch (addr->sa_family) {
+	case PF_INET:
+		off = __builtin_offsetof(struct sockaddr_in, sin_addr);
+		break;
+	case PF_INET6:
+		off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
+		break;
+	default:
+		error(1, 0, "printaddr: unsupported family %u\n",
+		      addr->sa_family);
+	}
+
+	if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
+		       sizeof(nbuf)))
+		error(1, errno, "inet_ntop");
+
+	fprintf(stderr, "%s: %s\n", msg, nbuf);
+}
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += start[i];
+
+	return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+			      unsigned long sum)
+{
+	sum += add_csum_hword(start, num_u16);
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	return ~sum;
+}
+
+static void build_ipv4_header(void *header, uint8_t proto,
+			      uint32_t src, uint32_t dst,
+			      int payload_len, uint8_t tos)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->tos = tos;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = proto;
+	iph->saddr = src;
+	iph->daddr = dst;
+	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+	uint16_t val, *ptr = (uint16_t *)ip6h;
+
+	val = ntohs(*ptr);
+	val &= 0xF00F;
+	val |= ((uint16_t) dsfield) << 4;
+	*ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+			      struct sockaddr_in6 *src,
+			      struct sockaddr_in6 *dst,
+			      int payload_len, uint8_t dsfield)
+{
+	struct ipv6hdr *ip6h = header;
+
+	ip6h->version = 6;
+	ip6h->payload_len = htons(payload_len);
+	ip6h->nexthdr = proto;
+	ip6h->hop_limit = 8;
+	ipv6_set_dsfield(ip6h, dsfield);
+
+	memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+	memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static uint16_t build_udp_v4_csum(const struct iphdr *iph,
+				  const struct udphdr *udph,
+				  int num_words)
+{
+	unsigned long pseudo_sum;
+	int num_u16 = sizeof(iph->saddr);	/* halfwords: twice byte len */
+
+	pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
+	pseudo_sum += htons(IPPROTO_UDP);
+	pseudo_sum += udph->len;
+	return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
+				  const struct udphdr *udph,
+				  int num_words)
+{
+	unsigned long pseudo_sum;
+	int num_u16 = sizeof(ip6h->saddr);	/* halfwords: twice byte len */
+
+	pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
+	pseudo_sum += htons(ip6h->nexthdr);
+	pseudo_sum += ip6h->payload_len;
+	return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static void build_udp_header(void *header, int payload_len,
+			     uint16_t dport, int family)
+{
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(cfg_src_port);
+	udph->dest = htons(dport);
+	udph->len = htons(len);
+	udph->check = 0;
+	if (family == AF_INET)
+		udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+						udph, len >> 1);
+	else
+		udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+						udph, len >> 1);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+	struct guehdr *gueh = header;
+
+	gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+	struct grehdr *greh = header;
+
+	greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+	if (family == AF_INET)
+		return sizeof(struct iphdr);
+	else
+		return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(void)
+{
+	int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+	int el3_len = 0;
+
+	if (cfg_l3_extra)
+		el3_len = l3_length(cfg_l3_extra);
+
+	/* calculate header offsets */
+	if (cfg_encap_proto) {
+		ol3_len = l3_length(cfg_l3_outer);
+
+		if (cfg_encap_proto == IPPROTO_GRE)
+			ol4_len = sizeof(struct grehdr);
+		else if (cfg_encap_proto == IPPROTO_UDP)
+			ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	}
+
+	il3_len = l3_length(cfg_l3_inner);
+	il4_len = sizeof(struct udphdr);
+
+	if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
+	    sizeof(buf))
+		error(1, 0, "packet too large\n");
+
+	/*
+	 * Fill packet from inside out, to calculate correct checksums.
+	 * But create ip before udp headers, as udp uses ip for pseudo-sum.
+	 */
+	memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+	       cfg_payload_char, cfg_payload_len);
+
+	/* add zero byte for udp csum padding */
+	buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
+
+	switch (cfg_l3_inner) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP,
+				  in_saddr4.sin_addr.s_addr,
+				  in_daddr4.sin_addr.s_addr,
+				  il4_len + cfg_payload_len,
+				  cfg_dsfield_inner);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP,
+				  &in_saddr6, &in_daddr6,
+				  il4_len + cfg_payload_len,
+				  cfg_dsfield_inner);
+		break;
+	}
+
+	build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+			 cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
+
+	if (!cfg_encap_proto)
+		return il3_len + il4_len + cfg_payload_len;
+
+	switch (cfg_l3_outer) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len, cfg_encap_proto,
+				  out_saddr4.sin_addr.s_addr,
+				  out_daddr4.sin_addr.s_addr,
+				  ol4_len + il3_len + il4_len + cfg_payload_len,
+				  cfg_dsfield_outer);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len, cfg_encap_proto,
+				  &out_saddr6, &out_daddr6,
+				  ol4_len + il3_len + il4_len + cfg_payload_len,
+				  cfg_dsfield_outer);
+		break;
+	}
+
+	switch (cfg_encap_proto) {
+	case IPPROTO_UDP:
+		build_gue_header(buf + el3_len + ol3_len + ol4_len -
+				 sizeof(struct guehdr),
+				 cfg_l3_inner == PF_INET ? IPPROTO_IPIP
+							 : IPPROTO_IPV6);
+		build_udp_header(buf + el3_len + ol3_len,
+				 sizeof(struct guehdr) + il3_len + il4_len +
+				 cfg_payload_len,
+				 cfg_port_gue, cfg_l3_outer);
+		break;
+	case IPPROTO_GRE:
+		build_gre_header(buf + el3_len + ol3_len,
+				 cfg_l3_inner == PF_INET ? ETH_P_IP
+							 : ETH_P_IPV6);
+		break;
+	}
+
+	switch (cfg_l3_extra) {
+	case PF_INET:
+		build_ipv4_header(buf,
+				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+							  : IPPROTO_IPV6,
+				  extra_saddr4.sin_addr.s_addr,
+				  extra_daddr4.sin_addr.s_addr,
+				  ol3_len + ol4_len + il3_len + il4_len +
+				  cfg_payload_len, 0);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf,
+				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+							  : IPPROTO_IPV6,
+				  &extra_saddr6, &extra_daddr6,
+				  ol3_len + ol4_len + il3_len + il4_len +
+				  cfg_payload_len, 0);
+		break;
+	}
+
+	return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+	       cfg_payload_len;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(void)
+{
+	int family, fd, ret;
+
+	if (cfg_l3_extra)
+		family = cfg_l3_extra;
+	else if (cfg_l3_outer)
+		family = cfg_l3_outer;
+	else
+		family = cfg_l3_inner;
+
+	fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+	if (fd == -1)
+		error(1, errno, "socket tx");
+
+	if (cfg_l3_extra) {
+		if (cfg_l3_extra == PF_INET)
+			ret = connect(fd, (void *) &extra_daddr4,
+				      sizeof(extra_daddr4));
+		else
+			ret = connect(fd, (void *) &extra_daddr6,
+				      sizeof(extra_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	} else if (cfg_l3_outer) {
+		/* connect to destination if not encapsulated */
+		if (cfg_l3_outer == PF_INET)
+			ret = connect(fd, (void *) &out_daddr4,
+				      sizeof(out_daddr4));
+		else
+			ret = connect(fd, (void *) &out_daddr6,
+				      sizeof(out_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	} else {
+		/* otherwise using loopback */
+		if (cfg_l3_inner == PF_INET)
+			ret = connect(fd, (void *) &in_daddr4,
+				      sizeof(in_daddr4));
+		else
+			ret = connect(fd, (void *) &in_daddr6,
+				      sizeof(in_daddr6));
+		if (ret)
+			error(1, errno, "connect tx");
+	}
+
+	return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(void)
+{
+	int fd, ret;
+
+	fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket rx");
+
+	if (cfg_l3_inner == PF_INET)
+		ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
+	else
+		ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
+	if (ret)
+		error(1, errno, "bind rx");
+
+	return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+	int ret;
+
+	ret = write(fd, pkt, len);
+	if (ret == -1)
+		error(1, errno, "send");
+	if (ret != len)
+		error(1, errno, "send: len (%d < %d)\n", ret, len);
+
+	return 1;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.fd = fd;
+	pfd.events = events;
+
+	ret = poll(&pfd, 1, timeout);
+	if (ret == -1)
+		error(1, errno, "poll");
+	if (ret && !(pfd.revents & POLLIN))
+		error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
+
+	return ret;
+}
+
+static int do_rx(int fd)
+{
+	char rbuf;
+	int ret, num = 0;
+
+	while (1) {
+		ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret == -1)
+			error(1, errno, "recv");
+		if (rbuf != cfg_payload_char)
+			error(1, 0, "recv: payload mismatch");
+		num++;
+	};
+
+	return num;
+}
+
+static int do_main(void)
+{
+	unsigned long tstop, treport, tcur;
+	int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
+
+	if (!cfg_only_tx)
+		fdr = setup_rx();
+	if (!cfg_only_rx)
+		fdt = setup_tx();
+
+	len = build_packet();
+
+	tcur = util_gettime();
+	treport = tcur + 1000;
+	tstop = tcur + (cfg_num_secs * 1000);
+
+	while (1) {
+		if (!cfg_only_rx)
+			tx += do_tx(fdt, buf, len);
+
+		if (!cfg_only_tx)
+			rx += do_rx(fdr);
+
+		if (cfg_num_secs) {
+			tcur = util_gettime();
+			if (tcur >= tstop)
+				break;
+			if (tcur >= treport) {
+				fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+				tx = 0;
+				rx = 0;
+				treport = tcur + 1000;
+			}
+		} else {
+			if (tx == cfg_num_pkt)
+				break;
+		}
+	}
+
+	/* read straggler packets, if any */
+	if (rx < tx) {
+		tstop = util_gettime() + 100;
+		while (rx < tx) {
+			tcur = util_gettime();
+			if (tcur >= tstop)
+				break;
+
+			do_poll(fdr, POLLIN, tstop - tcur);
+			rx += do_rx(fdr);
+		}
+	}
+
+	fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+
+	if (fdr != -1 && close(fdr))
+		error(1, errno, "close rx");
+	if (fdt != -1 && close(fdt))
+		error(1, errno, "close tx");
+
+	/*
+	 * success (== 0) only if received all packets
+	 * unless failure is expected, in which case none must arrive.
+	 */
+	if (cfg_expect_failure)
+		return rx != 0;
+	else
+		return rx != tx;
+}
+
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+	fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
+			"[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
+			"[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
+			"[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
+		filepath);
+	exit(1);
+}
+
+static void parse_addr(int family, void *addr, const char *optarg)
+{
+	int ret;
+
+	ret = inet_pton(family, optarg, addr);
+	if (ret == -1)
+		error(1, errno, "inet_pton");
+	if (ret == 0)
+		error(1, 0, "inet_pton: bad string");
+}
+
+static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
+{
+	parse_addr(AF_INET, &addr->sin_addr, optarg);
+}
+
+static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
+{
+	parse_addr(AF_INET6, &addr->sin6_addr, optarg);
+}
+
+static int parse_protocol_family(const char *filepath, const char *optarg)
+{
+	if (!strcmp(optarg, "4"))
+		return PF_INET;
+	if (!strcmp(optarg, "6"))
+		return PF_INET6;
+
+	usage(filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
+		switch (c) {
+		case 'd':
+			if (cfg_l3_outer == AF_UNSPEC)
+				error(1, 0, "-d must be preceded by -o");
+			if (cfg_l3_outer == AF_INET)
+				parse_addr4(&out_daddr4, optarg);
+			else
+				parse_addr6(&out_daddr6, optarg);
+			break;
+		case 'D':
+			if (cfg_l3_inner == AF_UNSPEC)
+				error(1, 0, "-D must be preceded by -i");
+			if (cfg_l3_inner == AF_INET)
+				parse_addr4(&in_daddr4, optarg);
+			else
+				parse_addr6(&in_daddr6, optarg);
+			break;
+		case 'e':
+			if (!strcmp(optarg, "gre"))
+				cfg_encap_proto = IPPROTO_GRE;
+			else if (!strcmp(optarg, "gue"))
+				cfg_encap_proto = IPPROTO_UDP;
+			else if (!strcmp(optarg, "bare"))
+				cfg_encap_proto = IPPROTO_IPIP;
+			else if (!strcmp(optarg, "none"))
+				cfg_encap_proto = IPPROTO_IP;	/* == 0 */
+			else
+				usage(argv[0]);
+			break;
+		case 'f':
+			cfg_src_port = strtol(optarg, NULL, 0);
+			break;
+		case 'F':
+			cfg_expect_failure = true;
+			break;
+		case 'h':
+			usage(argv[0]);
+			break;
+		case 'i':
+			if (!strcmp(optarg, "4"))
+				cfg_l3_inner = PF_INET;
+			else if (!strcmp(optarg, "6"))
+				cfg_l3_inner = PF_INET6;
+			else
+				usage(argv[0]);
+			break;
+		case 'l':
+			cfg_payload_len = strtol(optarg, NULL, 0);
+			break;
+		case 'n':
+			cfg_num_pkt = strtol(optarg, NULL, 0);
+			break;
+		case 'o':
+			cfg_l3_outer = parse_protocol_family(argv[0], optarg);
+			break;
+		case 'O':
+			cfg_l3_extra = parse_protocol_family(argv[0], optarg);
+			break;
+		case 'R':
+			cfg_only_rx = true;
+			break;
+		case 's':
+			if (cfg_l3_outer == AF_INET)
+				parse_addr4(&out_saddr4, optarg);
+			else
+				parse_addr6(&out_saddr6, optarg);
+			break;
+		case 'S':
+			if (cfg_l3_inner == AF_INET)
+				parse_addr4(&in_saddr4, optarg);
+			else
+				parse_addr6(&in_saddr6, optarg);
+			break;
+		case 't':
+			cfg_num_secs = strtol(optarg, NULL, 0);
+			break;
+		case 'T':
+			cfg_only_tx = true;
+			break;
+		case 'x':
+			cfg_dsfield_outer = strtol(optarg, NULL, 0);
+			break;
+		case 'X':
+			cfg_dsfield_inner = strtol(optarg, NULL, 0);
+			break;
+		}
+	}
+
+	if (cfg_only_rx && cfg_only_tx)
+		error(1, 0, "options: cannot combine rx-only and tx-only");
+
+	if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
+		error(1, 0, "options: must specify outer with encap");
+	else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
+		error(1, 0, "options: cannot combine no-encap and outer");
+	else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
+		error(1, 0, "options: cannot combine no-encap and extra");
+
+	if (cfg_l3_inner == AF_UNSPEC)
+		cfg_l3_inner = AF_INET6;
+	if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
+		cfg_encap_proto = IPPROTO_IPV6;
+
+	/* RFC 6040 4.2:
+	 *   on decap, if outer encountered congestion (CE == 0x3),
+	 *   but inner cannot encode ECN (NoECT == 0x0), then drop packet.
+	 */
+	if (((cfg_dsfield_outer & 0x3) == 0x3) &&
+	    ((cfg_dsfield_inner & 0x3) == 0x0))
+		cfg_expect_failure = true;
+}
+
+static void print_opts(void)
+{
+	if (cfg_l3_inner == PF_INET6) {
+		util_printaddr("inner.dest6", (void *) &in_daddr6);
+		util_printaddr("inner.source6", (void *) &in_saddr6);
+	} else {
+		util_printaddr("inner.dest4", (void *) &in_daddr4);
+		util_printaddr("inner.source4", (void *) &in_saddr4);
+	}
+
+	if (!cfg_l3_outer)
+		return;
+
+	fprintf(stderr, "encap proto:   %u\n", cfg_encap_proto);
+
+	if (cfg_l3_outer == PF_INET6) {
+		util_printaddr("outer.dest6", (void *) &out_daddr6);
+		util_printaddr("outer.source6", (void *) &out_saddr6);
+	} else {
+		util_printaddr("outer.dest4", (void *) &out_daddr4);
+		util_printaddr("outer.source4", (void *) &out_saddr4);
+	}
+
+	if (!cfg_l3_extra)
+		return;
+
+	if (cfg_l3_outer == PF_INET6) {
+		util_printaddr("extra.dest6", (void *) &extra_daddr6);
+		util_printaddr("extra.source6", (void *) &extra_saddr6);
+	} else {
+		util_printaddr("extra.dest4", (void *) &extra_daddr4);
+		util_printaddr("extra.source4", (void *) &extra_saddr4);
+	}
+
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	print_opts();
+	return do_main();
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
new file mode 100755
index 000000000000..c0fb073b5eab
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Load BPF flow dissector and verify it correctly dissects traffic
+export TESTNAME=test_flow_dissector
+unmount=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
+# This test needs to be run in a network namespace with in_netns.sh. Check if
+# this is the case and run it with in_netns.sh if it is being run in the root
+# namespace.
+if [[ -z $(ip netns identify $$) ]]; then
+	../net/in_netns.sh "$0" "$@"
+	exit $?
+fi
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+	if (( $? == 0 )); then
+		echo "selftests: $TESTNAME [PASS]";
+	else
+		echo "selftests: $TESTNAME [FAILED]";
+	fi
+
+	set +e
+
+	# Cleanup
+	tc filter del dev lo ingress pref 1337 2> /dev/null
+	tc qdisc del dev lo ingress 2> /dev/null
+	./flow_dissector_load -d 2> /dev/null
+	if [ $unmount -ne 0 ]; then
+		umount bpffs 2> /dev/null
+	fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+# Mount BPF file system
+if /bin/mount | grep /sys/fs/bpf > /dev/null; then
+	echo "bpffs already mounted"
+else
+	echo "bpffs not mounted. Mounting..."
+	unmount=1
+	/bin/mount bpffs /sys/fs/bpf -t bpf
+fi
+
+# Attach BPF program
+./flow_dissector_load -p bpf_flow.o -s dissect
+
+# Setup
+tc qdisc add dev lo ingress
+
+echo "Testing IPv4..."
+# Drops all IP/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
+	udp src_port 9 action drop
+
+# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 4 -f 8
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -f 9 -F
+# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 4 -f 10
+
+echo "Testing IPIP..."
+# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 10
+
+echo "Testing IPv4 + GRE..."
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+	-D 192.168.0.1 -S 1.1.1.1 -f 10
+
+tc filter del dev lo ingress pref 1337
+
+echo "Testing IPv6..."
+# Drops all IPv6/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
+	udp src_port 9 action drop
+
+# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 6 -f 8
+# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 6 -f 9 -F
+# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 6 -f 10
+
+exit 0
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
new file mode 100755
index 000000000000..ffcd3953f94c
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_addr.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# add private ipv4 and ipv6 addresses to loopback
+
+readonly V6_INNER='100::a/128'
+readonly V4_INNER='192.168.0.1/32'
+
+if getopts ":s" opt; then
+  readonly SIT_DEV_NAME='sixtofourtest0'
+  readonly V6_SIT='2::/64'
+  readonly V4_SIT='172.17.0.1/32'
+  shift
+fi
+
+fail() {
+  echo "error: $*" 1>&2
+  exit 1
+}
+
+setup() {
+  ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
+  ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
+
+  if [[ -n "${V6_SIT}" ]]; then
+    ip link add "${SIT_DEV_NAME}" type sit remote any local any \
+	    || fail 'failed to add sit'
+    ip link set dev "${SIT_DEV_NAME}" up \
+	    || fail 'failed to bring sit device up'
+    ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
+	    || fail 'failed to setup v6 SIT address'
+    ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
+	    || fail 'failed to setup v4 SIT address'
+  fi
+
+  sleep 2	# avoid race causing bind to fail
+}
+
+cleanup() {
+  if [[ -n "${V6_SIT}" ]]; then
+    ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
+    ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
+    ip link del "${SIT_DEV_NAME}"
+  fi
+
+  ip -4 addr del "${V4_INNER}" dev lo
+  ip -6 addr del "${V6_INNER}" dev lo
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
new file mode 100755
index 000000000000..e24949ed3a20
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_tunnels.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# setup tunnels for flow dissection test
+
+readonly SUFFIX="test_$(mktemp -u XXXX)"
+CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
+
+setup() {
+  ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
+  ip link add "gre_${SUFFIX}" type gre ${CONFIG}
+  ip link add "sit_${SUFFIX}" type sit ${CONFIG}
+
+  echo "tunnels before test:"
+  ip tunnel show
+
+  ip link set "ipip_${SUFFIX}" up
+  ip link set "gre_${SUFFIX}" up
+  ip link set "sit_${SUFFIX}" up
+}
+
+
+cleanup() {
+  ip tunnel del "ipip_${SUFFIX}"
+  ip tunnel del "gre_${SUFFIX}"
+  ip tunnel del "sit_${SUFFIX}"
+
+  echo "tunnels after test:"
+  ip tunnel show
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"