25 files changed, 760 insertions, 115 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7f0e225bf630..e82b7039fc66 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -299,7 +299,8 @@ struct bpf_prog_aux {
 	u32 max_pkt_offset;
 	u32 stack_depth;
 	u32 id;
-	u32 func_cnt;
+	u32 func_cnt; /* used by non-func prog as the number of func progs */
+	u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
 	bool offload_requested;
 	struct bpf_prog **func;
 	void *jit_data; /* JIT specific data. arch dependent */
@@ -317,7 +318,8 @@ struct bpf_prog_aux {
 #endif
 	struct bpf_prog_offload *offload;
 	struct btf *btf;
-	u32 type_id; /* type id for this prog/func */
+	struct bpf_func_info *func_info;
+	u32 func_info_cnt;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 204382f46fd8..11f5df1092d9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -204,7 +204,6 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
 struct bpf_subprog_info {
 	u32 start; /* insn idx of function entry point */
 	u16 stack_depth; /* max. stack depth used by this function */
-	u32 type_id; /* btf type_id for this subprog */
 };
 
 /* single container for all structs
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 23e2031a43d4..597afdbc1ab9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2268,6 +2268,19 @@ union bpf_attr {
  *
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
+ *	 Description
+ *		Will remove *pop* bytes from a *msg* starting at byte *start*.
+ *		This may result in **ENOMEM** errors under certain situations if
+ *		an allocation and copy are required due to a full ring buffer.
+ *		However, the helper will try to avoid doing the allocation
+ *		if possible. Other errors can occur if input parameters are
+ *		invalid either due to *start* byte not being valid part of msg
+ *		payload and/or *pop* value being to large.
+ *
+ *	Return
+ *		0 on success, or a negative erro in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2360,7 +2373,8 @@ union bpf_attr {
 	FN(map_push_elem),		\
 	FN(map_pop_elem),		\
 	FN(map_peek_elem),		\
-	FN(msg_push_data),
+	FN(msg_push_data),		\
+	FN(msg_pop_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 539e8575689f..f93ed667546f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -411,7 +411,8 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 
 	/* prog->aux->name will be ignored if full btf name is available */
 	if (prog->aux->btf) {
-		type = btf_type_by_id(prog->aux->btf, prog->aux->type_id);
+		type = btf_type_by_id(prog->aux->btf,
+				      prog->aux->func_info[prog->aux->func_idx].type_id);
 		func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
 		snprintf(sym, (size_t)(end - sym), "_%s", func_name);
 		return;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 998377808102..85cbeec06e50 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1214,6 +1214,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 		bpf_prog_free_id(prog, do_idr_lock);
 		bpf_prog_kallsyms_del_all(prog);
 		btf_put(prog->aux->btf);
+		kvfree(prog->aux->func_info);
 
 		call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
 	}
@@ -2219,46 +2220,28 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	}
 
 	if (prog->aux->btf) {
+		u32 krec_size = sizeof(struct bpf_func_info);
 		u32 ucnt, urec_size;
 
 		info.btf_id = btf_id(prog->aux->btf);
 
 		ucnt = info.func_info_cnt;
-		info.func_info_cnt = prog->aux->func_cnt ? : 1;
+		info.func_info_cnt = prog->aux->func_info_cnt;
 		urec_size = info.func_info_rec_size;
-		info.func_info_rec_size = sizeof(struct bpf_func_info);
+		info.func_info_rec_size = krec_size;
 		if (ucnt) {
 			/* expect passed-in urec_size is what the kernel expects */
 			if (urec_size != info.func_info_rec_size)
 				return -EINVAL;
 
 			if (bpf_dump_raw_ok()) {
-				struct bpf_func_info kern_finfo;
 				char __user *user_finfo;
-				u32 i, insn_offset;
 
 				user_finfo = u64_to_user_ptr(info.func_info);
-				if (prog->aux->func_cnt) {
-					ucnt = min_t(u32, info.func_info_cnt, ucnt);
-					insn_offset = 0;
-					for (i = 0; i < ucnt; i++) {
-						kern_finfo.insn_offset = insn_offset;
-						kern_finfo.type_id = prog->aux->func[i]->aux->type_id;
-						if (copy_to_user(user_finfo, &kern_finfo,
-								 sizeof(kern_finfo)))
-							return -EFAULT;
-
-						/* func[i]->len holds the prog len */
-						insn_offset += prog->aux->func[i]->len;
-						user_finfo += urec_size;
-					}
-				} else {
-					kern_finfo.insn_offset = 0;
-					kern_finfo.type_id = prog->aux->type_id;
-					if (copy_to_user(user_finfo, &kern_finfo,
-							 sizeof(kern_finfo)))
-						return -EFAULT;
-				}
+				ucnt = min_t(u32, info.func_info_cnt, ucnt);
+				if (copy_to_user(user_finfo, prog->aux->func_info,
+						 krec_size * ucnt))
+					return -EFAULT;
 			} else {
 				info.func_info_cnt = 0;
 			}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4ce049cd30a3..9584438fa2cc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4650,7 +4650,7 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env,
 {
 	u32 i, nfuncs, urec_size, min_size, prev_offset;
 	u32 krec_size = sizeof(struct bpf_func_info);
-	struct bpf_func_info krecord = {};
+	struct bpf_func_info *krecord = NULL;
 	const struct btf_type *type;
 	void __user *urecord;
 	struct btf *btf;
@@ -4682,6 +4682,12 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env,
 	urecord = u64_to_user_ptr(attr->func_info);
 	min_size = min_t(u32, krec_size, urec_size);
 
+	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
+	if (!krecord) {
+		ret = -ENOMEM;
+		goto free_btf;
+	}
+
 	for (i = 0; i < nfuncs; i++) {
 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
 		if (ret) {
@@ -4696,59 +4702,69 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env,
 			goto free_btf;
 		}
 
-		if (copy_from_user(&krecord, urecord, min_size)) {
+		if (copy_from_user(&krecord[i], urecord, min_size)) {
 			ret = -EFAULT;
 			goto free_btf;
 		}
 
 		/* check insn_offset */
 		if (i == 0) {
-			if (krecord.insn_offset) {
+			if (krecord[i].insn_offset) {
 				verbose(env,
 					"nonzero insn_offset %u for the first func info record",
-					krecord.insn_offset);
+					krecord[i].insn_offset);
 				ret = -EINVAL;
 				goto free_btf;
 			}
-		} else if (krecord.insn_offset <= prev_offset) {
+		} else if (krecord[i].insn_offset <= prev_offset) {
 			verbose(env,
 				"same or smaller insn offset (%u) than previous func info record (%u)",
-				krecord.insn_offset, prev_offset);
+				krecord[i].insn_offset, prev_offset);
 			ret = -EINVAL;
 			goto free_btf;
 		}
 
-		if (env->subprog_info[i].start != krecord.insn_offset) {
+		if (env->subprog_info[i].start != krecord[i].insn_offset) {
 			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
 			ret = -EINVAL;
 			goto free_btf;
 		}
 
 		/* check type_id */
-		type = btf_type_by_id(btf, krecord.type_id);
+		type = btf_type_by_id(btf, krecord[i].type_id);
 		if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
 			verbose(env, "invalid type id %d in func info",
-				krecord.type_id);
+				krecord[i].type_id);
 			ret = -EINVAL;
 			goto free_btf;
 		}
 
-		if (i == 0)
-			prog->aux->type_id = krecord.type_id;
-		env->subprog_info[i].type_id = krecord.type_id;
-
-		prev_offset = krecord.insn_offset;
+		prev_offset = krecord[i].insn_offset;
 		urecord += urec_size;
 	}
 
 	prog->aux->btf = btf;
+	prog->aux->func_info = krecord;
+	prog->aux->func_info_cnt = nfuncs;
 	return 0;
 
 free_btf:
 	btf_put(btf);
+	kvfree(krecord);
 	return ret;
 }
 
+static void adjust_btf_func(struct bpf_verifier_env *env)
+{
+	int i;
+
+	if (!env->prog->aux->func_info)
+		return;
+
+	for (i = 0; i < env->subprog_cnt; i++)
+		env->prog->aux->func_info[i].insn_offset = env->subprog_info[i].start;
+}
+
 /* check %cur's range satisfies %old's */
 static bool range_within(struct bpf_reg_state *old,
 			 struct bpf_reg_state *cur)
@@ -6043,15 +6059,17 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		if (bpf_prog_calc_tag(func[i]))
 			goto out_free;
 		func[i]->is_func = 1;
+		func[i]->aux->func_idx = i;
+		/* the btf and func_info will be freed only at prog->aux */
+		func[i]->aux->btf = prog->aux->btf;
+		func[i]->aux->func_info = prog->aux->func_info;
+
 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
 		 * Long term would need debug info to populate names
 		 */
 		func[i]->aux->name[0] = 'F';
 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
 		func[i]->jit_requested = 1;
-		/* the btf will be freed only at prog->aux */
-		func[i]->aux->btf = prog->aux->btf;
-		func[i]->aux->type_id = env->subprog_info[i].type_id;
 		func[i] = bpf_int_jit_compile(func[i]);
 		if (!func[i]->jited) {
 			err = -ENOTSUPP;
@@ -6572,6 +6590,9 @@ skip_full_check:
 		convert_pseudo_ld_imm64(env);
 	}
 
+	if (ret == 0)
+		adjust_btf_func(env);
+
 err_release_maps:
 	if (!env->prog->aux->used_maps)
 		/* if we didn't copy map pointers into bpf_prog_info, release
diff --git a/net/core/filter.c b/net/core/filter.c
index aa274679965d..bd0df75dc7b6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -463,7 +463,8 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
 		bool ldx_off_ok = offset <= S16_MAX;
 
 		*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
-		*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
+		if (offset)
+			*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
 		*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
 				      size, 2 + endian + (!ldx_off_ok * 2));
 		if (ldx_off_ok) {
@@ -2424,6 +2425,174 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
+static void sk_msg_shift_left(struct sk_msg *msg, int i)
+{
+	int prev;
+
+	do {
+		prev = i;
+		sk_msg_iter_var_next(i);
+		msg->sg.data[prev] = msg->sg.data[i];
+	} while (i != msg->sg.end);
+
+	sk_msg_iter_prev(msg, end);
+}
+
+static void sk_msg_shift_right(struct sk_msg *msg, int i)
+{
+	struct scatterlist tmp, sge;
+
+	sk_msg_iter_next(msg, end);
+	sge = sk_msg_elem_cpy(msg, i);
+	sk_msg_iter_var_next(i);
+	tmp = sk_msg_elem_cpy(msg, i);
+
+	while (i != msg->sg.end) {
+		msg->sg.data[i] = sge;
+		sk_msg_iter_var_next(i);
+		sge = tmp;
+		tmp = sk_msg_elem_cpy(msg, i);
+	}
+}
+
+BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
+	   u32, len, u64, flags)
+{
+	u32 i = 0, l, space, offset = 0;
+	u64 last = start + len;
+	int pop;
+
+	if (unlikely(flags))
+		return -EINVAL;
+
+	/* First find the starting scatterlist element */
+	i = msg->sg.start;
+	do {
+		l = sk_msg_elem(msg, i)->length;
+
+		if (start < offset + l)
+			break;
+		offset += l;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+
+	/* Bounds checks: start and pop must be inside message */
+	if (start >= offset + l || last >= msg->sg.size)
+		return -EINVAL;
+
+	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
+
+	pop = len;
+	/* --------------| offset
+	 * -| start      |-------- len -------|
+	 *
+	 *  |----- a ----|-------- pop -------|----- b ----|
+	 *  |______________________________________________| length
+	 *
+	 *
+	 * a:   region at front of scatter element to save
+	 * b:   region at back of scatter element to save when length > A + pop
+	 * pop: region to pop from element, same as input 'pop' here will be
+	 *      decremented below per iteration.
+	 *
+	 * Two top-level cases to handle when start != offset, first B is non
+	 * zero and second B is zero corresponding to when a pop includes more
+	 * than one element.
+	 *
+	 * Then if B is non-zero AND there is no space allocate space and
+	 * compact A, B regions into page. If there is space shift ring to
+	 * the rigth free'ing the next element in ring to place B, leaving
+	 * A untouched except to reduce length.
+	 */
+	if (start != offset) {
+		struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
+		int a = start;
+		int b = sge->length - pop - a;
+
+		sk_msg_iter_var_next(i);
+
+		if (pop < sge->length - a) {
+			if (space) {
+				sge->length = a;
+				sk_msg_shift_right(msg, i);
+				nsge = sk_msg_elem(msg, i);
+				get_page(sg_page(sge));
+				sg_set_page(nsge,
+					    sg_page(sge),
+					    b, sge->offset + pop + a);
+			} else {
+				struct page *page, *orig;
+				u8 *to, *from;
+
+				page = alloc_pages(__GFP_NOWARN |
+						   __GFP_COMP   | GFP_ATOMIC,
+						   get_order(a + b));
+				if (unlikely(!page))
+					return -ENOMEM;
+
+				sge->length = a;
+				orig = sg_page(sge);
+				from = sg_virt(sge);
+				to = page_address(page);
+				memcpy(to, from, a);
+				memcpy(to + a, from + a + pop, b);
+				sg_set_page(sge, page, a + b, 0);
+				put_page(orig);
+			}
+			pop = 0;
+		} else if (pop >= sge->length - a) {
+			sge->length = a;
+			pop -= (sge->length - a);
+		}
+	}
+
+	/* From above the current layout _must_ be as follows,
+	 *
+	 * -| offset
+	 * -| start
+	 *
+	 *  |---- pop ---|---------------- b ------------|
+	 *  |____________________________________________| length
+	 *
+	 * Offset and start of the current msg elem are equal because in the
+	 * previous case we handled offset != start and either consumed the
+	 * entire element and advanced to the next element OR pop == 0.
+	 *
+	 * Two cases to handle here are first pop is less than the length
+	 * leaving some remainder b above. Simply adjust the element's layout
+	 * in this case. Or pop >= length of the element so that b = 0. In this
+	 * case advance to next element decrementing pop.
+	 */
+	while (pop) {
+		struct scatterlist *sge = sk_msg_elem(msg, i);
+
+		if (pop < sge->length) {
+			sge->length -= pop;
+			sge->offset += pop;
+			pop = 0;
+		} else {
+			pop -= sge->length;
+			sk_msg_shift_left(msg, i);
+		}
+		sk_msg_iter_var_next(i);
+	}
+
+	sk_mem_uncharge(msg->sk, len - pop);
+	msg->sg.size -= (len - pop);
+	sk_msg_compute_data_pointers(msg);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pop_data_proto = {
+	.func		= bpf_msg_pop_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -5097,6 +5266,7 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_xdp_adjust_meta ||
 	    func == bpf_msg_pull_data ||
 	    func == bpf_msg_push_data ||
+	    func == bpf_msg_pop_data ||
 	    func == bpf_xdp_adjust_tail ||
 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 	    func == bpf_lwt_seg6_store_bytes ||
@@ -5393,6 +5563,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_msg_pull_data_proto;
 	case BPF_FUNC_msg_push_data:
 		return &bpf_msg_push_data_proto;
+	case BPF_FUNC_msg_pop_data:
+		return &bpf_msg_pop_data_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 3b45fe530f91..a47c1cdf90fc 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -289,12 +289,23 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
 {
 	bool cork = false, enospc = msg->sg.start == msg->sg.end;
 	struct sock *sk_redir;
-	u32 tosend;
+	u32 tosend, delta = 0;
 	int ret;
 
 more_data:
-	if (psock->eval == __SK_NONE)
+	if (psock->eval == __SK_NONE) {
+		/* Track delta in msg size to add/subtract it on SK_DROP from
+		 * returned to user copied size. This ensures user doesn't
+		 * get a positive return code with msg_cut_data and SK_DROP
+		 * verdict.
+		 */
+		delta = msg->sg.size;
 		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+		if (msg->sg.size < delta)
+			delta -= msg->sg.size;
+		else
+			delta = 0;
+	}
 
 	if (msg->cork_bytes &&
 	    msg->cork_bytes > msg->sg.size && !enospc) {
@@ -350,7 +361,7 @@ more_data:
 	default:
 		sk_msg_free_partial(sk, msg, tosend);
 		sk_msg_apply_bytes(psock, tosend);
-		*copied -= tosend;
+		*copied -= (tosend + delta);
 		return -EACCES;
 	}
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7b1af8b59cd2..d4ecc66464e6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -687,6 +687,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
 	struct sock *sk_redir;
 	struct tls_rec *rec;
 	int err = 0, send;
+	u32 delta = 0;
 	bool enospc;
 
 	psock = sk_psock_get(sk);
@@ -694,8 +695,14 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
 		return tls_push_record(sk, flags, record_type);
 more_data:
 	enospc = sk_msg_full(msg);
-	if (psock->eval == __SK_NONE)
+	if (psock->eval == __SK_NONE) {
+		delta = msg->sg.size;
 		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+		if (delta < msg->sg.size)
+			delta -= msg->sg.size;
+		else
+			delta = 0;
+	}
 	if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
 	    !enospc && !full_record) {
 		err = -ENOSPC;
@@ -743,7 +750,7 @@ more_data:
 			msg->apply_bytes -= send;
 		if (msg->sg.size == 0)
 			tls_free_open_rec(sk);
-		*copied -= send;
+		*copied -= (send + delta);
 		err = -EACCES;
 	}
 
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index cbd3080e72c7..3850f8d65703 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -713,7 +713,7 @@ static int do_dump(int argc, char **argv)
 
 	prev_key = NULL;
 
-	err = btf_get_from_id(info.btf_id, &btf);
+	err = btf__get_from_id(info.btf_id, &btf);
 	if (err) {
 		p_err("failed to get btf");
 		goto exit_free;
@@ -857,7 +857,7 @@ static int do_lookup(int argc, char **argv)
 	}
 
 	/* here means bpf_map_lookup_elem() succeeded */
-	err = btf_get_from_id(info.btf_id, &btf);
+	err = btf__get_from_id(info.btf_id, &btf);
 	if (err) {
 		p_err("failed to get btf");
 		goto exit_free;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index cb18429818ec..4e04699a2350 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -621,7 +621,7 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
-	if (info.btf_id && btf_get_from_id(info.btf_id, &btf)) {
+	if (info.btf_id && btf__get_from_id(info.btf_id, &btf)) {
 		p_err("failed to get btf");
 		goto err_free;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 23e2031a43d4..597afdbc1ab9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2268,6 +2268,19 @@ union bpf_attr {
  *
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
+ *	 Description
+ *		Will remove *pop* bytes from a *msg* starting at byte *start*.
+ *		This may result in **ENOMEM** errors under certain situations if
+ *		an allocation and copy are required due to a full ring buffer.
+ *		However, the helper will try to avoid doing the allocation
+ *		if possible. Other errors can occur if input parameters are
+ *		invalid either due to *start* byte not being valid part of msg
+ *		payload and/or *pop* value being to large.
+ *
+ *	Return
+ *		0 on success, or a negative erro in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2360,7 +2373,8 @@ union bpf_attr {
 	FN(map_push_elem),		\
 	FN(map_pop_elem),		\
 	FN(map_peek_elem),		\
-	FN(msg_push_data),
+	FN(msg_push_data),		\
+	FN(msg_pop_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 1b4a683a00fc..34d9c3619c96 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -145,6 +145,12 @@ include $(srctree)/tools/build/Makefile.include
 
 BPF_IN    := $(OUTPUT)libbpf-in.o
 LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+VERSION_SCRIPT := libbpf.map
+
+GLOBAL_SYM_COUNT = $(shell readelf -s $(BPF_IN) | \
+			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
+VERSIONED_SYM_COUNT = $(shell readelf -s $(OUTPUT)libbpf.so | \
+			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
 CMD_TARGETS = $(LIB_FILE)
 
@@ -158,7 +164,7 @@ TARGETS = $(CMD_TARGETS)
 
 all: fixdep all_cmd
 
-all_cmd: $(CMD_TARGETS)
+all_cmd: $(CMD_TARGETS) check
 
 $(BPF_IN): force elfdep bpfdep
 	@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
@@ -176,7 +182,8 @@ $(BPF_IN): force elfdep bpfdep
 	$(Q)$(MAKE) $(build)=libbpf
 
 $(OUTPUT)libbpf.so: $(BPF_IN)
-	$(QUIET_LINK)$(CC) --shared $^ -o $@
+	$(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \
+		$^ -o $@
 
 $(OUTPUT)libbpf.a: $(BPF_IN)
 	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
@@ -184,6 +191,18 @@ $(OUTPUT)libbpf.a: $(BPF_IN)
 $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
 	$(QUIET_LINK)$(CXX) $^ -lelf -o $@
 
+check: check_abi
+
+check_abi: $(OUTPUT)libbpf.so
+	@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then	 \
+		echo "Warning: Num of global symbols in $(BPF_IN)"	 \
+		     "($(GLOBAL_SYM_COUNT)) does NOT match with num of"	 \
+		     "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
+		     "Please make sure all LIBBPF_API symbols are"	 \
+		     "versioned in $(VERSION_SCRIPT)." >&2;		 \
+		exit 1;							 \
+	fi
+
 define do_install
 	if [ ! -d '$(DESTDIR_SQ)$2' ]; then		\
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2';	\
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
new file mode 100644
index 000000000000..2ced9e061c4b
--- /dev/null
+++ b/tools/lib/bpf/README.rst
@@ -0,0 +1,139 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+libbpf API naming convention
+============================
+
+libbpf API provides access to a few logically separated groups of
+functions and types. Every group has its own naming convention
+described here. It's recommended to follow these conventions whenever a
+new function or type is added to keep libbpf API clean and consistent.
+
+All types and functions provided by libbpf API should have one of the
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``.
+
+System call wrappers
+--------------------
+
+System call wrappers are simple wrappers for commands supported by
+sys_bpf system call. These wrappers should go to ``bpf.h`` header file
+and map one-on-one to corresponding commands.
+
+For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM``
+command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc.
+
+Objects
+-------
+
+Another class of types and functions provided by libbpf API is "objects"
+and functions to work with them. Objects are high-level abstractions
+such as BPF program or BPF map. They're represented by corresponding
+structures such as ``struct bpf_object``, ``struct bpf_program``,
+``struct bpf_map``, etc.
+
+Structures are forward declared and access to their fields should be
+provided via corresponding getters and setters rather than directly.
+
+These objects are associated with corresponding parts of ELF object that
+contains compiled BPF programs.
+
+For example ``struct bpf_object`` represents ELF object itself created
+from an ELF file or from a buffer, ``struct bpf_program`` represents a
+program in ELF object and ``struct bpf_map`` is a map.
+
+Functions that work with an object have names built from object name,
+double underscore and part that describes function purpose.
+
+For example ``bpf_object__open`` consists of the name of corresponding
+object, ``bpf_object``, double underscore and ``open`` that defines the
+purpose of the function to open ELF file and create ``bpf_object`` from
+it.
+
+Another example: ``bpf_program__load`` is named for corresponding
+object, ``bpf_program``, that is separated from other part of the name
+by double underscore.
+
+All objects and corresponding functions other than BTF related should go
+to ``libbpf.h``. BTF types and functions should go to ``btf.h``.
+
+Auxiliary functions
+-------------------
+
+Auxiliary functions and types that don't fit well in any of categories
+described above should have ``libbpf_`` prefix, e.g.
+``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
+
+libbpf ABI
+==========
+
+libbpf can be both linked statically or used as DSO. To avoid possible
+conflicts with other libraries an application is linked with, all
+non-static libbpf symbols should have one of the prefixes mentioned in
+API documentation above. See API naming convention to choose the right
+name for a new symbol.
+
+Symbol visibility
+-----------------
+
+libbpf follow the model when all global symbols have visibility "hidden"
+by default and to make a symbol visible it has to be explicitly
+attributed with ``LIBBPF_API`` macro. For example:
+
+.. code-block:: c
+
+        LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+
+This prevents from accidentally exporting a symbol, that is not supposed
+to be a part of ABI what, in turn, improves both libbpf developer- and
+user-experiences.
+
+ABI versionning
+---------------
+
+To make future ABI extensions possible libbpf ABI is versioned.
+Versioning is implemented by ``libbpf.map`` version script that is
+passed to linker.
+
+Version name is ``LIBBPF_`` prefix + three-component numeric version,
+starting from ``0.0.1``.
+
+Every time ABI is being changed, e.g. because a new symbol is added or
+semantic of existing symbol is changed, ABI version should be bumped.
+
+For example, if current state of ``libbpf.map`` is:
+
+.. code-block::
+        LIBBPF_0.0.1 {
+        	global:
+                        bpf_func_a;
+                        bpf_func_b;
+        	local:
+        		\*;
+        };
+
+, and a new symbol ``bpf_func_c`` is being introduced, then
+``libbpf.map`` should be changed like this:
+
+.. code-block::
+        LIBBPF_0.0.1 {
+        	global:
+                        bpf_func_a;
+                        bpf_func_b;
+        	local:
+        		\*;
+        };
+        LIBBPF_0.0.2 {
+                global:
+                        bpf_func_c;
+        } LIBBPF_0.0.1;
+
+, where new version ``LIBBPF_0.0.2`` depends on the previous
+``LIBBPF_0.0.1``.
+
+Format of version script and ways to handle ABI changes, including
+incompatible ones, described in details in [1].
+
+Links
+=====
+
+[1] https://www.akkadia.org/drepper/dsohowto.pdf
+    (Chapter 3. Maintaining APIs and ABIs).
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 13ddc4bd24ee..c2d641f3e16e 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -415,7 +415,7 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
 		return NULL;
 }
 
-int btf_get_from_id(__u32 id, struct btf **btf)
+int btf__get_from_id(__u32 id, struct btf **btf)
 {
 	struct bpf_btf_info btf_info = { 0 };
 	__u32 len = sizeof(btf_info);
@@ -466,7 +466,7 @@ int btf_get_from_id(__u32 id, struct btf **btf)
 		goto exit_free;
 	}
 
-	*btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
+	*btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL);
 	if (IS_ERR(*btf)) {
 		err = PTR_ERR(*btf);
 		*btf = NULL;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 701ad2b6c41f..5336b2f37293 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -73,7 +73,7 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
 LIBBPF_API int btf__fd(const struct btf *btf);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_API int btf_get_from_id(__u32 id, struct btf **btf);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 
 struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
 void btf_ext__free(struct btf_ext *btf_ext);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ed4212a4c5f9..59b748ebd15f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -9,7 +9,9 @@
  * Copyright (C) 2017 Nicira, Inc.
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdarg.h>
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
new file mode 100644
index 000000000000..4fb29f6d7a80
--- /dev/null
+++ b/tools/lib/bpf/libbpf.map
@@ -0,0 +1,121 @@
+LIBBPF_0.0.1 {
+	global:
+		bpf_btf_get_fd_by_id;
+		bpf_create_map;
+		bpf_create_map_in_map;
+		bpf_create_map_in_map_node;
+		bpf_create_map_name;
+		bpf_create_map_node;
+		bpf_create_map_xattr;
+		bpf_load_btf;
+		bpf_load_program;
+		bpf_load_program_xattr;
+		bpf_map__btf_key_type_id;
+		bpf_map__btf_value_type_id;
+		bpf_map__def;
+		bpf_map__fd;
+		bpf_map__is_offload_neutral;
+		bpf_map__name;
+		bpf_map__next;
+		bpf_map__pin;
+		bpf_map__prev;
+		bpf_map__priv;
+		bpf_map__reuse_fd;
+		bpf_map__set_ifindex;
+		bpf_map__set_inner_map_fd;
+		bpf_map__set_priv;
+		bpf_map__unpin;
+		bpf_map_delete_elem;
+		bpf_map_get_fd_by_id;
+		bpf_map_get_next_id;
+		bpf_map_get_next_key;
+		bpf_map_lookup_and_delete_elem;
+		bpf_map_lookup_elem;
+		bpf_map_update_elem;
+		bpf_obj_get;
+		bpf_obj_get_info_by_fd;
+		bpf_obj_pin;
+		bpf_object__btf_fd;
+		bpf_object__close;
+		bpf_object__find_map_by_name;
+		bpf_object__find_map_by_offset;
+		bpf_object__find_program_by_title;
+		bpf_object__kversion;
+		bpf_object__load;
+		bpf_object__name;
+		bpf_object__next;
+		bpf_object__open;
+		bpf_object__open_buffer;
+		bpf_object__open_xattr;
+		bpf_object__pin;
+		bpf_object__pin_maps;
+		bpf_object__pin_programs;
+		bpf_object__priv;
+		bpf_object__set_priv;
+		bpf_object__unload;
+		bpf_object__unpin_maps;
+		bpf_object__unpin_programs;
+		bpf_perf_event_read_simple;
+		bpf_prog_attach;
+		bpf_prog_detach;
+		bpf_prog_detach2;
+		bpf_prog_get_fd_by_id;
+		bpf_prog_get_next_id;
+		bpf_prog_load;
+		bpf_prog_load_xattr;
+		bpf_prog_query;
+		bpf_prog_test_run;
+		bpf_program__fd;
+		bpf_program__is_kprobe;
+		bpf_program__is_perf_event;
+		bpf_program__is_raw_tracepoint;
+		bpf_program__is_sched_act;
+		bpf_program__is_sched_cls;
+		bpf_program__is_socket_filter;
+		bpf_program__is_tracepoint;
+		bpf_program__is_xdp;
+		bpf_program__load;
+		bpf_program__next;
+		bpf_program__nth_fd;
+		bpf_program__pin;
+		bpf_program__pin_instance;
+		bpf_program__prev;
+		bpf_program__priv;
+		bpf_program__set_expected_attach_type;
+		bpf_program__set_ifindex;
+		bpf_program__set_kprobe;
+		bpf_program__set_perf_event;
+		bpf_program__set_prep;
+		bpf_program__set_priv;
+		bpf_program__set_raw_tracepoint;
+		bpf_program__set_sched_act;
+		bpf_program__set_sched_cls;
+		bpf_program__set_socket_filter;
+		bpf_program__set_tracepoint;
+		bpf_program__set_type;
+		bpf_program__set_xdp;
+		bpf_program__title;
+		bpf_program__unload;
+		bpf_program__unpin;
+		bpf_program__unpin_instance;
+		bpf_raw_tracepoint_open;
+		bpf_set_link_xdp_fd;
+		bpf_task_fd_query;
+		bpf_verify_program;
+		btf__fd;
+		btf__find_by_name;
+		btf__free;
+		btf__get_from_id;
+		btf__name_by_offset;
+		btf__new;
+		btf__resolve_size;
+		btf__resolve_type;
+		btf__type_by_id;
+		libbpf_attach_type_by_name;
+		libbpf_get_error;
+		libbpf_prog_type_by_name;
+		libbpf_set_print;
+		libbpf_strerror;
+	local:
+		*;
+};
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
index d83b17f8435c..4343e40588c6 100644
--- a/tools/lib/bpf/libbpf_errno.c
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -7,6 +7,7 @@
  * Copyright (C) 2017 Nicira, Inc.
  */
 
+#undef _GNU_SOURCE
 #include <stdio.h>
 #include <string.h>
 
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 686e57ce40f4..7b69519a09b1 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -113,6 +113,8 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
 	(void *) BPF_FUNC_msg_pull_data;
 static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
 	(void *) BPF_FUNC_msg_push_data;
+static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) =
+	(void *) BPF_FUNC_msg_pop_data;
 static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
 	(void *) BPF_FUNC_bind;
 static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 7f90d3645af8..37f947ec44ed 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -22,3 +22,4 @@ CONFIG_NET_CLS_FLOWER=m
 CONFIG_LWTUNNEL=y
 CONFIG_BPF_STREAM_PARSER=y
 CONFIG_XDP_SOCKETS=y
+CONFIG_FTRACE_SYSCALLS=y
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index bcbda7037840..bae7308b7ec5 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -29,7 +29,6 @@
 static uint32_t pass_cnt;
 static uint32_t error_cnt;
 static uint32_t skip_cnt;
-static bool jit_enabled;
 
 #define CHECK(condition, format...) ({					\
 	int __ret = !!(condition);					\
@@ -65,24 +64,6 @@ static int __base_pr(const char *format, ...)
 	return err;
 }
 
-static bool is_jit_enabled(void)
-{
-	const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
-	bool enabled = false;
-	int sysctl_fd;
-
-	sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
-	if (sysctl_fd != -1) {
-		char tmpc;
-
-		if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
-			enabled = (tmpc != '0');
-		close(sysctl_fd);
-	}
-
-	return enabled;
-}
-
 #define BTF_INFO_ENC(kind, root, vlen)			\
 	((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 
@@ -2547,8 +2528,8 @@ static int do_test_file(unsigned int test_num)
 		  test->btf_kv_notfound))
 		goto done;
 
-	if (!jit_enabled || !has_btf_ext)
-		goto skip_jit;
+	if (!has_btf_ext)
+		goto skip;
 
 	/* get necessary program info */
 	info_len = sizeof(struct bpf_prog_info);
@@ -2604,7 +2585,7 @@ static int do_test_file(unsigned int test_num)
 		goto done;
 	}
 
-	err = btf_get_from_id(info.btf_id, &btf);
+	err = btf__get_from_id(info.btf_id, &btf);
 	if (CHECK(err, "cannot get btf from kernel, err: %d", err))
 		goto done;
 
@@ -2636,7 +2617,7 @@ static int do_test_file(unsigned int test_num)
 		finfo = (void *)finfo + rec_size;
 	}
 
-skip_jit:
+skip:
 	fprintf(stderr, "OK");
 
 done:
@@ -3270,12 +3251,6 @@ static int do_test_func_type(int test_num)
 		err = -1;
 		goto done;
 	}
-	if (!jit_enabled) {
-		skip_cnt++;
-		fprintf(stderr, "SKIPPED, please enable sysctl bpf_jit_enable\n");
-		err = 0;
-		goto done;
-	}
 
 	/* get necessary lens */
 	info_len = sizeof(struct bpf_prog_info);
@@ -3452,8 +3427,6 @@ int main(int argc, char **argv)
 	if (args.always_log)
 		libbpf_set_print(__base_pr, __base_pr, __base_pr);
 
-	jit_enabled = is_jit_enabled();
-
 	if (args.raw_test)
 		err |= test_raw();
 
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c1e688f61061..1c57abbe0945 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -524,7 +524,7 @@ static void test_bpf_obj_id(void)
 			  load_time < now - 60 || load_time > now + 60 ||
 			  prog_infos[i].created_by_uid != my_uid ||
 			  prog_infos[i].nr_map_ids != 1 ||
-			  *(int *)prog_infos[i].map_ids != map_infos[i].id ||
+			  *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
 			  strcmp((char *)prog_infos[i].name, expected_prog_name),
 			  "get-prog-info(fd)",
 			  "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
@@ -539,7 +539,7 @@ static void test_bpf_obj_id(void)
 			  load_time, now,
 			  prog_infos[i].created_by_uid, my_uid,
 			  prog_infos[i].nr_map_ids, 1,
-			  *(int *)prog_infos[i].map_ids, map_infos[i].id,
+			  *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
 			  prog_infos[i].name, expected_prog_name))
 			goto done;
 	}
@@ -585,7 +585,7 @@ static void test_bpf_obj_id(void)
 		bzero(&prog_info, sizeof(prog_info));
 		info_len = sizeof(prog_info);
 
-		saved_map_id = *(int *)(prog_infos[i].map_ids);
+		saved_map_id = *(int *)((long)prog_infos[i].map_ids);
 		prog_info.map_ids = prog_infos[i].map_ids;
 		prog_info.nr_map_ids = 2;
 		err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
@@ -593,12 +593,12 @@ static void test_bpf_obj_id(void)
 		prog_infos[i].xlated_prog_insns = 0;
 		CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
 		      memcmp(&prog_info, &prog_infos[i], info_len) ||
-		      *(int *)prog_info.map_ids != saved_map_id,
+		      *(int *)(long)prog_info.map_ids != saved_map_id,
 		      "get-prog-info(next_id->fd)",
 		      "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
 		      err, errno, info_len, sizeof(struct bpf_prog_info),
 		      memcmp(&prog_info, &prog_infos[i], info_len),
-		      *(int *)prog_info.map_ids, saved_map_id);
+		      *(int *)(long)prog_info.map_ids, saved_map_id);
 		close(prog_fd);
 	}
 	CHECK(nr_id_found != nr_iters,
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 622ade0a0957..e85a771f607b 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -79,6 +79,8 @@ int txmsg_start;
 int txmsg_end;
 int txmsg_start_push;
 int txmsg_end_push;
+int txmsg_start_pop;
+int txmsg_pop;
 int txmsg_ingress;
 int txmsg_skb;
 int ktls;
@@ -104,6 +106,8 @@ static const struct option long_options[] = {
 	{"txmsg_end",	required_argument,	NULL, 'e'},
 	{"txmsg_start_push", required_argument,	NULL, 'p'},
 	{"txmsg_end_push",   required_argument,	NULL, 'q'},
+	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
+	{"txmsg_pop",	     required_argument,	NULL, 'x'},
 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
 	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
 	{"ktls", no_argument,			&ktls, 1 },
@@ -473,13 +477,27 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		clock_gettime(CLOCK_MONOTONIC, &s->end);
 	} else {
 		int slct, recvp = 0, recv, max_fd = fd;
+		float total_bytes, txmsg_pop_total;
 		int fd_flags = O_NONBLOCK;
 		struct timeval timeout;
-		float total_bytes;
 		fd_set w;
 
 		fcntl(fd, fd_flags);
+		/* Account for pop bytes noting each iteration of apply will
+		 * call msg_pop_data helper so we need to account for this
+		 * by calculating the number of apply iterations. Note user
+		 * of the tool can create cases where no data is sent by
+		 * manipulating pop/push/pull/etc. For example txmsg_apply 1
+		 * with txmsg_pop 1 will try to apply 1B at a time but each
+		 * iteration will then pop 1B so no data will ever be sent.
+		 * This is really only useful for testing edge cases in code
+		 * paths.
+		 */
 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+		txmsg_pop_total = txmsg_pop;
+		if (txmsg_apply)
+			txmsg_pop_total *= (total_bytes / txmsg_apply);
+		total_bytes -= txmsg_pop_total;
 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
 		if (err < 0)
 			perror("recv start time: ");
@@ -488,7 +506,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 				timeout.tv_sec = 0;
 				timeout.tv_usec = 300000;
 			} else {
-				timeout.tv_sec = 1;
+				timeout.tv_sec = 3;
 				timeout.tv_usec = 0;
 			}
 
@@ -503,7 +521,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 				goto out_errno;
 			} else if (!slct) {
 				if (opt->verbose)
-					fprintf(stderr, "unexpected timeout\n");
+					fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total);
 				errno = -EIO;
 				clock_gettime(CLOCK_MONOTONIC, &s->end);
 				goto out_errno;
@@ -619,7 +637,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 			iov_count = 1;
 		err = msg_loop(rx_fd, iov_count, iov_buf,
 			       cnt, &s, false, opt);
-		if (err && opt->verbose)
+		if (opt->verbose)
 			fprintf(stderr,
 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
 				iov_count, iov_buf, cnt, err);
@@ -931,6 +949,39 @@ run:
 			}
 		}
 
+		if (txmsg_start_pop) {
+			i = 4;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start_pop, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop):  %d (%s)\n",
+					txmsg_start_pop, i, err, strerror(errno));
+				goto out;
+			}
+		} else {
+			i = 4;
+			bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start_pop, BPF_ANY);
+		}
+
+		if (txmsg_pop) {
+			i = 5;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_pop, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem %i@%i (txmsg_pop):  %d (%s)\n",
+					txmsg_pop, i, err, strerror(errno));
+				goto out;
+			}
+		} else {
+			i = 5;
+			bpf_map_update_elem(map_fd[5],
+					    &i, &txmsg_pop, BPF_ANY);
+
+		}
+
 		if (txmsg_ingress) {
 			int in = BPF_F_INGRESS;
 
@@ -1082,6 +1133,11 @@ static void test_options(char *options)
 		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
 		strncat(options, tstr, OPTSTRING);
 	}
+	if (txmsg_start_pop) {
+		snprintf(tstr, OPTSTRING, "pop (%d,%d),",
+			 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
+		strncat(options, tstr, OPTSTRING);
+	}
 	if (txmsg_ingress)
 		strncat(options, "ingress,", OPTSTRING);
 	if (txmsg_skb)
@@ -1264,6 +1320,7 @@ static int test_mixed(int cgrp)
 	txmsg_apply = txmsg_cork = 0;
 	txmsg_start = txmsg_end = 0;
 	txmsg_start_push = txmsg_end_push = 0;
+	txmsg_start_pop = txmsg_pop = 0;
 
 	/* Test small and large iov_count values with pass/redir/apply/cork */
 	txmsg_pass = 1;
@@ -1383,6 +1440,19 @@ static int test_start_end(int cgrp)
 	txmsg_end = 2;
 	txmsg_start_push = 1;
 	txmsg_end_push = 2;
+	txmsg_start_pop = 1;
+	txmsg_pop = 1;
+	err = test_txmsg(cgrp);
+	if (err)
+		goto out;
+
+	/* Cut a byte of pushed data but leave reamining in place */
+	txmsg_start = 1;
+	txmsg_end = 2;
+	txmsg_start_push = 1;
+	txmsg_end_push = 3;
+	txmsg_start_pop = 1;
+	txmsg_pop = 1;
 	err = test_txmsg(cgrp);
 	if (err)
 		goto out;
@@ -1393,6 +1463,9 @@ static int test_start_end(int cgrp)
 	opt.iov_length = 100;
 	txmsg_cork = 1600;
 
+	txmsg_start_pop = 0;
+	txmsg_pop = 0;
+
 	for (i = 99; i <= 1600; i += 500) {
 		txmsg_start = 0;
 		txmsg_end = i;
@@ -1403,6 +1476,17 @@ static int test_start_end(int cgrp)
 			goto out;
 	}
 
+	/* Test pop data in middle of cork */
+	for (i = 99; i <= 1600; i += 500) {
+		txmsg_start_pop = 10;
+		txmsg_pop = i;
+		err = test_exec(cgrp, &opt);
+		if (err)
+			goto out;
+	}
+	txmsg_start_pop = 0;
+	txmsg_pop = 0;
+
 	/* Test start/end with cork but pull data in middle */
 	for (i = 199; i <= 1600; i += 500) {
 		txmsg_start = 100;
@@ -1423,6 +1507,15 @@ static int test_start_end(int cgrp)
 	if (err)
 		goto out;
 
+	/* Test pop with cork pulling last sg entry */
+	txmsg_start_pop = 1500;
+	txmsg_pop = 1600;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+	txmsg_start_pop = 0;
+	txmsg_pop = 0;
+
 	/* Test start/end pull of single byte in last page */
 	txmsg_start = 1111;
 	txmsg_end = 1112;
@@ -1432,6 +1525,13 @@ static int test_start_end(int cgrp)
 	if (err)
 		goto out;
 
+	/* Test pop of single byte in last page */
+	txmsg_start_pop = 1111;
+	txmsg_pop = 1112;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
 	/* Test start/end with end < start */
 	txmsg_start = 1111;
 	txmsg_end = 0;
@@ -1456,7 +1556,20 @@ static int test_start_end(int cgrp)
 	txmsg_start_push = 1601;
 	txmsg_end_push = 1600;
 	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test pop with start > data */
+	txmsg_start_pop = 1601;
+	txmsg_pop = 1;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
 
+	/* Test pop with pop range > data */
+	txmsg_start_pop = 1599;
+	txmsg_pop = 10;
+	err = test_exec(cgrp, &opt);
 out:
 	txmsg_start = 0;
 	txmsg_end = 0;
@@ -1641,6 +1754,12 @@ int main(int argc, char **argv)
 		case 'q':
 			txmsg_end_push = atoi(optarg);
 			break;
+		case 'w':
+			txmsg_start_pop = atoi(optarg);
+			break;
+		case 'x':
+			txmsg_pop = atoi(optarg);
+			break;
 		case 'a':
 			txmsg_apply = atoi(optarg);
 			break;
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index 14b8bbac004f..e7639f66a941 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -74,7 +74,7 @@ struct bpf_map_def SEC("maps") sock_bytes = {
 	.type = BPF_MAP_TYPE_ARRAY,
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.max_entries = 4
+	.max_entries = 6
 };
 
 struct bpf_map_def SEC("maps") sock_redir_flags = {
@@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 SEC("sk_msg1")
 int bpf_prog4(struct sk_msg_md *msg)
 {
-	int *bytes, zero = 0, one = 1, two = 2, three = 3;
-	int *start, *end, *start_push, *end_push;
+	int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+	int *start, *end, *start_push, *end_push, *start_pop, *pop;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -198,15 +198,19 @@ int bpf_prog4(struct sk_msg_md *msg)
 	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
 	if (start_push && end_push)
 		bpf_msg_push_data(msg, *start_push, *end_push, 0);
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop)
+		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
 	return SK_PASS;
 }
 
 SEC("sk_msg2")
 int bpf_prog5(struct sk_msg_md *msg)
 {
-	int zero = 0, one = 1, two = 2, three = 3;
-	int *start, *end, *start_push, *end_push;
-	int *bytes, len1, len2 = 0, len3;
+	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+	int *start, *end, *start_push, *end_push, *start_pop, *pop;
+	int *bytes, len1, len2 = 0, len3, len4;
 	int err1 = -1, err2 = -1;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
@@ -247,6 +251,20 @@ int bpf_prog5(struct sk_msg_md *msg)
 		bpf_printk("sk_msg2: length push_update %i->%i\n",
 			   len2 ? len2 : len1, len3);
 	}
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop) {
+		int err;
+
+		bpf_printk("sk_msg2: pop(%i@%i)\n",
+			   start_pop, pop);
+		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+		if (err)
+			bpf_printk("sk_msg2: pop_data err %i\n", err);
+		len4 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length pop_data %i->%i\n",
+			   len1 ? len1 : 0,  len4);
+	}
 
 	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
 		   len1, err1, err2);
@@ -256,8 +274,8 @@ int bpf_prog5(struct sk_msg_md *msg)
 SEC("sk_msg3")
 int bpf_prog6(struct sk_msg_md *msg)
 {
-	int *bytes, *start, *end, *start_push, *end_push, *f;
-	int zero = 0, one = 1, two = 2, three = 3, key = 0;
+	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
+	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
 	__u64 flags = 0;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
@@ -277,6 +295,11 @@ int bpf_prog6(struct sk_msg_md *msg)
 	if (start_push && end_push)
 		bpf_msg_push_data(msg, *start_push, *end_push, 0);
 
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop)
+		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+
 	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
 	if (f && *f) {
 		key = 2;
@@ -292,8 +315,9 @@ int bpf_prog6(struct sk_msg_md *msg)
 SEC("sk_msg4")
 int bpf_prog7(struct sk_msg_md *msg)
 {
-	int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3;
-	int *bytes, *start, *end, *start_push, *end_push, *f;
+	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
+	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+	int len1, len2 = 0, len3, len4;
 	int err1 = 0, err2 = 0, key = 0;
 	__u64 flags = 0;
 
@@ -335,6 +359,22 @@ int bpf_prog7(struct sk_msg_md *msg)
 			   len2 ? len2 : len1, len3);
 	}
 
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop) {
+		int err;
+
+		bpf_printk("sk_msg4: pop(%i@%i)\n",
+			   start_pop, pop);
+		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+		if (err)
+			bpf_printk("sk_msg4: pop_data err %i\n", err);
+		len4 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg4: length pop_data %i->%i\n",
+			   len1 ? len1 : 0,  len4);
+	}
+
+
 	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
 	if (f && *f) {
 		key = 2;
@@ -389,8 +429,8 @@ int bpf_prog9(struct sk_msg_md *msg)
 SEC("sk_msg7")
 int bpf_prog10(struct sk_msg_md *msg)
 {
-	int *bytes, *start, *end, *start_push, *end_push;
-	int zero = 0, one = 1, two = 2, three = 3;
+	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
+	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -406,7 +446,11 @@ int bpf_prog10(struct sk_msg_md *msg)
 	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
 	if (start_push && end_push)
 		bpf_msg_push_data(msg, *start_push, *end_push, 0);
-
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop)
+		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+	bpf_printk("return sk drop\n");
 	return SK_DROP;
 }