aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/filter.c')
-rw-r--r--net/core/filter.c142
1 files changed, 117 insertions, 25 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 05a04ea87172..672eefbfbe99 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,16 +49,17 @@
#include <net/sch_generic.h>
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
+#include <net/dst.h>
/**
* sk_filter - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
*
- * Run the filter code and then cut skb->data to correct size returned by
- * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * Run the eBPF program and then cut skb->data to correct size returned by
+ * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
+ * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -82,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+ unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -148,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return raw_smp_processor_id();
}
-/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
-{
- return prandom_u32();
-}
-
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
struct bpf_insn *insn_buf)
{
@@ -312,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
break;
case SKF_AD_OFF + SKF_AD_RANDOM:
- *insn = BPF_EMIT_CALL(__get_random_u32);
+ *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
+ bpf_user_rnd_init_once();
break;
}
break;
@@ -1001,7 +997,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
int err;
fp->bpf_func = NULL;
- fp->jited = false;
+ fp->jited = 0;
err = bpf_check_classic(fp->insns, fp->len);
if (err) {
@@ -1083,16 +1079,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create);
* @pfp: the unattached filter that is created
* @fprog: the filter program
* @trans: post-classic verifier transformation handler
+ * @save_orig: save classic BPF program
*
* This function effectively does the same as bpf_prog_create(), only
* that it builds up its insns buffer from user space provided buffer.
* It also allows for passing a bpf_aux_classic_check_t handler.
*/
int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
- bpf_aux_classic_check_t trans)
+ bpf_aux_classic_check_t trans, bool save_orig)
{
unsigned int fsize = bpf_classic_proglen(fprog);
struct bpf_prog *fp;
+ int err;
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
@@ -1108,12 +1106,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
}
fp->len = fprog->len;
- /* Since unattached filters are not copied back to user
- * space through sk_get_filter(), we do not need to hold
- * a copy here, and can spare us the work.
- */
fp->orig_prog = NULL;
+ if (save_orig) {
+ err = bpf_prog_store_orig_filter(fp, fprog);
+ if (err) {
+ __bpf_prog_free(fp);
+ return -ENOMEM;
+ }
+ }
+
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
@@ -1404,9 +1406,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
if (unlikely(!dev))
return -EINVAL;
- if (unlikely(!(dev->flags & IFF_UP)))
- return -EINVAL;
-
skb2 = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!skb2))
return -ENOMEM;
@@ -1415,6 +1414,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
return dev_forward_skb(dev, skb2);
skb2->dev = dev;
+ skb_sender_cpu_clear(skb2);
return dev_queue_xmit(skb2);
}
@@ -1427,6 +1427,49 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
+struct redirect_info {
+ u32 ifindex;
+ u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+ ri->ifindex = ifindex;
+ ri->flags = flags;
+ return TC_ACT_REDIRECT;
+}
+
+int skb_do_redirect(struct sk_buff *skb)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+ ri->ifindex = 0;
+ if (unlikely(!dev)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+ return dev_forward_skb(dev, skb);
+
+ skb->dev = dev;
+ skb_sender_cpu_clear(skb);
+ return dev_queue_xmit(skb);
+}
+
+const struct bpf_func_proto bpf_redirect_proto = {
+ .func = bpf_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
return task_get_classid((struct sk_buff *) (unsigned long) r1);
@@ -1439,6 +1482,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ const struct dst_entry *dst;
+
+ dst = skb_dst((struct sk_buff *) (unsigned long) r1);
+ if (dst)
+ return dst->tclassid;
+#endif
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_get_route_realm_proto = {
+ .func = bpf_get_route_realm,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1579,7 +1641,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_trace_printk:
- return bpf_get_trace_printk_proto();
+ if (capable(CAP_SYS_ADMIN))
+ return bpf_get_trace_printk_proto();
default:
return NULL;
}
@@ -1607,6 +1670,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
return bpf_get_skb_set_tunnel_key_proto();
+ case BPF_FUNC_redirect:
+ return &bpf_redirect_proto;
+ case BPF_FUNC_get_route_realm:
+ return &bpf_get_route_realm_proto;
default:
return sk_filter_func_proto(func_id);
}
@@ -1632,6 +1699,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type)
{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return false;
+
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, cb[0]) ...
@@ -1648,10 +1718,14 @@ static bool sk_filter_is_valid_access(int off, int size,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type)
{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return type == BPF_WRITE ? true : false;
+
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, mark):
case offsetof(struct __sk_buff, tc_index):
+ case offsetof(struct __sk_buff, priority):
case offsetof(struct __sk_buff, cb[0]) ...
offsetof(struct __sk_buff, cb[4]):
break;
@@ -1664,7 +1738,8 @@ static bool tc_cls_act_is_valid_access(int off, int size,
static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
- struct bpf_insn *insn_buf)
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
@@ -1693,8 +1768,12 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
case offsetof(struct __sk_buff, priority):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
- offsetof(struct sk_buff, priority));
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
break;
case offsetof(struct __sk_buff, ingress_ifindex):
@@ -1751,6 +1830,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
offsetof(struct __sk_buff, cb[4]):
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+ prog->cb_access = 1;
ctx_off -= offsetof(struct __sk_buff, cb[0]);
ctx_off += offsetof(struct sk_buff, cb);
ctx_off += offsetof(struct qdisc_skb_cb, data);
@@ -1760,6 +1840,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
break;
+ case offsetof(struct __sk_buff, tc_classid):
+ ctx_off -= offsetof(struct __sk_buff, tc_classid);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+ WARN_ON(type != BPF_WRITE);
+ *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ break;
+
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
@@ -1854,9 +1942,13 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
goto out;
/* We're copying the filter that has been originally attached,
- * so no conversion/decode needed anymore.
+ * so no conversion/decode needed anymore. eBPF programs that
+ * have no original program cannot be dumped through this.
*/
+ ret = -EACCES;
fprog = filter->prog->orig_prog;
+ if (!fprog)
+ goto out;
ret = fprog->len;
if (!len)