aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-11-02 22:48:39 -0500
committerDavid S. Miller <davem@davemloft.net>2015-11-02 22:48:39 -0500
commit12d4309636d30770b54985be05ac512131f328b8 (patch)
tree011745b348dfea74709648487aed31bb6f670329
parentnet: fix percpu memory leaks (diff)
parentbpf: add sample usages for persistent maps/progs (diff)
downloadlinux-dev-12d4309636d30770b54985be05ac512131f328b8.tar.xz
linux-dev-12d4309636d30770b54985be05ac512131f328b8.zip
Merge branch 'bpf-persistent'
Daniel Borkmann says: ==================== BPF updates This set adds support for persistent maps/progs. Please see individual patches for further details. A man-page update to bpf(2) will be sent later on, also a iproute2 patch for support in tc. v1 -> v2: - Reworked most of patch 4 and 5 - Rebased to latest net-next ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/bpf.h9
-rw-r--r--include/uapi/linux/bpf.h45
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--kernel/bpf/Makefile4
-rw-r--r--kernel/bpf/core.c3
-rw-r--r--kernel/bpf/inode.c387
-rw-r--r--kernel/bpf/syscall.c95
-rw-r--r--kernel/bpf/verifier.c3
-rw-r--r--samples/bpf/Makefile3
-rw-r--r--samples/bpf/fds_example.c183
-rw-r--r--samples/bpf/libbpf.c19
-rw-r--r--samples/bpf/libbpf.h3
12 files changed, 683 insertions, 72 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 75718fa28260..de464e6683b6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -167,11 +167,18 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
void bpf_prog_put(struct bpf_prog *prog);
void bpf_prog_put_rcu(struct bpf_prog *prog);
-struct bpf_map *bpf_map_get(struct fd f);
+struct bpf_map *bpf_map_get(u32 ufd);
+struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_put(struct bpf_map *map);
extern int sysctl_unprivileged_bpf_disabled;
+int bpf_map_new_fd(struct bpf_map *map);
+int bpf_prog_new_fd(struct bpf_prog *prog);
+
+int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
+int bpf_obj_get_user(const char __user *pathname);
+
/* verify correctness of eBPF program */
int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
#else
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2e032426cfb7..9ea2d22fa2cb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -63,50 +63,16 @@ struct bpf_insn {
__s32 imm; /* signed immediate constant */
};
-/* BPF syscall commands */
+/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
- /* create a map with given type and attributes
- * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
- * returns fd or negative error
- * map is deleted when fd is closed
- */
BPF_MAP_CREATE,
-
- /* lookup key in a given map
- * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
- * Using attr->map_fd, attr->key, attr->value
- * returns zero and stores found elem into value
- * or negative error
- */
BPF_MAP_LOOKUP_ELEM,
-
- /* create or update key/value pair in a given map
- * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
- * Using attr->map_fd, attr->key, attr->value, attr->flags
- * returns zero or negative error
- */
BPF_MAP_UPDATE_ELEM,
-
- /* find and delete elem by key in a given map
- * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
- * Using attr->map_fd, attr->key
- * returns zero or negative error
- */
BPF_MAP_DELETE_ELEM,
-
- /* lookup key in a given map and return next key
- * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
- * Using attr->map_fd, attr->key, attr->next_key
- * returns zero and stores next key or negative error
- */
BPF_MAP_GET_NEXT_KEY,
-
- /* verify and load eBPF program
- * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
- * Using attr->prog_type, attr->insns, attr->license
- * returns fd or negative error
- */
BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
};
enum bpf_map_type {
@@ -160,6 +126,11 @@ union bpf_attr {
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe */
};
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ };
} __attribute__((aligned(8)));
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 7b1425a6b370..accb036bbc9c 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -75,5 +75,6 @@
#define ANON_INODE_FS_MAGIC 0x09041934
#define BTRFS_TEST_MAGIC 0x73727279
#define NSFS_MAGIC 0x6e736673
+#define BPF_FS_MAGIC 0xcafe4a11
#endif /* __LINUX_MAGIC_H__ */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e6983be12bd3..13272582eee0 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,2 +1,4 @@
obj-y := core.o
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o
+
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 80864712d2c4..334b1bdd572c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -92,6 +92,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
fp->pages = size / PAGE_SIZE;
fp->aux = aux;
+ fp->aux->prog = fp;
return fp;
}
@@ -116,6 +117,7 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
fp->pages = size / PAGE_SIZE;
+ fp->aux->prog = fp;
/* We keep fp->aux from fp_old around in the new
* reallocated structure.
@@ -726,7 +728,6 @@ void bpf_prog_free(struct bpf_prog *fp)
struct bpf_prog_aux *aux = fp->aux;
INIT_WORK(&aux->work, bpf_prog_free_deferred);
- aux->prog = fp;
schedule_work(&aux->work);
}
EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
new file mode 100644
index 000000000000..be6d726e31c9
--- /dev/null
+++ b/kernel/bpf/inode.c
@@ -0,0 +1,387 @@
+/*
+ * Minimal file system backend for holding eBPF maps and programs,
+ * used by bpf(2) object pinning.
+ *
+ * Authors:
+ *
+ * Daniel Borkmann <daniel@iogearbox.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/magic.h>
+#include <linux/major.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+enum bpf_type {
+ BPF_TYPE_UNSPEC = 0,
+ BPF_TYPE_PROG,
+ BPF_TYPE_MAP,
+};
+
+static void *bpf_any_get(void *raw, enum bpf_type type)
+{
+ switch (type) {
+ case BPF_TYPE_PROG:
+ atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
+ break;
+ case BPF_TYPE_MAP:
+ atomic_inc(&((struct bpf_map *)raw)->refcnt);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return raw;
+}
+
+static void bpf_any_put(void *raw, enum bpf_type type)
+{
+ switch (type) {
+ case BPF_TYPE_PROG:
+ bpf_prog_put(raw);
+ break;
+ case BPF_TYPE_MAP:
+ bpf_map_put(raw);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
+static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
+{
+ void *raw;
+
+ *type = BPF_TYPE_MAP;
+ raw = bpf_map_get(ufd);
+ if (IS_ERR(raw)) {
+ *type = BPF_TYPE_PROG;
+ raw = bpf_prog_get(ufd);
+ }
+
+ return raw;
+}
+
+static const struct inode_operations bpf_dir_iops;
+
+static const struct inode_operations bpf_prog_iops = { };
+static const struct inode_operations bpf_map_iops = { };
+
+static struct inode *bpf_get_inode(struct super_block *sb,
+ const struct inode *dir,
+ umode_t mode)
+{
+ struct inode *inode;
+
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
+ case S_IFREG:
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ inode = new_inode(sb);
+ if (!inode)
+ return ERR_PTR(-ENOSPC);
+
+ inode->i_ino = get_next_ino();
+ inode->i_atime = CURRENT_TIME;
+ inode->i_mtime = inode->i_atime;
+ inode->i_ctime = inode->i_atime;
+
+ inode_init_owner(inode, dir, mode);
+
+ return inode;
+}
+
+static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
+{
+ *type = BPF_TYPE_UNSPEC;
+ if (inode->i_op == &bpf_prog_iops)
+ *type = BPF_TYPE_PROG;
+ else if (inode->i_op == &bpf_map_iops)
+ *type = BPF_TYPE_MAP;
+ else
+ return -EACCES;
+
+ return 0;
+}
+
+static bool bpf_dname_reserved(const struct dentry *dentry)
+{
+ return strchr(dentry->d_name.name, '.');
+}
+
+static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+
+ if (bpf_dname_reserved(dentry))
+ return -EPERM;
+
+ inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_op = &bpf_dir_iops;
+ inode->i_fop = &simple_dir_operations;
+
+ inc_nlink(inode);
+ inc_nlink(dir);
+
+ d_instantiate(dentry, inode);
+ dget(dentry);
+
+ return 0;
+}
+
+static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry,
+ umode_t mode, const struct inode_operations *iops)
+{
+ struct inode *inode;
+
+ if (bpf_dname_reserved(dentry))
+ return -EPERM;
+
+ inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_op = iops;
+ inode->i_private = dentry->d_fsdata;
+
+ d_instantiate(dentry, inode);
+ dget(dentry);
+
+ return 0;
+}
+
+static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode,
+ dev_t devt)
+{
+ enum bpf_type type = MINOR(devt);
+
+ if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) ||
+ dentry->d_fsdata == NULL)
+ return -EPERM;
+
+ switch (type) {
+ case BPF_TYPE_PROG:
+ return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops);
+ case BPF_TYPE_MAP:
+ return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops);
+ default:
+ return -EPERM;
+ }
+}
+
+static const struct inode_operations bpf_dir_iops = {
+ .lookup = simple_lookup,
+ .mknod = bpf_mkobj,
+ .mkdir = bpf_mkdir,
+ .rmdir = simple_rmdir,
+ .unlink = simple_unlink,
+};
+
+static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
+ enum bpf_type type)
+{
+ struct dentry *dentry;
+ struct inode *dir;
+ struct path path;
+ umode_t mode;
+ dev_t devt;
+ int ret;
+
+ dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
+ devt = MKDEV(UNNAMED_MAJOR, type);
+
+ ret = security_path_mknod(&path, dentry, mode, devt);
+ if (ret)
+ goto out;
+
+ dir = d_inode(path.dentry);
+ if (dir->i_op != &bpf_dir_iops) {
+ ret = -EPERM;
+ goto out;
+ }
+
+ dentry->d_fsdata = raw;
+ ret = vfs_mknod(dir, dentry, mode, devt);
+ dentry->d_fsdata = NULL;
+out:
+ done_path_create(&path, dentry);
+ return ret;
+}
+
+int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
+{
+ struct filename *pname;
+ enum bpf_type type;
+ void *raw;
+ int ret;
+
+ pname = getname(pathname);
+ if (IS_ERR(pname))
+ return PTR_ERR(pname);
+
+ raw = bpf_fd_probe_obj(ufd, &type);
+ if (IS_ERR(raw)) {
+ ret = PTR_ERR(raw);
+ goto out;
+ }
+
+ ret = bpf_obj_do_pin(pname, raw, type);
+ if (ret != 0)
+ bpf_any_put(raw, type);
+out:
+ putname(pname);
+ return ret;
+}
+
+static void *bpf_obj_do_get(const struct filename *pathname,
+ enum bpf_type *type)
+{
+ struct inode *inode;
+ struct path path;
+ void *raw;
+ int ret;
+
+ ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path);
+ if (ret)
+ return ERR_PTR(ret);
+
+ inode = d_backing_inode(path.dentry);
+ ret = inode_permission(inode, MAY_WRITE);
+ if (ret)
+ goto out;
+
+ ret = bpf_inode_type(inode, type);
+ if (ret)
+ goto out;
+
+ raw = bpf_any_get(inode->i_private, *type);
+ touch_atime(&path);
+
+ path_put(&path);
+ return raw;
+out:
+ path_put(&path);
+ return ERR_PTR(ret);
+}
+
+int bpf_obj_get_user(const char __user *pathname)
+{
+ enum bpf_type type = BPF_TYPE_UNSPEC;
+ struct filename *pname;
+ int ret = -ENOENT;
+ void *raw;
+
+ pname = getname(pathname);
+ if (IS_ERR(pname))
+ return PTR_ERR(pname);
+
+ raw = bpf_obj_do_get(pname, &type);
+ if (IS_ERR(raw)) {
+ ret = PTR_ERR(raw);
+ goto out;
+ }
+
+ if (type == BPF_TYPE_PROG)
+ ret = bpf_prog_new_fd(raw);
+ else if (type == BPF_TYPE_MAP)
+ ret = bpf_map_new_fd(raw);
+ else
+ goto out;
+
+ if (ret < 0)
+ bpf_any_put(raw, type);
+out:
+ putname(pname);
+ return ret;
+}
+
+static void bpf_evict_inode(struct inode *inode)
+{
+ enum bpf_type type;
+
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+
+ if (!bpf_inode_type(inode, &type))
+ bpf_any_put(inode->i_private, type);
+}
+
+static const struct super_operations bpf_super_ops = {
+ .statfs = simple_statfs,
+ .drop_inode = generic_delete_inode,
+ .evict_inode = bpf_evict_inode,
+};
+
+static int bpf_fill_super(struct super_block *sb, void *data, int silent)
+{
+ static struct tree_descr bpf_rfiles[] = { { "" } };
+ struct inode *inode;
+ int ret;
+
+ ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
+ if (ret)
+ return ret;
+
+ sb->s_op = &bpf_super_ops;
+
+ inode = sb->s_root->d_inode;
+ inode->i_op = &bpf_dir_iops;
+ inode->i_mode &= ~S_IALLUGO;
+ inode->i_mode |= S_ISVTX | S_IRWXUGO;
+
+ return 0;
+}
+
+static struct dentry *bpf_mount(struct file_system_type *type, int flags,
+ const char *dev_name, void *data)
+{
+ return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super);
+}
+
+static struct file_system_type bpf_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "bpf",
+ .mount = bpf_mount,
+ .kill_sb = kill_litter_super,
+ .fs_flags = FS_USERNS_MOUNT,
+};
+
+MODULE_ALIAS_FS("bpf");
+
+static int __init bpf_init(void)
+{
+ int ret;
+
+ ret = sysfs_create_mount_point(fs_kobj, "bpf");
+ if (ret)
+ return ret;
+
+ ret = register_filesystem(&bpf_fs_type);
+ if (ret)
+ sysfs_remove_mount_point(fs_kobj, "bpf");
+
+ return ret;
+}
+fs_initcall(bpf_init);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 687dd6ca574d..0d3313d02a7e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -111,6 +111,12 @@ static const struct file_operations bpf_map_fops = {
.release = bpf_map_release,
};
+int bpf_map_new_fd(struct bpf_map *map)
+{
+ return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
+ O_RDWR | O_CLOEXEC);
+}
+
/* helper macro to check that unused fields 'union bpf_attr' are zero */
#define CHECK_ATTR(CMD) \
memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
@@ -141,8 +147,7 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map;
- err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
-
+ err = bpf_map_new_fd(map);
if (err < 0)
/* failed to allocate fd */
goto free_map;
@@ -157,19 +162,29 @@ free_map:
/* if error is returned, fd is released.
* On success caller should complete fd access with matching fdput()
*/
-struct bpf_map *bpf_map_get(struct fd f)
+struct bpf_map *__bpf_map_get(struct fd f)
{
- struct bpf_map *map;
-
if (!f.file)
return ERR_PTR(-EBADF);
-
if (f.file->f_op != &bpf_map_fops) {
fdput(f);
return ERR_PTR(-EINVAL);
}
- map = f.file->private_data;
+ return f.file->private_data;
+}
+
+struct bpf_map *bpf_map_get(u32 ufd)
+{
+ struct fd f = fdget(ufd);
+ struct bpf_map *map;
+
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return map;
+
+ atomic_inc(&map->refcnt);
+ fdput(f);
return map;
}
@@ -197,7 +212,7 @@ static int map_lookup_elem(union bpf_attr *attr)
return -EINVAL;
f = fdget(ufd);
- map = bpf_map_get(f);
+ map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
@@ -256,7 +271,7 @@ static int map_update_elem(union bpf_attr *attr)
return -EINVAL;
f = fdget(ufd);
- map = bpf_map_get(f);
+ map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
@@ -309,7 +324,7 @@ static int map_delete_elem(union bpf_attr *attr)
return -EINVAL;
f = fdget(ufd);
- map = bpf_map_get(f);
+ map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
@@ -350,7 +365,7 @@ static int map_get_next_key(union bpf_attr *attr)
return -EINVAL;
f = fdget(ufd);
- map = bpf_map_get(f);
+ map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
@@ -498,7 +513,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
free_uid(user);
}
-static void __prog_put_rcu(struct rcu_head *rcu)
+static void __prog_put_common(struct rcu_head *rcu)
{
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
@@ -510,19 +525,14 @@ static void __prog_put_rcu(struct rcu_head *rcu)
/* version of bpf_prog_put() that is called after a grace period */
void bpf_prog_put_rcu(struct bpf_prog *prog)
{
- if (atomic_dec_and_test(&prog->aux->refcnt)) {
- prog->aux->prog = prog;
- call_rcu(&prog->aux->rcu, __prog_put_rcu);
- }
+ if (atomic_dec_and_test(&prog->aux->refcnt))
+ call_rcu(&prog->aux->rcu, __prog_put_common);
}
void bpf_prog_put(struct bpf_prog *prog)
{
- if (atomic_dec_and_test(&prog->aux->refcnt)) {
- free_used_maps(prog->aux);
- bpf_prog_uncharge_memlock(prog);
- bpf_prog_free(prog);
- }
+ if (atomic_dec_and_test(&prog->aux->refcnt))
+ __prog_put_common(&prog->aux->rcu);
}
EXPORT_SYMBOL_GPL(bpf_prog_put);
@@ -538,21 +548,22 @@ static const struct file_operations bpf_prog_fops = {
.release = bpf_prog_release,
};
-static struct bpf_prog *get_prog(struct fd f)
+int bpf_prog_new_fd(struct bpf_prog *prog)
{
- struct bpf_prog *prog;
+ return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
+ O_RDWR | O_CLOEXEC);
+}
+static struct bpf_prog *__bpf_prog_get(struct fd f)
+{
if (!f.file)
return ERR_PTR(-EBADF);
-
if (f.file->f_op != &bpf_prog_fops) {
fdput(f);
return ERR_PTR(-EINVAL);
}
- prog = f.file->private_data;
-
- return prog;
+ return f.file->private_data;
}
/* called by sockets/tracing/seccomp before attaching program to an event
@@ -563,13 +574,13 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
struct fd f = fdget(ufd);
struct bpf_prog *prog;
- prog = get_prog(f);
-
+ prog = __bpf_prog_get(f);
if (IS_ERR(prog))
return prog;
atomic_inc(&prog->aux->refcnt);
fdput(f);
+
return prog;
}
EXPORT_SYMBOL_GPL(bpf_prog_get);
@@ -647,7 +658,7 @@ static int bpf_prog_load(union bpf_attr *attr)
if (err < 0)
goto free_used_maps;
- err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
+ err = bpf_prog_new_fd(prog);
if (err < 0)
/* failed to allocate fd */
goto free_used_maps;
@@ -663,6 +674,24 @@ free_prog_nouncharge:
return err;
}
+#define BPF_OBJ_LAST_FIELD bpf_fd
+
+static int bpf_obj_pin(const union bpf_attr *attr)
+{
+ if (CHECK_ATTR(BPF_OBJ))
+ return -EINVAL;
+
+ return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
+}
+
+static int bpf_obj_get(const union bpf_attr *attr)
+{
+ if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
+ return -EINVAL;
+
+ return bpf_obj_get_user(u64_to_ptr(attr->pathname));
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -723,6 +752,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_PROG_LOAD:
err = bpf_prog_load(&attr);
break;
+ case BPF_OBJ_PIN:
+ err = bpf_obj_pin(&attr);
+ break;
+ case BPF_OBJ_GET:
+ err = bpf_obj_get(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b56cf51f8d42..fdc88c5a60e3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1989,8 +1989,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
}
f = fdget(insn->imm);
-
- map = bpf_map_get(f);
+ map = __bpf_map_get(f);
if (IS_ERR(map)) {
verbose("fd %d is not pointing to valid bpf_map\n",
insn->imm);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index b30514514e37..79b4596b5f9a 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -4,6 +4,7 @@ obj- := dummy.o
# List of programs to build
hostprogs-y := test_verifier test_maps
hostprogs-y += sock_example
+hostprogs-y += fds_example
hostprogs-y += sockex1
hostprogs-y += sockex2
hostprogs-y += sockex3
@@ -19,6 +20,7 @@ hostprogs-y += lathist
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
sock_example-objs := sock_example.o libbpf.o
+fds_example-objs := bpf_load.o libbpf.o fds_example.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
sockex3-objs := bpf_load.o libbpf.o sockex3_user.o
@@ -49,6 +51,7 @@ always += lathist_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+HOSTLOADLIBES_fds_example += -lelf
HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf
HOSTLOADLIBES_sockex3 += -lelf
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
new file mode 100644
index 000000000000..e2fd16c3d0f0
--- /dev/null
+++ b/samples/bpf/fds_example.c
@@ -0,0 +1,183 @@
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "bpf_load.h"
+#include "libbpf.h"
+
+#define BPF_F_PIN (1 << 0)
+#define BPF_F_GET (1 << 1)
+#define BPF_F_PIN_GET (BPF_F_PIN | BPF_F_GET)
+
+#define BPF_F_KEY (1 << 2)
+#define BPF_F_VAL (1 << 3)
+#define BPF_F_KEY_VAL (BPF_F_KEY | BPF_F_VAL)
+
+#define BPF_M_UNSPEC 0
+#define BPF_M_MAP 1
+#define BPF_M_PROG 2
+
+static void usage(void)
+{
+ printf("Usage: fds_example [...]\n");
+ printf(" -F <file> File to pin/get object\n");
+ printf(" -P |- pin object\n");
+ printf(" -G `- get object\n");
+ printf(" -m eBPF map mode\n");
+ printf(" -k <key> |- map key\n");
+ printf(" -v <value> `- map value\n");
+ printf(" -p eBPF prog mode\n");
+ printf(" -o <object> `- object file\n");
+ printf(" -h Display this help.\n");
+}
+
+static int bpf_map_create(void)
+{
+ return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
+ sizeof(uint32_t), 1024);
+}
+
+static int bpf_prog_create(const char *object)
+{
+ static const struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ if (object) {
+ assert(!load_bpf_file((char *)object));
+ return prog_fd[0];
+ } else {
+ return bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
+ insns, sizeof(insns), "GPL", 0);
+ }
+}
+
+static int bpf_do_map(const char *file, uint32_t flags, uint32_t key,
+ uint32_t value)
+{
+ int fd, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_map_create();
+ printf("bpf: map fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ if ((flags & BPF_F_KEY_VAL) == BPF_F_KEY_VAL) {
+ ret = bpf_update_elem(fd, &key, &value, 0);
+ printf("bpf: fd:%d u->(%u:%u) ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ } else if (flags & BPF_F_KEY) {
+ ret = bpf_lookup_elem(fd, &key, &value);
+ printf("bpf: fd:%d l->(%u):%u ret:(%d,%s)\n", fd, key, value,
+ ret, strerror(errno));
+ assert(ret == 0);
+ }
+
+ return 0;
+}
+
+static int bpf_do_prog(const char *file, uint32_t flags, const char *object)
+{
+ int fd, sock, ret;
+
+ if (flags & BPF_F_PIN) {
+ fd = bpf_prog_create(object);
+ printf("bpf: prog fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+
+ ret = bpf_obj_pin(fd, file);
+ printf("bpf: pin ret:(%d,%s)\n", ret, strerror(errno));
+ assert(ret == 0);
+ } else {
+ fd = bpf_obj_get(file);
+ printf("bpf: get fd:%d (%s)\n", fd, strerror(errno));
+ assert(fd > 0);
+ }
+
+ sock = open_raw_sock("lo");
+ assert(sock > 0);
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &fd, sizeof(fd));
+ printf("bpf: sock:%d <- fd:%d attached ret:(%d,%s)\n", sock, fd,
+ ret, strerror(errno));
+ assert(ret == 0);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *file = NULL, *object = NULL;
+ uint32_t key = 0, value = 0, flags = 0;
+ int opt, mode = BPF_M_UNSPEC;
+
+ while ((opt = getopt(argc, argv, "F:PGmk:v:po:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'F':
+ file = optarg;
+ break;
+ case 'P':
+ flags |= BPF_F_PIN;
+ break;
+ case 'G':
+ flags |= BPF_F_GET;
+ break;
+ /* Map-related args */
+ case 'm':
+ mode = BPF_M_MAP;
+ break;
+ case 'k':
+ key = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_KEY;
+ break;
+ case 'v':
+ value = strtoul(optarg, NULL, 0);
+ flags |= BPF_F_VAL;
+ break;
+ /* Prog-related args */
+ case 'p':
+ mode = BPF_M_PROG;
+ break;
+ case 'o':
+ object = optarg;
+ break;
+ default:
+ goto out;
+ }
+ }
+
+ if (!(flags & BPF_F_PIN_GET) || !file)
+ goto out;
+
+ switch (mode) {
+ case BPF_M_MAP:
+ return bpf_do_map(file, flags, key, value);
+ case BPF_M_PROG:
+ return bpf_do_prog(file, flags, object);
+ }
+out:
+ usage();
+ return -1;
+}
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 7e1efa7e2ed7..65a8d48d2799 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -103,6 +103,25 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ .bpf_fd = fd,
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
int open_raw_sock(const char *name)
{
struct sockaddr_ll sll;
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index b7f63c70b4a2..014aacf916e4 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -15,6 +15,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version);
+int bpf_obj_pin(int fd, const char *pathname);
+int bpf_obj_get(const char *pathname);
+
#define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE];