aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-01-09 09:16:05 -0700
committerJens Axboe <axboe@kernel.dk>2019-02-28 08:24:23 -0700
commitedafccee56ff31678a091ddb7219aba9b28bc3cb (patch)
tree7cbbdbd1fe256674ac278111bd556b6f25f23264 /include
parentblock: implement bio helper to add iter bvec pages to bio (diff)
downloadlinux-dev-edafccee56ff31678a091ddb7219aba9b28bc3cb.tar.xz
linux-dev-edafccee56ff31678a091ddb7219aba9b28bc3cb.zip
io_uring: add support for pre-mapped user IO buffers
If we have fixed user buffers, we can map them into the kernel when we setup the io_uring. That avoids the need to do get_user_pages() for each and every IO. To utilize this feature, the application must call io_uring_register() after having setup an io_uring instance, passing in IORING_REGISTER_BUFFERS as the opcode. The argument must be a pointer to an iovec array, and the nr_args should contain how many iovecs the application wishes to map. If successful, these buffers are now mapped into the kernel, eligible for IO. To use these fixed buffers, the application must use the IORING_OP_READ_FIXED and IORING_OP_WRITE_FIXED opcodes, and then set sqe->index to the desired buffer index. sqe->addr..sqe->addr+seq->len must point to somewhere inside the indexed buffer. The application may register buffers throughout the lifetime of the io_uring instance. It can call io_uring_register() with IORING_UNREGISTER_BUFFERS as the opcode to unregister the current set of buffers, and then register a new set. The application need not unregister buffers explicitly before shutting down the io_uring instance. It's perfectly valid to setup a larger buffer, and then sometimes only use parts of it for an IO. As long as the range is within the originally mapped region, it will work just fine. For now, buffers must not be file backed. If file backed buffers are passed in, the registration will fail with -1/EOPNOTSUPP. This restriction may be relaxed in the future. RLIMIT_MEMLOCK is used to check how much memory we can pin. A somewhat arbitrary 1G per buffer size is also imposed. Reviewed-by: Hannes Reinecke <hare@suse.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include')
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/io_uring.h13
3 files changed, 17 insertions, 2 deletions
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3072dbaa7869..3681c05ac538 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -315,6 +315,8 @@ asmlinkage long sys_io_uring_setup(u32 entries,
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
u32 min_complete, u32 flags,
const sigset_t __user *sig, size_t sigsz);
+asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
+ void __user *arg, unsigned int nr_args);
/* fs/xattr.c */
asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 87871e7b7ea7..d346229a1eb0 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -744,9 +744,11 @@ __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
#define __NR_io_uring_enter 426
__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
#undef __NR_syscalls
-#define __NR_syscalls 427
+#define __NR_syscalls 428
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5c457ea396e6..cf28f7a11f12 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -27,7 +27,10 @@ struct io_uring_sqe {
__u32 fsync_flags;
};
__u64 user_data; /* data to be passed back at completion time */
- __u64 __pad2[3];
+ union {
+ __u16 buf_index; /* index into fixed buffers, if used */
+ __u64 __pad2[3];
+ };
};
/*
@@ -39,6 +42,8 @@ struct io_uring_sqe {
#define IORING_OP_READV 1
#define IORING_OP_WRITEV 2
#define IORING_OP_FSYNC 3
+#define IORING_OP_READ_FIXED 4
+#define IORING_OP_WRITE_FIXED 5
/*
* sqe->fsync_flags
@@ -103,4 +108,10 @@ struct io_uring_params {
struct io_cqring_offsets cq_off;
};
+/*
+ * io_uring_register(2) opcodes and arguments
+ */
+#define IORING_REGISTER_BUFFERS 0
+#define IORING_UNREGISTER_BUFFERS 1
+
#endif