diff options
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/bitops.h | 11 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 4 | ||||
-rw-r--r-- | include/linux/entry-common.h | 1 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 8 | ||||
-rw-r--r-- | include/linux/init.h | 2 | ||||
-rw-r--r-- | include/linux/iova_bitmap.h | 26 | ||||
-rw-r--r-- | include/linux/ipc_namespace.h | 5 | ||||
-rw-r--r-- | include/linux/irqchip.h | 4 | ||||
-rw-r--r-- | include/linux/irqdesc.h | 1 | ||||
-rw-r--r-- | include/linux/iversion.h | 72 | ||||
-rw-r--r-- | include/linux/kexec.h | 2 | ||||
-rw-r--r-- | include/linux/mdev.h | 77 | ||||
-rw-r--r-- | include/linux/mmc/card.h | 1 | ||||
-rw-r--r-- | include/linux/of_irq.h | 6 | ||||
-rw-r--r-- | include/linux/percpu_counter.h | 32 | ||||
-rw-r--r-- | include/linux/sched/task.h | 3 | ||||
-rw-r--r-- | include/linux/sunrpc/clnt.h | 1 | ||||
-rw-r--r-- | include/linux/sunrpc/sched.h | 6 | ||||
-rw-r--r-- | include/linux/vfio.h | 57 | ||||
-rw-r--r-- | include/linux/vfio_pci_core.h | 149 | ||||
-rw-r--r-- | include/linux/wireless.h | 10 |
21 files changed, 209 insertions, 269 deletions
diff --git a/include/linux/bitops.h b/include/linux/bitops.h index d7dd83fafeba..2ba557e067fe 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -347,10 +347,10 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, const typeof(*(ptr)) mask__ = (mask), bits__ = (bits); \ typeof(*(ptr)) old__, new__; \ \ + old__ = READ_ONCE(*(ptr)); \ do { \ - old__ = READ_ONCE(*(ptr)); \ new__ = (old__ & ~mask__) | bits__; \ - } while (cmpxchg(ptr, old__, new__) != old__); \ + } while (!try_cmpxchg(ptr, &old__, new__)); \ \ old__; \ }) @@ -362,11 +362,12 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, const typeof(*(ptr)) clear__ = (clear), test__ = (test);\ typeof(*(ptr)) old__, new__; \ \ + old__ = READ_ONCE(*(ptr)); \ do { \ - old__ = READ_ONCE(*(ptr)); \ + if (old__ & test__) \ + break; \ new__ = old__ & ~clear__; \ - } while (!(old__ & test__) && \ - cmpxchg(ptr, old__, new__) != old__); \ + } while (!try_cmpxchg(ptr, &old__, new__)); \ \ !(old__ & test__); \ }) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e7f2fb2fc207..99c1726be6ee 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -207,7 +207,6 @@ struct ceph_msg_data_cursor { struct ceph_msg_data *data; /* current data item */ size_t resid; /* bytes not yet consumed */ - bool last_piece; /* current is last piece */ bool need_crc; /* crc update needed */ union { #ifdef CONFIG_BLOCK @@ -498,8 +497,7 @@ void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq); void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor, struct ceph_msg *msg, size_t length); struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor, - size_t *page_offset, size_t *length, - bool *last_piece); + size_t *page_offset, size_t *length); void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes); u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset, diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 84a466b176cf..d95ab85f96ba 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -253,7 +253,6 @@ static __always_inline void arch_exit_to_user_mode(void) { } /** * arch_do_signal_or_restart - Architecture specific signal delivery function * @regs: Pointer to currents pt_regs - * @has_signal: actual signal to handle * * Invoked from exit_to_user_mode_loop(). */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 95fda85aa195..8b4f93e84868 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -214,8 +214,8 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pd(struct vm_area_struct *vma, unsigned long address, hugepd_t hpd, int flags, int pdshift); -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags); +struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, + int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, @@ -327,8 +327,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma, return NULL; } -static inline struct page *follow_huge_pmd(struct mm_struct *mm, - unsigned long address, pmd_t *pmd, int flags) +static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, + unsigned long address, int flags) { return NULL; } diff --git a/include/linux/init.h b/include/linux/init.h index a0a90cd73ebe..077d7f93b402 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -134,7 +134,7 @@ static inline initcall_t initcall_from_entry(initcall_entry_t *entry) extern initcall_entry_t __con_initcall_start[], __con_initcall_end[]; -/* Used for contructor calls. */ +/* Used for constructor calls. */ typedef void (*ctor_fn_t)(void); struct file_system_type; diff --git a/include/linux/iova_bitmap.h b/include/linux/iova_bitmap.h new file mode 100644 index 000000000000..c006cf0a25f3 --- /dev/null +++ b/include/linux/iova_bitmap.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ +#ifndef _IOVA_BITMAP_H_ +#define _IOVA_BITMAP_H_ + +#include <linux/types.h> + +struct iova_bitmap; + +typedef int (*iova_bitmap_fn_t)(struct iova_bitmap *bitmap, + unsigned long iova, size_t length, + void *opaque); + +struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, + unsigned long page_size, + u64 __user *data); +void iova_bitmap_free(struct iova_bitmap *bitmap); +int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, + iova_bitmap_fn_t fn); +void iova_bitmap_set(struct iova_bitmap *bitmap, + unsigned long iova, size_t length); + +#endif diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e3e8c8662b49..e8240cf2611a 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -11,6 +11,7 @@ #include <linux/refcount.h> #include <linux/rhashtable-types.h> #include <linux/sysctl.h> +#include <linux/percpu_counter.h> struct user_namespace; @@ -36,8 +37,8 @@ struct ipc_namespace { unsigned int msg_ctlmax; unsigned int msg_ctlmnb; unsigned int msg_ctlmni; - atomic_t msg_bytes; - atomic_t msg_hdrs; + struct percpu_counter percpu_msg_bytes; + struct percpu_counter percpu_msg_hdrs; size_t shm_ctlmax; size_t shm_ctlall; diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 3a091d0710ae..d5e6024cb2a8 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -44,7 +44,8 @@ static const struct of_device_id drv_name##_irqchip_match_table[] = { #define IRQCHIP_MATCH(compat, fn) { .compatible = compat, \ .data = typecheck_irq_init_cb(fn), }, -#define IRQCHIP_PLATFORM_DRIVER_END(drv_name) \ + +#define IRQCHIP_PLATFORM_DRIVER_END(drv_name, ...) \ {}, \ }; \ MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table); \ @@ -56,6 +57,7 @@ static struct platform_driver drv_name##_driver = { \ .owner = THIS_MODULE, \ .of_match_table = drv_name##_irqchip_match_table, \ .suppress_bind_attrs = true, \ + __VA_ARGS__ \ }, \ }; \ builtin_platform_driver(drv_name##_driver) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 1cd4e36890fb..844a8e30e6de 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -169,6 +169,7 @@ int generic_handle_irq_safe(unsigned int irq); * conversion failed. */ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq); +int generic_handle_domain_irq_safe(struct irq_domain *domain, unsigned int hwirq); int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq); #endif diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 3bfebde5a1a6..e27bd4f55d84 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -123,17 +123,12 @@ inode_peek_iversion_raw(const struct inode *inode) static inline void inode_set_max_iversion_raw(struct inode *inode, u64 val) { - u64 cur, old; + u64 cur = inode_peek_iversion_raw(inode); - cur = inode_peek_iversion_raw(inode); - for (;;) { + do { if (cur > val) break; - old = atomic64_cmpxchg(&inode->i_version, cur, val); - if (likely(old == cur)) - break; - cur = old; - } + } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, val)); } /** @@ -177,56 +172,7 @@ inode_set_iversion_queried(struct inode *inode, u64 val) I_VERSION_QUERIED); } -/** - * inode_maybe_inc_iversion - increments i_version - * @inode: inode with the i_version that should be updated - * @force: increment the counter even if it's not necessary? - * - * Every time the inode is modified, the i_version field must be seen to have - * changed by any observer. - * - * If "force" is set or the QUERIED flag is set, then ensure that we increment - * the value, and clear the queried flag. - * - * In the common case where neither is set, then we can return "false" without - * updating i_version. - * - * If this function returns false, and no other metadata has changed, then we - * can avoid logging the metadata. - */ -static inline bool -inode_maybe_inc_iversion(struct inode *inode, bool force) -{ - u64 cur, old, new; - - /* - * The i_version field is not strictly ordered with any other inode - * information, but the legacy inode_inc_iversion code used a spinlock - * to serialize increments. - * - * Here, we add full memory barriers to ensure that any de-facto - * ordering with other info is preserved. - * - * This barrier pairs with the barrier in inode_query_iversion() - */ - smp_mb(); - cur = inode_peek_iversion_raw(inode); - for (;;) { - /* If flag is clear then we needn't do anything */ - if (!force && !(cur & I_VERSION_QUERIED)) - return false; - - /* Since lowest bit is flag, add 2 to avoid it */ - new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; - - old = atomic64_cmpxchg(&inode->i_version, cur, new); - if (likely(old == cur)) - break; - cur = old; - } - return true; -} - +bool inode_maybe_inc_iversion(struct inode *inode, bool force); /** * inode_inc_iversion - forcibly increment i_version @@ -304,10 +250,10 @@ inode_peek_iversion(const struct inode *inode) static inline u64 inode_query_iversion(struct inode *inode) { - u64 cur, old, new; + u64 cur, new; cur = inode_peek_iversion_raw(inode); - for (;;) { + do { /* If flag is already set, then no need to swap */ if (cur & I_VERSION_QUERIED) { /* @@ -320,11 +266,7 @@ inode_query_iversion(struct inode *inode) } new = cur | I_VERSION_QUERIED; - old = atomic64_cmpxchg(&inode->i_version, cur, new); - if (likely(old == cur)) - break; - cur = old; - } + } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); return cur >> I_VERSION_QUERIED_SHIFT; } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 13e6c4b58f07..41a686996aaa 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -427,7 +427,7 @@ extern int kexec_load_disabled; extern bool kexec_in_progress; int crash_shrink_memory(unsigned long new_size); -size_t crash_get_memory_size(void); +ssize_t crash_get_memory_size(void); #ifndef arch_kexec_protect_crashkres /* diff --git a/include/linux/mdev.h b/include/linux/mdev.h index 47ad3b104d9e..139d05b26f82 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -10,6 +10,9 @@ #ifndef MDEV_H #define MDEV_H +#include <linux/device.h> +#include <linux/uuid.h> + struct mdev_type; struct mdev_device { @@ -20,67 +23,67 @@ struct mdev_device { bool active; }; -static inline struct mdev_device *to_mdev_device(struct device *dev) -{ - return container_of(dev, struct mdev_device, dev); -} +struct mdev_type { + /* set by the driver before calling mdev_register parent: */ + const char *sysfs_name; + const char *pretty_name; -unsigned int mdev_get_type_group_id(struct mdev_device *mdev); -unsigned int mtype_get_type_group_id(struct mdev_type *mtype); -struct device *mtype_get_parent_dev(struct mdev_type *mtype); + /* set by the core, can be used drivers */ + struct mdev_parent *parent; -/* interface for exporting mdev supported type attributes */ -struct mdev_type_attribute { - struct attribute attr; - ssize_t (*show)(struct mdev_type *mtype, - struct mdev_type_attribute *attr, char *buf); - ssize_t (*store)(struct mdev_type *mtype, - struct mdev_type_attribute *attr, const char *buf, - size_t count); + /* internal only */ + struct kobject kobj; + struct kobject *devices_kobj; }; -#define MDEV_TYPE_ATTR(_name, _mode, _show, _store) \ -struct mdev_type_attribute mdev_type_attr_##_name = \ - __ATTR(_name, _mode, _show, _store) -#define MDEV_TYPE_ATTR_RW(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RW(_name) -#define MDEV_TYPE_ATTR_RO(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name) -#define MDEV_TYPE_ATTR_WO(_name) \ - struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name) +/* embedded into the struct device that the mdev devices hang off */ +struct mdev_parent { + struct device *dev; + struct mdev_driver *mdev_driver; + struct kset *mdev_types_kset; + /* Synchronize device creation/removal with parent unregistration */ + struct rw_semaphore unreg_sem; + struct mdev_type **types; + unsigned int nr_types; + atomic_t available_instances; +}; + +static inline struct mdev_device *to_mdev_device(struct device *dev) +{ + return container_of(dev, struct mdev_device, dev); +} /** * struct mdev_driver - Mediated device driver + * @device_api: string to return for the device_api sysfs + * @max_instances: maximum number of instances supported (optional) * @probe: called when new device created * @remove: called when device removed - * @supported_type_groups: Attributes to define supported types. It is mandatory - * to provide supported types. + * @get_available: Return the max number of instances that can be created + * @show_description: Print a description of the mtype * @driver: device driver structure - * **/ struct mdev_driver { + const char *device_api; + unsigned int max_instances; int (*probe)(struct mdev_device *dev); void (*remove)(struct mdev_device *dev); - struct attribute_group **supported_type_groups; + unsigned int (*get_available)(struct mdev_type *mtype); + ssize_t (*show_description)(struct mdev_type *mtype, char *buf); struct device_driver driver; }; -extern struct bus_type mdev_bus_type; - -int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver); -void mdev_unregister_device(struct device *dev); +int mdev_register_parent(struct mdev_parent *parent, struct device *dev, + struct mdev_driver *mdev_driver, struct mdev_type **types, + unsigned int nr_types); +void mdev_unregister_parent(struct mdev_parent *parent); int mdev_register_driver(struct mdev_driver *drv); void mdev_unregister_driver(struct mdev_driver *drv); -struct device *mdev_parent_dev(struct mdev_device *mdev); static inline struct device *mdev_dev(struct mdev_device *mdev) { return &mdev->dev; } -static inline struct mdev_device *mdev_from_dev(struct device *dev) -{ - return dev->bus == &mdev_bus_type ? to_mdev_device(dev) : NULL; -} #endif /* MDEV_H */ diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 8a30de08e913..c726ea781255 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -293,6 +293,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_IRQ_POLLING (1<<11) /* Polling SDIO_CCCR_INTx could create a fake interrupt */ #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ +#define MMC_QUIRK_BROKEN_SD_DISCARD (1<<14) /* Disable broken SD discard support */ bool reenable_cmdq; /* Re-enable Command Queue */ diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 83fccd0c9bba..d6d3eae2f145 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -37,9 +37,8 @@ extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data); extern int of_irq_to_resource(struct device_node *dev, int index, struct resource *r); -extern void of_irq_init(const struct of_device_id *matches); - #ifdef CONFIG_OF_IRQ +extern void of_irq_init(const struct of_device_id *matches); extern int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq); extern int of_irq_count(struct device_node *dev); @@ -57,6 +56,9 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, extern void of_msi_configure(struct device *dev, struct device_node *np); u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in); #else +static inline void of_irq_init(const struct of_device_id *matches) +{ +} static inline int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_args *out_irq) { diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 01861eebed79..8ed5fba6d156 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -15,6 +15,9 @@ #include <linux/types.h> #include <linux/gfp.h> +/* percpu_counter batch for local add or sub */ +#define PERCPU_COUNTER_LOCAL_BATCH INT_MAX + #ifdef CONFIG_SMP struct percpu_counter { @@ -56,6 +59,22 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } +/* + * With percpu_counter_add_local() and percpu_counter_sub_local(), counts + * are accumulated in local per cpu counter and not in fbc->count until + * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter + * write efficient. + * But percpu_counter_sum(), instead of percpu_counter_read(), needs to be + * used to add up the counts from each CPU to account for all the local + * counts. So percpu_counter_add_local() and percpu_counter_sub_local() + * should be used when a counter is updated frequently and read rarely. + */ +static inline void +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH); +} + static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { s64 ret = __percpu_counter_sum(fbc); @@ -138,6 +157,13 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount) preempt_enable(); } +/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ +static inline void +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add(fbc, amount); +} + static inline void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { @@ -193,4 +219,10 @@ static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount) percpu_counter_add(fbc, -amount); } +static inline void +percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount) +{ + percpu_counter_add_local(fbc, -amount); +} + #endif /* _LINUX_PERCPU_COUNTER_H */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 81cab4b01edc..d6c48163c6de 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -127,6 +127,9 @@ static inline void put_task_struct_many(struct task_struct *t, int nr) void put_task_struct_rcu_user(struct task_struct *task); +/* Free all architecture-specific resources held by a thread. */ +void release_thread(struct task_struct *dead_task); + #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT extern int arch_task_struct_size __read_mostly; #else diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 75eea5ebb179..770ef2cb5775 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -246,6 +246,7 @@ void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, const struct sockaddr *sap); void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt); +void rpc_clnt_disconnect(struct rpc_clnt *clnt); void rpc_cleanup_clids(void); static inline int rpc_reply_expected(struct rpc_task *task) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index baeca2f564dc..b8ca3ecaf8d7 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -209,11 +209,17 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); +void rpc_task_try_cancel(struct rpc_task *task, int error); void rpc_signal_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); void rpc_release_calldata(const struct rpc_call_ops *, void *); void rpc_killall_tasks(struct rpc_clnt *); +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e05ddc6fe6a5..e7cebeb875dd 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -14,6 +14,7 @@ #include <linux/workqueue.h> #include <linux/poll.h> #include <uapi/linux/vfio.h> +#include <linux/iova_bitmap.h> struct kvm; @@ -33,10 +34,11 @@ struct vfio_device { struct device *dev; const struct vfio_device_ops *ops; /* - * mig_ops is a static property of the vfio_device which must be set - * prior to registering the vfio_device. + * mig_ops/log_ops is a static property of the vfio_device which must + * be set prior to registering the vfio_device. */ const struct vfio_migration_ops *mig_ops; + const struct vfio_log_ops *log_ops; struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; @@ -45,7 +47,9 @@ struct vfio_device { struct kvm *kvm; /* Members below here are private, not for driver use */ - refcount_t refcount; + unsigned int index; + struct device device; /* device.kref covers object life circle */ + refcount_t refcount; /* user count on registered device*/ unsigned int open_count; struct completion comp; struct list_head group_next; @@ -55,6 +59,8 @@ struct vfio_device { /** * struct vfio_device_ops - VFIO bus driver device callbacks * + * @init: initialize private fields in device structure + * @release: Reclaim private fields in device structure * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -72,6 +78,8 @@ struct vfio_device { */ struct vfio_device_ops { char *name; + int (*init)(struct vfio_device *vdev); + void (*release)(struct vfio_device *vdev); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -109,6 +117,28 @@ struct vfio_migration_ops { }; /** + * @log_start: Optional callback to ask the device start DMA logging. + * @log_stop: Optional callback to ask the device stop DMA logging. + * @log_read_and_clear: Optional callback to ask the device read + * and clear the dirty DMAs in some given range. + * + * The vfio core implementation of the DEVICE_FEATURE_DMA_LOGGING_ set + * of features does not track logging state relative to the device, + * therefore the device implementation of vfio_log_ops must handle + * arbitrary user requests. This includes rejecting subsequent calls + * to log_start without an intervening log_stop, as well as graceful + * handling of log_stop and log_read_and_clear from invalid states. + */ +struct vfio_log_ops { + int (*log_start)(struct vfio_device *device, + struct rb_root_cached *ranges, u32 nnodes, u64 *page_size); + int (*log_stop)(struct vfio_device *device); + int (*log_read_and_clear)(struct vfio_device *device, + unsigned long iova, unsigned long length, + struct iova_bitmap *dirty); +}; + +/** * vfio_check_feature - Validate user input for the VFIO_DEVICE_FEATURE ioctl * @flags: Arg from the device_feature op * @argsz: Arg from the device_feature op @@ -137,9 +167,23 @@ static inline int vfio_check_feature(u32 flags, size_t argsz, u32 supported_ops, return 1; } -void vfio_init_group_dev(struct vfio_device *device, struct device *dev, - const struct vfio_device_ops *ops); -void vfio_uninit_group_dev(struct vfio_device *device); +struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev, + const struct vfio_device_ops *ops); +#define vfio_alloc_device(dev_struct, member, dev, ops) \ + container_of(_vfio_alloc_device(sizeof(struct dev_struct) + \ + BUILD_BUG_ON_ZERO(offsetof( \ + struct dev_struct, member)), \ + dev, ops), \ + struct dev_struct, member) + +int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops); +void vfio_free_device(struct vfio_device *device); +static inline void vfio_put_device(struct vfio_device *device) +{ + put_device(&device->device); +} + int vfio_register_group_dev(struct vfio_device *device); int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); @@ -155,6 +199,7 @@ int vfio_mig_get_next_state(struct vfio_device *device, * External user API */ struct iommu_group *vfio_file_iommu_group(struct file *file); +bool vfio_file_is_group(struct file *file); bool vfio_file_enforced_coherent(struct file *file); void vfio_file_set_kvm(struct file *file, struct kvm *kvm); bool vfio_file_has_dev(struct file *file, struct vfio_device *device); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 5579ece4347b..367fd79226a3 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -20,39 +20,10 @@ #define VFIO_PCI_CORE_H #define VFIO_PCI_OFFSET_SHIFT 40 - #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) -/* Special capability IDs predefined access */ -#define PCI_CAP_ID_INVALID 0xFF /* default raw access */ -#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */ - -/* Cap maximum number of ioeventfds per device (arbitrary) */ -#define VFIO_PCI_IOEVENTFD_MAX 1000 - -struct vfio_pci_ioeventfd { - struct list_head next; - struct vfio_pci_core_device *vdev; - struct virqfd *virqfd; - void __iomem *addr; - uint64_t data; - loff_t pos; - int bar; - int count; - bool test_mem; -}; - -struct vfio_pci_irq_ctx { - struct eventfd_ctx *trigger; - struct virqfd *unmask; - struct virqfd *mask; - char *name; - bool masked; - struct irq_bypass_producer producer; -}; - struct vfio_pci_core_device; struct vfio_pci_region; @@ -78,23 +49,6 @@ struct vfio_pci_region { u32 flags; }; -struct vfio_pci_dummy_resource { - struct resource resource; - int index; - struct list_head res_next; -}; - -struct vfio_pci_vf_token { - struct mutex lock; - uuid_t uuid; - int users; -}; - -struct vfio_pci_mmap_vma { - struct vm_area_struct *vma; - struct list_head vma_next; -}; - struct vfio_pci_core_device { struct vfio_device vdev; struct pci_dev *pdev; @@ -124,11 +78,14 @@ struct vfio_pci_core_device { bool needs_reset; bool nointx; bool needs_pm_restore; + bool pm_intx_masked; + bool pm_runtime_engaged; struct pci_saved_state *pci_saved_state; struct pci_saved_state *pm_save; int ioeventfds_nr; struct eventfd_ctx *err_trigger; struct eventfd_ctx *req_trigger; + struct eventfd_ctx *pm_wake_eventfd_ctx; struct list_head dummy_resources_list; struct mutex ioeventfds_lock; struct list_head ioeventfds_list; @@ -141,100 +98,17 @@ struct vfio_pci_core_device { struct rw_semaphore memory_lock; }; -#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) -#define is_msi(vdev) (vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX) -#define is_msix(vdev) (vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX) -#define is_irq_none(vdev) (!(is_intx(vdev) || is_msi(vdev) || is_msix(vdev))) -#define irq_is(vdev, type) (vdev->irq_type == type) - -void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev); -void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev); - -int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, - uint32_t flags, unsigned index, - unsigned start, unsigned count, void *data); - -ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite); - -ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); - -#ifdef CONFIG_VFIO_PCI_VGA -ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, - size_t count, loff_t *ppos, bool iswrite); -#else -static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, - char __user *buf, size_t count, - loff_t *ppos, bool iswrite) -{ - return -EINVAL; -} -#endif - -long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset, - uint64_t data, int count, int fd); - -int vfio_pci_init_perm_bits(void); -void vfio_pci_uninit_perm_bits(void); - -int vfio_config_init(struct vfio_pci_core_device *vdev); -void vfio_config_free(struct vfio_pci_core_device *vdev); - -int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev, - unsigned int type, unsigned int subtype, - const struct vfio_pci_regops *ops, - size_t size, u32 flags, void *data); - -int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, - pci_power_t state); - -bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev); -void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev); -u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev); -void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, - u16 cmd); - -#ifdef CONFIG_VFIO_PCI_IGD -int vfio_pci_igd_init(struct vfio_pci_core_device *vdev); -#else -static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev) -{ - return -ENODEV; -} -#endif - -#ifdef CONFIG_VFIO_PCI_ZDEV_KVM -int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps); -int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev); -void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev); -#else -static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev, - struct vfio_info_cap *caps) -{ - return -ENODEV; -} - -static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev) -{ - return 0; -} - -static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev) -{} -#endif - /* Will be exported for vfio pci drivers usage */ +int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev, + unsigned int type, unsigned int subtype, + const struct vfio_pci_regops *ops, + size_t size, u32 flags, void *data); void vfio_pci_core_set_params(bool nointxmask, bool is_disable_vga, bool is_disable_idle_d3); void vfio_pci_core_close_device(struct vfio_device *core_vdev); -void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev, - struct pci_dev *pdev, - const struct vfio_device_ops *vfio_pci_ops); +int vfio_pci_core_init_dev(struct vfio_device *core_vdev); +void vfio_pci_core_release_dev(struct vfio_device *core_vdev); int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev); -void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev); void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev); extern const struct pci_error_handlers vfio_pci_core_err_handlers; int vfio_pci_core_sriov_configure(struct vfio_pci_core_device *vdev, @@ -256,9 +130,4 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, pci_channel_state_t state); -static inline bool vfio_pci_is_vga(struct pci_dev *pdev) -{ - return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; -} - #endif /* VFIO_PCI_CORE_H */ diff --git a/include/linux/wireless.h b/include/linux/wireless.h index 2d1b54556eff..e6e34d74dda0 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -26,7 +26,15 @@ struct compat_iw_point { struct __compat_iw_event { __u16 len; /* Real length of this stuff */ __u16 cmd; /* Wireless IOCTL */ - compat_caddr_t pointer; + + union { + compat_caddr_t pointer; + + /* we need ptr_bytes to make memcpy() run-time destination + * buffer bounds checking happy, nothing special + */ + DECLARE_FLEX_ARRAY(__u8, ptr_bytes); + }; }; #define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer) #define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length) |