diff options
Diffstat (limited to 'drivers/dma-buf/dma-fence.c')
-rw-r--r-- | drivers/dma-buf/dma-fence.c | 361 |
1 files changed, 335 insertions, 26 deletions
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 052a41e2451c..406b4e26f538 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -15,6 +15,7 @@ #include <linux/atomic.h> #include <linux/dma-fence.h> #include <linux/sched/signal.h> +#include <linux/seq_file.h> #define CREATE_TRACE_POINTS #include <trace/events/dma_fence.h> @@ -64,6 +65,52 @@ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); * &dma_buf.resv pointer. */ +/** + * DOC: fence cross-driver contract + * + * Since &dma_fence provide a cross driver contract, all drivers must follow the + * same rules: + * + * * Fences must complete in a reasonable time. Fences which represent kernels + * and shaders submitted by userspace, which could run forever, must be backed + * up by timeout and gpu hang recovery code. Minimally that code must prevent + * further command submission and force complete all in-flight fences, e.g. + * when the driver or hardware do not support gpu reset, or if the gpu reset + * failed for some reason. Ideally the driver supports gpu recovery which only + * affects the offending userspace context, and no other userspace + * submissions. + * + * * Drivers may have different ideas of what completion within a reasonable + * time means. Some hang recovery code uses a fixed timeout, others a mix + * between observing forward progress and increasingly strict timeouts. + * Drivers should not try to second guess timeout handling of fences from + * other drivers. + * + * * To ensure there's no deadlocks of dma_fence_wait() against other locks + * drivers should annotate all code required to reach dma_fence_signal(), + * which completes the fences, with dma_fence_begin_signalling() and + * dma_fence_end_signalling(). + * + * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock(). + * This means any code required for fence completion cannot acquire a + * &dma_resv lock. Note that this also pulls in the entire established + * locking hierarchy around dma_resv_lock() and dma_resv_unlock(). + * + * * Drivers are allowed to call dma_fence_wait() from their &shrinker + * callbacks. This means any code required for fence completion cannot + * allocate memory with GFP_KERNEL. + * + * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier + * respectively &mmu_interval_notifier callbacks. This means any code required + * for fence completeion cannot allocate memory with GFP_NOFS or GFP_NOIO. + * Only GFP_ATOMIC is permissible, which might fail. + * + * Note that only GPU drivers have a reasonable excuse for both requiring + * &mmu_interval_notifier and &shrinker callbacks at the same time as having to + * track asynchronous compute work using &dma_fence. No driver outside of + * drivers/gpu should ever call dma_fence_wait() in such contexts. + */ + static const char *dma_fence_stub_get_name(struct dma_fence *fence) { return "stub"; @@ -77,7 +124,9 @@ static const struct dma_fence_ops dma_fence_stub_ops = { /** * dma_fence_get_stub - return a signaled fence * - * Return a stub fence which is already signaled. + * Return a stub fence which is already signaled. The fence's + * timestamp corresponds to the first time after boot this + * function is called. */ struct dma_fence *dma_fence_get_stub(void) { @@ -87,6 +136,10 @@ struct dma_fence *dma_fence_get_stub(void) &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0); + + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &dma_fence_stub.flags); + dma_fence_signal_locked(&dma_fence_stub); } spin_unlock(&dma_fence_stub_lock); @@ -96,6 +149,33 @@ struct dma_fence *dma_fence_get_stub(void) EXPORT_SYMBOL(dma_fence_get_stub); /** + * dma_fence_allocate_private_stub - return a private, signaled fence + * + * Return a newly allocated and signaled stub fence. + */ +struct dma_fence *dma_fence_allocate_private_stub(void) +{ + struct dma_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return ERR_PTR(-ENOMEM); + + dma_fence_init(fence, + &dma_fence_stub_ops, + &dma_fence_stub_lock, + 0, 0); + + set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &dma_fence_stub.flags); + + dma_fence_signal(fence); + + return fence; +} +EXPORT_SYMBOL(dma_fence_allocate_private_stub); + +/** * dma_fence_context_alloc - allocate an array of fence contexts * @num: amount of contexts to allocate * @@ -106,27 +186,185 @@ EXPORT_SYMBOL(dma_fence_get_stub); u64 dma_fence_context_alloc(unsigned num) { WARN_ON(!num); - return atomic64_add_return(num, &dma_fence_context_counter) - num; + return atomic64_fetch_add(num, &dma_fence_context_counter); } EXPORT_SYMBOL(dma_fence_context_alloc); /** - * dma_fence_signal_locked - signal completion of a fence + * DOC: fence signalling annotation + * + * Proving correctness of all the kernel code around &dma_fence through code + * review and testing is tricky for a few reasons: + * + * * It is a cross-driver contract, and therefore all drivers must follow the + * same rules for lock nesting order, calling contexts for various functions + * and anything else significant for in-kernel interfaces. But it is also + * impossible to test all drivers in a single machine, hence brute-force N vs. + * N testing of all combinations is impossible. Even just limiting to the + * possible combinations is infeasible. + * + * * There is an enormous amount of driver code involved. For render drivers + * there's the tail of command submission, after fences are published, + * scheduler code, interrupt and workers to process job completion, + * and timeout, gpu reset and gpu hang recovery code. Plus for integration + * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier, + * and &shrinker. For modesetting drivers there's the commit tail functions + * between when fences for an atomic modeset are published, and when the + * corresponding vblank completes, including any interrupt processing and + * related workers. Auditing all that code, across all drivers, is not + * feasible. + * + * * Due to how many other subsystems are involved and the locking hierarchies + * this pulls in there is extremely thin wiggle-room for driver-specific + * differences. &dma_fence interacts with almost all of the core memory + * handling through page fault handlers via &dma_resv, dma_resv_lock() and + * dma_resv_unlock(). On the other side it also interacts through all + * allocation sites through &mmu_notifier and &shrinker. + * + * Furthermore lockdep does not handle cross-release dependencies, which means + * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught + * at runtime with some quick testing. The simplest example is one thread + * waiting on a &dma_fence while holding a lock:: + * + * lock(A); + * dma_fence_wait(B); + * unlock(A); + * + * while the other thread is stuck trying to acquire the same lock, which + * prevents it from signalling the fence the previous thread is stuck waiting + * on:: + * + * lock(A); + * unlock(A); + * dma_fence_signal(B); + * + * By manually annotating all code relevant to signalling a &dma_fence we can + * teach lockdep about these dependencies, which also helps with the validation + * headache since now lockdep can check all the rules for us:: + * + * cookie = dma_fence_begin_signalling(); + * lock(A); + * unlock(A); + * dma_fence_signal(B); + * dma_fence_end_signalling(cookie); + * + * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to + * annotate critical sections the following rules need to be observed: + * + * * All code necessary to complete a &dma_fence must be annotated, from the + * point where a fence is accessible to other threads, to the point where + * dma_fence_signal() is called. Un-annotated code can contain deadlock issues, + * and due to the very strict rules and many corner cases it is infeasible to + * catch these just with review or normal stress testing. + * + * * &struct dma_resv deserves a special note, since the readers are only + * protected by rcu. This means the signalling critical section starts as soon + * as the new fences are installed, even before dma_resv_unlock() is called. + * + * * The only exception are fast paths and opportunistic signalling code, which + * calls dma_fence_signal() purely as an optimization, but is not required to + * guarantee completion of a &dma_fence. The usual example is a wait IOCTL + * which calls dma_fence_signal(), while the mandatory completion path goes + * through a hardware interrupt and possible job completion worker. + * + * * To aid composability of code, the annotations can be freely nested, as long + * as the overall locking hierarchy is consistent. The annotations also work + * both in interrupt and process context. Due to implementation details this + * requires that callers pass an opaque cookie from + * dma_fence_begin_signalling() to dma_fence_end_signalling(). + * + * * Validation against the cross driver contract is implemented by priming + * lockdep with the relevant hierarchy at boot-up. This means even just + * testing with a single device is enough to validate a driver, at least as + * far as deadlocks with dma_fence_wait() against dma_fence_signal() are + * concerned. + */ +#ifdef CONFIG_LOCKDEP +static struct lockdep_map dma_fence_lockdep_map = { + .name = "dma_fence_map" +}; + +/** + * dma_fence_begin_signalling - begin a critical DMA fence signalling section + * + * Drivers should use this to annotate the beginning of any code section + * required to eventually complete &dma_fence by calling dma_fence_signal(). + * + * The end of these critical sections are annotated with + * dma_fence_end_signalling(). + * + * Returns: + * + * Opaque cookie needed by the implementation, which needs to be passed to + * dma_fence_end_signalling(). + */ +bool dma_fence_begin_signalling(void) +{ + /* explicitly nesting ... */ + if (lock_is_held_type(&dma_fence_lockdep_map, 1)) + return true; + + /* rely on might_sleep check for soft/hardirq locks */ + if (in_atomic()) + return true; + + /* ... and non-recursive readlock */ + lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_); + + return false; +} +EXPORT_SYMBOL(dma_fence_begin_signalling); + +/** + * dma_fence_end_signalling - end a critical DMA fence signalling section + * @cookie: opaque cookie from dma_fence_begin_signalling() + * + * Closes a critical section annotation opened by dma_fence_begin_signalling(). + */ +void dma_fence_end_signalling(bool cookie) +{ + if (cookie) + return; + + lock_release(&dma_fence_lockdep_map, _RET_IP_); +} +EXPORT_SYMBOL(dma_fence_end_signalling); + +void __dma_fence_might_wait(void) +{ + bool tmp; + + tmp = lock_is_held_type(&dma_fence_lockdep_map, 1); + if (tmp) + lock_release(&dma_fence_lockdep_map, _THIS_IP_); + lock_map_acquire(&dma_fence_lockdep_map); + lock_map_release(&dma_fence_lockdep_map); + if (tmp) + lock_acquire(&dma_fence_lockdep_map, 0, 0, 1, 1, NULL, _THIS_IP_); +} +#endif + + +/** + * dma_fence_signal_timestamp_locked - signal completion of a fence * @fence: the fence to signal + * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will - * only be effective the first time. + * only be effective the first time. Set the timestamp provided as the fence + * signal timestamp. * - * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock - * held. + * Unlike dma_fence_signal_timestamp(), this function must be called with + * &dma_fence.lock held. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ -int dma_fence_signal_locked(struct dma_fence *fence) +int dma_fence_signal_timestamp_locked(struct dma_fence *fence, + ktime_t timestamp) { struct dma_fence_cb *cur, *tmp; struct list_head cb_list; @@ -140,7 +378,7 @@ int dma_fence_signal_locked(struct dma_fence *fence) /* Stash the cb_list before replacing it with the timestamp */ list_replace(&fence->cb_list, &cb_list); - fence->timestamp = ktime_get(); + fence->timestamp = timestamp; set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); trace_dma_fence_signaled(fence); @@ -151,6 +389,59 @@ int dma_fence_signal_locked(struct dma_fence *fence) return 0; } +EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); + +/** + * dma_fence_signal_timestamp - signal completion of a fence + * @fence: the fence to signal + * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain + * + * Signal completion for software callbacks on a fence, this will unblock + * dma_fence_wait() calls and run all the callbacks added with + * dma_fence_add_callback(). Can be called multiple times, but since a fence + * can only go from the unsignaled to the signaled state and not back, it will + * only be effective the first time. Set the timestamp provided as the fence + * signal timestamp. + * + * Returns 0 on success and a negative error value when @fence has been + * signalled already. + */ +int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) +{ + unsigned long flags; + int ret; + + if (!fence) + return -EINVAL; + + spin_lock_irqsave(fence->lock, flags); + ret = dma_fence_signal_timestamp_locked(fence, timestamp); + spin_unlock_irqrestore(fence->lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_fence_signal_timestamp); + +/** + * dma_fence_signal_locked - signal completion of a fence + * @fence: the fence to signal + * + * Signal completion for software callbacks on a fence, this will unblock + * dma_fence_wait() calls and run all the callbacks added with + * dma_fence_add_callback(). Can be called multiple times, but since a fence + * can only go from the unsignaled to the signaled state and not back, it will + * only be effective the first time. + * + * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock + * held. + * + * Returns 0 on success and a negative error value when @fence has been + * signalled already. + */ +int dma_fence_signal_locked(struct dma_fence *fence) +{ + return dma_fence_signal_timestamp_locked(fence, ktime_get()); +} EXPORT_SYMBOL(dma_fence_signal_locked); /** @@ -170,14 +461,19 @@ int dma_fence_signal(struct dma_fence *fence) { unsigned long flags; int ret; + bool tmp; if (!fence) return -EINVAL; + tmp = dma_fence_begin_signalling(); + spin_lock_irqsave(fence->lock, flags); - ret = dma_fence_signal_locked(fence); + ret = dma_fence_signal_timestamp_locked(fence, ktime_get()); spin_unlock_irqrestore(fence->lock, flags); + dma_fence_end_signalling(tmp); + return ret; } EXPORT_SYMBOL(dma_fence_signal); @@ -208,6 +504,12 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) if (WARN_ON(timeout < 0)) return -EINVAL; + might_sleep(); + + __dma_fence_might_wait(); + + dma_fence_enable_sw_signaling(fence); + trace_dma_fence_wait_start(fence); if (fence->ops->wait) ret = fence->ops->wait(fence, intr, timeout); @@ -309,9 +611,6 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence) { unsigned long flags; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return; - spin_lock_irqsave(fence->lock, flags); __dma_fence_enable_signaling(fence); spin_unlock_irqrestore(fence->lock, flags); @@ -325,20 +624,17 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling); * @cb: the callback to register * @func: the function to call * + * Add a software callback to the fence. The caller should keep a reference to + * the fence. + * * @cb will be initialized by dma_fence_add_callback(), no initialization * by the caller is required. Any number of callbacks can be registered * to a fence, but a callback can only be registered to one fence at a time. * - * Note that the callback can be called from an atomic context. If - * fence is already signaled, this function will return -ENOENT (and + * If fence is already signaled, this function will return -ENOENT (and * *not* call the callback). * - * Add a software callback to the fence. Same restrictions apply to - * refcount as it does to dma_fence_wait(), however the caller doesn't need to - * keep a refcount to fence afterward dma_fence_add_callback() has returned: - * when software access is enabled, the creator of the fence is required to keep - * the fence alive until after it signals with dma_fence_signal(). The callback - * itself can be called from irq context. + * Note that the callback can be called from an atomic context or irq context. * * Returns 0 in case of success, -ENOENT if the fence is already signaled * and -EINVAL in case of error. @@ -467,19 +763,16 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) unsigned long flags; signed long ret = timeout ? timeout : 1; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return ret; - spin_lock_irqsave(fence->lock, flags); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + goto out; + if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; } - if (!__dma_fence_enable_signaling(fence)) - goto out; - if (!timeout) { ret = 0; goto out; @@ -620,6 +913,22 @@ err_free_cb: EXPORT_SYMBOL(dma_fence_wait_any_timeout); /** + * dma_fence_describe - Dump fence describtion into seq_file + * @fence: the 6fence to describe + * @seq: the seq_file to put the textual description into + * + * Dump a textual description of the fence and it's state into the seq_file. + */ +void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq) +{ + seq_printf(seq, "%s %s seq %llu %ssignalled\n", + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), fence->seqno, + dma_fence_is_signaled(fence) ? "" : "un"); +} +EXPORT_SYMBOL(dma_fence_describe); + +/** * dma_fence_init - Initialize a custom fence. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence |