From 1c0908d8e441631f5b8ba433523cf39339ee2ba0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 2 Dec 2022 10:02:23 +0000 Subject: rtmutex: Add acquire semantics for rtmutex lock acquisition slow path Jan Kara reported the following bug triggering on 6.0.5-rt14 running dbench on XFS on arm64. kernel BUG at fs/inode.c:625! Internal error: Oops - BUG: 0 [#1] PREEMPT_RT SMP CPU: 11 PID: 6611 Comm: dbench Tainted: G E 6.0.0-rt14-rt+ #1 pc : clear_inode+0xa0/0xc0 lr : clear_inode+0x38/0xc0 Call trace: clear_inode+0xa0/0xc0 evict+0x160/0x180 iput+0x154/0x240 do_unlinkat+0x184/0x300 __arm64_sys_unlinkat+0x48/0xc0 el0_svc_common.constprop.4+0xe4/0x2c0 do_el0_svc+0xac/0x100 el0_svc+0x78/0x200 el0t_64_sync_handler+0x9c/0xc0 el0t_64_sync+0x19c/0x1a0 It also affects 6.1-rc7-rt5 and affects a preempt-rt fork of 5.14 so this is likely a bug that existed forever and only became visible when ARM support was added to preempt-rt. The same problem does not occur on x86-64 and he also reported that converting sb->s_inode_wblist_lock to raw_spinlock_t makes the problem disappear indicating that the RT spinlock variant is the problem. Which in turn means that RT mutexes on ARM64 and any other weakly ordered architecture are affected by this independent of RT. Will Deacon observed: "I'd be more inclined to be suspicious of the slowpath tbh, as we need to make sure that we have acquire semantics on all paths where the lock can be taken. Looking at the rtmutex code, this really isn't obvious to me -- for example, try_to_take_rt_mutex() appears to be able to return via the 'takeit' label without acquire semantics and it looks like we might be relying on the caller's subsequent _unlock_ of the wait_lock for ordering, but that will give us release semantics which aren't correct." Sebastian Andrzej Siewior prototyped a fix that does work based on that comment but it was a little bit overkill and added some fences that should not be necessary. The lock owner is updated with an IRQ-safe raw spinlock held, but the spin_unlock does not provide acquire semantics which are needed when acquiring a mutex. Adds the necessary acquire semantics for lock owner updates in the slow path acquisition and the waiter bit logic. It successfully completed 10 iterations of the dbench workload while the vanilla kernel fails on the first iteration. [ bigeasy@linutronix.de: Initial prototype fix ] Fixes: 700318d1d7b38 ("locking/rtmutex: Use acquire/release semantics") Fixes: 23f78d4a03c5 ("[PATCH] pi-futex: rt mutex core") Reported-by: Jan Kara Signed-off-by: Mel Gorman Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221202100223.6mevpbl7i6x5udfd@techsingularity.net --- kernel/locking/rtmutex.c | 55 ++++++++++++++++++++++++++++++++++++-------- kernel/locking/rtmutex_api.c | 6 ++--- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 7779ee8abc2a..010cf4e6d0b8 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -89,15 +89,31 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock, * set this bit before looking at the lock. */ -static __always_inline void -rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) +static __always_inline struct task_struct * +rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) { unsigned long val = (unsigned long)owner; if (rt_mutex_has_waiters(lock)) val |= RT_MUTEX_HAS_WAITERS; - WRITE_ONCE(lock->owner, (struct task_struct *)val); + return (struct task_struct *)val; +} + +static __always_inline void +rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) +{ + /* + * lock->wait_lock is held but explicit acquire semantics are needed + * for a new lock owner so WRITE_ONCE is insufficient. + */ + xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner)); +} + +static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock) +{ + /* lock->wait_lock is held so the unlock provides release semantics. */ + WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL)); } static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) @@ -106,7 +122,8 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); } -static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) +static __always_inline void +fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock) { unsigned long owner, *p = (unsigned long *) &lock->owner; @@ -172,8 +189,21 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock) * still set. */ owner = READ_ONCE(*p); - if (owner & RT_MUTEX_HAS_WAITERS) - WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); + if (owner & RT_MUTEX_HAS_WAITERS) { + /* + * See rt_mutex_set_owner() and rt_mutex_clear_owner() on + * why xchg_acquire() is used for updating owner for + * locking and WRITE_ONCE() for unlocking. + * + * WRITE_ONCE() would work for the acquire case too, but + * in case that the lock acquisition failed it might + * force other lockers into the slow path unnecessarily. + */ + if (acquire_lock) + xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS); + else + WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); + } } /* @@ -208,6 +238,13 @@ static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) owner = *p; } while (cmpxchg_relaxed(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); + + /* + * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE + * operations in the event of contention. Ensure the successful + * cmpxchg is visible. + */ + smp_mb__after_atomic(); } /* @@ -1243,7 +1280,7 @@ static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) * try_to_take_rt_mutex() sets the lock waiters bit * unconditionally. Clean this up. */ - fixup_rt_mutex_waiters(lock); + fixup_rt_mutex_waiters(lock, true); return ret; } @@ -1604,7 +1641,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, * try_to_take_rt_mutex() sets the waiter bit * unconditionally. We might have to fix that up. */ - fixup_rt_mutex_waiters(lock); + fixup_rt_mutex_waiters(lock, true); trace_contention_end(lock, ret); @@ -1719,7 +1756,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) * try_to_take_rt_mutex() sets the waiter bit unconditionally. * We might have to fix that up: */ - fixup_rt_mutex_waiters(lock); + fixup_rt_mutex_waiters(lock, true); debug_rt_mutex_free_waiter(&waiter); trace_contention_end(lock, 0); diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index 900220941caa..cb9fdff76a8a 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -267,7 +267,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock, void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock) { debug_rt_mutex_proxy_unlock(lock); - rt_mutex_set_owner(lock, NULL); + rt_mutex_clear_owner(lock); } /** @@ -382,7 +382,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock, * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. */ - fixup_rt_mutex_waiters(lock); + fixup_rt_mutex_waiters(lock, true); raw_spin_unlock_irq(&lock->wait_lock); return ret; @@ -438,7 +438,7 @@ bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock, * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. */ - fixup_rt_mutex_waiters(lock); + fixup_rt_mutex_waiters(lock, false); raw_spin_unlock_irq(&lock->wait_lock); -- cgit v1.2.3-59-g8ed1b From b5f96cb719d8ba220b565ddd3ba4ac0d8bcfb130 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 13 Dec 2022 09:58:07 +0100 Subject: nvme-pci: fix doorbell buffer value endianness When using shadow doorbells, the event index and the doorbell values are written to host memory. Prior to this patch, the values written would erroneously be written in host endianness. This causes trouble on big-endian platforms. Fix this by adding missing endian conversions. This issue was noticed by Guenter while testing various big-endian platforms under QEMU[1]. A similar fix required for hw/nvme in QEMU is up for review as well[2]. [1]: https://lore.kernel.org/qemu-devel/20221209110022.GA3396194@roeck-us.net/ [2]: https://lore.kernel.org/qemu-devel/20221212114409.34972-4-its@irrelevant.dk/ Fixes: f9f38e33389c ("nvme: improve performance for virtual NVMe devices") Reported-by: Guenter Roeck Signed-off-by: Klaus Jensen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f0f8027644bb..017442858054 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -144,9 +144,9 @@ struct nvme_dev { mempool_t *iod_mempool; /* shadow doorbell buffer support: */ - u32 *dbbuf_dbs; + __le32 *dbbuf_dbs; dma_addr_t dbbuf_dbs_dma_addr; - u32 *dbbuf_eis; + __le32 *dbbuf_eis; dma_addr_t dbbuf_eis_dma_addr; /* host memory buffer support: */ @@ -208,10 +208,10 @@ struct nvme_queue { #define NVMEQ_SQ_CMB 1 #define NVMEQ_DELETE_ERROR 2 #define NVMEQ_POLLED 3 - u32 *dbbuf_sq_db; - u32 *dbbuf_cq_db; - u32 *dbbuf_sq_ei; - u32 *dbbuf_cq_ei; + __le32 *dbbuf_sq_db; + __le32 *dbbuf_cq_db; + __le32 *dbbuf_sq_ei; + __le32 *dbbuf_cq_ei; struct completion delete_done; }; @@ -343,11 +343,11 @@ static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) } /* Update dbbuf and return true if an MMIO is required */ -static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, - volatile u32 *dbbuf_ei) +static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, + volatile __le32 *dbbuf_ei) { if (dbbuf_db) { - u16 old_value; + u16 old_value, event_idx; /* * Ensure that the queue is written before updating @@ -355,8 +355,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, */ wmb(); - old_value = *dbbuf_db; - *dbbuf_db = value; + old_value = le32_to_cpu(*dbbuf_db); + *dbbuf_db = cpu_to_le32(value); /* * Ensure that the doorbell is updated before reading the event @@ -366,7 +366,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, */ mb(); - if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) + event_idx = le32_to_cpu(*dbbuf_ei); + if (!nvme_dbbuf_need_event(event_idx, value, old_value)) return false; } -- cgit v1.2.3-59-g8ed1b From c89a529e823d51dd23c7ec0c047c7a454a428541 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Dec 2022 10:59:06 -0800 Subject: nvme-pci: fix mempool alloc size Convert the max size to bytes to match the units of the divisor that calculates the worst-case number of PRP entries. The result is used to determine how many PRP Lists are required. The code was previously rounding this to 1 list, but we can require 2 in the worst case. In that scenario, the driver would corrupt memory beyond the size provided by the mempool. While unlikely to occur (you'd need a 4MB in exactly 127 phys segments on a queue that doesn't support SGLs), this memory corruption has been observed by kfence. Cc: Jens Axboe Fixes: 943e942e6266f ("nvme-pci: limit max IO size and segments to avoid high order allocations") Signed-off-by: Keith Busch Reviewed-by: Jens Axboe Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 017442858054..6e9d1c7409a9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -381,8 +381,8 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, */ static int nvme_pci_npages_prp(void) { - unsigned nprps = DIV_ROUND_UP(NVME_MAX_KB_SZ + NVME_CTRL_PAGE_SIZE, - NVME_CTRL_PAGE_SIZE); + unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; + unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); } -- cgit v1.2.3-59-g8ed1b From 841734234a28fd5cd0889b84bd4d93a0988fa11e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Dec 2022 13:54:55 -0800 Subject: nvme-pci: fix page size checks The size allocated out of the dma pool is at most NVME_CTRL_PAGE_SIZE, which may be smaller than the PAGE_SIZE. Fixes: c61b82c7b7134 ("nvme-pci: fix PRP pool size") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6e9d1c7409a9..804b6a6cb43a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -36,7 +36,7 @@ #define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) #define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) -#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) +#define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc)) /* * These can be higher, but we need to ensure that any command doesn't @@ -383,7 +383,7 @@ static int nvme_pci_npages_prp(void) { unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); - return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); + return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); } /* @@ -393,7 +393,7 @@ static int nvme_pci_npages_prp(void) static int nvme_pci_npages_sgl(void) { return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc), - PAGE_SIZE); + NVME_CTRL_PAGE_SIZE); } static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, @@ -709,7 +709,7 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, sge->length = cpu_to_le32(entries * sizeof(*sge)); sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; } else { - sge->length = cpu_to_le32(PAGE_SIZE); + sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE); sge->type = NVME_SGL_FMT_SEG_DESC << 4; } } -- cgit v1.2.3-59-g8ed1b From 52ea806ad983490b3132a9e526e11a10dc2fd10c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Dec 2022 07:05:09 -0700 Subject: io_uring: finish waiting before flushing overflow entries If we have overflow entries being generated after we've done the initial flush in io_cqring_wait(), then we could be flushing them in the main wait loop as well. If that's done after having added ourselves to the cq_wait waitqueue, then the task state can be != TASK_RUNNING when we enter the overflow flush. Check for the need to overflow flush, and finish our wait cycle first if we have to do so. Reported-and-tested-by: syzbot+cf6ea1d6bb30a4ce10b2@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/000000000000cb143a05f04eee15@google.com/ Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ff2bbac1a10f..ac5d39eeb3d1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -677,16 +677,20 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx) io_cq_unlock_post(ctx); } +static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx) +{ + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + __io_cqring_overflow_flush(ctx); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); +} + static void io_cqring_overflow_flush(struct io_ring_ctx *ctx) { - if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { - /* iopoll syncs against uring_lock, not completion_lock */ - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_lock(&ctx->uring_lock); - __io_cqring_overflow_flush(ctx); - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_unlock(&ctx->uring_lock); - } + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) + io_cqring_do_overflow_flush(ctx); } void __io_put_task(struct task_struct *task, int nr) @@ -2549,7 +2553,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, trace_io_uring_cqring_wait(ctx, min_events); do { - io_cqring_overflow_flush(ctx); + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { + finish_wait(&ctx->cq_wait, &iowq.wq); + io_cqring_do_overflow_flush(ctx); + } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); ret = io_cqring_wait_schedule(ctx, &iowq, timeout); -- cgit v1.2.3-59-g8ed1b From 23fffb2f09ce1145cbd751801d45ba74acaa6542 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Dec 2022 07:11:33 -0700 Subject: io_uring/cancel: re-grab ctx mutex after finishing wait If we have a signal pending during cancelations, it'll cause the task_work run to return an error. Since we didn't run task_work, the current task is left in TASK_INTERRUPTIBLE state when we need to re-grab the ctx mutex, and the kernel will rightfully complain about that. Move the lock grabbing for the error cases outside the loop to avoid that issue. Reported-by: syzbot+7df055631cd1be4586fd@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/0000000000003a14a905f05050b0@google.com/ Signed-off-by: Jens Axboe --- io_uring/cancel.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 2291a53cdabd..b4f5dfacc0c3 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -288,24 +288,23 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); + mutex_unlock(&ctx->uring_lock); if (ret != -EALREADY) break; - mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(ctx); - if (ret < 0) { - mutex_lock(&ctx->uring_lock); + if (ret < 0) break; - } ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); - mutex_lock(&ctx->uring_lock); if (!ret) { ret = -ETIME; break; } + mutex_lock(&ctx->uring_lock); } while (1); finish_wait(&ctx->cq_wait, &wait); + mutex_lock(&ctx->uring_lock); if (ret == -ENOENT || ret > 0) ret = 0; -- cgit v1.2.3-59-g8ed1b From 3659fb5ac29a5e6102bebe494ac789fd47fb78f4 Mon Sep 17 00:00:00 2001 From: Yanjun Zhang Date: Thu, 22 Dec 2022 09:57:21 +0800 Subject: nvme: fix multipath crash caused by flush request when blktrace is enabled The flush request initialized by blk_kick_flush has NULL bio, and it may be dealt with nvme_end_req during io completion. When blktrace is enabled, nvme_trace_bio_complete with multipath activated trying to access NULL pointer bio from flush request results in the following crash: [ 2517.831677] BUG: kernel NULL pointer dereference, address: 000000000000001a [ 2517.835213] #PF: supervisor read access in kernel mode [ 2517.838724] #PF: error_code(0x0000) - not-present page [ 2517.842222] PGD 7b2d51067 P4D 0 [ 2517.845684] Oops: 0000 [#1] SMP NOPTI [ 2517.849125] CPU: 2 PID: 732 Comm: kworker/2:1H Kdump: loaded Tainted: G S 5.15.67-0.cl9.x86_64 #1 [ 2517.852723] Hardware name: XFUSION 2288H V6/BC13MBSBC, BIOS 1.13 07/27/2022 [ 2517.856358] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] [ 2517.859993] RIP: 0010:blk_add_trace_bio_complete+0x6/0x30 [ 2517.863628] Code: 1f 44 00 00 48 8b 46 08 31 c9 ba 04 00 10 00 48 8b 80 50 03 00 00 48 8b 78 50 e9 e5 fe ff ff 0f 1f 44 00 00 41 54 49 89 f4 55 <0f> b6 7a 1a 48 89 d5 e8 3e 1c 2b 00 48 89 ee 4c 89 e7 5d 89 c1 ba [ 2517.871269] RSP: 0018:ff7f6a008d9dbcd0 EFLAGS: 00010286 [ 2517.875081] RAX: ff3d5b4be00b1d50 RBX: 0000000002040002 RCX: ff3d5b0a270f2000 [ 2517.878966] RDX: 0000000000000000 RSI: ff3d5b0b021fb9f8 RDI: 0000000000000000 [ 2517.882849] RBP: ff3d5b0b96a6fa00 R08: 0000000000000001 R09: 0000000000000000 [ 2517.886718] R10: 000000000000000c R11: 000000000000000c R12: ff3d5b0b021fb9f8 [ 2517.890575] R13: 0000000002000000 R14: ff3d5b0b021fb1b0 R15: 0000000000000018 [ 2517.894434] FS: 0000000000000000(0000) GS:ff3d5b42bfc80000(0000) knlGS:0000000000000000 [ 2517.898299] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2517.902157] CR2: 000000000000001a CR3: 00000004f023e005 CR4: 0000000000771ee0 [ 2517.906053] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2517.909930] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 2517.913761] PKRU: 55555554 [ 2517.917558] Call Trace: [ 2517.921294] [ 2517.924982] nvme_complete_rq+0x1c3/0x1e0 [nvme_core] [ 2517.928715] nvme_tcp_recv_pdu+0x4d7/0x540 [nvme_tcp] [ 2517.932442] nvme_tcp_recv_skb+0x4f/0x240 [nvme_tcp] [ 2517.936137] ? nvme_tcp_recv_pdu+0x540/0x540 [nvme_tcp] [ 2517.939830] tcp_read_sock+0x9c/0x260 [ 2517.943486] nvme_tcp_try_recv+0x65/0xa0 [nvme_tcp] [ 2517.947173] nvme_tcp_io_work+0x64/0x90 [nvme_tcp] [ 2517.950834] process_one_work+0x1e8/0x390 [ 2517.954473] worker_thread+0x53/0x3c0 [ 2517.958069] ? process_one_work+0x390/0x390 [ 2517.961655] kthread+0x10c/0x130 [ 2517.965211] ? set_kthread_struct+0x40/0x40 [ 2517.968760] ret_from_fork+0x1f/0x30 [ 2517.972285] To avoid this situation, add a NULL check for req->bio before calling trace_block_bio_complete. Signed-off-by: Yanjun Zhang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6bbb73ef8b25..424c8a467a0c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -893,7 +893,7 @@ static inline void nvme_trace_bio_complete(struct request *req) { struct nvme_ns *ns = req->q->queuedata; - if (req->cmd_flags & REQ_NVME_MPATH) + if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio) trace_block_bio_complete(ns->head->disk->queue, req->bio); } -- cgit v1.2.3-59-g8ed1b From 00a734104af7d878f1252d49eff9298785c6cbdc Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 8 Dec 2022 10:42:05 -0600 Subject: ACPI: video: Allow GPU drivers to report no panels The current logic for the ACPI backlight detection will create a backlight device if no native or vendor drivers have created 8 seconds after the system has booted if the ACPI tables included backlight control methods. If the GPU drivers have loaded, they may be able to report whether any LCD panels were found. Allow using this information to factor in whether to enable the fallback logic for making an acpi_video0 backlight device. Suggested-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 11 +++++++++++ include/acpi/video.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 30d8fd03fec7..75dc37affff2 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -2176,6 +2176,17 @@ static bool should_check_lcd_flag(void) return false; } +/* + * At least one graphics driver has reported that no LCD is connected + * via the native interface. cancel the registration for fallback acpi_video0. + * If another driver still deems this necessary, it can explicitly register it. + */ +void acpi_video_report_nolcd(void) +{ + cancel_delayed_work(&video_bus_register_backlight_work); +} +EXPORT_SYMBOL(acpi_video_report_nolcd); + int acpi_video_register(void) { int ret = 0; diff --git a/include/acpi/video.h b/include/acpi/video.h index a275c35e5249..8ed9bec03e53 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -53,6 +53,7 @@ enum acpi_backlight_type { }; #if IS_ENABLED(CONFIG_ACPI_VIDEO) +extern void acpi_video_report_nolcd(void); extern int acpi_video_register(void); extern void acpi_video_unregister(void); extern void acpi_video_register_backlight(void); @@ -69,6 +70,7 @@ extern int acpi_video_get_levels(struct acpi_device *device, struct acpi_video_device_brightness **dev_br, int *pmax_level); #else +static inline void acpi_video_report_nolcd(void) { return; }; static inline int acpi_video_register(void) { return -ENODEV; } static inline void acpi_video_unregister(void) { return; } static inline void acpi_video_register_backlight(void) { return; } -- cgit v1.2.3-59-g8ed1b From c573e240609ff781a0246c0c8c8351abd0475287 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 8 Dec 2022 10:42:06 -0600 Subject: drm/amd/display: Report to ACPI video if no panels were found On desktop APUs amdgpu doesn't create a native backlight device as no eDP panels are found. However if the BIOS has reported backlight control methods in the ACPI tables then an acpi_video0 backlight device will be made 8 seconds after boot. This has manifested in a power slider on a number of desktop APUs ranging from Ryzen 5000 through Ryzen 7000 on various motherboard manufacturers. To avoid this, report to the acpi video detection that the system does not have any panel connected in the native driver. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783786 Reported-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 77277d90b6e2..a7eb13902af8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4360,6 +4360,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) amdgpu_set_panel_orientation(&aconnector->base); } + /* If we didn't find a panel, notify the acpi video detection */ + if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0) + acpi_video_report_nolcd(); + /* Software is initialized. Now we can register interrupt handlers. */ switch (adev->asic_type) { #if defined(CONFIG_DRM_AMD_DC_SI) -- cgit v1.2.3-59-g8ed1b From 5aa9d943e9b6bf6e6023645cbe7ce7d5ed84baf4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 8 Dec 2022 10:42:07 -0600 Subject: ACPI: video: Don't enable fallback path for creating ACPI backlight by default The ACPI video detection code has a module parameter `register_backlight_delay` which is currently configured to 8 seconds. This means that if after 8 seconds of booting no native driver has created a backlight device then the code will attempt to make an ACPI video backlight device. This was intended as a safety mechanism with the backlight overhaul that occurred in kernel 6.1, but as it doesn't appear necesssary set it to be disabled by default. Suggested-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 75dc37affff2..97b711e57bff 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -70,11 +70,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); -/* - * Display probing is known to take up to 5 seconds, so delay the fallback - * backlight registration by 5 seconds + 3 seconds for some extra margin. - */ -static int register_backlight_delay = 8; +static int register_backlight_delay; module_param(register_backlight_delay, int, 0444); MODULE_PARM_DESC(register_backlight_delay, "Delay in seconds before doing fallback (non GPU driver triggered) " -- cgit v1.2.3-59-g8ed1b From 7592b79ba4a91350b38469e05238308bcfe1019b Mon Sep 17 00:00:00 2001 From: Erik Schumacher Date: Sun, 11 Dec 2022 14:33:22 +0100 Subject: ACPI: resource: do IRQ override on XMG Core 15 The Schenker XMG CORE 15 (M22) is Ryzen-6 based and needs IRQ overriding for the keyboard to work. Adding an entry for this laptop to the override_table makes the internal keyboard functional again. Signed-off-by: Erik Schumacher Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index f27914aedbd5..037d1aa10357 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -446,6 +446,17 @@ static const struct dmi_system_id lenovo_82ra[] = { { } }; +static const struct dmi_system_id schenker_gm_rg[] = { + { + .ident = "XMG CORE 15 (M22)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SchenkerTechnologiesGmbH"), + DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"), + }, + }, + { } +}; + struct irq_override_cmp { const struct dmi_system_id *system; unsigned char irq; @@ -460,6 +471,7 @@ static const struct irq_override_cmp override_table[] = { { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, { lenovo_82ra, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, { lenovo_82ra, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, + { schenker_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, }; static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity, -- cgit v1.2.3-59-g8ed1b From f3cb9b740869712d448edf3b9ef5952b847caf8b Mon Sep 17 00:00:00 2001 From: Adrian Freund Date: Tue, 13 Dec 2022 21:13:11 +0100 Subject: ACPI: resource: do IRQ override on Lenovo 14ALC7 Commit bfcdf58380b1 ("ACPI: resource: do IRQ override on LENOVO IdeaPad") added an override for Lenovo IdeaPad 5 16ALC7. The 14ALC7 variant also suffers from a broken touchscreen and trackpad. Fixes: 9946e39fe8d0 ("ACPI: resource: skip IRQ override on AMD Zen platforms") Link: https://bugzilla.kernel.org/show_bug.cgi?id=216804 Signed-off-by: Adrian Freund Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 037d1aa10357..d0c92422e206 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -435,7 +435,14 @@ static const struct dmi_system_id asus_laptop[] = { { } }; -static const struct dmi_system_id lenovo_82ra[] = { +static const struct dmi_system_id lenovo_laptop[] = { + { + .ident = "LENOVO IdeaPad Flex 5 14ALC7", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82R9"), + }, + }, { .ident = "LENOVO IdeaPad Flex 5 16ALC7", .matches = { @@ -469,8 +476,8 @@ struct irq_override_cmp { static const struct irq_override_cmp override_table[] = { { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false }, - { lenovo_82ra, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, - { lenovo_82ra, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, + { lenovo_laptop, 6, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, + { lenovo_laptop, 10, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, true }, { schenker_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true }, }; -- cgit v1.2.3-59-g8ed1b From 7203481fd12b1257938519efb2460ea02b9236ee Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Dec 2022 10:44:43 +0100 Subject: ACPI: resource: Add Asus ExpertBook B2502 to Asus quirks The Asus ExpertBook B2502 has the same keyboard issue as Asus Vivobook K3402ZA/K3502ZA. The kernel overrides IRQ 1 to Edge_High when it should be Active_Low. This patch adds the ExpertBook B2502 model to the existing quirk list of Asus laptops with this issue. Fixes: b5f9223a105d ("ACPI: resource: Skip IRQ override on Asus Vivobook S5602ZA") Link: https://bugzilla.redhat.com/show_bug.cgi?id=2142574 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index d0c92422e206..16dcd31d124f 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -432,6 +432,13 @@ static const struct dmi_system_id asus_laptop[] = { DMI_MATCH(DMI_BOARD_NAME, "S5602ZA"), }, }, + { + .ident = "Asus ExpertBook B2502", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, "B2502CBA"), + }, + }, { } }; -- cgit v1.2.3-59-g8ed1b From 3cf3b7f012f3ea8bdc56196e367cf07c10424855 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Dec 2022 10:41:38 +0100 Subject: ACPI: video: Fix Apple GMUX backlight detection The apple-gmux driver only binds to old GMUX devices which have an IORESOURCE_IO resource (using inb()/outb()) rather then memory-mapped IO (IORESOURCE_MEM). T2 MacBooks use the new style GMUX devices (with IORESOURCE_MEM access), so these are not supported by the apple-gmux driver. This is not a problem since they have working ACPI video backlight support. But the apple_gmux_present() helper only checks if an ACPI device with the "APP000B" HID is present, causing acpi_video_get_backlight_type() to return acpi_backlight_apple_gmux disabling the acpi_video backlight device. Add a new apple_gmux_backlight_present() helper which checks that the "APP000B" device actually is an old GMUX device with an IORESOURCE_IO resource. This fixes the acpi_video0 backlight no longer registering on T2 MacBooks. Note people are working to add support for the new style GMUX to Linux: https://github.com/kekrby/linux-t2/commits/wip/hybrid-graphics Once this lands this patch should be reverted so that acpi_video_get_backlight_type() also prefers the gmux on new style GMUX MacBooks, but for now this is necessary to avoid regressing backlight control on T2 Macs. Fixes: 21245df307cb ("ACPI: video: Add Apple GMUX brightness control detection") Reported-and-tested-by: Aditya Garg Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index a934bbc9dd37..1b78c7434492 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -105,6 +106,26 @@ static bool nvidia_wmi_ec_supported(void) } #endif +static bool apple_gmux_backlight_present(void) +{ + struct acpi_device *adev; + struct device *dev; + + adev = acpi_dev_get_first_match_dev(GMUX_ACPI_HID, NULL, -1); + if (!adev) + return false; + + dev = acpi_get_first_physical_node(adev); + if (!dev) + return false; + + /* + * drivers/platform/x86/apple-gmux.c only supports old style + * Apple GMUX with an IO-resource. + */ + return pnp_get_resource(to_pnp_dev(dev), IORESOURCE_IO, 0) != NULL; +} + /* Force to use vendor driver when the ACPI device is known to be * buggy */ static int video_detect_force_vendor(const struct dmi_system_id *d) @@ -767,7 +788,7 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) if (nvidia_wmi_ec_present) return acpi_backlight_nvidia_wmi_ec; - if (apple_gmux_present()) + if (apple_gmux_backlight_present()) return acpi_backlight_apple_gmux; /* Use ACPI video if available, except when native should be preferred. */ -- cgit v1.2.3-59-g8ed1b From 3ea45390e9c0d35805ef8357ace55594fd4233d0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 15 Dec 2022 13:16:15 -0600 Subject: ACPI: x86: s2idle: Force AMD GUID/_REV 2 on HP Elitebook 865 HP Elitebook 865 supports both the AMD GUID w/ _REV 2 and Microsoft GUID with _REV 0. Both have very similar code but the AMD GUID has a special workaround that is specific to a problem with spurious wakeups on systems with Qualcomm WLAN. This is believed to be a bug in the Qualcomm WLAN F/W (it doesn't affect any other WLAN H/W). If this WLAN firmware is fixed this quirk can be dropped. Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/s2idle.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 5350c73564b6..422415cb14f4 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -401,6 +401,13 @@ static const struct acpi_device_id amd_hid_ids[] = { {} }; +static int lps0_prefer_amd(const struct dmi_system_id *id) +{ + pr_debug("Using AMD GUID w/ _REV 2.\n"); + rev_id = 2; + return 0; +} + static int lps0_prefer_microsoft(const struct dmi_system_id *id) { pr_debug("Preferring Microsoft GUID.\n"); @@ -462,6 +469,19 @@ static const struct dmi_system_id s2idle_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow X16 GV601"), }, }, + { + /* + * AMD Rembrandt based HP EliteBook 835/845/865 G9 + * Contains specialized AML in AMD/_REV 2 path to avoid + * triggering a bug in Qualcomm WLAN firmware. This may be + * removed in the future if that firmware is fixed. + */ + .callback = lps0_prefer_amd, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8990"), + }, + }, {} }; -- cgit v1.2.3-59-g8ed1b From e555c85792bd5f9828a2fd2ca9761f70efb1c77b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 15 Dec 2022 13:16:16 -0600 Subject: ACPI: x86: s2idle: Stop using AMD specific codepath for Rembrandt+ After we introduced a module parameter and quirk infrastructure for picking the Microsoft GUID over the SOC vendor GUID we discovered that lots and lots of systems are getting this wrong. The table continues to grow, and is becoming unwieldy. We don't really have any benefit to forcing vendors to populate the AMD GUID. This is just extra work, and more and more vendors seem to mess it up. As the Microsoft GUID is used by Windows as well, it's very likely that it won't be messed up like this. So drop all the quirks forcing it and the Rembrandt behavior. This means that Cezanne or later effectively only run the Microsoft GUID codepath with the exception of HP Elitebook 8*5 G9. Fixes: fd894f05cf30 ("ACPI: x86: s2idle: If a new AMD _HID is missing assume Rembrandt") Cc: stable@vger.kernel.org # 6.1 Reported-by: Benjamin Cheng Reported-by: bilkow@tutanota.com Reported-by: Paul Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2292 Link: https://bugzilla.kernel.org/show_bug.cgi?id=216768 Signed-off-by: Mario Limonciello Reviewed-by: Philipp Zabel Tested-by: Philipp Zabel Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/s2idle.c | 87 ++--------------------------------------------- 1 file changed, 3 insertions(+), 84 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 422415cb14f4..c7afce465a07 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -28,10 +28,6 @@ static bool sleep_no_lps0 __read_mostly; module_param(sleep_no_lps0, bool, 0644); MODULE_PARM_DESC(sleep_no_lps0, "Do not use the special LPS0 device interface"); -static bool prefer_microsoft_dsm_guid __read_mostly; -module_param(prefer_microsoft_dsm_guid, bool, 0644); -MODULE_PARM_DESC(prefer_microsoft_dsm_guid, "Prefer using Microsoft GUID in LPS0 device _DSM evaluation"); - static const struct acpi_device_id lps0_device_ids[] = { {"PNP0D80", }, {"", }, @@ -369,27 +365,15 @@ out: } struct amd_lps0_hid_device_data { - const unsigned int rev_id; const bool check_off_by_one; - const bool prefer_amd_guid; }; static const struct amd_lps0_hid_device_data amd_picasso = { - .rev_id = 0, .check_off_by_one = true, - .prefer_amd_guid = false, }; static const struct amd_lps0_hid_device_data amd_cezanne = { - .rev_id = 0, .check_off_by_one = false, - .prefer_amd_guid = false, -}; - -static const struct amd_lps0_hid_device_data amd_rembrandt = { - .rev_id = 2, - .check_off_by_one = false, - .prefer_amd_guid = true, }; static const struct acpi_device_id amd_hid_ids[] = { @@ -397,7 +381,6 @@ static const struct acpi_device_id amd_hid_ids[] = { {"AMD0005", (kernel_ulong_t)&amd_picasso, }, {"AMDI0005", (kernel_ulong_t)&amd_picasso, }, {"AMDI0006", (kernel_ulong_t)&amd_cezanne, }, - {"AMDI0007", (kernel_ulong_t)&amd_rembrandt, }, {} }; @@ -407,68 +390,7 @@ static int lps0_prefer_amd(const struct dmi_system_id *id) rev_id = 2; return 0; } - -static int lps0_prefer_microsoft(const struct dmi_system_id *id) -{ - pr_debug("Preferring Microsoft GUID.\n"); - prefer_microsoft_dsm_guid = true; - return 0; -} - static const struct dmi_system_id s2idle_dmi_table[] __initconst = { - { - /* - * ASUS TUF Gaming A17 FA707RE - * https://bugzilla.kernel.org/show_bug.cgi?id=216101 - */ - .callback = lps0_prefer_microsoft, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ASUS TUF Gaming A17"), - }, - }, - { - /* ASUS ROG Zephyrus G14 (2022) */ - .callback = lps0_prefer_microsoft, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ROG Zephyrus G14 GA402"), - }, - }, - { - /* - * Lenovo Yoga Slim 7 Pro X 14ARH7 - * https://bugzilla.kernel.org/show_bug.cgi?id=216473 : 82V2 - * https://bugzilla.kernel.org/show_bug.cgi?id=216438 : 82TL - */ - .callback = lps0_prefer_microsoft, - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82"), - }, - }, - { - /* - * ASUSTeK COMPUTER INC. ROG Flow X13 GV301RE_GV301RE - * https://gitlab.freedesktop.org/drm/amd/-/issues/2148 - */ - .callback = lps0_prefer_microsoft, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow X13 GV301"), - }, - }, - { - /* - * ASUSTeK COMPUTER INC. ROG Flow X16 GV601RW_GV601RW - * https://gitlab.freedesktop.org/drm/amd/-/issues/2148 - */ - .callback = lps0_prefer_microsoft, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ROG Flow X16 GV601"), - }, - }, { /* * AMD Rembrandt based HP EliteBook 835/845/865 G9 @@ -504,16 +426,14 @@ static int lps0_device_attach(struct acpi_device *adev, if (dev_id->id[0]) data = (const struct amd_lps0_hid_device_data *) dev_id->driver_data; else - data = &amd_rembrandt; - rev_id = data->rev_id; + data = &amd_cezanne; lps0_dsm_func_mask = validate_dsm(adev->handle, ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid); if (lps0_dsm_func_mask > 0x3 && data->check_off_by_one) { lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1; acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n", ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask); - } else if (lps0_dsm_func_mask_microsoft > 0 && data->prefer_amd_guid && - !prefer_microsoft_dsm_guid) { + } else if (lps0_dsm_func_mask_microsoft > 0 && rev_id) { lps0_dsm_func_mask_microsoft = -EINVAL; acpi_handle_debug(adev->handle, "_DSM Using AMD method\n"); } @@ -521,8 +441,7 @@ static int lps0_device_attach(struct acpi_device *adev, rev_id = 1; lps0_dsm_func_mask = validate_dsm(adev->handle, ACPI_LPS0_DSM_UUID, rev_id, &lps0_dsm_guid); - if (!prefer_microsoft_dsm_guid) - lps0_dsm_func_mask_microsoft = -EINVAL; + lps0_dsm_func_mask_microsoft = -EINVAL; } if (lps0_dsm_func_mask < 0 && lps0_dsm_func_mask_microsoft < 0) -- cgit v1.2.3-59-g8ed1b From 343190841a1f22b96996d9f8cfab902a4d1bfd0e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Dec 2022 06:37:08 -0700 Subject: io_uring: check for valid register opcode earlier We only check the register opcode value inside the restricted ring section, move it into the main io_uring_register() function instead and check it up front. Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ac5d39eeb3d1..58ac13b69dc8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4020,8 +4020,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return -EEXIST; if (ctx->restricted) { - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; @@ -4177,6 +4175,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, long ret = -EBADF; struct fd f; + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + f = fdget(fd); if (!f.file) return -EBADF; -- cgit v1.2.3-59-g8ed1b From 55c590adfe18b5380f7c4ae3696468bc5c916ee5 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 7 Dec 2022 15:15:05 +0800 Subject: KVM: x86/pmu: Prevent zero period event from being repeatedly released The current vPMU can reuse the same pmc->perf_event for the same hardware event via pmc_pause/resume_counter(), but this optimization does not apply to a portion of the TSX events (e.g., "event=0x3c,in_tx=1, in_tx_cp=1"), where event->attr.sample_period is legally zero at creation, thus making the perf call to perf_event_period() meaningless (no need to adjust sample period in this case), and instead causing such reusable perf_events to be repeatedly released and created. Avoid releasing zero sample_period events by checking is_sampling_event() to follow the previously enable/disable optimization. Signed-off-by: Like Xu Message-Id: <20221207071506.15733-2-likexu@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/pmu.c | 3 ++- arch/x86/kvm/pmu.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 684393c22105..eb594620dd75 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -238,7 +238,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) return false; /* recalibrate sample period and check if it's accepted by perf core */ - if (perf_event_period(pmc->perf_event, + if (is_sampling_event(pmc->perf_event) && + perf_event_period(pmc->perf_event, get_sample_period(pmc, pmc->counter))) return false; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 85ff3c0588ba..cdb91009701d 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -140,7 +140,8 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value) static inline void pmc_update_sample_period(struct kvm_pmc *pmc) { - if (!pmc->perf_event || pmc->is_paused) + if (!pmc->perf_event || pmc->is_paused || + !is_sampling_event(pmc->perf_event)) return; perf_event_period(pmc->perf_event, -- cgit v1.2.3-59-g8ed1b From fceb3a36c29a957515d5156e5e7844ea040dc43d Mon Sep 17 00:00:00 2001 From: Adamos Ttofari Date: Thu, 8 Dec 2022 09:44:14 +0000 Subject: KVM: x86: ioapic: Fix level-triggered EOI and userspace I/OAPIC reconfigure race When scanning userspace I/OAPIC entries, intercept EOI for level-triggered IRQs if the current vCPU has a pending and/or in-service IRQ for the vector in its local API, even if the vCPU doesn't match the new entry's destination. This fixes a race between userspace I/OAPIC reconfiguration and IRQ delivery that results in the vector's bit being left set in the remote IRR due to the eventual EOI not being forwarded to the userspace I/OAPIC. Commit 0fc5a36dd6b3 ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race") fixed the in-kernel IOAPIC, but not the userspace IOAPIC configuration, which has a similar race. Fixes: 0fc5a36dd6b3 ("KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race") Signed-off-by: Adamos Ttofari Reviewed-by: Sean Christopherson Message-Id: <20221208094415.12723-1-attofari@amazon.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 0687162c4f22..3742d9adacfc 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -426,8 +426,9 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); if (irq.trig_mode && - kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, - irq.dest_id, irq.dest_mode)) + (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, + irq.dest_id, irq.dest_mode) || + kvm_apic_pending_eoi(vcpu, irq.vector))) __set_bit(irq.vector, ioapic_handled_vectors); } } -- cgit v1.2.3-59-g8ed1b From 8b9e13d2de73b5513c2ceffe0f62eab40206a126 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 8 Dec 2022 11:27:00 +0100 Subject: KVM: x86: hyper-v: Fix 'using uninitialized value' Coverity warning In kvm_hv_flush_tlb(), 'data_offset' and 'consumed_xmm_halves' variables are used in a mutually exclusive way: in 'hc->fast' we count in 'XMM halves' and increase 'data_offset' otherwise. Coverity discovered, that in one case both variables are incremented unconditionally. This doesn't seem to cause any issues as the only user of 'data_offset'/'consumed_xmm_halves' data is kvm_hv_get_tlb_flush_entries() -> kvm_hv_get_hc_data() which also takes into account 'hc->fast' but is still worth fixing. To make things explicit, put 'data_offset' and 'consumed_xmm_halves' to 'struct kvm_hv_hcall' as a union and use at call sites. This allows to remove explicit 'data_offset'/'consumed_xmm_halves' parameters from kvm_hv_get_hc_data()/kvm_get_sparse_vp_set()/kvm_hv_get_tlb_flush_entries() helpers. Note: 'struct kvm_hv_hcall' is allocated on stack in kvm_hv_hypercall() and is not zeroed, consumers are supposed to initialize the appropriate field if needed. Reported-by: coverity-bot Addresses-Coverity-ID: 1527764 ("Uninitialized variables") Fixes: 260970862c88 ("KVM: x86: hyper-v: Handle HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls gently") Signed-off-by: Vitaly Kuznetsov Reviewed-by: Sean Christopherson Message-Id: <20221208102700.959630-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 63 +++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2c7f2a26421e..e8296942a868 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1769,6 +1769,7 @@ static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_ba } struct kvm_hv_hcall { + /* Hypercall input data */ u64 param; u64 ingpa; u64 outgpa; @@ -1779,12 +1780,21 @@ struct kvm_hv_hcall { bool fast; bool rep; sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS]; + + /* + * Current read offset when KVM reads hypercall input data gradually, + * either offset in bytes from 'ingpa' for regular hypercalls or the + * number of already consumed 'XMM halves' for 'fast' hypercalls. + */ + union { + gpa_t data_offset; + int consumed_xmm_halves; + }; }; static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc, - u16 orig_cnt, u16 cnt_cap, u64 *data, - int consumed_xmm_halves, gpa_t offset) + u16 orig_cnt, u16 cnt_cap, u64 *data) { /* * Preserve the original count when ignoring entries via a "cap", KVM @@ -1799,11 +1809,11 @@ static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc, * Each XMM holds two sparse banks, but do not count halves that * have already been consumed for hypercall parameters. */ - if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves) + if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - hc->consumed_xmm_halves) return HV_STATUS_INVALID_HYPERCALL_INPUT; for (i = 0; i < cnt; i++) { - j = i + consumed_xmm_halves; + j = i + hc->consumed_xmm_halves; if (j % 2) data[i] = sse128_hi(hc->xmm[j / 2]); else @@ -1812,27 +1822,24 @@ static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc, return 0; } - return kvm_read_guest(kvm, hc->ingpa + offset, data, + return kvm_read_guest(kvm, hc->ingpa + hc->data_offset, data, cnt * sizeof(*data)); } static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, - u64 *sparse_banks, int consumed_xmm_halves, - gpa_t offset) + u64 *sparse_banks) { if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS) return -EINVAL; /* Cap var_cnt to ignore banks that cannot contain a legal VP index. */ return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS, - sparse_banks, consumed_xmm_halves, offset); + sparse_banks); } -static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[], - int consumed_xmm_halves, gpa_t offset) +static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[]) { - return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, - entries, consumed_xmm_halves, offset); + return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, entries); } static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, @@ -1926,8 +1933,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) struct kvm_vcpu *v; unsigned long i; bool all_cpus; - int consumed_xmm_halves = 0; - gpa_t data_offset; /* * The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS @@ -1955,12 +1960,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) flush.address_space = hc->ingpa; flush.flags = hc->outgpa; flush.processor_mask = sse128_lo(hc->xmm[0]); - consumed_xmm_halves = 1; + hc->consumed_xmm_halves = 1; } else { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush)))) return HV_STATUS_INVALID_HYPERCALL_INPUT; - data_offset = sizeof(flush); + hc->data_offset = sizeof(flush); } trace_kvm_hv_flush_tlb(flush.processor_mask, @@ -1985,12 +1990,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) flush_ex.flags = hc->outgpa; memcpy(&flush_ex.hv_vp_set, &hc->xmm[0], sizeof(hc->xmm[0])); - consumed_xmm_halves = 2; + hc->consumed_xmm_halves = 2; } else { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, sizeof(flush_ex)))) return HV_STATUS_INVALID_HYPERCALL_INPUT; - data_offset = sizeof(flush_ex); + hc->data_offset = sizeof(flush_ex); } trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, @@ -2009,8 +2014,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if (!hc->var_cnt) goto ret_success; - if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, - consumed_xmm_halves, data_offset)) + if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks)) return HV_STATUS_INVALID_HYPERCALL_INPUT; } @@ -2021,8 +2025,10 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) * consumed_xmm_halves to make sure TLB flush entries are read * from the correct offset. */ - data_offset += hc->var_cnt * sizeof(sparse_banks[0]); - consumed_xmm_halves += hc->var_cnt; + if (hc->fast) + hc->consumed_xmm_halves += hc->var_cnt; + else + hc->data_offset += hc->var_cnt * sizeof(sparse_banks[0]); } if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE || @@ -2030,8 +2036,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) { tlb_flush_entries = NULL; } else { - if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries, - consumed_xmm_halves, data_offset)) + if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries)) return HV_STATUS_INVALID_HYPERCALL_INPUT; tlb_flush_entries = __tlb_flush_entries; } @@ -2180,9 +2185,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if (!hc->var_cnt) goto ret_success; - if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 1, - offsetof(struct hv_send_ipi_ex, - vp_set.bank_contents))) + if (!hc->fast) + hc->data_offset = offsetof(struct hv_send_ipi_ex, + vp_set.bank_contents); + else + hc->consumed_xmm_halves = 1; + + if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks)) return HV_STATUS_INVALID_HYPERCALL_INPUT; } -- cgit v1.2.3-59-g8ed1b From 3c649918b764c0aaef22ea65d514bac5e2324ec0 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Tue, 6 Dec 2022 17:20:15 +0800 Subject: KVM: x86: Simplify kvm_apic_hw_enabled kvm_apic_hw_enabled() only needs to return bool, there is no place to use the return value of MSR_IA32_APICBASE_ENABLE. Signed-off-by: Peng Hao Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 28e3769066e2..58c3242fcc7a 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -188,11 +188,11 @@ static inline bool lapic_in_kernel(struct kvm_vcpu *vcpu) extern struct static_key_false_deferred apic_hw_disabled; -static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic) +static inline bool kvm_apic_hw_enabled(struct kvm_lapic *apic) { if (static_branch_unlikely(&apic_hw_disabled.key)) return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; - return MSR_IA32_APICBASE_ENABLE; + return true; } extern struct static_key_false_deferred apic_sw_disabled; -- cgit v1.2.3-59-g8ed1b From 77b1908e10eccf34310ffd95b0b455c01aa76286 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 20 Dec 2022 15:34:27 +0000 Subject: KVM: x86: Sanity check inputs to kvm_handle_memory_failure() Add a sanity check in kvm_handle_memory_failure() to assert that a valid x86_exception structure is provided if the memory "failure" wants to propagate a fault into the guest. If a memory failure happens during a direct guest physical memory access, e.g. for nested VMX, KVM hardcodes the failure to X86EMUL_IO_NEEDED and doesn't provide an exception pointer (because the exception struct would just be filled with garbage). Signed-off-by: Sean Christopherson Message-Id: <20221220153427.514032-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fd6c01a39312..5c3ce39cdccb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13132,6 +13132,9 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, struct x86_exception *e) { if (r == X86EMUL_PROPAGATE_FAULT) { + if (KVM_BUG_ON(!e, vcpu->kvm)) + return -EIO; + kvm_inject_emulated_page_fault(vcpu, e); return 1; } -- cgit v1.2.3-59-g8ed1b From 53800f88d414525d3fdc5c84629faa0b6bc35b3b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 19 Dec 2022 22:04:16 +0000 Subject: KVM: selftests: Zero out valid_bank_mask for "all" case in Hyper-V IPI test Zero out the valid_bank_mask when using the fast variant of HVCALL_SEND_IPI_EX to send IPIs to all vCPUs. KVM requires the "var_cnt" and "valid_bank_mask" inputs to be consistent even when targeting all vCPUs. See commit bd1ba5732bb9 ("KVM: x86: Get the number of Hyper-V sparse banks from the VARHEAD field"). Fixes: 998489245d84 ("KVM: selftests: Hyper-V PV IPI selftest") Cc: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Message-Id: <20221219220416.395329-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/hyperv_ipi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c index 8b791eac7d5a..0cbb0e646ef8 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -193,8 +193,9 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL. - * Nothing to write anything to XMM regs. */ + ipi_ex->vp_set.valid_bank_mask = 0; + hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2); hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT, IPI_VECTOR, HV_GENERIC_SET_ALL); nop_loop(); -- cgit v1.2.3-59-g8ed1b From 057b18756b464729bc787ed4e6b44abb9f5c3a38 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 20 Dec 2022 15:42:24 +0000 Subject: KVM: nVMX: Document that ignoring memory failures for VMCLEAR is deliberate Explicitly drop the result of kvm_vcpu_write_guest() when writing the "launch state" as part of VMCLEAR emulation, and add a comment to call out that KVM's behavior is architecturally valid. Intel's pseudocode effectively says that VMCLEAR is a nop if the target VMCS address isn't in memory, e.g. if the address points at MMIO. Add a FIXME to call out that suppressing failures on __copy_to_user() is wrong, as memory (a memslot) does exist in that case. Punt the issue to the future as open coding kvm_vcpu_write_guest() just to make sure the guest dies with -EFAULT isn't worth the extra complexity. The flaw will need to be addressed if KVM ever does something intelligent on uaccess failures, e.g. to support post-copy demand paging, but in that case KVM will need a more thorough overhaul, i.e. VMCLEAR shouldn't need to open code a core KVM helper. No functional change intended. Reported-by: coverity-bot Addresses-Coverity-ID: 1527765 ("Error handling issues") Fixes: 587d7e72aedc ("kvm: nVMX: VMCLEAR should not cause the vCPU to shut down") Cc: Jim Mattson Signed-off-by: Sean Christopherson Message-Id: <20221220154224.526568-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b6f4411b613e..f18f3a9f0943 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5296,10 +5296,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vcpu); - kvm_vcpu_write_guest(vcpu, - vmptr + offsetof(struct vmcs12, - launch_state), - &zero, sizeof(zero)); + /* + * Silently ignore memory errors on VMCLEAR, Intel's pseudocode + * for VMCLEAR includes a "ensure that data for VMCS referenced + * by the operand is in memory" clause that guards writes to + * memory, i.e. doing nothing for I/O is architecturally valid. + * + * FIXME: Suppress failures if and only if no memslot is found, + * i.e. exit to userspace if __copy_to_user() fails. + */ + (void)kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, + launch_state), + &zero, sizeof(zero)); } else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) { nested_release_evmcs(vcpu); } -- cgit v1.2.3-59-g8ed1b From 31de69f4eea77b28a9724b3fa55aae104fc91fc7 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 06:23:03 +0000 Subject: KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1 Set ENABLE_USR_WAIT_PAUSE in KVM's supported VMX MSR configuration if the feature is supported in hardware and enabled in KVM's base, non-nested configuration, i.e. expose ENABLE_USR_WAIT_PAUSE to L1 if it's supported. This fixes a bug where saving/restoring, i.e. migrating, a vCPU will fail if WAITPKG (the associated CPUID feature) is enabled for the vCPU, and obviously allows L1 to enable the feature for L2. KVM already effectively exposes ENABLE_USR_WAIT_PAUSE to L1 by stuffing the allowed-1 control ina vCPU's virtual MSR_IA32_VMX_PROCBASED_CTLS2 when updating secondary controls in response to KVM_SET_CPUID(2), but (a) that depends on flawed code (KVM shouldn't touch VMX MSRs in response to CPUID updates) and (b) runs afoul of vmx_restore_control_msr()'s restriction that the guest value must be a strict subset of the supported host value. Although no past commit explicitly enabled nested support for WAITPKG, doing so is safe and functionally correct from an architectural perspective as no additional KVM support is needed to virtualize TPAUSE, UMONITOR, and UMWAIT for L2 relative to L1, and KVM already forwards VM-Exits to L1 as necessary (commit bf653b78f960, "KVM: vmx: Introduce handle_unexpected_vmexit and handle WAITPKG vmexit"). Note, KVM always keeps the hosts MSR_IA32_UMWAIT_CONTROL resident in hardware, i.e. always runs both L1 and L2 with the host's power management settings for TPAUSE and UMWAIT. See commit bf09fb6cba4f ("KVM: VMX: Stop context switching MSR_IA32_UMWAIT_CONTROL") for more details. Fixes: e69e72faa3a0 ("KVM: x86: Add support for user wait instructions") Cc: stable@vger.kernel.org Reported-by: Aaron Lewis Reported-by: Yu Zhang Signed-off-by: Sean Christopherson Reviewed-by: Jim Mattson Message-Id: <20221213062306.667649-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f18f3a9f0943..d93c715cda6a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6882,7 +6882,8 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps) SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_RDSEED_EXITING | SECONDARY_EXEC_XSAVES | - SECONDARY_EXEC_TSC_SCALING; + SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; /* * We can emulate "VMCS shadowing," even if the hardware -- cgit v1.2.3-59-g8ed1b From a0860d68a25dee4e51e7d3e067a66ca765776fe8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 06:23:04 +0000 Subject: KVM: nVMX: Don't stuff secondary execution control if it's not supported When stuffing the allowed secondary execution controls for nested VMX in response to CPUID updates, don't set the allowed-1 bit for a feature that isn't supported by KVM, i.e. isn't allowed by the canonical vmcs_config. WARN if KVM attempts to manipulate a feature that isn't supported. All features that are currently stuffed are always advertised to L1 for nested VMX if they are supported in KVM's base configuration, and no additional features should ever be added to the CPUID-induced stuffing (updating VMX MSRs in response to CPUID updates is a long-standing KVM flaw that is slowly being fixed). Signed-off-by: Sean Christopherson Message-Id: <20221213062306.667649-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fe5615fd8295..fc9008dbed33 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4459,6 +4459,13 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, * controls for features that are/aren't exposed to the guest. */ if (nested) { + /* + * All features that can be added or removed to VMX MSRs must + * be supported in the first place for nested virtualization. + */ + if (WARN_ON_ONCE(!(vmcs_config.nested.secondary_ctls_high & control))) + enabled = false; + if (enabled) vmx->nested.msrs.secondary_ctls_high |= control; else -- cgit v1.2.3-59-g8ed1b From f5d16bb9be68b20c78fc3d93fa243eb1f0b9fa53 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 03:30:26 +0000 Subject: KVM: x86/mmu: Don't attempt to map leaf if target TDP MMU SPTE is frozen Hoist the is_removed_spte() check above the "level == goal_level" check when walking SPTEs during a TDP MMU page fault to avoid attempting to map a leaf entry if said entry is frozen by a different task/vCPU. ------------[ cut here ]------------ WARNING: CPU: 3 PID: 939 at arch/x86/kvm/mmu/tdp_mmu.c:653 kvm_tdp_mmu_map+0x269/0x4b0 Modules linked in: kvm_intel CPU: 3 PID: 939 Comm: nx_huge_pages_t Not tainted 6.1.0-rc4+ #67 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kvm_tdp_mmu_map+0x269/0x4b0 RSP: 0018:ffffc9000068fba8 EFLAGS: 00010246 RAX: 00000000000005a0 RBX: ffffc9000068fcc0 RCX: 0000000000000005 RDX: ffff88810741f000 RSI: ffff888107f04600 RDI: ffffc900006a3000 RBP: 060000010b000bf3 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 000ffffffffff000 R12: 0000000000000005 R13: ffff888113670000 R14: ffff888107464958 R15: 0000000000000000 FS: 00007f01c942c740(0000) GS:ffff888277cc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000117013006 CR4: 0000000000172ea0 Call Trace: kvm_tdp_page_fault+0x10c/0x130 kvm_mmu_page_fault+0x103/0x680 vmx_handle_exit+0x132/0x5a0 [kvm_intel] vcpu_enter_guest+0x60c/0x16f0 kvm_arch_vcpu_ioctl_run+0x1e2/0x9d0 kvm_vcpu_ioctl+0x271/0x660 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 ---[ end trace 0000000000000000 ]--- Fixes: 63d28a25e04c ("KVM: x86/mmu: simplify kvm_tdp_mmu_map flow when guest has to retry") Cc: Robert Hoo Signed-off-by: Sean Christopherson Reviewed-by: Robert Hoo Message-Id: <20221213033030.83345-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 771210ce5181..84b0b225fe5f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1173,9 +1173,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, iter.old_spte, iter.level); - if (iter.level == fault->goal_level) - break; - /* * If SPTE has been frozen by another thread, just give up and * retry, avoiding unnecessary page table allocation and free. @@ -1183,6 +1180,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (is_removed_spte(iter.old_spte)) goto retry; + if (iter.level == fault->goal_level) + break; + /* Step down into the lower level page table if it exists. */ if (is_shadow_present_pte(iter.old_spte) && !is_large_pte(iter.old_spte)) -- cgit v1.2.3-59-g8ed1b From 80a3e4ae962de33ff6a94e798c80e56e1fed4d10 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 03:30:27 +0000 Subject: KVM: x86/mmu: Map TDP MMU leaf SPTE iff target level is reached Map the leaf SPTE when handling a TDP MMU page fault if and only if the target level is reached. A recent commit reworked the retry logic and incorrectly assumed that walking SPTEs would never "fail", as the loop either bails (retries) or installs parent SPs. However, the iterator itself will bail early if it detects a frozen (REMOVED) SPTE when stepping down. The TDP iterator also rereads the current SPTE before stepping down specifically to avoid walking into a part of the tree that is being removed, which means it's possible to terminate the loop without the guts of the loop observing the frozen SPTE, e.g. if a different task zaps a parent SPTE between the initial read and try_step_down()'s refresh. Mapping a leaf SPTE at the wrong level results in all kinds of badness as page table walkers interpret the SPTE as a page table, not a leaf, and walk into the weeds. ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1025 at arch/x86/kvm/mmu/tdp_mmu.c:1070 kvm_tdp_mmu_map+0x481/0x510 Modules linked in: kvm_intel CPU: 1 PID: 1025 Comm: nx_huge_pages_t Tainted: G W 6.1.0-rc4+ #64 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kvm_tdp_mmu_map+0x481/0x510 RSP: 0018:ffffc9000072fba8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffffc9000072fcc0 RCX: 0000000000000027 RDX: 0000000000000027 RSI: 00000000ffffdfff RDI: ffff888277c5b4c8 RBP: ffff888107d45a10 R08: ffff888277c5b4c0 R09: ffffc9000072fa48 R10: 0000000000000001 R11: 0000000000000001 R12: ffffc9000073a0e0 R13: ffff88810fc54800 R14: ffff888107d1ae60 R15: ffff88810fc54f90 FS: 00007fba9f853740(0000) GS:ffff888277c40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000010aa7a003 CR4: 0000000000172ea0 Call Trace: kvm_tdp_page_fault+0x10c/0x130 kvm_mmu_page_fault+0x103/0x680 vmx_handle_exit+0x132/0x5a0 [kvm_intel] vcpu_enter_guest+0x60c/0x16f0 kvm_arch_vcpu_ioctl_run+0x1e2/0x9d0 kvm_vcpu_ioctl+0x271/0x660 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 ---[ end trace 0000000000000000 ]--- Invalid SPTE change: cannot replace a present leaf SPTE with another present leaf SPTE mapping a different PFN! as_id: 0 gfn: 100200 old_spte: 600000112400bf3 new_spte: 6000001126009f3 level: 2 ------------[ cut here ]------------ kernel BUG at arch/x86/kvm/mmu/tdp_mmu.c:559! invalid opcode: 0000 [#1] SMP CPU: 1 PID: 1025 Comm: nx_huge_pages_t Tainted: G W 6.1.0-rc4+ #64 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:__handle_changed_spte.cold+0x95/0x9c RSP: 0018:ffffc9000072faf8 EFLAGS: 00010246 RAX: 00000000000000c1 RBX: ffffc90000731000 RCX: 0000000000000027 RDX: 0000000000000000 RSI: 00000000ffffdfff RDI: ffff888277c5b4c8 RBP: 0600000112400bf3 R08: ffff888277c5b4c0 R09: ffffc9000072f9a0 R10: 0000000000000001 R11: 0000000000000001 R12: 06000001126009f3 R13: 0000000000000002 R14: 0000000012600901 R15: 0000000012400b01 FS: 00007fba9f853740(0000) GS:ffff888277c40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000010aa7a003 CR4: 0000000000172ea0 Call Trace: kvm_tdp_mmu_map+0x3b0/0x510 kvm_tdp_page_fault+0x10c/0x130 kvm_mmu_page_fault+0x103/0x680 vmx_handle_exit+0x132/0x5a0 [kvm_intel] vcpu_enter_guest+0x60c/0x16f0 kvm_arch_vcpu_ioctl_run+0x1e2/0x9d0 kvm_vcpu_ioctl+0x271/0x660 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Modules linked in: kvm_intel ---[ end trace 0000000000000000 ]--- Fixes: 63d28a25e04c ("KVM: x86/mmu: simplify kvm_tdp_mmu_map flow when guest has to retry") Cc: Robert Hoo Signed-off-by: Sean Christopherson Message-Id: <20221213033030.83345-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 84b0b225fe5f..fbdef59374fe 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1181,7 +1181,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) goto retry; if (iter.level == fault->goal_level) - break; + goto map_target_level; /* Step down into the lower level page table if it exists. */ if (is_shadow_present_pte(iter.old_spte) && @@ -1203,8 +1203,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) r = tdp_mmu_link_sp(kvm, &iter, sp, true); /* - * Also force the guest to retry the access if the upper level SPTEs - * aren't in place. + * Force the guest to retry if installing an upper level SPTE + * failed, e.g. because a different task modified the SPTE. */ if (r) { tdp_mmu_free_sp(sp); @@ -1219,6 +1219,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } } + /* + * The walk aborted before reaching the target level, e.g. because the + * iterator detected an upper level SPTE was frozen during traversal. + */ + WARN_ON_ONCE(iter.level == fault->goal_level); + goto retry; + +map_target_level: ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter); retry: -- cgit v1.2.3-59-g8ed1b From 21a36ac6b6c7059965bac0cc73ef3cbb8ef576dd Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 03:30:28 +0000 Subject: KVM: x86/mmu: Re-check under lock that TDP MMU SP hugepage is disallowed Re-check sp->nx_huge_page_disallowed under the tdp_mmu_pages_lock spinlock when adding a new shadow page in the TDP MMU. To ensure the NX reclaim kthread can't see a not-yet-linked shadow page, the page fault path links the new page table prior to adding the page to possible_nx_huge_pages. If the page is zapped by different task, e.g. because dirty logging is disabled, between linking the page and adding it to the list, KVM can end up triggering use-after-free by adding the zapped SP to the aforementioned list, as the zapped SP's memory is scheduled for removal via RCU callback. The bug is detected by the sanity checks guarded by CONFIG_DEBUG_LIST=y, i.e. the below splat is just one possible signature. ------------[ cut here ]------------ list_add corruption. prev->next should be next (ffffc9000071fa70), but was ffff88811125ee38. (prev=ffff88811125ee38). WARNING: CPU: 1 PID: 953 at lib/list_debug.c:30 __list_add_valid+0x79/0xa0 Modules linked in: kvm_intel CPU: 1 PID: 953 Comm: nx_huge_pages_t Tainted: G W 6.1.0-rc4+ #71 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:__list_add_valid+0x79/0xa0 RSP: 0018:ffffc900006efb68 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff888116cae8a0 RCX: 0000000000000027 RDX: 0000000000000027 RSI: 0000000100001872 RDI: ffff888277c5b4c8 RBP: ffffc90000717000 R08: ffff888277c5b4c0 R09: ffffc900006efa08 R10: 0000000000199998 R11: 0000000000199a20 R12: ffff888116cae930 R13: ffff88811125ee38 R14: ffffc9000071fa70 R15: ffff88810b794f90 FS: 00007fc0415d2740(0000) GS:ffff888277c40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000115201006 CR4: 0000000000172ea0 Call Trace: track_possible_nx_huge_page+0x53/0x80 kvm_tdp_mmu_map+0x242/0x2c0 kvm_tdp_page_fault+0x10c/0x130 kvm_mmu_page_fault+0x103/0x680 vmx_handle_exit+0x132/0x5a0 [kvm_intel] vcpu_enter_guest+0x60c/0x16f0 kvm_arch_vcpu_ioctl_run+0x1e2/0x9d0 kvm_vcpu_ioctl+0x271/0x660 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 ---[ end trace 0000000000000000 ]--- Fixes: 61f94478547b ("KVM: x86/mmu: Set disallowed_nx_huge_page in TDP MMU before setting SPTE") Reported-by: Greg Thelen Analyzed-by: David Matlack Cc: David Matlack Cc: Ben Gardon Cc: Mingwei Zhang Signed-off-by: Sean Christopherson Message-Id: <20221213033030.83345-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index fbdef59374fe..62a687d094bb 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1214,7 +1214,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) if (fault->huge_page_disallowed && fault->req_level >= iter.level) { spin_lock(&kvm->arch.tdp_mmu_pages_lock); - track_possible_nx_huge_page(kvm, sp); + if (sp->nx_huge_page_disallowed) + track_possible_nx_huge_page(kvm, sp); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } } -- cgit v1.2.3-59-g8ed1b From 50a9ac25985c037d45ee6d7e3a7ae198a63b9266 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 03:30:29 +0000 Subject: KVM: x86/mmu: Don't install TDP MMU SPTE if SP has unexpected level Don't install a leaf TDP MMU SPTE if the parent page's level doesn't match the target level of the fault, and instead have the vCPU retry the faulting instruction after warning. Continuing on is completely unnecessary as the absolute worst case scenario of retrying is DoSing the vCPU, whereas continuing on all but guarantees bigger explosions, e.g. ------------[ cut here ]------------ kernel BUG at arch/x86/kvm/mmu/tdp_mmu.c:559! invalid opcode: 0000 [#1] SMP CPU: 1 PID: 1025 Comm: nx_huge_pages_t Tainted: G W 6.1.0-rc4+ #64 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:__handle_changed_spte.cold+0x95/0x9c RSP: 0018:ffffc9000072faf8 EFLAGS: 00010246 RAX: 00000000000000c1 RBX: ffffc90000731000 RCX: 0000000000000027 RDX: 0000000000000000 RSI: 00000000ffffdfff RDI: ffff888277c5b4c8 RBP: 0600000112400bf3 R08: ffff888277c5b4c0 R09: ffffc9000072f9a0 R10: 0000000000000001 R11: 0000000000000001 R12: 06000001126009f3 R13: 0000000000000002 R14: 0000000012600901 R15: 0000000012400b01 FS: 00007fba9f853740(0000) GS:ffff888277c40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000010aa7a003 CR4: 0000000000172ea0 Call Trace: kvm_tdp_mmu_map+0x3b0/0x510 kvm_tdp_page_fault+0x10c/0x130 kvm_mmu_page_fault+0x103/0x680 vmx_handle_exit+0x132/0x5a0 [kvm_intel] vcpu_enter_guest+0x60c/0x16f0 kvm_arch_vcpu_ioctl_run+0x1e2/0x9d0 kvm_vcpu_ioctl+0x271/0x660 __x64_sys_ioctl+0x80/0xb0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Modules linked in: kvm_intel ---[ end trace 0000000000000000 ]--- Signed-off-by: Sean Christopherson Message-Id: <20221213033030.83345-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 62a687d094bb..d6df38d371a0 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1074,7 +1074,9 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int ret = RET_PF_FIXED; bool wrprot = false; - WARN_ON(sp->role.level != fault->goal_level); + if (WARN_ON_ONCE(sp->role.level != fault->goal_level)) + return RET_PF_RETRY; + if (unlikely(!fault->slot)) new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); else -- cgit v1.2.3-59-g8ed1b From ad9679f3811899fd1c21dc7bdd715e8e1cfb46b9 Mon Sep 17 00:00:00 2001 From: Takahiro Kuwano Date: Mon, 26 Dec 2022 13:01:58 +0900 Subject: mtd: spi-nor: sfdp: Fix index value for SCCR dwords Array index for SCCR 22th DOWRD should be 21. Fixes: 981a8d60e01f ("mtd: spi-nor: Parse SFDP SCCR Map") Signed-off-by: Takahiro Kuwano Signed-off-by: Tudor Ambarus Reviewed-by: Michael Walle Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/d8a2a77c2c95cf776e7dcae6392d29fdcf5d6307.1672026365.git.Takahiro.Kuwano@infineon.com --- drivers/mtd/spi-nor/sfdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c index 8434f654eca1..5df2fcba5483 100644 --- a/drivers/mtd/spi-nor/sfdp.c +++ b/drivers/mtd/spi-nor/sfdp.c @@ -1228,7 +1228,7 @@ static int spi_nor_parse_sccr(struct spi_nor *nor, le32_to_cpu_array(dwords, sccr_header->length); - if (FIELD_GET(SCCR_DWORD22_OCTAL_DTR_EN_VOLATILE, dwords[22])) + if (FIELD_GET(SCCR_DWORD22_OCTAL_DTR_EN_VOLATILE, dwords[21])) nor->flags |= SNOR_F_IO_MODE_EN_VOLATILE; out: -- cgit v1.2.3-59-g8ed1b From 86d4cdf88c81778bb4cd5c4bd7f8a6c64e69491f Mon Sep 17 00:00:00 2001 From: Takahiro Kuwano Date: Mon, 26 Dec 2022 13:01:59 +0900 Subject: mtd: spi-nor: sfdp: Rename BFPT_DWORD() macro to SFDP_DWORD() BFPT_DWORD() converts 1-based indexing to 0-based indexing for C arrays, and is used in BFPT parse. Per JESD216F.02, the conversion is applicable to other parameter tables than BFPT. This patch renames the macro to SFDP_DWORD() so that we can use it for other parameter tables than BFPT. Suggested-by: Tudor Ambarus Signed-off-by: Takahiro Kuwano Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/e42feac840fe3a31187419e91b2d514d9f259d15.1672026365.git.Takahiro.Kuwano@infineon.com --- drivers/mtd/spi-nor/issi.c | 2 +- drivers/mtd/spi-nor/macronix.c | 2 +- drivers/mtd/spi-nor/sfdp.c | 44 +++++++++++++++++++++--------------------- drivers/mtd/spi-nor/sfdp.h | 9 ++++----- 4 files changed, 28 insertions(+), 29 deletions(-) diff --git a/drivers/mtd/spi-nor/issi.c b/drivers/mtd/spi-nor/issi.c index a0ddad2afffc..400e2b42f45a 100644 --- a/drivers/mtd/spi-nor/issi.c +++ b/drivers/mtd/spi-nor/issi.c @@ -18,7 +18,7 @@ is25lp256_post_bfpt_fixups(struct spi_nor *nor, * BFPT_DWORD1_ADDRESS_BYTES_3_ONLY. * Overwrite the number of address bytes advertised by the BFPT. */ - if ((bfpt->dwords[BFPT_DWORD(1)] & BFPT_DWORD1_ADDRESS_BYTES_MASK) == + if ((bfpt->dwords[SFDP_DWORD(1)] & BFPT_DWORD1_ADDRESS_BYTES_MASK) == BFPT_DWORD1_ADDRESS_BYTES_3_ONLY) nor->params->addr_nbytes = 4; diff --git a/drivers/mtd/spi-nor/macronix.c b/drivers/mtd/spi-nor/macronix.c index d81a4cb2812b..6853ec9ae65d 100644 --- a/drivers/mtd/spi-nor/macronix.c +++ b/drivers/mtd/spi-nor/macronix.c @@ -22,7 +22,7 @@ mx25l25635_post_bfpt_fixups(struct spi_nor *nor, * seems that the F version advertises support for Fast Read 4-4-4 in * its BFPT table. */ - if (bfpt->dwords[BFPT_DWORD(5)] & BFPT_DWORD5_FAST_READ_4_4_4) + if (bfpt->dwords[SFDP_DWORD(5)] & BFPT_DWORD5_FAST_READ_4_4_4) nor->flags |= SNOR_F_4B_OPCODES; return 0; diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c index 5df2fcba5483..5c2ab868707b 100644 --- a/drivers/mtd/spi-nor/sfdp.c +++ b/drivers/mtd/spi-nor/sfdp.c @@ -242,64 +242,64 @@ static const struct sfdp_bfpt_read sfdp_bfpt_reads[] = { /* Fast Read 1-1-2 */ { SNOR_HWCAPS_READ_1_1_2, - BFPT_DWORD(1), BIT(16), /* Supported bit */ - BFPT_DWORD(4), 0, /* Settings */ + SFDP_DWORD(1), BIT(16), /* Supported bit */ + SFDP_DWORD(4), 0, /* Settings */ SNOR_PROTO_1_1_2, }, /* Fast Read 1-2-2 */ { SNOR_HWCAPS_READ_1_2_2, - BFPT_DWORD(1), BIT(20), /* Supported bit */ - BFPT_DWORD(4), 16, /* Settings */ + SFDP_DWORD(1), BIT(20), /* Supported bit */ + SFDP_DWORD(4), 16, /* Settings */ SNOR_PROTO_1_2_2, }, /* Fast Read 2-2-2 */ { SNOR_HWCAPS_READ_2_2_2, - BFPT_DWORD(5), BIT(0), /* Supported bit */ - BFPT_DWORD(6), 16, /* Settings */ + SFDP_DWORD(5), BIT(0), /* Supported bit */ + SFDP_DWORD(6), 16, /* Settings */ SNOR_PROTO_2_2_2, }, /* Fast Read 1-1-4 */ { SNOR_HWCAPS_READ_1_1_4, - BFPT_DWORD(1), BIT(22), /* Supported bit */ - BFPT_DWORD(3), 16, /* Settings */ + SFDP_DWORD(1), BIT(22), /* Supported bit */ + SFDP_DWORD(3), 16, /* Settings */ SNOR_PROTO_1_1_4, }, /* Fast Read 1-4-4 */ { SNOR_HWCAPS_READ_1_4_4, - BFPT_DWORD(1), BIT(21), /* Supported bit */ - BFPT_DWORD(3), 0, /* Settings */ + SFDP_DWORD(1), BIT(21), /* Supported bit */ + SFDP_DWORD(3), 0, /* Settings */ SNOR_PROTO_1_4_4, }, /* Fast Read 4-4-4 */ { SNOR_HWCAPS_READ_4_4_4, - BFPT_DWORD(5), BIT(4), /* Supported bit */ - BFPT_DWORD(7), 16, /* Settings */ + SFDP_DWORD(5), BIT(4), /* Supported bit */ + SFDP_DWORD(7), 16, /* Settings */ SNOR_PROTO_4_4_4, }, }; static const struct sfdp_bfpt_erase sfdp_bfpt_erases[] = { /* Erase Type 1 in DWORD8 bits[15:0] */ - {BFPT_DWORD(8), 0}, + {SFDP_DWORD(8), 0}, /* Erase Type 2 in DWORD8 bits[31:16] */ - {BFPT_DWORD(8), 16}, + {SFDP_DWORD(8), 16}, /* Erase Type 3 in DWORD9 bits[15:0] */ - {BFPT_DWORD(9), 0}, + {SFDP_DWORD(9), 0}, /* Erase Type 4 in DWORD9 bits[31:16] */ - {BFPT_DWORD(9), 16}, + {SFDP_DWORD(9), 16}, }; /** @@ -458,7 +458,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor, le32_to_cpu_array(bfpt.dwords, BFPT_DWORD_MAX); /* Number of address bytes. */ - switch (bfpt.dwords[BFPT_DWORD(1)] & BFPT_DWORD1_ADDRESS_BYTES_MASK) { + switch (bfpt.dwords[SFDP_DWORD(1)] & BFPT_DWORD1_ADDRESS_BYTES_MASK) { case BFPT_DWORD1_ADDRESS_BYTES_3_ONLY: case BFPT_DWORD1_ADDRESS_BYTES_3_OR_4: params->addr_nbytes = 3; @@ -475,7 +475,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor, } /* Flash Memory Density (in bits). */ - val = bfpt.dwords[BFPT_DWORD(2)]; + val = bfpt.dwords[SFDP_DWORD(2)]; if (val & BIT(31)) { val &= ~BIT(31); @@ -555,13 +555,13 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor, return spi_nor_post_bfpt_fixups(nor, bfpt_header, &bfpt); /* Page size: this field specifies 'N' so the page size = 2^N bytes. */ - val = bfpt.dwords[BFPT_DWORD(11)]; + val = bfpt.dwords[SFDP_DWORD(11)]; val &= BFPT_DWORD11_PAGE_SIZE_MASK; val >>= BFPT_DWORD11_PAGE_SIZE_SHIFT; params->page_size = 1U << val; /* Quad Enable Requirements. */ - switch (bfpt.dwords[BFPT_DWORD(15)] & BFPT_DWORD15_QER_MASK) { + switch (bfpt.dwords[SFDP_DWORD(15)] & BFPT_DWORD15_QER_MASK) { case BFPT_DWORD15_QER_NONE: params->quad_enable = NULL; break; @@ -608,7 +608,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor, } /* Soft Reset support. */ - if (bfpt.dwords[BFPT_DWORD(16)] & BFPT_DWORD16_SWRST_EN_RST) + if (bfpt.dwords[SFDP_DWORD(16)] & BFPT_DWORD16_SWRST_EN_RST) nor->flags |= SNOR_F_SOFT_RESET; /* Stop here if not JESD216 rev C or later. */ @@ -616,7 +616,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor, return spi_nor_post_bfpt_fixups(nor, bfpt_header, &bfpt); /* 8D-8D-8D command extension. */ - switch (bfpt.dwords[BFPT_DWORD(18)] & BFPT_DWORD18_CMD_EXT_MASK) { + switch (bfpt.dwords[SFDP_DWORD(18)] & BFPT_DWORD18_CMD_EXT_MASK) { case BFPT_DWORD18_CMD_EXT_REP: nor->cmd_ext_type = SPI_NOR_EXT_REPEAT; break; diff --git a/drivers/mtd/spi-nor/sfdp.h b/drivers/mtd/spi-nor/sfdp.h index c1969f0a2f46..500659b35655 100644 --- a/drivers/mtd/spi-nor/sfdp.h +++ b/drivers/mtd/spi-nor/sfdp.h @@ -13,13 +13,12 @@ #define SFDP_JESD216A_MINOR 5 #define SFDP_JESD216B_MINOR 6 +/* SFDP DWORDS are indexed from 1 but C arrays are indexed from 0. */ +#define SFDP_DWORD(i) ((i) - 1) + /* Basic Flash Parameter Table */ -/* - * JESD216 rev D defines a Basic Flash Parameter Table of 20 DWORDs. - * They are indexed from 1 but C arrays are indexed from 0. - */ -#define BFPT_DWORD(i) ((i) - 1) +/* JESD216 rev D defines a Basic Flash Parameter Table of 20 DWORDs. */ #define BFPT_DWORD_MAX 20 struct sfdp_bfpt { -- cgit v1.2.3-59-g8ed1b From 55398beb0846a9eca38db5250c9e7b3e59d407ca Mon Sep 17 00:00:00 2001 From: Takahiro Kuwano Date: Mon, 26 Dec 2022 13:02:00 +0900 Subject: mtd: spi-nor: sfdp: Use SFDP_DWORD() macro for optional parameter tables Change 0-based indexing values of parameter tables to 1-based ones by SFDP_DWORD() macro. Suggested-by: Tudor Ambarus Signed-off-by: Takahiro Kuwano Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/48cb008b40fdef4bf7f87e37029efaa2bfefa9ef.1672026365.git.Takahiro.Kuwano@infineon.com --- drivers/mtd/spi-nor/sfdp.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c index 5c2ab868707b..3acc01c3a900 100644 --- a/drivers/mtd/spi-nor/sfdp.c +++ b/drivers/mtd/spi-nor/sfdp.c @@ -1004,7 +1004,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor, discard_hwcaps |= read->hwcaps; if ((params->hwcaps.mask & read->hwcaps) && - (dwords[0] & read->supported_bit)) + (dwords[SFDP_DWORD(1)] & read->supported_bit)) read_hwcaps |= read->hwcaps; } @@ -1023,7 +1023,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor, * authority for specifying Page Program support. */ discard_hwcaps |= program->hwcaps; - if (dwords[0] & program->supported_bit) + if (dwords[SFDP_DWORD(1)] & program->supported_bit) pp_hwcaps |= program->hwcaps; } @@ -1035,7 +1035,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor, for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) { const struct sfdp_4bait *erase = &erases[i]; - if (dwords[0] & erase->supported_bit) + if (dwords[SFDP_DWORD(1)] & erase->supported_bit) erase_mask |= BIT(i); } @@ -1086,7 +1086,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor, for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) { if (erase_mask & BIT(i)) - erase_type[i].opcode = (dwords[1] >> + erase_type[i].opcode = (dwords[SFDP_DWORD(2)] >> erase_type[i].idx * 8) & 0xFF; else spi_nor_set_erase_type(&erase_type[i], 0u, 0xFF); @@ -1145,15 +1145,15 @@ static int spi_nor_parse_profile1(struct spi_nor *nor, le32_to_cpu_array(dwords, profile1_header->length); /* Get 8D-8D-8D fast read opcode and dummy cycles. */ - opcode = FIELD_GET(PROFILE1_DWORD1_RD_FAST_CMD, dwords[0]); + opcode = FIELD_GET(PROFILE1_DWORD1_RD_FAST_CMD, dwords[SFDP_DWORD(1)]); /* Set the Read Status Register dummy cycles and dummy address bytes. */ - if (dwords[0] & PROFILE1_DWORD1_RDSR_DUMMY) + if (dwords[SFDP_DWORD(1)] & PROFILE1_DWORD1_RDSR_DUMMY) nor->params->rdsr_dummy = 8; else nor->params->rdsr_dummy = 4; - if (dwords[0] & PROFILE1_DWORD1_RDSR_ADDR_BYTES) + if (dwords[SFDP_DWORD(1)] & PROFILE1_DWORD1_RDSR_ADDR_BYTES) nor->params->rdsr_addr_nbytes = 4; else nor->params->rdsr_addr_nbytes = 0; @@ -1167,13 +1167,16 @@ static int spi_nor_parse_profile1(struct spi_nor *nor, * Default to PROFILE1_DUMMY_DEFAULT if we don't find anything, and let * flashes set the correct value if needed in their fixup hooks. */ - dummy = FIELD_GET(PROFILE1_DWORD4_DUMMY_200MHZ, dwords[3]); + dummy = FIELD_GET(PROFILE1_DWORD4_DUMMY_200MHZ, dwords[SFDP_DWORD(4)]); if (!dummy) - dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_166MHZ, dwords[4]); + dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_166MHZ, + dwords[SFDP_DWORD(5)]); if (!dummy) - dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_133MHZ, dwords[4]); + dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_133MHZ, + dwords[SFDP_DWORD(5)]); if (!dummy) - dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_100MHZ, dwords[4]); + dummy = FIELD_GET(PROFILE1_DWORD5_DUMMY_100MHZ, + dwords[SFDP_DWORD(5)]); if (!dummy) dev_dbg(nor->dev, "Can't find dummy cycles from Profile 1.0 table\n"); @@ -1228,7 +1231,8 @@ static int spi_nor_parse_sccr(struct spi_nor *nor, le32_to_cpu_array(dwords, sccr_header->length); - if (FIELD_GET(SCCR_DWORD22_OCTAL_DTR_EN_VOLATILE, dwords[21])) + if (FIELD_GET(SCCR_DWORD22_OCTAL_DTR_EN_VOLATILE, + dwords[SFDP_DWORD(22)])) nor->flags |= SNOR_F_IO_MODE_EN_VOLATILE; out: -- cgit v1.2.3-59-g8ed1b From 8508fa2e7472f673edbeedf1b1d2b7a6bb898ecc Mon Sep 17 00:00:00 2001 From: Artem Egorkine Date: Sun, 25 Dec 2022 12:57:27 +0200 Subject: ALSA: line6: correct midi status byte when receiving data from podxt A PODxt device sends 0xb2, 0xc2 or 0xf2 as a status byte for MIDI messages over USB that should otherwise have a 0xb0, 0xc0 or 0xf0 status byte. This is usually corrected by the driver on other OSes. This fixes MIDI sysex messages sent by PODxt. [ tiwai: fixed white spaces ] Signed-off-by: Artem Egorkine Cc: Link: https://lore.kernel.org/r/20221225105728.1153989-1-arteme@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/line6/driver.c | 3 ++- sound/usb/line6/midi.c | 3 ++- sound/usb/line6/midibuf.c | 25 +++++++++++++++++-------- sound/usb/line6/midibuf.h | 5 ++++- sound/usb/line6/pod.c | 3 ++- 5 files changed, 27 insertions(+), 12 deletions(-) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index 59faa5a9a714..b67617b68e50 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -304,7 +304,8 @@ static void line6_data_received(struct urb *urb) for (;;) { done = line6_midibuf_read(mb, line6->buffer_message, - LINE6_MIDI_MESSAGE_MAXLEN); + LINE6_MIDI_MESSAGE_MAXLEN, + LINE6_MIDIBUF_READ_RX); if (done <= 0) break; diff --git a/sound/usb/line6/midi.c b/sound/usb/line6/midi.c index ba0e2b7e8fe1..d52355de2bbc 100644 --- a/sound/usb/line6/midi.c +++ b/sound/usb/line6/midi.c @@ -56,7 +56,8 @@ static void line6_midi_transmit(struct snd_rawmidi_substream *substream) for (;;) { done = line6_midibuf_read(mb, chunk, - LINE6_FALLBACK_MAXPACKETSIZE); + LINE6_FALLBACK_MAXPACKETSIZE, + LINE6_MIDIBUF_READ_TX); if (done == 0) break; diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c index 6a70463f82c4..e7f830f7526c 100644 --- a/sound/usb/line6/midibuf.c +++ b/sound/usb/line6/midibuf.c @@ -9,6 +9,7 @@ #include "midibuf.h" + static int midibuf_message_length(unsigned char code) { int message_length; @@ -20,12 +21,7 @@ static int midibuf_message_length(unsigned char code) message_length = length[(code >> 4) - 8]; } else { - /* - Note that according to the MIDI specification 0xf2 is - the "Song Position Pointer", but this is used by Line 6 - to send sysex messages to the host. - */ - static const int length[] = { -1, 2, -1, 2, -1, -1, 1, 1, 1, 1, + static const int length[] = { -1, 2, 2, 2, -1, -1, 1, 1, 1, -1, 1, 1, 1, -1, 1, 1 }; message_length = length[code & 0x0f]; @@ -125,7 +121,7 @@ int line6_midibuf_write(struct midi_buffer *this, unsigned char *data, } int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, - int length) + int length, int read_type) { int bytes_used; int length1, length2; @@ -148,9 +144,22 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, length1 = this->size - this->pos_read; - /* check MIDI command length */ command = this->buf[this->pos_read]; + /* + PODxt always has status byte lower nibble set to 0010, + when it means to send 0000, so we correct if here so + that control/program changes come on channel 1 and + sysex message status byte is correct + */ + if (read_type == LINE6_MIDIBUF_READ_RX) { + if (command == 0xb2 || command == 0xc2 || command == 0xf2) { + unsigned char fixed = command & 0xf0; + this->buf[this->pos_read] = fixed; + command = fixed; + } + } + /* check MIDI command length */ if (command & 0x80) { midi_length = midibuf_message_length(command); this->command_prev = command; diff --git a/sound/usb/line6/midibuf.h b/sound/usb/line6/midibuf.h index 124a8f9f7e96..542e8d836f87 100644 --- a/sound/usb/line6/midibuf.h +++ b/sound/usb/line6/midibuf.h @@ -8,6 +8,9 @@ #ifndef MIDIBUF_H #define MIDIBUF_H +#define LINE6_MIDIBUF_READ_TX 0 +#define LINE6_MIDIBUF_READ_RX 1 + struct midi_buffer { unsigned char *buf; int size; @@ -23,7 +26,7 @@ extern void line6_midibuf_destroy(struct midi_buffer *mb); extern int line6_midibuf_ignore(struct midi_buffer *mb, int length); extern int line6_midibuf_init(struct midi_buffer *mb, int size, int split); extern int line6_midibuf_read(struct midi_buffer *mb, unsigned char *data, - int length); + int length, int read_type); extern void line6_midibuf_reset(struct midi_buffer *mb); extern int line6_midibuf_write(struct midi_buffer *mb, unsigned char *data, int length); diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c index cd41aa7f0385..d173971e5f02 100644 --- a/sound/usb/line6/pod.c +++ b/sound/usb/line6/pod.c @@ -159,8 +159,9 @@ static struct line6_pcm_properties pod_pcm_properties = { .bytes_per_channel = 3 /* SNDRV_PCM_FMTBIT_S24_3LE */ }; + static const char pod_version_header[] = { - 0xf2, 0x7e, 0x7f, 0x06, 0x02 + 0xf0, 0x7e, 0x7f, 0x06, 0x02 }; static char *pod_alloc_sysex_buffer(struct usb_line6_pod *pod, int code, -- cgit v1.2.3-59-g8ed1b From b8800d324abb50160560c636bfafe2c81001b66c Mon Sep 17 00:00:00 2001 From: Artem Egorkine Date: Sun, 25 Dec 2022 12:57:28 +0200 Subject: ALSA: line6: fix stack overflow in line6_midi_transmit Correctly calculate available space including the size of the chunk buffer. This fixes a buffer overflow when multiple MIDI sysex messages are sent to a PODxt device. Signed-off-by: Artem Egorkine Cc: Link: https://lore.kernel.org/r/20221225105728.1153989-2-arteme@gmail.com Signed-off-by: Takashi Iwai --- sound/usb/line6/midi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/line6/midi.c b/sound/usb/line6/midi.c index d52355de2bbc..0838632c788e 100644 --- a/sound/usb/line6/midi.c +++ b/sound/usb/line6/midi.c @@ -44,7 +44,8 @@ static void line6_midi_transmit(struct snd_rawmidi_substream *substream) int req, done; for (;;) { - req = min(line6_midibuf_bytes_free(mb), line6->max_packet_size); + req = min3(line6_midibuf_bytes_free(mb), line6->max_packet_size, + LINE6_FALLBACK_MAXPACKETSIZE); done = snd_rawmidi_transmit_peek(substream, chunk, req); if (done == 0) -- cgit v1.2.3-59-g8ed1b From a4517c4f3423c7c448f2c359218f97c1173523a1 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Mon, 26 Dec 2022 19:43:03 +0800 Subject: ALSA: hda/realtek: Apply dual codec fixup for Dell Latitude laptops The Dell Latiture 3340/3440/3540 laptops with Realtek ALC3204 have dual codecs and need the ALC1220_FIXUP_GB_DUAL_CODECS to fix the conflicts of Master controls. The existing headset mic fixup for Dell is also required to enable the jack sense and the headset mic. Introduce a new fixup to fix the dual codec and headset mic issues for particular Dell laptops since other old Dell laptops with the same codec configuration are already well handled by the fixup in alc269_fallback_pin_fixup_tbl[]. Signed-off-by: Chris Chiu Cc: Link: https://lore.kernel.org/r/20221226114303.4027500-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e443d88f627f..3794b522c222 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7175,6 +7175,7 @@ enum { ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS, + ALC236_FIXUP_DELL_DUAL_CODECS, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9130,6 +9131,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, }, + [ALC236_FIXUP_DELL_DUAL_CODECS] = { + .type = HDA_FIXUP_PINS, + .v.func = alc1220_fixup_gb_dual_codecs, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9232,6 +9239,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), + SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), -- cgit v1.2.3-59-g8ed1b From 246cf66e300b76099b5dbd3fdd39e9a5dbc53f02 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 26 Dec 2022 11:06:05 +0800 Subject: block, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq Commit 64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'") will access 'bic->bfqq' in bic_set_bfqq(), however, bfq_exit_icq_bfqq() can free bfqq first, and then call bic_set_bfqq(), which will cause uaf. Fix the problem by moving bfq_exit_bfqq() behind bic_set_bfqq(). Fixes: 64dc8c732f5c ("block, bfq: fix possible uaf for 'bfqq->bic'") Reported-by: Yi Zhang Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20221226030605.1437081-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 16f43bbc575a..ccf2204477a5 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -5317,8 +5317,8 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) unsigned long flags; spin_lock_irqsave(&bfqd->lock, flags); - bfq_exit_bfqq(bfqd, bfqq); bic_set_bfqq(bic, NULL, is_sync); + bfq_exit_bfqq(bfqd, bfqq); spin_unlock_irqrestore(&bfqd->lock, flags); } } -- cgit v1.2.3-59-g8ed1b From 33b93727ce90c8db916fb071ed13e90106339754 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 25 Dec 2022 11:32:31 +0100 Subject: nvme: fix setting the queue depth in nvme_alloc_io_tag_set While the CAP.MQES field in NVMe is a 0s based filed with a natural one off, we also need to account for the queue wrap condition and fix undo the one off again in nvme_alloc_io_tag_set. This was never properly done by the fabrics drivers, but they don't seem to care because there is no actual physical queue that can wrap around, but it became a problem when converting over the PCIe driver. Also add back the BLK_MQ_MAX_DEPTH check that was lost in the same commit. Fixes: 0da7feaa5913 ("nvme-pci: use the tagset alloc/free helpers") Reported-by: Hugh Dickins Signed-off-by: Christoph Hellwig Tested-by: Hugh Dickins Link: https://lore.kernel.org/r/20221225103234.226794-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e26b085a007a..cda1361e6d4f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4897,7 +4897,7 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, memset(set, 0, sizeof(*set)); set->ops = ops; - set->queue_depth = ctrl->sqsize + 1; + set->queue_depth = min_t(unsigned, ctrl->sqsize, BLK_MQ_MAX_DEPTH - 1); /* * Some Apple controllers requires tags to be unique across admin and * the (only) I/O queue, so reserve the first 32 tags of the I/O queue. -- cgit v1.2.3-59-g8ed1b From 88d356ca41ba1c3effc2d4208dfbd4392f58cd6d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 25 Dec 2022 11:32:32 +0100 Subject: nvme-pci: update sqsize when adjusting the queue depth Update the core sqsize field in addition to the PCIe-specific q_depth field as the core tagset allocation helpers rely on it. Fixes: 0da7feaa5913 ("nvme-pci: use the tagset alloc/free helpers") Signed-off-by: Christoph Hellwig Acked-by: Hugh Dickins Link: https://lore.kernel.org/r/20221225103234.226794-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 804b6a6cb43a..b13baccedb4a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2333,10 +2333,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (dev->cmb_use_sqes) { result = nvme_cmb_qdepth(dev, nr_io_queues, sizeof(struct nvme_command)); - if (result > 0) + if (result > 0) { dev->q_depth = result; - else + dev->ctrl.sqsize = result - 1; + } else { dev->cmb_use_sqes = false; + } } do { @@ -2537,7 +2539,6 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1, io_queue_depth); - dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); dev->dbs = dev->bar + 4096; @@ -2578,7 +2579,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", dev->q_depth); } - + dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ nvme_map_cmb(dev); -- cgit v1.2.3-59-g8ed1b From 93ef83050e597634d2c7dc838a28caf5137b9404 Mon Sep 17 00:00:00 2001 From: "YoungJun.park" Date: Fri, 28 Oct 2022 07:42:41 -0700 Subject: kunit: alloc_string_stream_fragment error handling bug fix When it fails to allocate fragment, it does not free and return error. And check the pointer inappropriately. Fixed merge conflicts with commit 618887768bb7 ("kunit: update NULL vs IS_ERR() tests") Shuah Khan Signed-off-by: YoungJun.park Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/string-stream.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/kunit/string-stream.c b/lib/kunit/string-stream.c index f5f51166d8c2..cc32743c1171 100644 --- a/lib/kunit/string-stream.c +++ b/lib/kunit/string-stream.c @@ -23,8 +23,10 @@ static struct string_stream_fragment *alloc_string_stream_fragment( return ERR_PTR(-ENOMEM); frag->fragment = kunit_kmalloc(test, len, gfp); - if (!frag->fragment) + if (!frag->fragment) { + kunit_kfree(test, frag); return ERR_PTR(-ENOMEM); + } return frag; } -- cgit v1.2.3-59-g8ed1b From 37e14e4f3715428b809e4df9a9958baa64c77d51 Mon Sep 17 00:00:00 2001 From: Adam Vodopjan Date: Fri, 9 Dec 2022 09:26:34 +0000 Subject: ata: ahci: Fix PCS quirk application for suspend Since kernel 5.3.4 my laptop (ICH8M controller) does not see Kingston SV300S37A60G SSD disk connected into a SATA connector on wake from suspend. The problem was introduced in c312ef176399 ("libata/ahci: Drop PCS quirk for Denverton and beyond"): the quirk is not applied on wake from suspend as it originally was. It is worth to mention the commit contained another bug: the quirk is not applied at all to controllers which require it. The fix commit 09d6ac8dc51a ("libata/ahci: Fix PCS quirk application") landed in 5.3.8. So testing my patch anywhere between commits c312ef176399 and 09d6ac8dc51a is pointless. Not all disks trigger the problem. For example nothing bad happens with Western Digital WD5000LPCX HDD. Test hardware: - Acer 5920G with ICH8M SATA controller - sda: some SATA HDD connnected into the DVD drive IDE port with a SATA-IDE caddy. It is a boot disk - sdb: Kingston SV300S37A60G SSD connected into the only SATA port Sample "dmesg --notime | grep -E '^(sd |ata)'" output on wake: sd 0:0:0:0: [sda] Starting disk sd 2:0:0:0: [sdb] Starting disk ata4: SATA link down (SStatus 4 SControl 300) ata3: SATA link down (SStatus 4 SControl 300) ata1.00: ACPI cmd ef/03:0c:00:00:00:a0 (SET FEATURES) filtered out ata1.00: ACPI cmd ef/03:42:00:00:00:a0 (SET FEATURES) filtered out ata1: FORCE: cable set to 80c ata5: SATA link down (SStatus 0 SControl 300) ata3: SATA link down (SStatus 4 SControl 300) ata3: SATA link down (SStatus 4 SControl 300) ata3.00: disabled sd 2:0:0:0: rejecting I/O to offline device ata3.00: detaching (SCSI 2:0:0:0) sd 2:0:0:0: [sdb] Start/Stop Unit failed: Result: hostbyte=DID_NO_CONNECT driverbyte=DRIVER_OK sd 2:0:0:0: [sdb] Synchronizing SCSI cache sd 2:0:0:0: [sdb] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK sd 2:0:0:0: [sdb] Stopping disk sd 2:0:0:0: [sdb] Start/Stop Unit failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK Commit c312ef176399 dropped ahci_pci_reset_controller() which internally calls ahci_reset_controller() and applies the PCS quirk if needed after that. It was called each time a reset was required instead of just ahci_reset_controller(). This patch puts the function back in place. Fixes: c312ef176399 ("libata/ahci: Drop PCS quirk for Denverton and beyond") Signed-off-by: Adam Vodopjan Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 0cfd0ec6229b..14a1c0d14916 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -83,6 +83,7 @@ enum board_ids { static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static void ahci_remove_one(struct pci_dev *dev); static void ahci_shutdown_one(struct pci_dev *dev); +static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hpriv); static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, @@ -676,6 +677,25 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, ahci_save_initial_config(&pdev->dev, hpriv); } +static int ahci_pci_reset_controller(struct ata_host *host) +{ + struct pci_dev *pdev = to_pci_dev(host->dev); + struct ahci_host_priv *hpriv = host->private_data; + int rc; + + rc = ahci_reset_controller(host); + if (rc) + return rc; + + /* + * If platform firmware failed to enable ports, try to enable + * them here. + */ + ahci_intel_pcs_quirk(pdev, hpriv); + + return 0; +} + static void ahci_pci_init_controller(struct ata_host *host) { struct ahci_host_priv *hpriv = host->private_data; @@ -870,7 +890,7 @@ static int ahci_pci_device_runtime_resume(struct device *dev) struct ata_host *host = pci_get_drvdata(pdev); int rc; - rc = ahci_reset_controller(host); + rc = ahci_pci_reset_controller(host); if (rc) return rc; ahci_pci_init_controller(host); @@ -906,7 +926,7 @@ static int ahci_pci_device_resume(struct device *dev) ahci_mcp89_apple_enable(pdev); if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) { - rc = ahci_reset_controller(host); + rc = ahci_pci_reset_controller(host); if (rc) return rc; @@ -1784,12 +1804,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* save initial config */ ahci_pci_save_initial_config(pdev, hpriv); - /* - * If platform firmware failed to enable ports, try to enable - * them here. - */ - ahci_intel_pcs_quirk(pdev, hpriv); - /* prepare host */ if (hpriv->cap & HOST_CAP_NCQ) { pi.flags |= ATA_FLAG_NCQ; @@ -1899,7 +1913,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) return rc; - rc = ahci_reset_controller(host); + rc = ahci_pci_reset_controller(host); if (rc) return rc; -- cgit v1.2.3-59-g8ed1b From e779fd53b4aa0aa8704ae62eb56065b9877a540b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 00:16:40 +0000 Subject: KVM: selftests: Define literal to asm constraint in aarch64 as unsigned long MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define a literal '0' asm input constraint to aarch64/page_fault_test's guest_cas() as an unsigned long to make clang happy. tools/testing/selftests/kvm/aarch64/page_fault_test.c:120:16: error: value size does not match register size specified by the constraint and modifier [-Werror,-Wasm-operand-widths] :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); ^ tools/testing/selftests/kvm/aarch64/page_fault_test.c:119:15: note: use constraint modifier "w" "casal %0, %1, [%2]\n" ^~ %w0 Fixes: 35c581015712 ("KVM: selftests: aarch64: Add aarch64/page_fault_test") Cc: Ricardo Koller Signed-off-by: Sean Christopherson Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20221213001653.3852042-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/aarch64/page_fault_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 95d22cfb7b41..beb944fa6fd4 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -117,7 +117,7 @@ static void guest_cas(void) GUEST_ASSERT(guest_check_lse()); asm volatile(".arch_extension lse\n" "casal %0, %1, [%2]\n" - :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); + :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory)); val = READ_ONCE(*guest_test_memory); GUEST_ASSERT_EQ(val, TEST_DATA); } -- cgit v1.2.3-59-g8ed1b From 73441efa36c253906057b8800bc9a3fdadbc2c41 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 00:16:41 +0000 Subject: KVM: selftests: Delete dead code in x86_64/vmx_tsc_adjust_test.c Delete an unused struct definition in x86_64/vmx_tsc_adjust_test.c. Signed-off-by: Sean Christopherson Message-Id: <20221213001653.3852042-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 5943187e8594..ff8ecdf32ae0 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -49,11 +49,6 @@ enum { NUM_VMX_PAGES, }; -struct kvm_single_msr { - struct kvm_msrs header; - struct kvm_msr_entry entry; -} __attribute__((packed)); - /* The virtual machine object. */ static struct kvm_vm *vm; -- cgit v1.2.3-59-g8ed1b From d61a12cb9af5b355a38e0c0106e91224b49195ce Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 00:16:42 +0000 Subject: KVM: selftests: Fix divide-by-zero bug in memslot_perf_test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check that the number of pages per slot is non-zero in get_max_slots() prior to computing the remaining number of pages. clang generates code that uses an actual DIV for calculating the remaining, which causes a #DE if the total number of pages is less than the number of slots. traps: memslot_perf_te[97611] trap divide error ip:4030c4 sp:7ffd18ae58f0 error:0 in memslot_perf_test[401000+cb000] Fixes: a69170c65acd ("KVM: selftests: memslot_perf_test: Report optimal memory slots") Signed-off-by: Sean Christopherson Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20221213001653.3852042-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/memslot_perf_test.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index e698306bf49d..e6587e193490 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -265,6 +265,9 @@ static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) slots = data->nslots; while (--slots > 1) { pages_per_slot = mempages / slots; + if (!pages_per_slot) + continue; + rempages = mempages % pages_per_slot; if (check_slot_pages(host_page_size, guest_page_size, pages_per_slot, rempages)) -- cgit v1.2.3-59-g8ed1b From 43e96957e8b87bad8e4ba666750ff0cda9e03ffb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 00:16:43 +0000 Subject: KVM: selftests: Use pattern matching in .gitignore Use pattern matching to exclude everything except .c, .h, .S, and .sh files from Git. Manually adding every test target has an absurd maintenance cost, is comically error prone, and leads to bikeshedding over whether or not the targets should be listed in alphabetical order. Deliberately do not include the one-off assets, e.g. config, settings, .gitignore itself, etc as Git doesn't ignore files that are already in the repository. Adding the one-off assets won't prevent mistakes where developers forget to --force add files that don't match the "allowed". Signed-off-by: Sean Christopherson Message-Id: <20221213001653.3852042-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 91 +++------------------------------- 1 file changed, 6 insertions(+), 85 deletions(-) diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6ce8c488d62e..6d9381d60172 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,86 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -/aarch64/aarch32_id_regs -/aarch64/arch_timer -/aarch64/debug-exceptions -/aarch64/get-reg-list -/aarch64/hypercalls -/aarch64/page_fault_test -/aarch64/psci_test -/aarch64/vcpu_width_config -/aarch64/vgic_init -/aarch64/vgic_irq -/s390x/memop -/s390x/resets -/s390x/sync_regs_test -/s390x/tprot -/x86_64/amx_test -/x86_64/cpuid_test -/x86_64/cr4_cpuid_sync_test -/x86_64/debug_regs -/x86_64/exit_on_emulation_failure_test -/x86_64/fix_hypercall_test -/x86_64/get_msr_index_features -/x86_64/kvm_clock_test -/x86_64/kvm_pv_test -/x86_64/hyperv_clock -/x86_64/hyperv_cpuid -/x86_64/hyperv_evmcs -/x86_64/hyperv_features -/x86_64/hyperv_ipi -/x86_64/hyperv_svm_test -/x86_64/hyperv_tlb_flush -/x86_64/max_vcpuid_cap_test -/x86_64/mmio_warning_test -/x86_64/monitor_mwait_test -/x86_64/nested_exceptions_test -/x86_64/nx_huge_pages_test -/x86_64/platform_info_test -/x86_64/pmu_event_filter_test -/x86_64/set_boot_cpu_id -/x86_64/set_sregs_test -/x86_64/sev_migrate_tests -/x86_64/smaller_maxphyaddr_emulation_test -/x86_64/smm_test -/x86_64/state_test -/x86_64/svm_vmcall_test -/x86_64/svm_int_ctl_test -/x86_64/svm_nested_soft_inject_test -/x86_64/svm_nested_shutdown_test -/x86_64/sync_regs_test -/x86_64/tsc_msrs_test -/x86_64/tsc_scaling_sync -/x86_64/ucna_injection_test -/x86_64/userspace_io_test -/x86_64/userspace_msr_exit_test -/x86_64/vmx_apic_access_test -/x86_64/vmx_close_while_nested_test -/x86_64/vmx_dirty_log_test -/x86_64/vmx_exception_with_invalid_guest_state -/x86_64/vmx_invalid_nested_guest_state -/x86_64/vmx_msrs_test -/x86_64/vmx_preemption_timer_test -/x86_64/vmx_set_nested_state_test -/x86_64/vmx_tsc_adjust_test -/x86_64/vmx_nested_tsc_scaling_test -/x86_64/xapic_ipi_test -/x86_64/xapic_state_test -/x86_64/xen_shinfo_test -/x86_64/xen_vmcall_test -/x86_64/xss_msr_test -/x86_64/vmx_pmu_caps_test -/x86_64/triple_fault_event_test -/access_tracking_perf_test -/demand_paging_test -/dirty_log_test -/dirty_log_perf_test -/hardware_disable_test -/kvm_create_max_vcpus -/kvm_page_table_test -/max_guest_memory_test -/memslot_modification_stress_test -/memslot_perf_test -/rseq_test -/set_memory_region_test -/steal_time -/kvm_binary_stats_test -/system_counter_offset_test +* +!/**/ +!*.c +!*.h +!*.S +!*.sh -- cgit v1.2.3-59-g8ed1b From 1525429fe5cb8e23b74c6dd473bb477a35906704 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 00:16:44 +0000 Subject: KVM: selftests: Fix a typo in x86-64's kvm_get_cpu_address_width() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a == vs. = typo in kvm_get_cpu_address_width() that results in @pa_bits being left unset if the CPU doesn't support enumerating its MAX_PHY_ADDR. Flagged by clang's unusued-value warning. lib/x86_64/processor.c:1034:51: warning: expression result unused [-Wunused-value] *pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; Fixes: 3bd396353d18 ("KVM: selftests: Add X86_FEATURE_PAE and use it calc "fallback" MAXPHYADDR") Signed-off-by: Sean Christopherson Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20221213001653.3852042-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/x86_64/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c4d368d56cfe..acfa1d01e7df 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1031,7 +1031,7 @@ bool is_amd_cpu(void) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) { if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) { - *pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; + *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32; *va_bits = 32; } else { *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR); -- cgit v1.2.3-59-g8ed1b From 6a5db83adfd668b3c1092274ddf45903eb1fe435 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 00:16:45 +0000 Subject: KVM: selftests: Rename UNAME_M to ARCH_DIR, fill explicitly for x86 Rename UNAME_M to ARCH_DIR and explicitly set it directly for x86. At this point, the name of the arch directory really doesn't have anything to do with `uname -m`, and UNAME_M is unnecessarily confusing given that its purpose is purely to identify the arch specific directory. Signed-off-by: Sean Christopherson Message-Id: <20221213001653.3852042-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 47 ++++++++++-------------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 947676983da1..59f3eb53c932 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -7,35 +7,14 @@ top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -# For cross-builds to work, UNAME_M has to map to ARCH and arch specific -# directories and targets in this Makefile. "uname -m" doesn't map to -# arch specific sub-directory names. -# -# UNAME_M variable to used to run the compiles pointing to the right arch -# directories and build the right targets for these supported architectures. -# -# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable. -# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable. -# -# x86_64 targets are named to include x86_64 as a suffix and directories -# for includes are in x86_64 sub-directory. s390x and aarch64 follow the -# same convention. "uname -m" doesn't result in the correct mapping for -# s390x and aarch64. -# -# No change necessary for x86_64 -UNAME_M := $(shell uname -m) - -# Set UNAME_M for arm64 compile/install to work -ifeq ($(ARCH),arm64) - UNAME_M := aarch64 -endif -# Set UNAME_M s390x compile/install to work -ifeq ($(ARCH),s390) - UNAME_M := s390x -endif -# Set UNAME_M riscv compile/install to work -ifeq ($(ARCH),riscv) - UNAME_M := riscv +ifeq ($(ARCH),x86) + ARCH_DIR := x86_64 +else ifeq ($(ARCH),arm64) + ARCH_DIR := aarch64 +else ifeq ($(ARCH),s390) + ARCH_DIR := s390x +else + ARCH_DIR := $(ARCH) endif LIBKVM += lib/assert.c @@ -196,10 +175,10 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_PROGS += $(TEST_PROGS_$(UNAME_M)) -TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) -TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(UNAME_M)) -LIBKVM += $(LIBKVM_$(UNAME_M)) +TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) +LIBKVM += $(LIBKVM_$(ARCH_DIR)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ @@ -212,7 +191,7 @@ endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$( Date: Tue, 13 Dec 2022 00:16:46 +0000 Subject: KVM: selftests: Use proper function prototypes in probing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the main() functions in the probing code proper prototypes so that compiling the probing code with more strict flags won't generate false negatives. :1:5: error: function declaration isn’t a prototype [-Werror=strict-prototypes] Signed-off-by: Sean Christopherson Message-Id: <20221213001653.3852042-8-seanjc@google.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 59f3eb53c932..8eecda2be0d0 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -194,11 +194,11 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -I$( Date: Tue, 13 Dec 2022 00:16:47 +0000 Subject: KVM: selftests: Probe -no-pie with actual CFLAGS used to compile Probe -no-pie with the actual set of CFLAGS used to compile the tests, clang whines about -no-pie being unused if the tests are compiled with -static. clang: warning: argument unused during compilation: '-no-pie' [-Wunused-command-line-argument] Signed-off-by: Sean Christopherson Message-Id: <20221213001653.3852042-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 8eecda2be0d0..2ffd87247279 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -195,7 +195,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ $(KHDR_INCLUDES) no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \ - $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie) + $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) # On s390, build the testcases KVM-enabled pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ -- cgit v1.2.3-59-g8ed1b From 7cf2e7373ab145bf972c3cbcb495fd1a9770c3b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 00:16:48 +0000 Subject: KVM: selftests: Explicitly disable builtins for mem*() overrides Explicitly disable the compiler's builtin memcmp(), memcpy(), and memset(). Because only lib/string_override.c is built with -ffreestanding, the compiler reserves the right to do what it wants and can try to link the non-freestanding code to its own crud. /usr/bin/x86_64-linux-gnu-ld: /lib/x86_64-linux-gnu/libc.a(memcmp.o): in function `memcmp_ifunc': (.text+0x0): multiple definition of `memcmp'; tools/testing/selftests/kvm/lib/string_override.o: tools/testing/selftests/kvm/lib/string_override.c:15: first defined here clang: error: linker command failed with exit code 1 (use -v to see invocation) Fixes: 6b6f71484bf4 ("KVM: selftests: Implement memcmp(), memcpy(), and memset() for guest use") Reported-by: Aaron Lewis Reported-by: Raghavendra Rao Ananta Signed-off-by: Sean Christopherson Message-Id: <20221213001653.3852042-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 2ffd87247279..ecd3c8126c3d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -189,6 +189,7 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -I$( Date: Tue, 13 Dec 2022 00:16:49 +0000 Subject: KVM: selftests: Include lib.mk before consuming $(CC) Include lib.mk before consuming $(CC) and document that lib.mk overwrites $(CC) unless make was invoked with -e or $(CC) was specified after make (which makes the environment override the Makefile). Including lib.mk after using it for probing, e.g. for -no-pie, can lead to weirdness. Signed-off-by: Sean Christopherson Message-Id: <20221213001653.3852042-11-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ecd3c8126c3d..2acba3c74ad6 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -180,6 +180,11 @@ TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) LIBKVM += $(LIBKVM_$(ARCH_DIR)) +# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most +# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, +# which causes the environment variable to override the makefile). +include ../lib.mk + INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include @@ -205,10 +210,6 @@ pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \ LDLIBS += -ldl LDFLAGS += -pthread $(no-pie-option) $(pgste-option) -# After inclusion, $(OUTPUT) is defined and -# $(TEST_GEN_PROGS) starts with $(OUTPUT)/ -include ../lib.mk - LIBKVM_C := $(filter %.c,$(LIBKVM)) LIBKVM_S := $(filter %.S,$(LIBKVM)) LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) -- cgit v1.2.3-59-g8ed1b From db7b780dab6742a8358ae7ecb1d0e972ccea8737 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 13 Dec 2022 00:16:50 +0000 Subject: KVM: selftests: Disable "gnu-variable-sized-type-not-at-end" warning Disable gnu-variable-sized-type-not-at-end so that tests and libraries can create overlays of variable sized arrays at the end of structs when using a fixed number of entries, e.g. to get/set a single MSR. It's possible to fudge around the warning, e.g. by defining a custom struct that hardcodes the number of entries, but that is a burden for both developers and readers of the code. lib/x86_64/processor.c:664:19: warning: field 'header' with variable sized type 'struct kvm_msrs' not at the end of a struct or class is a GNU extension [-Wgnu-variable-sized-type-not-at-end] struct kvm_msrs header; ^ lib/x86_64/processor.c:772:19: warning: field 'header' with variable sized type 'struct kvm_msrs' not at the end of a struct or class is a GNU extension [-Wgnu-variable-sized-type-not-at-end] struct kvm_msrs header; ^ lib/x86_64/processor.c:787:19: warning: field 'header' with variable sized type 'struct kvm_msrs' not at the end of a struct or class is a GNU extension [-Wgnu-variable-sized-type-not-at-end] struct kvm_msrs header; ^ 3 warnings generated. x86_64/hyperv_tlb_flush.c:54:18: warning: field 'hv_vp_set' with variable sized type 'struct hv_vpset' not at the end of a struct or class is a GNU extension [-Wgnu-variable-sized-type-not-at-end] struct hv_vpset hv_vp_set; ^ 1 warning generated. x86_64/xen_shinfo_test.c:137:25: warning: field 'info' with variable sized type 'struct kvm_irq_routing' not at the end of a struct or class is a GNU extension [-Wgnu-variable-sized-type-not-at-end] struct kvm_irq_routing info; ^ 1 warning generated. Signed-off-by: Sean Christopherson Message-Id: <20221213001653.3852042-12-seanjc@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 2acba3c74ad6..1750f91dd936 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -194,6 +194,7 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -Wno-gnu-variable-sized-type-not-at-end \ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -- cgit v1.2.3-59-g8ed1b From 2f5213b8fc311eaa8fc78de7ecbd27ead027993c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 9 Dec 2022 12:55:44 -0800 Subject: KVM: selftests: Use magic value to signal ucall_alloc() failure Use a magic value to signal a ucall_alloc() failure instead of simply doing GUEST_ASSERT(). GUEST_ASSERT() relies on ucall_alloc() and so a failure puts the guest into an infinite loop. Use -1 as the magic value, as a real ucall struct should never wrap. Reported-by: Oliver Upton Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/ucall_common.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 0cc0971ce60e..2f0e2ea941cc 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -4,6 +4,8 @@ #include "linux/bitmap.h" #include "linux/atomic.h" +#define GUEST_UCALL_FAILED -1 + struct ucall_header { DECLARE_BITMAP(in_use, KVM_MAX_VCPUS); struct ucall ucalls[KVM_MAX_VCPUS]; @@ -41,7 +43,8 @@ static struct ucall *ucall_alloc(void) struct ucall *uc; int i; - GUEST_ASSERT(ucall_pool); + if (!ucall_pool) + goto ucall_failed; for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (!test_and_set_bit(i, ucall_pool->in_use)) { @@ -51,7 +54,13 @@ static struct ucall *ucall_alloc(void) } } - GUEST_ASSERT(0); +ucall_failed: + /* + * If the vCPU cannot grab a ucall structure, make a bare ucall with a + * magic value to signal to get_ucall() that things went sideways. + * GUEST_ASSERT() depends on ucall_alloc() and so cannot be used here. + */ + ucall_arch_do_ucall(GUEST_UCALL_FAILED); return NULL; } @@ -93,6 +102,9 @@ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) addr = ucall_arch_get_ucall(vcpu); if (addr) { + TEST_ASSERT(addr != (void *)GUEST_UCALL_FAILED, + "Guest failed to allocate ucall struct"); + memcpy(uc, addr, sizeof(*uc)); vcpu_run_complete_io(vcpu); } else { -- cgit v1.2.3-59-g8ed1b From feb84f6daa7e7d51444d13fa65df7d5562fd0075 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 12 Dec 2022 05:36:53 -0500 Subject: KVM: selftests: document the default implementation of vm_vaddr_populate_bitmap Explain the meaning of the bit manipulations of vm_vaddr_populate_bitmap. These correspond to the "canonical addresses" of x86 and other architectures, but that is not obvious. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index c88c3ace16d2..bd892a814951 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -186,6 +186,15 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); +/* + * Initializes vm->vpages_valid to match the canonical VA space of the + * architecture. + * + * The default implementation is valid for architectures which split the + * range addressed by a single page table into a low and high region + * based on the MSB of the VA. On architectures with this behavior + * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1]. + */ __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) { sparsebit_set_num(vm->vpages_valid, -- cgit v1.2.3-59-g8ed1b From 7a16142505cbb9b80d5e998e32b1d882e0f45d64 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 9 Dec 2022 01:53:04 +0000 Subject: KVM: arm64: selftests: Don't identity map the ucall MMIO hole Currently the ucall MMIO hole is placed immediately after slot0, which is a relatively safe address in the PA space. However, it is possible that the same address has already been used for something else (like the guest program image) in the VA space. At least in my own testing, building the vgic_irq test with clang leads to the MMIO hole appearing underneath gicv3_ops. Stop identity mapping the MMIO hole and instead find an unused VA to map to it. Yet another subtle detail of the KVM selftests library is that virt_pg_map() does not update vm->vpages_mapped. Switch over to virt_map() instead to guarantee that the chosen VA isn't to something else. Signed-off-by: Oliver Upton Message-Id: <20221209015307.1781352-6-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/aarch64/ucall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index 562c16dfbb00..f212bd8ab93d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -14,11 +14,13 @@ static vm_vaddr_t *ucall_exit_mmio_addr; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { - virt_pg_map(vm, mmio_gpa, mmio_gpa); + vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + + virt_map(vm, mmio_gva, mmio_gpa, 1); vm->ucall_mmio_addr = mmio_gpa; - write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa); + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); } void ucall_arch_do_ucall(vm_vaddr_t uc) -- cgit v1.2.3-59-g8ed1b From 92c8191bb5d3f670ed806f91823381193288a4e1 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 9 Dec 2022 01:53:02 +0000 Subject: KVM: selftests: Mark correct page as mapped in virt_map() The loop marks vaddr as mapped after incrementing it by page size, thereby marking the *next* page as mapped. Set the bit in vpages_mapped first instead. Fixes: 56fc7732031d ("KVM: selftests: Fill in vm->vpages_mapped bitmap in virt_map() too") Signed-off-by: Oliver Upton Message-Id: <20221209015307.1781352-4-oliver.upton@linux.dev> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index bd892a814951..56d5ea949cbb 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1425,10 +1425,10 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, while (npages--) { virt_pg_map(vm, vaddr, paddr); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); + vaddr += page_size; paddr += page_size; - - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } } -- cgit v1.2.3-59-g8ed1b From e0a78525f540f9d9a44a296f307b8b74cee4c288 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 5 Dec 2022 09:20:44 +0100 Subject: MAINTAINERS: adjust entry after renaming the vmx hyperv files Commit a789aeba4196 ("KVM: VMX: Rename "vmx/evmcs.{ch}" to "vmx/hyperv.{ch}"") renames the VMX specific Hyper-V files, but does not adjust the entry in MAINTAINERS. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a broken reference. Repair this file reference in KVM X86 HYPER-V (KVM/hyper-v). Signed-off-by: Lukas Bulwahn Fixes: a789aeba4196 ("KVM: VMX: Rename "vmx/evmcs.{ch}" to "vmx/hyperv.{ch}"") Reviewed-by: Sean Christopherson Message-Id: <20221205082044.10141-1-lukas.bulwahn@gmail.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 89672a59c0c3..8d90da0cda5a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11322,7 +11322,7 @@ F: arch/x86/kvm/hyperv.* F: arch/x86/kvm/kvm_onhyperv.* F: arch/x86/kvm/svm/hyperv.* F: arch/x86/kvm/svm/svm_onhyperv.* -F: arch/x86/kvm/vmx/evmcs.* +F: arch/x86/kvm/vmx/hyperv.* KVM X86 Xen (KVM/Xen) M: David Woodhouse -- cgit v1.2.3-59-g8ed1b From a303def0fc18f0f2393b5c5f8ae3d2657a9713dc Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 7 Dec 2022 20:06:16 +0800 Subject: kvm: Remove the unused macro KVM_MMU_READ_{,UN}LOCK() No code is using KVM_MMU_READ_LOCK() or KVM_MMU_READ_UNLOCK(). They used to be in virt/kvm/pfncache.c: KVM_MMU_READ_LOCK(kvm); retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva); KVM_MMU_READ_UNLOCK(kvm); However, since 58cd407ca4c6 ("KVM: Fix multiple races in gfn=>pfn cache refresh", 2022-05-25) the code is only relying on the MMU notifier's invalidation count and sequence number. Signed-off-by: Lai Jiangshan Message-Id: <20221207120617.9409-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_mm.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h index a1ab15006af3..180f1a09e6ba 100644 --- a/virt/kvm/kvm_mm.h +++ b/virt/kvm/kvm_mm.h @@ -14,14 +14,10 @@ #define KVM_MMU_LOCK_INIT(kvm) rwlock_init(&(kvm)->mmu_lock) #define KVM_MMU_LOCK(kvm) write_lock(&(kvm)->mmu_lock) #define KVM_MMU_UNLOCK(kvm) write_unlock(&(kvm)->mmu_lock) -#define KVM_MMU_READ_LOCK(kvm) read_lock(&(kvm)->mmu_lock) -#define KVM_MMU_READ_UNLOCK(kvm) read_unlock(&(kvm)->mmu_lock) #else #define KVM_MMU_LOCK_INIT(kvm) spin_lock_init(&(kvm)->mmu_lock) #define KVM_MMU_LOCK(kvm) spin_lock(&(kvm)->mmu_lock) #define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock) -#define KVM_MMU_READ_LOCK(kvm) spin_lock(&(kvm)->mmu_lock) -#define KVM_MMU_READ_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock) #endif /* KVM_HAVE_MMU_RWLOCK */ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible, -- cgit v1.2.3-59-g8ed1b From 562f5bc48a8d99a8898c734ecacf061a79a88fbf Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 7 Dec 2022 20:05:05 +0800 Subject: kvm: x86/mmu: Remove duplicated "be split" in spte.h "be split be split" -> "be split" Signed-off-by: Lai Jiangshan Message-Id: <20221207120505.9175-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/spte.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 1f03701b943a..6f54dc9409c9 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -363,7 +363,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, * A shadow-present leaf SPTE may be non-writable for 4 possible reasons: * * 1. To intercept writes for dirty logging. KVM write-protects huge pages - * so that they can be split be split down into the dirty logging + * so that they can be split down into the dirty logging * granularity (4KiB) whenever the guest writes to them. KVM also * write-protects 4KiB pages so that writes can be recorded in the dirty log * (e.g. if not using PML). SPTEs are write-protected for dirty logging -- cgit v1.2.3-59-g8ed1b From 23e528d9bce2385967370ad95a7d52a3c7a0a016 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 7 Dec 2022 00:36:37 +0000 Subject: KVM: Delete extra block of "};" in the KVM API documentation Delete an extra block of code/documentation that snuck in when KVM's documentation was converted to ReST format. Fixes: 106ee47dc633 ("docs: kvm: Convert api.txt to ReST format") Signed-off-by: Sean Christopherson Message-Id: <20221207003637.2041211-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 778c6460d1de..d795d683601c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6577,11 +6577,6 @@ Please note that the kernel is allowed to use the kvm_run structure as the primary storage for certain register types. Therefore, the kernel may use the values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set. -:: - - }; - - 6. Capabilities that can be enabled on vCPUs ============================================ -- cgit v1.2.3-59-g8ed1b From 385407a69d5140825d4cdab814cbf128ba63a64a Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 26 Dec 2022 12:03:15 +0000 Subject: KVM: x86/xen: Fix memory leak in kvm_xen_write_hypercall_page() Release page irrespectively of kvm_vcpu_write_guest() return value. Suggested-by: Paul Durrant Fixes: 23200b7a30de ("KVM: x86/xen: intercept xen hypercalls if enabled") Signed-off-by: Michal Luczaj Message-Id: <20221220151454.712165-1-mhal@rbox.co> Reviewed-by: Paul Durrant Signed-off-by: David Woodhouse Message-Id: <20221226120320.1125390-1-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index d7af40240248..d1a98d834d18 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1069,6 +1069,7 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 : kvm->arch.xen_hvm_config.blob_size_32; u8 *page; + int ret; if (page_num >= blob_size) return 1; @@ -1079,10 +1080,10 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) if (IS_ERR(page)) return PTR_ERR(page); - if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) { - kfree(page); + ret = kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE); + kfree(page); + if (ret) return 1; - } } return 0; } -- cgit v1.2.3-59-g8ed1b From 92c58965e9656dc6e682a8ffe520fac0fb256d13 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 26 Dec 2022 12:03:16 +0000 Subject: KVM: x86/xen: Use kvm_read_guest_virt() instead of open-coding it badly In particular, we shouldn't assume that being contiguous in guest virtual address space means being contiguous in guest *physical* address space. In dropping the manual calls to kvm_mmu_gva_to_gpa_system(), also drop the srcu_read_lock() that was around them. All call sites are reached from kvm_xen_hypercall() which is called from the handle_exit function with the read lock already held. 536395260 ("KVM: x86/xen: handle PV timers oneshot mode") 1a65105a5 ("KVM: x86/xen: handle PV spinlocks slowpath") Fixes: 2fd6df2f2 ("KVM: x86/xen: intercept EVTCHNOP_send from guests") Signed-off-by: David Woodhouse Message-Id: <20221226120320.1125390-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 56 ++++++++++++++++++------------------------------------ 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index d1a98d834d18..929b887eafd7 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1184,30 +1184,22 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, u64 param, u64 *r) { - int idx, i; struct sched_poll sched_poll; evtchn_port_t port, *ports; - gpa_t gpa; + struct x86_exception e; + int i; if (!lapic_in_kernel(vcpu) || !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) return false; - idx = srcu_read_lock(&vcpu->kvm->srcu); - gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - if (!gpa) { - *r = -EFAULT; - return true; - } - if (IS_ENABLED(CONFIG_64BIT) && !longmode) { struct compat_sched_poll sp32; /* Sanity check that the compat struct definition is correct */ BUILD_BUG_ON(sizeof(sp32) != 16); - if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) { + if (kvm_read_guest_virt(vcpu, param, &sp32, sizeof(sp32), &e)) { *r = -EFAULT; return true; } @@ -1221,8 +1213,8 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, sched_poll.nr_ports = sp32.nr_ports; sched_poll.timeout = sp32.timeout; } else { - if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll, - sizeof(sched_poll))) { + if (kvm_read_guest_virt(vcpu, param, &sched_poll, + sizeof(sched_poll), &e)) { *r = -EFAULT; return true; } @@ -1244,18 +1236,13 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, } else ports = &port; + if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports, + sched_poll.nr_ports * sizeof(*ports), &e)) { + *r = -EFAULT; + return true; + } + for (i = 0; i < sched_poll.nr_ports; i++) { - idx = srcu_read_lock(&vcpu->kvm->srcu); - gpa = kvm_mmu_gva_to_gpa_system(vcpu, - (gva_t)(sched_poll.ports + i), - NULL); - srcu_read_unlock(&vcpu->kvm->srcu, idx); - - if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, - &ports[i], sizeof(port))) { - *r = -EFAULT; - goto out; - } if (ports[i] >= max_evtchn_port(vcpu->kvm)) { *r = -EINVAL; goto out; @@ -1331,9 +1318,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, int vcpu_id, u64 param, u64 *r) { struct vcpu_set_singleshot_timer oneshot; + struct x86_exception e; s64 delta; - gpa_t gpa; - int idx; if (!kvm_xen_timer_enabled(vcpu)) return false; @@ -1344,9 +1330,6 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, *r = -EINVAL; return true; } - idx = srcu_read_lock(&vcpu->kvm->srcu); - gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL); - srcu_read_unlock(&vcpu->kvm->srcu, idx); /* * The only difference for 32-bit compat is the 4 bytes of @@ -1364,9 +1347,8 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) != sizeof_field(struct vcpu_set_singleshot_timer, flags)); - if (!gpa || - kvm_vcpu_read_guest(vcpu, gpa, &oneshot, longmode ? sizeof(oneshot) : - sizeof(struct compat_vcpu_set_singleshot_timer))) { + if (kvm_read_guest_virt(vcpu, param, &oneshot, longmode ? sizeof(oneshot) : + sizeof(struct compat_vcpu_set_singleshot_timer), &e)) { *r = -EFAULT; return true; } @@ -2003,14 +1985,12 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) { struct evtchnfd *evtchnfd; struct evtchn_send send; - gpa_t gpa; - int idx; + struct x86_exception e; - idx = srcu_read_lock(&vcpu->kvm->srcu); - gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL); - srcu_read_unlock(&vcpu->kvm->srcu, idx); + /* Sanity check: this structure is the same for 32-bit and 64-bit */ + BUILD_BUG_ON(sizeof(send) != 4); - if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &send, sizeof(send))) { + if (kvm_read_guest_virt(vcpu, param, &send, sizeof(send), &e)) { *r = -EFAULT; return true; } -- cgit v1.2.3-59-g8ed1b From 70eae03087a3101493d9a1cf60c86c5f65600822 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 26 Dec 2022 12:03:17 +0000 Subject: KVM: x86/xen: Fix SRCU/RCU usage in readers of evtchn_ports The evtchnfd structure itself must be protected by either kvm->lock or SRCU. Use the former in kvm_xen_eventfd_update(), since the lock is being taken anyway; kvm_xen_hcall_evtchn_send() instead is a reader and does not need kvm->lock, and is called in SRCU critical section from the kvm_x86_handle_exit function. It is also important to use rcu_read_{lock,unlock}() in kvm_xen_hcall_evtchn_send(), because idr_remove() will *not* use synchronize_srcu() to wait for readers to complete. Remove a superfluous if (kvm) check before calling synchronize_srcu() in kvm_xen_eventfd_deassign() where kvm has been dereferenced already. Co-developed-by: Michal Luczaj Signed-off-by: Michal Luczaj Signed-off-by: Paolo Bonzini Signed-off-by: David Woodhouse Message-Id: <20221226120320.1125390-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 929b887eafd7..9b75457120f7 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1808,20 +1808,23 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, { u32 port = data->u.evtchn.send_port; struct evtchnfd *evtchnfd; + int ret; if (!port || port >= max_evtchn_port(kvm)) return -EINVAL; + /* Protect writes to evtchnfd as well as the idr lookup. */ mutex_lock(&kvm->lock); evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); - mutex_unlock(&kvm->lock); + ret = -ENOENT; if (!evtchnfd) - return -ENOENT; + goto out_unlock; /* For an UPDATE, nothing may change except the priority/vcpu */ + ret = -EINVAL; if (evtchnfd->type != data->u.evtchn.type) - return -EINVAL; + goto out_unlock; /* * Port cannot change, and if it's zero that was an eventfd @@ -1829,20 +1832,21 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, */ if (!evtchnfd->deliver.port.port || evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port) - return -EINVAL; + goto out_unlock; /* We only support 2 level event channels for now */ if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) - return -EINVAL; + goto out_unlock; - mutex_lock(&kvm->lock); evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) { evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; evtchnfd->deliver.port.vcpu_idx = -1; } + ret = 0; +out_unlock: mutex_unlock(&kvm->lock); - return 0; + return ret; } /* @@ -1935,8 +1939,7 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) if (!evtchnfd) return -ENOENT; - if (kvm) - synchronize_srcu(&kvm->srcu); + synchronize_srcu(&kvm->srcu); if (!evtchnfd->deliver.port.port) eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); kfree(evtchnfd); @@ -1989,14 +1992,18 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) /* Sanity check: this structure is the same for 32-bit and 64-bit */ BUILD_BUG_ON(sizeof(send) != 4); - if (kvm_read_guest_virt(vcpu, param, &send, sizeof(send), &e)) { *r = -EFAULT; return true; } - /* The evtchn_ports idr is protected by vcpu->kvm->srcu */ + /* + * evtchnfd is protected by kvm->srcu; the idr lookup instead + * is protected by RCU. + */ + rcu_read_lock(); evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port); + rcu_read_unlock(); if (!evtchnfd) return false; -- cgit v1.2.3-59-g8ed1b From 1c14faa5087db0a098c3ab1e183f2b5df4b0d3f2 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 26 Dec 2022 12:03:18 +0000 Subject: KVM: x86/xen: Simplify eventfd IOCTLs Port number is validated in kvm_xen_setattr_evtchn(). Remove superfluous checks in kvm_xen_eventfd_assign() and kvm_xen_eventfd_update(). Signed-off-by: Michal Luczaj Message-Id: <20221222203021.1944101-3-mhal@rbox.co> Signed-off-by: Paolo Bonzini Signed-off-by: David Woodhouse Message-Id: <20221226120320.1125390-4-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 9b75457120f7..bddbe5ac5cfa 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1810,9 +1810,6 @@ static int kvm_xen_eventfd_update(struct kvm *kvm, struct evtchnfd *evtchnfd; int ret; - if (!port || port >= max_evtchn_port(kvm)) - return -EINVAL; - /* Protect writes to evtchnfd as well as the idr lookup. */ mutex_lock(&kvm->lock); evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); @@ -1858,12 +1855,9 @@ static int kvm_xen_eventfd_assign(struct kvm *kvm, { u32 port = data->u.evtchn.send_port; struct eventfd_ctx *eventfd = NULL; - struct evtchnfd *evtchnfd = NULL; + struct evtchnfd *evtchnfd; int ret = -EINVAL; - if (!port || port >= max_evtchn_port(kvm)) - return -EINVAL; - evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL); if (!evtchnfd) return -ENOMEM; -- cgit v1.2.3-59-g8ed1b From b0305c1e0e27ad91187bc6d5ac3d502799faf239 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 26 Dec 2022 12:03:19 +0000 Subject: KVM: x86/xen: Add KVM_XEN_INVALID_GPA and KVM_XEN_INVALID_GFN to uapi These are (uint64_t)-1 magic values are a userspace ABI, allowing the shared info pages and other enlightenments to be disabled. This isn't a Xen ABI because Xen doesn't let the guest turn these off except with the full SHUTDOWN_soft_reset mechanism. Under KVM, the userspace VMM is expected to handle soft reset, and tear down the kernel parts of the enlightenments accordingly. Suggested-by: Sean Christopherson Signed-off-by: David Woodhouse Message-Id: <20221226120320.1125390-5-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 14 +++++++------- include/uapi/linux/kvm.h | 3 +++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index bddbe5ac5cfa..b178f40bd863 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -41,7 +41,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) int ret = 0; int idx = srcu_read_lock(&kvm->srcu); - if (gfn == GPA_INVALID) { + if (gfn == KVM_XEN_INVALID_GFN) { kvm_gpc_deactivate(gpc); goto out; } @@ -659,7 +659,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) if (kvm->arch.xen.shinfo_cache.active) data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa); else - data->u.shared_info.gfn = GPA_INVALID; + data->u.shared_info.gfn = KVM_XEN_INVALID_GFN; r = 0; break; @@ -705,7 +705,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) BUILD_BUG_ON(offsetof(struct vcpu_info, time) != offsetof(struct compat_vcpu_info, time)); - if (data->u.gpa == GPA_INVALID) { + if (data->u.gpa == KVM_XEN_INVALID_GPA) { kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); r = 0; break; @@ -719,7 +719,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: - if (data->u.gpa == GPA_INVALID) { + if (data->u.gpa == KVM_XEN_INVALID_GPA) { kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); r = 0; break; @@ -739,7 +739,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) r = -EOPNOTSUPP; break; } - if (data->u.gpa == GPA_INVALID) { + if (data->u.gpa == KVM_XEN_INVALID_GPA) { r = 0; deactivate_out: kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); @@ -937,7 +937,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) if (vcpu->arch.xen.vcpu_info_cache.active) data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; else - data->u.gpa = GPA_INVALID; + data->u.gpa = KVM_XEN_INVALID_GPA; r = 0; break; @@ -945,7 +945,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) if (vcpu->arch.xen.vcpu_time_info_cache.active) data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; else - data->u.gpa = GPA_INVALID; + data->u.gpa = KVM_XEN_INVALID_GPA; r = 0; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 20522d4ba1e0..55155e262646 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1767,6 +1767,7 @@ struct kvm_xen_hvm_attr { __u8 runstate_update_flag; struct { __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) } shared_info; struct { __u32 send_port; @@ -1798,6 +1799,7 @@ struct kvm_xen_hvm_attr { } u; }; + /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ #define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 #define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 @@ -1823,6 +1825,7 @@ struct kvm_xen_vcpu_attr { __u16 pad[3]; union { __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) __u64 pad[8]; struct { __u64 state; -- cgit v1.2.3-59-g8ed1b From af2808906aab0bf5786021d45b3ebfca6f4ad72f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 26 Dec 2022 12:03:20 +0000 Subject: KVM: x86/xen: Documentation updates and clarifications Most notably, the KVM_XEN_EVTCHN_RESET feature had escaped documentation entirely. Along with how to turn most stuff off on SHUTDOWN_soft_reset. Signed-off-by: David Woodhouse Message-Id: <20221226120320.1125390-6-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index d795d683601c..af6471657395 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5343,9 +5343,9 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO 32 vCPUs in the shared_info page, KVM does not automatically do so and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used explicitly even when the vcpu_info for a given vCPU resides at the - "default" location in the shared_info page. This is because KVM is - not aware of the Xen CPU id which is used as the index into the - vcpu_info[] array, so cannot know the correct default location. + "default" location in the shared_info page. This is because KVM may + not be aware of the Xen CPU id which is used as the index into the + vcpu_info[] array, so may know the correct default location. Note that the shared info page may be constantly written to by KVM; it contains the event channel bitmap used to deliver interrupts to @@ -5356,23 +5356,29 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO any vCPU has been running or any event channel interrupts can be routed to the guest. + Setting the gfn to KVM_XEN_INVALID_GFN will disable the shared info + page. + KVM_XEN_ATTR_TYPE_UPCALL_VECTOR Sets the exception vector used to deliver Xen event channel upcalls. This is the HVM-wide vector injected directly by the hypervisor (not through the local APIC), typically configured by a guest via - HVM_PARAM_CALLBACK_IRQ. + HVM_PARAM_CALLBACK_IRQ. This can be disabled again (e.g. for guest + SHUTDOWN_soft_reset) by setting it to zero. KVM_XEN_ATTR_TYPE_EVTCHN This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It configures an outbound port number for interception of EVTCHNOP_send requests - from the guest. A given sending port number may be directed back - to a specified vCPU (by APIC ID) / port / priority on the guest, - or to trigger events on an eventfd. The vCPU and priority can be - changed by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call, - but other fields cannot change for a given sending port. A port - mapping is removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags - field. + from the guest. A given sending port number may be directed back to + a specified vCPU (by APIC ID) / port / priority on the guest, or to + trigger events on an eventfd. The vCPU and priority can be changed + by setting KVM_XEN_EVTCHN_UPDATE in a subsequent call, but but other + fields cannot change for a given sending port. A port mapping is + removed by using KVM_XEN_EVTCHN_DEASSIGN in the flags field. Passing + KVM_XEN_EVTCHN_RESET in the flags field removes all interception of + outbound event channels. The values of the flags field are mutually + exclusive and cannot be combined as a bitmask. KVM_XEN_ATTR_TYPE_XEN_VERSION This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates @@ -5388,7 +5394,7 @@ KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read other vCPUs' vcpu_runstate_info. Xen guests enable this feature via - the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist + the VMASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist hypercall. 4.127 KVM_XEN_HVM_GET_ATTR @@ -5446,15 +5452,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO As with the shared_info page for the VM, the corresponding page may be dirtied at any time if event channel interrupt delivery is enabled, so userspace should always assume that the page is dirty without relying - on dirty logging. + on dirty logging. Setting the gpa to KVM_XEN_INVALID_GPA will disable + the vcpu_info. KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO Sets the guest physical address of an additional pvclock structure for a given vCPU. This is typically used for guest vsyscall support. + Setting the gpa to KVM_XEN_INVALID_GPA will disable the structure. KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR Sets the guest physical address of the vcpu_runstate_info for a given vCPU. This is how a Xen guest tracks CPU state such as steal time. + Setting the gpa to KVM_XEN_INVALID_GPA will disable the runstate area. KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of @@ -5487,7 +5496,8 @@ KVM_XEN_VCPU_ATTR_TYPE_TIMER This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates support for KVM_XEN_HVM_CONFIG_EVTCHN_SEND features. It sets the event channel port/priority for the VIRQ_TIMER of the vCPU, as well - as allowing a pending timer to be saved/restored. + as allowing a pending timer to be saved/restored. Setting the timer + port to zero disables kernel handling of the singleshot timer. KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates @@ -5495,7 +5505,8 @@ KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR per-vCPU local APIC upcall vector, configured by a Xen guest with the HVMOP_set_evtchn_upcall_vector hypercall. This is typically used by Windows guests, and is distinct from the HVM-wide upcall - vector configured with HVM_PARAM_CALLBACK_IRQ. + vector configured with HVM_PARAM_CALLBACK_IRQ. It is disabled by + setting the vector to zero. 4.129 KVM_XEN_VCPU_GET_ATTR -- cgit v1.2.3-59-g8ed1b From e2d371484653ac83b970d3ebcf343383f39f8b6b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Fri, 18 Nov 2022 10:45:39 +0530 Subject: perf core: Return error pointer if inherit_event() fails to find pmu_ctx inherit_event() returns NULL only when it finds orphaned events otherwise it returns either valid child_event pointer or an error pointer. Follow the same when it fails to find pmu_ctx. Fixes: bd2756811766 ("perf: Rewrite core context handling") Reported-by: Dan Carpenter Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221118051539.820-1-ravi.bangoria@amd.com --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index eacc3702654d..4bd2434251f0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -13231,7 +13231,7 @@ inherit_event(struct perf_event *parent_event, pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); if (IS_ERR(pmu_ctx)) { free_event(child_event); - return NULL; + return ERR_CAST(pmu_ctx); } child_event->pmu_ctx = pmu_ctx; -- cgit v1.2.3-59-g8ed1b From f841b682baef90ee144df8b12e2c76aa460717c1 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 7 Dec 2022 20:40:23 +0800 Subject: perf/core: Fix cgroup events tracking We encounter perf warnings when using cgroup events like: cd /sys/fs/cgroup mkdir test perf stat -e cycles -a -G test Which then triggers: WARNING: CPU: 0 PID: 690 at kernel/events/core.c:849 perf_cgroup_switch+0xb2/0xc0 Call Trace: __schedule+0x4ae/0x9f0 ? _raw_spin_unlock_irqrestore+0x23/0x40 ? __cond_resched+0x18/0x20 preempt_schedule_common+0x2d/0x70 __cond_resched+0x18/0x20 wait_for_completion+0x2f/0x160 ? cpu_stop_queue_work+0x9e/0x130 affine_move_task+0x18a/0x4f0 WARNING: CPU: 0 PID: 690 at kernel/events/core.c:829 ctx_sched_in+0x1cf/0x1e0 Call Trace: ? ctx_sched_out+0xb7/0x1b0 perf_cgroup_switch+0x88/0xc0 __schedule+0x4ae/0x9f0 ? _raw_spin_unlock_irqrestore+0x23/0x40 ? __cond_resched+0x18/0x20 preempt_schedule_common+0x2d/0x70 __cond_resched+0x18/0x20 wait_for_completion+0x2f/0x160 ? cpu_stop_queue_work+0x9e/0x130 affine_move_task+0x18a/0x4f0 The above two warnings are not complete here since I remove other unimportant information. The problem is caused by the perf cgroup events tracking: CPU0 CPU1 perf_event_open() perf_event_alloc() account_event() account_event_cpu() atomic_inc(perf_cgroup_events) __perf_event_task_sched_out() if (atomic_read(perf_cgroup_events)) perf_cgroup_switch() // kernel/events/core.c:849 WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0) if (READ_ONCE(cpuctx->cgrp) == cgrp) // false return perf_ctx_lock() ctx_sched_out() cpuctx->cgrp = cgrp ctx_sched_in() perf_cgroup_set_timestamp() // kernel/events/core.c:829 WARN_ON_ONCE(!ctx->nr_cgroups) perf_ctx_unlock() perf_install_in_context() cpu_function_call() __perf_install_in_context() add_event_to_ctx() list_add_event() perf_cgroup_event_enable() ctx->nr_cgroups++ cpuctx->cgrp = X We can see from above that we wrongly use percpu atomic perf_cgroup_events to check if we need to perf_cgroup_switch(), which should only be used when we know this CPU has cgroup events enabled. The commit bd2756811766 ("perf: Rewrite core context handling") change to have only one context per-CPU, so we can just use cpuctx->cgrp to check if this CPU has cgroup events enabled. So percpu atomic perf_cgroup_events is not needed. Fixes: bd2756811766 ("perf: Rewrite core context handling") Signed-off-by: Chengming Zhou Signed-off-by: Peter Zijlstra (Intel) Tested-by: Ravi Bangoria Link: https://lkml.kernel.org/r/20221207124023.66252-1-zhouchengming@bytedance.com --- kernel/events/core.c | 42 ++++++++++-------------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4bd2434251f0..37c0f04d7a00 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -380,7 +380,6 @@ enum event_type_t { /* * perf_sched_events : >0 events exist - * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu */ static void perf_sched_delayed(struct work_struct *work); @@ -389,7 +388,6 @@ static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed); static DEFINE_MUTEX(perf_sched_mutex); static atomic_t perf_sched_count; -static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; @@ -844,9 +842,16 @@ static void perf_cgroup_switch(struct task_struct *task) struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); struct perf_cgroup *cgrp; - cgrp = perf_cgroup_from_task(task, NULL); + /* + * cpuctx->cgrp is set when the first cgroup event enabled, + * and is cleared when the last cgroup event disabled. + */ + if (READ_ONCE(cpuctx->cgrp) == NULL) + return; WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); + + cgrp = perf_cgroup_from_task(task, NULL); if (READ_ONCE(cpuctx->cgrp) == cgrp) return; @@ -3631,8 +3636,7 @@ void __perf_event_task_sched_out(struct task_struct *task, * to check if we have to switch out PMU state. * cgroup event are system-wide mode only */ - if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_switch(next); + perf_cgroup_switch(next); } static bool perf_less_group_idx(const void *l, const void *r) @@ -4974,15 +4978,6 @@ static void unaccount_pmu_sb_event(struct perf_event *event) detach_sb_event(event); } -static void unaccount_event_cpu(struct perf_event *event, int cpu) -{ - if (event->parent) - return; - - if (is_cgroup_event(event)) - atomic_dec(&per_cpu(perf_cgroup_events, cpu)); -} - #ifdef CONFIG_NO_HZ_FULL static DEFINE_SPINLOCK(nr_freq_lock); #endif @@ -5048,8 +5043,6 @@ static void unaccount_event(struct perf_event *event) schedule_delayed_work(&perf_sched_work, HZ); } - unaccount_event_cpu(event, event->cpu); - unaccount_pmu_sb_event(event); } @@ -11679,15 +11672,6 @@ static void account_pmu_sb_event(struct perf_event *event) attach_sb_event(event); } -static void account_event_cpu(struct perf_event *event, int cpu) -{ - if (event->parent) - return; - - if (is_cgroup_event(event)) - atomic_inc(&per_cpu(perf_cgroup_events, cpu)); -} - /* Freq events need the tick to stay alive (see perf_event_task_tick). */ static void account_freq_event_nohz(void) { @@ -11775,8 +11759,6 @@ static void account_event(struct perf_event *event) } enabled: - account_event_cpu(event, event->cpu); - account_pmu_sb_event(event); } @@ -12822,13 +12804,11 @@ static void __perf_pmu_remove(struct perf_event_context *ctx, perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) { perf_remove_from_context(event, 0); - unaccount_event_cpu(event, cpu); put_pmu_ctx(event->pmu_ctx); list_add(&event->migrate_entry, events); for_each_sibling_event(sibling, event) { perf_remove_from_context(sibling, 0); - unaccount_event_cpu(sibling, cpu); put_pmu_ctx(sibling->pmu_ctx); list_add(&sibling->migrate_entry, events); } @@ -12847,7 +12827,6 @@ static void __perf_pmu_install_event(struct pmu *pmu, if (event->state >= PERF_EVENT_STATE_OFF) event->state = PERF_EVENT_STATE_INACTIVE; - account_event_cpu(event, cpu); perf_install_in_context(ctx, event, cpu); } @@ -13742,8 +13721,7 @@ static int __perf_cgroup_move(void *info) struct task_struct *task = info; preempt_disable(); - if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) - perf_cgroup_switch(task); + perf_cgroup_switch(task); preempt_enable(); return 0; -- cgit v1.2.3-59-g8ed1b From 08245672cdc6505550d1a5020603b0a8d4a6dcc7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Dec 2022 13:51:49 +0000 Subject: perf/x86/amd: fix potential integer overflow on shift of a int The left shift of int 32 bit integer constant 1 is evaluated using 32 bit arithmetic and then passed as a 64 bit function argument. In the case where i is 32 or more this can lead to an overflow. Avoid this by shifting using the BIT_ULL macro instead. Fixes: 471af006a747 ("perf/x86/amd: Constrain Large Increment per Cycle events") Signed-off-by: Colin Ian King Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ian Rogers Acked-by: Kim Phillips Link: https://lore.kernel.org/r/20221202135149.1797974-1-colin.i.king@gmail.com --- arch/x86/events/amd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index d6f3703e4119..4386b10682ce 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -1387,7 +1387,7 @@ static int __init amd_core_pmu_init(void) * numbered counter following it. */ for (i = 0; i < x86_pmu.num_counters - 1; i += 2) - even_ctr_mask |= 1 << i; + even_ctr_mask |= BIT_ULL(i); pair_constraint = (struct event_constraint) __EVENT_CONSTRAINT(0, even_ctr_mask, 0, -- cgit v1.2.3-59-g8ed1b From a551844e345ba2a1c533dee4b55cb0efddb1bcda Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 19 Dec 2022 15:40:04 +0100 Subject: perf: Fix use-after-free in error path The syscall error path has a use-after-free; put_pmu_ctx() will reference ctx, therefore we must ensure ctx is destroyed after pmu_ctx is. Fixes: bd2756811766 ("perf: Rewrite core context handling") Reported-by: syzbot+b8e8c01c8ade4fe6e48f@syzkaller.appspotmail.com Signed-off-by: Peter Zijlstra (Intel) Tested-by: Chengming Zhou Link: https://lkml.kernel.org/r/Y6B3xEgkbmFUCeni@hirez.programming.kicks-ass.net --- kernel/events/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 37c0f04d7a00..63d674c9b70e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12671,7 +12671,8 @@ SYSCALL_DEFINE5(perf_event_open, return event_fd; err_context: - /* event->pmu_ctx freed by free_event() */ + put_pmu_ctx(event->pmu_ctx); + event->pmu_ctx = NULL; /* _free_event() */ err_locked: mutex_unlock(&ctx->mutex); perf_unpin_context(ctx); @@ -12784,6 +12785,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, err_pmu_ctx: put_pmu_ctx(pmu_ctx); + event->pmu_ctx = NULL; /* _free_event() */ err_unlock: mutex_unlock(&ctx->mutex); perf_unpin_context(ctx); -- cgit v1.2.3-59-g8ed1b From 0a041ebca4956292cadfb14a63ace3a9c1dcb0a3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 20 Dec 2022 14:31:40 -0800 Subject: perf/core: Call LSM hook after copying perf_event_attr It passes the attr struct to the security_perf_event_open() but it's not initialized yet. Fixes: da97e18458fb ("perf_event: Add support for LSM and SELinux checks") Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Joel Fernandes (Google) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221220223140.4020470-1-namhyung@kernel.org --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 63d674c9b70e..d56328e5080e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -12321,12 +12321,12 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; - /* Do we allow access to perf_event_open(2) ? */ - err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; - err = perf_copy_attr(attr_uptr, &attr); + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); if (err) return err; -- cgit v1.2.3-59-g8ed1b From ade8c20847fcc3f4de08b35f730ec04ef29ddbdc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:43:23 +0100 Subject: x86/calldepth: Fix incorrect init section references The addition of callthunks_translate_call_dest means that skip_addr() and patch_dest() can no longer be discarded as part of the __init section freeing: WARNING: modpost: vmlinux.o: section mismatch in reference: callthunks_translate_call_dest.cold (section: .text.unlikely) -> skip_addr (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: callthunks_translate_call_dest.cold (section: .text.unlikely) -> patch_dest (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: is_callthunk.cold (section: .text.unlikely) -> skip_addr (section: .init.text) ERROR: modpost: Section mismatches detected. Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them. Fixes: b2e9dfe54be4 ("x86/bpf: Emit call depth accounting if required") Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221215164334.968863-1-arnd@kernel.org --- arch/x86/kernel/callthunks.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 7d2c75ec9a8c..ffea98f9064b 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -119,7 +119,7 @@ static bool is_coretext(const struct core_text *ct, void *addr) return within_module_coretext(addr); } -static __init_or_module bool skip_addr(void *dest) +static bool skip_addr(void *dest) { if (dest == error_entry) return true; @@ -181,7 +181,7 @@ static const u8 nops[] = { 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, }; -static __init_or_module void *patch_dest(void *dest, bool direct) +static void *patch_dest(void *dest, bool direct) { unsigned int tsize = SKL_TMPL_SIZE; u8 *pad = dest - tsize; -- cgit v1.2.3-59-g8ed1b From 1993bf97992df2d560287f3c4120eda57426843d Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 19 Dec 2022 23:35:10 +0900 Subject: x86/kprobes: Fix kprobes instruction boudary check with CONFIG_RETHUNK Since the CONFIG_RETHUNK and CONFIG_SLS will use INT3 for stopping speculative execution after RET instruction, kprobes always failes to check the probed instruction boundary by decoding the function body if the probed address is after such sequence. (Note that some conditional code blocks will be placed after function return, if compiler decides it is not on the hot path.) This is because kprobes expects kgdb puts the INT3 as a software breakpoint and it will replace the original instruction. But these INT3 are not such purpose, it doesn't need to recover the original instruction. To avoid this issue, kprobes checks whether the INT3 is owned by kgdb or not, and if so, stop decoding and make it fail. The other INT3 will come from CONFIG_RETHUNK/CONFIG_SLS and those can be treated as a one-byte instruction. Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation") Suggested-by: Peter Zijlstra Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/167146051026.1374301.392728975473572291.stgit@devnote3 --- arch/x86/kernel/kprobes/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 66299682b6b7..b36f3c367cb2 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -281,12 +282,15 @@ static int can_probe(unsigned long paddr) if (ret < 0) return 0; +#ifdef CONFIG_KGDB /* - * Another debugging subsystem might insert this breakpoint. - * In that case, we can't recover it. + * If there is a dynamically installed kgdb sw breakpoint, + * this function should not be probed. */ - if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && + kgdb_has_hit_break(addr)) return 0; +#endif addr += insn.length; } -- cgit v1.2.3-59-g8ed1b From 63dc6325ff41ee9e570bde705ac34a39c5dbeb44 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 19 Dec 2022 23:35:19 +0900 Subject: x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK Since the CONFIG_RETHUNK and CONFIG_SLS will use INT3 for stopping speculative execution after function return, kprobe jump optimization always fails on the functions with such INT3 inside the function body. (It already checks the INT3 padding between functions, but not inside the function) To avoid this issue, as same as kprobes, check whether the INT3 comes from kgdb or not, and if so, stop decoding and make it fail. The other INT3 will come from CONFIG_RETHUNK/CONFIG_SLS and those can be treated as a one-byte instruction. Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation") Suggested-by: Peter Zijlstra Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/167146051929.1374301.7419382929328081706.stgit@devnote3 --- arch/x86/kernel/kprobes/opt.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index e6b8c5362b94..e57e07b0edb6 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -279,19 +280,6 @@ static int insn_is_indirect_jump(struct insn *insn) return ret; } -static bool is_padding_int3(unsigned long addr, unsigned long eaddr) -{ - unsigned char ops; - - for (; addr < eaddr; addr++) { - if (get_kernel_nofault(ops, (void *)addr) < 0 || - ops != INT3_INSN_OPCODE) - return false; - } - - return true; -} - /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { @@ -334,15 +322,15 @@ static int can_optimize(unsigned long paddr) ret = insn_decode_kernel(&insn, (void *)recovered_insn); if (ret < 0) return 0; - +#ifdef CONFIG_KGDB /* - * In the case of detecting unknown breakpoint, this could be - * a padding INT3 between functions. Let's check that all the - * rest of the bytes are also INT3. + * If there is a dynamically installed kgdb sw breakpoint, + * this function should not be probed. */ - if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) - return is_padding_int3(addr, paddr - offset + size) ? 1 : 0; - + if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && + kgdb_has_hit_break(addr)) + return 0; +#endif /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); -- cgit v1.2.3-59-g8ed1b From 94cd8fa09f5f1ebdd4e90964b08b7f2cc4b36c43 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 14 Dec 2022 17:20:08 -0500 Subject: futex: Fix futex_waitv() hrtimer debug object leak on kcalloc error In a scenario where kcalloc() fails to allocate memory, the futex_waitv system call immediately returns -ENOMEM without invoking destroy_hrtimer_on_stack(). When CONFIG_DEBUG_OBJECTS_TIMERS=y, this results in leaking a timer debug object. Fixes: bf69bad38cf6 ("futex: Implement sys_futex_waitv()") Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Davidlohr Bueso Cc: stable@vger.kernel.org Cc: stable@vger.kernel.org # v5.16+ Link: https://lore.kernel.org/r/20221214222008.200393-1-mathieu.desnoyers@efficios.com --- kernel/futex/syscalls.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 086a22d1adb7..a8074079b09e 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -286,19 +286,22 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, } futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); - if (!futexv) - return -ENOMEM; + if (!futexv) { + ret = -ENOMEM; + goto destroy_timer; + } ret = futex_parse_waitv(futexv, waiters, nr_futexes); if (!ret) ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); + kfree(futexv); + +destroy_timer: if (timeout) { hrtimer_cancel(&to.timer); destroy_hrtimer_on_stack(&to.timer); } - - kfree(futexv); return ret; } -- cgit v1.2.3-59-g8ed1b From 9eb803402a2a83400c6c6afd900e3b7c87c06816 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 16 Nov 2022 21:25:24 +0100 Subject: uapi:io_uring.h: allow linux/time_types.h to be skipped include/uapi/linux/io_uring.h is synced 1:1 into liburing:src/include/liburing/io_uring.h. liburing has a configure check to detect the need for linux/time_types.h. It can opt-out by defining UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H Fixes: 78a861b94959 ("io_uring: add sync cancelation API through io_uring_register()") Link: https://github.com/axboe/liburing/issues/708 Link: https://github.com/axboe/liburing/pull/709 Link: https://lore.kernel.org/io-uring/20221115212614.1308132-1-ammar.faizi@intel.com/T/#m9f5dd571cd4f6a5dee84452dbbca3b92ba7a4091 CC: Jens Axboe Cc: Ammar Faizi Signed-off-by: Stefan Metzmacher Reviewed-by: Ammar Faizi Link: https://lore.kernel.org/r/7071a0a1d751221538b20b63f9160094fc7e06f4.1668630247.git.metze@samba.org Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 9d4c4078e8d0..2780bce62faf 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -10,7 +10,15 @@ #include #include +/* + * this file is shared with liburing and that has to autodetect + * if linux/time_types.h is available or not, it can + * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H + * if linux/time_types.h is not available + */ +#ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H #include +#endif #ifdef __cplusplus extern "C" { -- cgit v1.2.3-59-g8ed1b From a79b53aaaab53de017517bf9579b6106397a523c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Dec 2022 05:33:41 -0500 Subject: KVM: x86: fix deadlock for KVM_XEN_EVTCHN_RESET While KVM_XEN_EVTCHN_RESET is usually called with no vCPUs running, if that happened it could cause a deadlock. This is due to kvm_xen_eventfd_reset() doing a synchronize_srcu() inside a kvm->lock critical section. To avoid this, first collect all the evtchnfd objects in an array and free all of them once the kvm->lock critical section is over and th SRCU grace period has expired. Reported-by: Michal Luczaj Cc: David Woodhouse Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 30 +++++++++++++++++++--- .../testing/selftests/kvm/x86_64/xen_shinfo_test.c | 6 +++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index b178f40bd863..2e29bdc2949c 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1942,18 +1942,42 @@ static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) static int kvm_xen_eventfd_reset(struct kvm *kvm) { - struct evtchnfd *evtchnfd; + struct evtchnfd *evtchnfd, **all_evtchnfds; int i; + int n = 0; mutex_lock(&kvm->lock); + + /* + * Because synchronize_srcu() cannot be called inside the + * critical section, first collect all the evtchnfd objects + * in an array as they are removed from evtchn_ports. + */ + idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) + n++; + + all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL); + if (!all_evtchnfds) { + mutex_unlock(&kvm->lock); + return -ENOMEM; + } + + n = 0; idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { + all_evtchnfds[n++] = evtchnfd; idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port); - synchronize_srcu(&kvm->srcu); + } + mutex_unlock(&kvm->lock); + + synchronize_srcu(&kvm->srcu); + + while (n--) { + evtchnfd = all_evtchnfds[n]; if (!evtchnfd->deliver.port.port) eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); kfree(evtchnfd); } - mutex_unlock(&kvm->lock); + kfree(all_evtchnfds); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 721f6a693799..dae510c263b4 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -962,6 +962,12 @@ int main(int argc, char *argv[]) } done: + struct kvm_xen_hvm_attr evt_reset = { + .type = KVM_XEN_ATTR_TYPE_EVTCHN, + .u.evtchn.flags = KVM_XEN_EVTCHN_RESET, + }; + vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); + alarm(0); clock_gettime(CLOCK_REALTIME, &max_ts); -- cgit v1.2.3-59-g8ed1b From 02d9a04da453984b16f4a585ad808cf961df495e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Dec 2022 06:00:22 -0500 Subject: Documentation: kvm: clarify SRCU locking order Currently only the locking order of SRCU vs kvm->slots_arch_lock and kvm->slots_lock is documented. Extend this to kvm->lock since Xen emulation got it terribly wrong. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/locking.rst | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 845a561629f1..a3ca76f9be75 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -16,17 +16,26 @@ The acquisition orders for mutexes are as follows: - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring them together is quite rare. -- Unlike kvm->slots_lock, kvm->slots_arch_lock is released before - synchronize_srcu(&kvm->srcu). Therefore kvm->slots_arch_lock - can be taken inside a kvm->srcu read-side critical section, - while kvm->slots_lock cannot. - - kvm->mn_active_invalidate_count ensures that pairs of invalidate_range_start() and invalidate_range_end() callbacks use the same memslots array. kvm->slots_lock and kvm->slots_arch_lock are taken on the waiting side in install_new_memslots, so MMU notifiers must not take either kvm->slots_lock or kvm->slots_arch_lock. +For SRCU: + +- ``synchronize_srcu(&kvm->srcu)`` is called _inside_ + the kvm->slots_lock critical section, therefore kvm->slots_lock + cannot be taken inside a kvm->srcu read-side critical section. + Instead, kvm->slots_arch_lock is released before the call + to ``synchronize_srcu()`` and _can_ be taken inside a + kvm->srcu read-side critical section. + +- kvm->lock is taken inside kvm->srcu, therefore + ``synchronize_srcu(&kvm->srcu)`` cannot be called inside + a kvm->lock critical section. If you cannot delay the + call until after kvm->lock is released, use ``call_srcu``. + On x86: - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock -- cgit v1.2.3-59-g8ed1b From 129c48cde6c9e519d033305649665427c6cac494 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 30 Nov 2022 13:11:47 -0500 Subject: KVM: selftests: restore special vmmcall code layout needed by the harness Commit 8fda37cf3d41 ("KVM: selftests: Stuff RAX/RCX with 'safe' values in vmmcall()/vmcall()", 2022-11-21) broke the svm_nested_soft_inject_test because it placed a "pop rbp" instruction after vmmcall. While this is correct and mimics what is done in the VMX case, this particular test expects a ud2 instruction right after the vmmcall, so that it can skip over it in the L1 part of the test. Inline a suitably-modified version of vmmcall() to restore the functionality of the test. Fixes: 8fda37cf3d41 ("KVM: selftests: Stuff RAX/RCX with 'safe' values in vmmcall()/vmcall()" Cc: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini Reviewed-by: Sean Christopherson Reviewed-by: Vitaly Kuznetsov Reviewed-by: Maxim Levitsky Message-Id: <20221130181147.9911-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/svm_nested_soft_inject_test.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index e497ace629c1..b34980d45648 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -41,8 +41,17 @@ static void guest_int_handler(struct ex_regs *regs) static void l2_guest_code_int(void) { GUEST_ASSERT_1(int_fired == 1, int_fired); - vmmcall(); - ud2(); + + /* + * Same as the vmmcall() function, but with a ud2 sneaked after the + * vmmcall. The caller injects an exception with the return address + * increased by 2, so the "pop rbp" must be after the ud2 and we cannot + * use vmmcall() directly. + */ + __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp" + : : "a"(0xdeadbeef), "c"(0xbeefdead) + : "rbx", "rdx", "rsi", "rdi", "r8", "r9", + "r10", "r11", "r12", "r13", "r14", "r15"); GUEST_ASSERT_1(bp_fired == 1, bp_fired); hlt(); -- cgit v1.2.3-59-g8ed1b From 090ddad4c7a9fefd647c762093a555870a19c8b2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Dec 2022 13:57:14 +0100 Subject: ALSA: hda/hdmi: Static PCM mapping again with AMD HDMI codecs The recent code refactoring for HD-audio HDMI codec driver caused a regression on AMD/ATI HDMI codecs; namely, PulseAudioand pipewire don't recognize HDMI outputs any longer while the direct output via ALSA raw access still works. The problem turned out that, after the code refactoring, the driver assumes only the dynamic PCM assignment, and when a PCM stream that still isn't assigned to any pin gets opened, the driver tries to assign any free converter to the PCM stream. This behavior is OK for Intel and other codecs, as they have arbitrary connections between pins and converters. OTOH, on AMD chips that have a 1:1 mapping between pins and converters, this may end up with blocking the open of the next PCM stream for the pin that is tied with the formerly taken converter. Also, with the code refactoring, more PCM streams are exposed than necessary as we assume all converters can be used, while this isn't true for AMD case. This may change the PCM stream assignment and confuse users as well. This patch fixes those problems by: - Introducing a flag spec->static_pcm_mapping, and if it's set, the driver applies the static mapping between pins and converters at the probe time - Limiting the number of PCM streams per pins, too; this avoids the superfluous PCM streams Fixes: ef6f5494faf6 ("ALSA: hda/hdmi: Use only dynamic PCM device allocation") Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=216836 Co-developed-by: Jaroslav Kysela Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20221228125714.16329-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 8015e4471267..386dd9d9143f 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -167,6 +167,7 @@ struct hdmi_spec { struct hdmi_ops ops; bool dyn_pin_out; + bool static_pcm_mapping; /* hdmi interrupt trigger control flag for Nvidia codec */ bool hdmi_intr_trig_ctrl; bool nv_dp_workaround; /* workaround DP audio infoframe for Nvidia */ @@ -1525,13 +1526,16 @@ static void update_eld(struct hda_codec *codec, */ pcm_jack = pin_idx_to_pcm_jack(codec, per_pin); - if (eld->eld_valid) { - hdmi_attach_hda_pcm(spec, per_pin); - hdmi_pcm_setup_pin(spec, per_pin); - } else { - hdmi_pcm_reset_pin(spec, per_pin); - hdmi_detach_hda_pcm(spec, per_pin); + if (!spec->static_pcm_mapping) { + if (eld->eld_valid) { + hdmi_attach_hda_pcm(spec, per_pin); + hdmi_pcm_setup_pin(spec, per_pin); + } else { + hdmi_pcm_reset_pin(spec, per_pin); + hdmi_detach_hda_pcm(spec, per_pin); + } } + /* if pcm_idx == -1, it means this is in monitor connection event * we can get the correct pcm_idx now. */ @@ -2281,8 +2285,8 @@ static int generic_hdmi_build_pcms(struct hda_codec *codec) struct hdmi_spec *spec = codec->spec; int idx, pcm_num; - /* limit the PCM devices to the codec converters */ - pcm_num = spec->num_cvts; + /* limit the PCM devices to the codec converters or available PINs */ + pcm_num = min(spec->num_cvts, spec->num_pins); codec_dbg(codec, "hdmi: pcm_num set to %d\n", pcm_num); for (idx = 0; idx < pcm_num; idx++) { @@ -2379,6 +2383,11 @@ static int generic_hdmi_build_controls(struct hda_codec *codec) struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx); struct hdmi_eld *pin_eld = &per_pin->sink_eld; + if (spec->static_pcm_mapping) { + hdmi_attach_hda_pcm(spec, per_pin); + hdmi_pcm_setup_pin(spec, per_pin); + } + pin_eld->eld_valid = false; hdmi_present_sense(per_pin, 0); } @@ -4419,6 +4428,8 @@ static int patch_atihdmi(struct hda_codec *codec) spec = codec->spec; + spec->static_pcm_mapping = true; + spec->ops.pin_get_eld = atihdmi_pin_get_eld; spec->ops.pin_setup_infoframe = atihdmi_pin_setup_infoframe; spec->ops.pin_hbr_setup = atihdmi_pin_hbr_setup; -- cgit v1.2.3-59-g8ed1b From 8ca4fc323d2e4ab9dabbdd57633af40b0c7e6af9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Dec 2022 11:09:55 +0100 Subject: docs, nvme: add a feature and quirk policy document This adds a document about what specification features are supported by the Linux NVMe driver, and what qualifies for a quirk if an implementation has problems following the specification. Signed-off-by: Jens Axboe Signed-off-by: Keith Busch Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Reviewed-by: Randy Dunlap Acked-by: Jonathan Corbet --- .../maintainer/maintainer-entry-profile.rst | 1 + Documentation/nvme/feature-and-quirk-policy.rst | 77 ++++++++++++++++++++++ MAINTAINERS | 1 + 3 files changed, 79 insertions(+) create mode 100644 Documentation/nvme/feature-and-quirk-policy.rst diff --git a/Documentation/maintainer/maintainer-entry-profile.rst b/Documentation/maintainer/maintainer-entry-profile.rst index 93b2ae6c34a9..cfd37f31077f 100644 --- a/Documentation/maintainer/maintainer-entry-profile.rst +++ b/Documentation/maintainer/maintainer-entry-profile.rst @@ -104,3 +104,4 @@ to do something different in the near future. ../riscv/patch-acceptance ../driver-api/media/maintainer-entry-profile ../driver-api/vfio-pci-device-specific-driver-acceptance + ../nvme/feature-and-quirk-policy diff --git a/Documentation/nvme/feature-and-quirk-policy.rst b/Documentation/nvme/feature-and-quirk-policy.rst new file mode 100644 index 000000000000..c01d836d8e41 --- /dev/null +++ b/Documentation/nvme/feature-and-quirk-policy.rst @@ -0,0 +1,77 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================= +Linux NVMe feature and and quirk policy +======================================= + +This file explains the policy used to decide what is supported by the +Linux NVMe driver and what is not. + + +Introduction +============ + +NVM Express is an open collection of standards and information. + +The Linux NVMe host driver in drivers/nvme/host/ supports devices +implementing the NVM Express (NVMe) family of specifications, which +currently consists of a number of documents: + + - the NVMe Base specification + - various Command Set specifications (e.g. NVM Command Set) + - various Transport specifications (e.g. PCIe, Fibre Channel, RDMA, TCP) + - the NVMe Management Interface specification + +See https://nvmexpress.org/developers/ for the NVMe specifications. + + +Supported features +================== + +NVMe is a large suite of specifications, and contains features that are only +useful or suitable for specific use-cases. It is important to note that Linux +does not aim to implement every feature in the specification. Every additional +feature implemented introduces more code, more maintenance and potentially more +bugs. Hence there is an inherent tradeoff between functionality and +maintainability of the NVMe host driver. + +Any feature implemented in the Linux NVMe host driver must support the +following requirements: + + 1. The feature is specified in a release version of an official NVMe + specification, or in a ratified Technical Proposal (TP) that is + available on NVMe website. Or if it is not directly related to the + on-wire protocol, does not contradict any of the NVMe specifications. + 2. Does not conflict with the Linux architecture, nor the design of the + NVMe host driver. + 3. Has a clear, indisputable value-proposition and a wide consensus across + the community. + +Vendor specific extensions are generally not supported in the NVMe host +driver. + +It is strongly recommended to work with the Linux NVMe and block layer +maintainers and get feedback on specification changes that are intended +to be used by the Linux NVMe host driver in order to avoid conflict at a +later stage. + + +Quirks +====== + +Sometimes implementations of open standards fail to correctly implement parts +of the standards. Linux uses identifier-based quirks to work around such +implementation bugs. The intent of quirks is to deal with widely available +hardware, usually consumer, which Linux users can't use without these quirks. +Typically these implementations are not or only superficially tested with Linux +by the hardware manufacturer. + +The Linux NVMe maintainers decide ad hoc whether to quirk implementations +based on the impact of the problem to Linux users and how it impacts +maintainability of the driver. In general quirks are a last resort, if no +firmware updates or other workarounds are available from the vendor. + +Quirks will not be added to the Linux kernel for hardware that isn't available +on the mass market. Hardware that fails qualification for enterprise Linux +distributions, ChromeOS, Android or other consumers of the Linux kernel +should be fixed before it is shipped instead of relying on Linux quirks. diff --git a/MAINTAINERS b/MAINTAINERS index bb77a3ed9d54..d53b3a6cdc67 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14827,6 +14827,7 @@ L: linux-nvme@lists.infradead.org S: Supported W: http://git.infradead.org/nvme.git T: git://git.infradead.org/nvme.git +F: Documentation/nvme/ F: drivers/nvme/host/ F: drivers/nvme/common/ F: include/linux/nvme* -- cgit v1.2.3-59-g8ed1b From 685e6311637e46f3212439ce2789f8a300e5050f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Dec 2022 10:30:45 +0100 Subject: nvme: fix the NVME_CMD_EFFECTS_CSE_MASK definition 3 << 16 does not generate the correct mask for bits 16, 17 and 18. Use the GENMASK macro to generate the correct mask instead. Fixes: 84fef62d135b ("nvme: check admin passthru command effects") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Kanchan Joshi --- include/linux/nvme.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d6be2a686100..d1cd53f2b6ab 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -7,6 +7,7 @@ #ifndef _LINUX_NVME_H #define _LINUX_NVME_H +#include #include #include @@ -639,7 +640,7 @@ enum { NVME_CMD_EFFECTS_NCC = 1 << 2, NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, - NVME_CMD_EFFECTS_CSE_MASK = 3 << 16, + NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, }; -- cgit v1.2.3-59-g8ed1b From 61f37154c599cf9f2f84dcbd9be842f8645a7099 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Dec 2022 15:20:04 +0100 Subject: nvmet: use NVME_CMD_EFFECTS_CSUPP instead of open coding it Use NVME_CMD_EFFECTS_CSUPP instead of open coding it and assign a single value to multiple array entries instead of repeated assignments. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/admin-cmd.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 53a004ea320c..111a5cb6403f 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -164,26 +164,29 @@ out: static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) { - log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0); - - log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0); + log->acs[nvme_admin_get_log_page] = + log->acs[nvme_admin_identify] = + log->acs[nvme_admin_abort_cmd] = + log->acs[nvme_admin_set_features] = + log->acs[nvme_admin_get_features] = + log->acs[nvme_admin_async_event] = + log->acs[nvme_admin_keep_alive] = + cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); + + log->iocs[nvme_cmd_read] = + log->iocs[nvme_cmd_write] = + log->iocs[nvme_cmd_flush] = + log->iocs[nvme_cmd_dsm] = + log->iocs[nvme_cmd_write_zeroes] = + cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); } static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log) { - log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_zone_append] = + log->iocs[nvme_cmd_zone_mgmt_send] = + log->iocs[nvme_cmd_zone_mgmt_recv] = + cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); } static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) -- cgit v1.2.3-59-g8ed1b From f2d1421391bba0b15684d2379a47a089f0e561d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Dec 2022 15:20:56 +0100 Subject: nvmet: set the LBCC bit for commands that modify data Write, Write Zeroes, Zone append and a Zone Reset through Zone Management Send modify the logical block content of a namespace, so make sure the LBCC bit is reported for them. Fixes: b5d0b38c0475 ("nvmet: add Command Set Identifier support") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/admin-cmd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 111a5cb6403f..6a54ed6fb121 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -174,17 +174,19 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); log->iocs[nvme_cmd_read] = - log->iocs[nvme_cmd_write] = log->iocs[nvme_cmd_flush] = log->iocs[nvme_cmd_dsm] = - log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); + log->iocs[nvme_cmd_write] = + log->iocs[nvme_cmd_write_zeroes] = + cpu_to_le32(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC); } static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log) { log->iocs[nvme_cmd_zone_append] = log->iocs[nvme_cmd_zone_mgmt_send] = + cpu_to_le32(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC); log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); } -- cgit v1.2.3-59-g8ed1b From 2a459f6933e1c459bffb7cc73fd6c900edc714bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Dec 2022 09:51:19 +0100 Subject: nvmet: don't defer passthrough commands with trivial effects to the workqueue Mask out the "Command Supported" and "Logical Block Content Change" bits and only defer execution of commands that have non-trivial effects to the workqueue for synchronous execution. This allows to execute admin commands asynchronously on controllers that provide a Command Supported and Effects log page, and will keep allowing to execute Write commands asynchronously once command effects on I/O commands are taken into account. Fixes: c1fef73f793b ("nvmet: add passthru code to process commands") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Kanchan Joshi --- drivers/nvme/target/passthru.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 79af5140af8b..adc0958755d6 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -334,14 +334,13 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) } /* - * If there are effects for the command we are about to execute, or - * an end_req function we need to use nvme_execute_passthru_rq() - * synchronously in a work item seeing the end_req function and - * nvme_passthru_end() can't be called in the request done callback - * which is typically in interrupt context. + * If a command needs post-execution fixups, or there are any + * non-trivial effects, make sure to execute the command synchronously + * in a workqueue so that nvme_passthru_end gets called. */ effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode); - if (req->p.use_workqueue || effects) { + if (req->p.use_workqueue || + (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))) { INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work); req->p.rq = rq; queue_work(nvmet_wq, &req->p.work); -- cgit v1.2.3-59-g8ed1b From 831ed60c2aca2d7c517b2da22897a90224a97d27 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Dec 2022 10:12:17 +0100 Subject: nvme: also return I/O command effects from nvme_command_effects To be able to use the Commands Supported and Effects Log for allowing unprivileged passtrough, it needs to be corretly reported for I/O commands as well. Return the I/O command effects from nvme_command_effects, and also add a default list of effects for the NVM command set. For other command sets, the Commands Supported and Effects log is required to be present already. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Kanchan Joshi --- drivers/nvme/host/core.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cda1361e6d4f..d307ae4d8a57 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1074,6 +1074,18 @@ static u32 nvme_known_admin_effects(u8 opcode) return 0; } +static u32 nvme_known_nvm_effects(u8 opcode) +{ + switch (opcode) { + case nvme_cmd_write: + case nvme_cmd_write_zeroes: + case nvme_cmd_write_uncor: + return NVME_CMD_EFFECTS_LBCC; + default: + return 0; + } +} + u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) { u32 effects = 0; @@ -1081,16 +1093,24 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) if (ns) { if (ns->head->effects) effects = le32_to_cpu(ns->head->effects->iocs[opcode]); + if (ns->head->ids.csi == NVME_CAP_CSS_NVM) + effects |= nvme_known_nvm_effects(opcode); if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) dev_warn_once(ctrl->device, - "IO command:%02x has unhandled effects:%08x\n", + "IO command:%02x has unusual effects:%08x\n", opcode, effects); - return 0; - } - if (ctrl->effects) - effects = le32_to_cpu(ctrl->effects->acs[opcode]); - effects |= nvme_known_admin_effects(opcode); + /* + * NVME_CMD_EFFECTS_CSE_MASK causes a freeze all I/O queues, + * which would deadlock when done on an I/O command. Note that + * We already warn about an unusual effect above. + */ + effects &= ~NVME_CMD_EFFECTS_CSE_MASK; + } else { + if (ctrl->effects) + effects = le32_to_cpu(ctrl->effects->acs[opcode]); + effects |= nvme_known_admin_effects(opcode); + } return effects; } -- cgit v1.2.3-59-g8ed1b From 6f99ac04c469b5d0a180a4ccea99d25d5dc9d21c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Dec 2022 16:13:38 +0100 Subject: nvme: consult the CSE log page for unprivileged passthrough Commands like Write Zeros can change the contents of a namespaces without actually transferring data. To protect against this, check the Commands Supported and Effects log is supported by the controller for any unprivileg command passthrough and refuse unprivileged passthrough if the command has any effects that can change data or metadata. Note: While the Commands Support and Effects log page has only been mandatory since NVMe 2.0, it is widely supported because Windows requires it for any command passthrough from userspace. Fixes: e4fbcf32c860 ("nvme: identify-namespace without CAP_SYS_ADMIN") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Kanchan Joshi --- drivers/nvme/host/ioctl.c | 28 ++++++++++++++++++++++++---- include/linux/nvme.h | 1 + 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 9ddda571f046..a8639919237e 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -11,6 +11,8 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, fmode_t mode) { + u32 effects; + if (capable(CAP_SYS_ADMIN)) return true; @@ -43,11 +45,29 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, } /* - * Only allow I/O commands that transfer data to the controller if the - * special file is open for writing, but always allow I/O commands that - * transfer data from the controller. + * Check if the controller provides a Commands Supported and Effects log + * and marks this command as supported. If not reject unprivileged + * passthrough. + */ + effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); + if (!(effects & NVME_CMD_EFFECTS_CSUPP)) + return false; + + /* + * Don't allow passthrough for command that have intrusive (or unknown) + * effects. + */ + if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | + NVME_CMD_EFFECTS_UUID_SEL | + NVME_CMD_EFFECTS_SCOPE_MASK)) + return false; + + /* + * Only allow I/O commands that transfer data to the controller or that + * change the logical block contents if the file descriptor is open for + * writing. */ - if (nvme_is_write(c)) + if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) return mode & FMODE_WRITE; return true; } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d1cd53f2b6ab..4fad4aa245fb 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -642,6 +642,7 @@ enum { NVME_CMD_EFFECTS_CCC = 1 << 4, NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, + NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), }; struct nvme_effects_log { -- cgit v1.2.3-59-g8ed1b From 76807fcd73b818eb9f245ef1035aed34ecdd9813 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 25 Dec 2022 13:28:51 +0200 Subject: nvme-auth: fix smatch warning complaints When initializing auth context, there may be no secrets passed by the user. Make return code explicit when returning successfully. smatch warnings: drivers/nvme/host/auth.c:950 nvme_auth_init_ctrl() warn: missing error code? 'ret' Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index bb0abbe4491c..4424f53a8a0a 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -953,7 +953,7 @@ int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) goto err_free_dhchap_secret; if (!ctrl->opts->dhchap_secret && !ctrl->opts->dhchap_ctrl_secret) - return ret; + return 0; ctrl->dhchap_ctxs = kvcalloc(ctrl_max_dhchaps(ctrl), sizeof(*chap), GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From da8daff9405e55baa1f797b77a7c629a89f4d764 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Sat, 17 Dec 2022 11:21:48 +0530 Subject: kconfig: Add static text for search information in help menu Add few static text to explain how one can bring up the search dialog box by pressing the forward slash key anywhere on this interface. Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- scripts/kconfig/mconf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 9c549683c627..e67e0db50b2e 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -161,6 +161,12 @@ static const char mconf_readme[] = "(especially with a larger number of unrolled categories) than the\n" "default mode.\n" "\n" + +"Search\n" +"-------\n" +"Pressing the forward-slash (/) anywhere brings up a search dialog box.\n" +"\n" + "Different color themes available\n" "--------------------------------\n" "It is possible to select different color themes using the variable\n" -- cgit v1.2.3-59-g8ed1b From c5bc073668206c73c20798eb6d978b5e9db5b16f Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 14 Dec 2022 08:54:39 +0100 Subject: drm/i915: fix TLB invalidation for Gen12.50 video and compute engines In case of Gen12.50 video and compute engines, TLB_INV registers are masked - to modify one bit, corresponding bit in upper half of the register must be enabled, otherwise nothing happens. Fixes: 77fa9efc16a9 ("drm/i915/xehp: Create separate reg definitions for new MCR registers") Signed-off-by: Andrzej Hajda Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221214075439.402485-1-andrzej.hajda@intel.com (cherry picked from commit 4d5cf7b1680a1e6db327e3c935ef58325cbedb2c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 767e329e1cc5..9c18b5f2e789 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1109,9 +1109,15 @@ static void mmio_invalidate_full(struct intel_gt *gt) continue; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + u32 val = BIT(engine->instance); + + if (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS) + val = _MASKED_BIT_ENABLE(val); intel_gt_mcr_multicast_write_fw(gt, xehp_regs[engine->class], - BIT(engine->instance)); + val); } else { rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); if (!i915_mmio_reg_offset(rb.reg)) -- cgit v1.2.3-59-g8ed1b From fff758698842fb6722be37498d8773e0fb47f000 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 14 Dec 2022 11:49:44 -0800 Subject: drm/i915: Remove __maybe_unused from mtl_info The attribute __maybe_unused should remain only until the respective info is not in the pciidlist. The info can't be added together with its definition because that would cause the driver to automatically probe for the device, while it's still not ready for that. However once pciidlist contains it, the attribute can be removed. Fixes: 7835303982d1 ("drm/i915/mtl: Add MeteorLake PCI IDs") Signed-off-by: Lucas De Marchi Reviewed-by: Radhakrishna Sripada Link: https://patchwork.freedesktop.org/patch/msgid/20221214194944.3670344-1-lucas.demarchi@intel.com (cherry picked from commit 50490ce05b7a50b0bd4108fa7d6db3ca2972fa83) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 6da9784fe4a2..ccd1f864aa19 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1129,7 +1129,6 @@ static const struct intel_gt_definition xelpmp_extra_gt[] = { {} }; -__maybe_unused static const struct intel_device_info mtl_info = { XE_HP_FEATURES, XE_LPDP_FEATURES, -- cgit v1.2.3-59-g8ed1b From 3f882f2d4f689627c1566c2c92087bc3ff734953 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 16 Dec 2022 11:34:56 +0000 Subject: drm/i915: improve the catch-all evict to handle lock contention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The catch-all evict can fail due to object lock contention, since it only goes as far as trylocking the object, due to us already holding the vm->mutex. Doing a full object lock here can deadlock, since the vm->mutex is always our inner lock. Add another execbuf pass which drops the vm->mutex and then tries to grab the object will the full lock, before then retrying the eviction. This should be good enough for now to fix the immediate regression with userspace seeing -ENOSPC from execbuf due to contended object locks during GTT eviction. v2 (Mani) - Also revamp the docs for the different passes. Testcase: igt@gem_ppgtt@shrink-vs-evict-* Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.") References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627 References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570 References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558 Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: Thomas Hellström Cc: Tvrtko Ursulin Cc: Andrzej Hajda Cc: Mani Milani Cc: # v5.18+ Reviewed-by: Mani Milani Tested-by: Mani Milani Link: https://patchwork.freedesktop.org/patch/msgid/20221216113456.414183-1-matthew.auld@intel.com (cherry picked from commit 801fa7a81f6da533cc5442fc40e32c72b76cd42a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 59 ++++++++++++++++++++----- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 37 +++++++++++----- drivers/gpu/drm/i915/i915_gem_evict.h | 4 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- 6 files changed, 82 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index da09767fda07..f266b68cf012 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -730,32 +730,69 @@ static int eb_reserve(struct i915_execbuffer *eb) bool unpinned; /* - * Attempt to pin all of the buffers into the GTT. - * This is done in 2 phases: + * We have one more buffers that we couldn't bind, which could be due to + * various reasons. To resolve this we have 4 passes, with every next + * level turning the screws tighter: * - * 1. Unbind all objects that do not match the GTT constraints for - * the execbuffer (fenceable, mappable, alignment etc). - * 2. Bind new objects. + * 0. Unbind all objects that do not match the GTT constraints for the + * execbuffer (fenceable, mappable, alignment etc). Bind all new + * objects. This avoids unnecessary unbinding of later objects in order + * to make room for the earlier objects *unless* we need to defragment. * - * This avoid unnecessary unbinding of later objects in order to make - * room for the earlier objects *unless* we need to defragment. + * 1. Reorder the buffers, where objects with the most restrictive + * placement requirements go first (ignoring fixed location buffers for + * now). For example, objects needing the mappable aperture (the first + * 256M of GTT), should go first vs objects that can be placed just + * about anywhere. Repeat the previous pass. * - * Defragmenting is skipped if all objects are pinned at a fixed location. + * 2. Consider buffers that are pinned at a fixed location. Also try to + * evict the entire VM this time, leaving only objects that we were + * unable to lock. Try again to bind the buffers. (still using the new + * buffer order). + * + * 3. We likely have object lock contention for one or more stubborn + * objects in the VM, for which we need to evict to make forward + * progress (perhaps we are fighting the shrinker?). When evicting the + * VM this time around, anything that we can't lock we now track using + * the busy_bo, using the full lock (after dropping the vm->mutex to + * prevent deadlocks), instead of trylock. We then continue to evict the + * VM, this time with the stubborn object locked, which we can now + * hopefully unbind (if still bound in the VM). Repeat until the VM is + * evicted. Finally we should be able bind everything. */ - for (pass = 0; pass <= 2; pass++) { + for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK; if (pass >= 1) - unpinned = eb_unbind(eb, pass == 2); + unpinned = eb_unbind(eb, pass >= 2); if (pass == 2) { err = mutex_lock_interruptible(&eb->context->vm->mutex); if (!err) { - err = i915_gem_evict_vm(eb->context->vm, &eb->ww); + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); + mutex_unlock(&eb->context->vm->mutex); + } + if (err) + return err; + } + + if (pass == 3) { +retry: + err = mutex_lock_interruptible(&eb->context->vm->mutex); + if (!err) { + struct drm_i915_gem_object *busy_bo = NULL; + + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo); mutex_unlock(&eb->context->vm->mutex); + if (err && busy_bo) { + err = i915_gem_object_lock(busy_bo, &eb->ww); + i915_gem_object_put(busy_bo); + if (!err) + goto retry; + } } if (err) return err; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index c29efdef8313..0ad44f3868de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -369,7 +369,7 @@ retry: if (vma == ERR_PTR(-ENOSPC)) { ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (!ret) { - ret = i915_gem_evict_vm(&ggtt->vm, &ww); + ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f025ee4fa526..a4b4d9b7d26c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * @vm: Address space to cleanse * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm * will be able to evict vma's locked by the ww as well. + * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then + * in the event i915_gem_evict_vm() is unable to trylock an object for eviction, + * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop + * the vm->mutex, before trying again to acquire the contended lock. The caller + * also owns a reference to the object. * * This function evicts all vmas from a vm. * @@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo) { int ret = 0; @@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */ - if (dying_vma(vma) || + if (!i915_gem_object_get_rcu(vma->obj) || (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; } - if (!i915_gem_object_trylock(vma->obj, ww)) + if (!i915_gem_object_trylock(vma->obj, ww)) { + if (busy_bo) { + *busy_bo = vma->obj; /* holds ref */ + ret = -EBUSY; + break; + } + i915_gem_object_put(vma->obj); continue; + } __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); @@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) if (list_empty(&eviction_list) && list_empty(&locked_eviction_list)) break; - ret = 0; /* Unbind locked objects first, before unlocking the eviction_list */ list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } + if (!dying_vma(vma)) + i915_gem_object_put(vma->obj); } list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } i915_gem_object_unlock(vma->obj); + i915_gem_object_put(vma->obj); } } while (ret == 0); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h index e593c530f9bd..bf0ee0e4fe60 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.h +++ b/drivers/gpu/drm/i915/i915_gem_evict.h @@ -11,6 +11,7 @@ struct drm_mm_node; struct i915_address_space; struct i915_gem_ww_ctx; +struct drm_i915_gem_object; int __must_check i915_gem_evict_something(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, @@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm, - struct i915_gem_ww_ctx *ww); + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo); #endif /* __I915_GEM_EVICT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 703fee6b5f75..3a33be5401ed 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1566,7 +1566,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * locked objects when called from execbuf when pinning * is removed. This would probably regress badly. */ - i915_gem_evict_vm(vm, NULL); + i915_gem_evict_vm(vm, NULL, NULL); mutex_unlock(&vm->mutex); } } while (1); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8c6517d29b8e..37068542aafe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg) /* Everything is pinned, nothing should happen */ mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, NULL); + err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL); mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", @@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg) for_i915_gem_ww(&ww, err, false) { mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, &ww); + err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } -- cgit v1.2.3-59-g8ed1b From 11ce8fd8fd8718247f17475802639cd7e2d3765c Mon Sep 17 00:00:00 2001 From: John Harrison Date: Wed, 21 Dec 2022 11:30:31 -0800 Subject: drm/i915/uc: Fix two issues with over-size firmware files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case where a firmware file is too large (e.g. someone downloaded a web page ASCII dump from github...), the firmware object is released but the pointer is not zerod. If no other firmware file was found then release would be called again leading to a double kfree. Also, the size check was only being applied to the initial firmware load not any of the subsequent attempts. So move the check into a wrapper that is used for all loads. Fixes: 016241168dc5 ("drm/i915/uc: use different ggtt pin offsets for uc loads") Signed-off-by: John Harrison Reviewed-by: Daniele Ceraolo Spurio Cc: Alan Previn Cc: Rodrigo Vivi Cc: Matt Roper Cc: Jani Nikula Cc: Matthew Auld Cc: "Thomas Hellström" Link: https://patchwork.freedesktop.org/patch/msgid/20221221193031.687266-4-John.C.Harrison@Intel.com (cherry picked from commit 4071d98b296a5bc5fd4b15ec651bd05800ec9510) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 42 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 0c80ba51a4bd..2bcdd192f814 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -545,6 +545,32 @@ static int check_ccs_header(struct intel_gt *gt, return 0; } +static int try_firmware_load(struct intel_uc_fw *uc_fw, const struct firmware **fw) +{ + struct intel_gt *gt = __uc_fw_to_gt(uc_fw); + struct device *dev = gt->i915->drm.dev; + int err; + + err = firmware_request_nowarn(fw, uc_fw->file_selected.path, dev); + + if (err) + return err; + + if ((*fw)->size > INTEL_UC_RSVD_GGTT_PER_FW) { + drm_err(>->i915->drm, + "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, + (*fw)->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K); + + /* try to find another blob to load */ + release_firmware(*fw); + *fw = NULL; + return -ENOENT; + } + + return 0; +} + /** * intel_uc_fw_fetch - fetch uC firmware * @uc_fw: uC firmware @@ -558,7 +584,6 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) struct intel_gt *gt = __uc_fw_to_gt(uc_fw); struct drm_i915_private *i915 = gt->i915; struct intel_uc_fw_file file_ideal; - struct device *dev = i915->drm.dev; struct drm_i915_gem_object *obj; const struct firmware *fw = NULL; bool old_ver = false; @@ -574,20 +599,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); + err = try_firmware_load(uc_fw, &fw); memcpy(&file_ideal, &uc_fw->file_wanted, sizeof(file_ideal)); - if (!err && fw->size > INTEL_UC_RSVD_GGTT_PER_FW) { - drm_err(&i915->drm, - "%s firmware %s: size (%zuKB) exceeds max supported size (%uKB)\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->file_selected.path, - fw->size / SZ_1K, INTEL_UC_RSVD_GGTT_PER_FW / SZ_1K); - - /* try to find another blob to load */ - release_firmware(fw); - err = -ENOENT; - } - /* Any error is terminal if overriding. Don't bother searching for older versions */ if (err && intel_uc_fw_is_overridden(uc_fw)) goto fail; @@ -608,7 +622,7 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) break; } - err = firmware_request_nowarn(&fw, uc_fw->file_selected.path, dev); + err = try_firmware_load(uc_fw, &fw); } if (err) -- cgit v1.2.3-59-g8ed1b From 99cb0d917ffa1ab628bb67364ca9b162c07699b1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 27 Dec 2022 03:45:37 +0900 Subject: arch: fix broken BuildID for arm64 and riscv Dennis Gilmore reports that the BuildID is missing in the arm64 vmlinux since commit 994b7ac1697b ("arm64: remove special treatment for the link order of head.o"). The issue is that the type of .notes section, which contains the BuildID, changed from NOTES to PROGBITS. Ard Biesheuvel figured out that whichever object gets linked first gets to decide the type of a section. The PROGBITS type is the result of the compiler emitting .note.GNU-stack as PROGBITS rather than NOTE. While Ard provided a fix for arm64, I want to fix this globally because the same issue is happening on riscv since commit 2348e6bf4421 ("riscv: remove special treatment for the link order of head.o"). This problem will happen in general for other architectures if they start to drop unneeded entries from scripts/head-object-list.txt. Discard .note.GNU-stack in include/asm-generic/vmlinux.lds.h. Link: https://lore.kernel.org/lkml/CAABkxwuQoz1CTbyb57n0ZX65eSYiTonFCU8-LCQc=74D=xE=rA@mail.gmail.com/ Fixes: 994b7ac1697b ("arm64: remove special treatment for the link order of head.o") Fixes: 2348e6bf4421 ("riscv: remove special treatment for the link order of head.o") Reported-by: Dennis Gilmore Suggested-by: Ard Biesheuvel Signed-off-by: Masahiro Yamada Acked-by: Palmer Dabbelt --- include/asm-generic/vmlinux.lds.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a94219e9916f..659bf3b31c91 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -891,7 +891,12 @@ #define PRINTK_INDEX #endif +/* + * Discard .note.GNU-stack, which is emitted as PROGBITS by the compiler. + * Otherwise, the type of .notes section would become PROGBITS instead of NOTES. + */ #define NOTES \ + /DISCARD/ : { *(.note.GNU-stack) } \ .notes : AT(ADDR(.notes) - LOAD_OFFSET) { \ BOUNDED_SECTION_BY(.note.*, _notes) \ } NOTES_HEADERS \ -- cgit v1.2.3-59-g8ed1b From 924d28b39e3b62ad5e97751585aed7c89f8c43ee Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 27 Dec 2022 03:54:44 +0900 Subject: .gitignore: ignore *.rpm Previously, *.rpm files were created under $HOME/rpmbuild/, but since commit 8818039f959b ("kbuild: add ability to make source rpm buildable using koji"), srcrpm-pkg creates the source rpm in the kernel tree because it sets '_srcrpmdir'. Signed-off-by: Masahiro Yamada --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3ec73ead6757..20dce5c3b9e0 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ *.o.* *.patch *.rmeta +*.rpm *.rsi *.s *.so -- cgit v1.2.3-59-g8ed1b From 9c9b55a59416a87fc73c479d78cb3218076dbc30 Mon Sep 17 00:00:00 2001 From: Jun ASAKA Date: Tue, 27 Dec 2022 17:21:57 +0800 Subject: kbuild: add a missing line for help message The help message line for building the source RPM package was missing. Added it. Signed-off-by: Jun ASAKA Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Makefile.package | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 539e9f765d64..525a2820976f 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -158,6 +158,7 @@ $(perf-tar-pkgs): PHONY += help help: @echo ' rpm-pkg - Build both source and binary RPM kernel packages' + @echo ' srcrpm-pkg - Build only the source kernel RPM package' @echo ' binrpm-pkg - Build only the binary kernel RPM package' @echo ' deb-pkg - Build both source and binary deb kernel packages' @echo ' bindeb-pkg - Build only the binary kernel deb package' -- cgit v1.2.3-59-g8ed1b From 63ffe00d8c939eda1a8fa87484ca4537e13a20b7 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 27 Dec 2022 15:48:21 -0600 Subject: kbuild: Fix running modpost with musl libc commit 3d57e1b7b1d4 ("kbuild: refactor the prerequisites of the modpost rule") moved 'vmlinux.o' inside modpost-args, possibly before some of the other options. However, getopt() in musl libc follows POSIX and stops looking for options upon reaching the first non-option argument. As a result, the '-T' option is misinterpreted as a positional argument, and the build fails: make -f ./scripts/Makefile.modpost scripts/mod/modpost -E -o Module.symvers vmlinux.o -T modules.order -T: No such file or directory make[1]: *** [scripts/Makefile.modpost:137: Module.symvers] Error 1 make: *** [Makefile:1960: modpost] Error 2 The fix is to move all options before 'vmlinux.o' in modpost-args. Fixes: 3d57e1b7b1d4 ("kbuild: refactor the prerequisites of the modpost rule") Signed-off-by: Samuel Holland Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Makefile.modpost | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 5eb5e8280379..0ee296cf520c 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -55,6 +55,17 @@ ifneq ($(findstring i,$(filter-out --%,$(MAKEFLAGS))),) modpost-args += -n endif +ifneq ($(KBUILD_MODPOST_WARN)$(missing-input),) +modpost-args += -w +endif + +# Read out modules.order to pass in modpost. +# Otherwise, allmodconfig would fail with "Argument list too long". +ifdef KBUILD_MODULES +modpost-args += -T $(MODORDER) +modpost-deps += $(MODORDER) +endif + ifeq ($(KBUILD_EXTMOD),) # Generate the list of in-tree objects in vmlinux @@ -113,17 +124,6 @@ modpost-args += -e $(addprefix -i , $(KBUILD_EXTRA_SYMBOLS)) endif # ($(KBUILD_EXTMOD),) -ifneq ($(KBUILD_MODPOST_WARN)$(missing-input),) -modpost-args += -w -endif - -ifdef KBUILD_MODULES -modpost-args += -T $(MODORDER) -modpost-deps += $(MODORDER) -endif - -# Read out modules.order to pass in modpost. -# Otherwise, allmodconfig would fail with "Argument list too long". quiet_cmd_modpost = MODPOST $@ cmd_modpost = \ $(if $(missing-input), \ -- cgit v1.2.3-59-g8ed1b From 02a893bc99757d75b7abb43b74f210dfa3df8c4b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 29 Dec 2022 04:10:14 +0900 Subject: kbuild: rpm-pkg: add libelf-devel as alternative for BuildRequires Guoqing Jiang reports that openSUSE cannot compile the kernel rpm due to "BuildRequires: elfutils-libelf-devel" added by commit 8818039f959b ("kbuild: add ability to make source rpm buildable using koji"). The relevant package name in openSUSE is libelf-devel. Add it as an alternative package. BTW, if it is impossible to solve the build requirement, the final resort would be: $ make RPMOPTS=--nodeps rpm-pkg This passes --nodeps to the rpmbuild command so it will not verify build dependencies. This is useful to test rpm builds on non-rpm system. On Debian/Ubuntu, for example, you can install rpmbuild by 'apt-get install rpm'. NOTE1: Likewise, it is possible to bypass the build dependency check for debian package builds: $ make DPKG_FLAGS=-d deb-pkg NOTE2: The 'or' operator is supported since RPM 4.13. So, old distros such as CentOS 7 will break. I suggest installing newer rpmbuild in such cases. Link: https://lore.kernel.org/linux-kbuild/ee227d24-9c94-bfa3-166a-4ee6b5dfea09@linux.dev/T/#u Fixes: 8818039f959b ("kbuild: add ability to make source rpm buildable using koji") Reported-by: Guoqing Jiang Signed-off-by: Masahiro Yamada Reviewed-by: Nathan Chancellor Tested-by: Guoqing Jiang Acked-by: Jonathan Toppins --- scripts/package/mkspec | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/package/mkspec b/scripts/package/mkspec index dda00a948a01..adab28fa7f89 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -51,7 +51,8 @@ sed -e '/^DEL/d' -e 's/^\t*//' < Date: Thu, 29 Dec 2022 21:16:42 +0900 Subject: kbuild: sort single-targets alphabetically again This was previously alphabetically sorted. Sort it again. Signed-off-by: Masahiro Yamada Reviewed-by: Miguel Ojeda Reviewed-by: Nathan Chancellor --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d4b6af8c09e9..a5133e422f69 100644 --- a/Makefile +++ b/Makefile @@ -297,7 +297,7 @@ no-compiler-targets := $(no-dot-config-targets) install dtbs_install \ headers_install modules_install kernelrelease image_name no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \ image_name -single-targets := %.a %.i %.rsi %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/ +single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.symtypes %/ config-build := mixed-build := -- cgit v1.2.3-59-g8ed1b From 6a5e25fc3e0b94301734e8abb1d311a1e02d360d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 30 Dec 2022 17:16:42 +0900 Subject: fixdep: remove unneeded inclusion This is unneeded since commit 69304379ff03 ("fixdep: use fflush() and ferror() to ensure successful write to files"). Signed-off-by: Masahiro Yamada --- scripts/basic/fixdep.c | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c index 2328f9a641da..f932aeaba71a 100644 --- a/scripts/basic/fixdep.c +++ b/scripts/basic/fixdep.c @@ -94,7 +94,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-59-g8ed1b From 963bbdb32b47cfa67a449e715e1dcc525fbd01fc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 19 Dec 2022 12:59:55 +0200 Subject: drm/i915/dsi: add support for ICL+ native MIPI GPIO sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from ICL, the default for MIPI GPIO sequences seems to be using native GPIOs i.e. GPIOs available in the GPU. These native GPIOs reuse many pins that quite frankly seem scary to poke based on the VBT sequences. We pretty much have to trust that the board is configured such that the relevant HPD, PP_CONTROL and GPIO bits aren't used for anything else. MIPI sequence v4 also adds a flag to fall back to non-native sequences. v5: - Wrap SHOTPLUG_CTL_DDI modification in spin_lock() in icp_irq_handler() too (Ville) - References instead of Closes issue 6131 because this does not fix everything v4: - Wrap SHOTPLUG_CTL_DDI modification in spin_lock_irq() (Ville) v3: - Fix -Wbitwise-conditional-parentheses (kernel test robot ) v2: - Fix HPD pin output set (impacts GPIOs 0 and 5) - Fix GPIO data output direction set (impacts GPIOs 4 and 9) - Reduce register accesses to single intel_de_rwm() References: https://gitlab.freedesktop.org/drm/intel/-/issues/6131 Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221219105955.4014451-1-jani.nikula@intel.com (cherry picked from commit f087cfe6fcff58044f7aa3b284965af47f472fb0) Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 94 +++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_irq.c | 3 + drivers/gpu/drm/i915/i915_reg.h | 1 + 3 files changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index fce69fa446d5..41f025f089d9 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -41,9 +41,11 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" #include "intel_dsi_vbt.h" +#include "intel_gmbus_regs.h" #include "vlv_dsi.h" #include "vlv_dsi_regs.h" #include "vlv_sideband.h" @@ -377,6 +379,85 @@ static void icl_exec_gpio(struct intel_connector *connector, drm_dbg_kms(&dev_priv->drm, "Skipping ICL GPIO element execution\n"); } +enum { + MIPI_RESET_1 = 0, + MIPI_AVDD_EN_1, + MIPI_BKLT_EN_1, + MIPI_AVEE_EN_1, + MIPI_VIO_EN_1, + MIPI_RESET_2, + MIPI_AVDD_EN_2, + MIPI_BKLT_EN_2, + MIPI_AVEE_EN_2, + MIPI_VIO_EN_2, +}; + +static void icl_native_gpio_set_value(struct drm_i915_private *dev_priv, + int gpio, bool value) +{ + int index; + + if (drm_WARN_ON(&dev_priv->drm, DISPLAY_VER(dev_priv) == 11 && gpio >= MIPI_RESET_2)) + return; + + switch (gpio) { + case MIPI_RESET_1: + case MIPI_RESET_2: + index = gpio == MIPI_RESET_1 ? HPD_PORT_A : HPD_PORT_B; + + /* + * Disable HPD to set the pin to output, and set output + * value. The HPD pin should not be enabled for DSI anyway, + * assuming the board design and VBT are sane, and the pin isn't + * used by a non-DSI encoder. + * + * The locking protects against concurrent SHOTPLUG_CTL_DDI + * modifications in irq setup and handling. + */ + spin_lock_irq(&dev_priv->irq_lock); + intel_de_rmw(dev_priv, SHOTPLUG_CTL_DDI, + SHOTPLUG_CTL_DDI_HPD_ENABLE(index) | + SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(index), + value ? SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(index) : 0); + spin_unlock_irq(&dev_priv->irq_lock); + break; + case MIPI_AVDD_EN_1: + case MIPI_AVDD_EN_2: + index = gpio == MIPI_AVDD_EN_1 ? 0 : 1; + + intel_de_rmw(dev_priv, PP_CONTROL(index), PANEL_POWER_ON, + value ? PANEL_POWER_ON : 0); + break; + case MIPI_BKLT_EN_1: + case MIPI_BKLT_EN_2: + index = gpio == MIPI_AVDD_EN_1 ? 0 : 1; + + intel_de_rmw(dev_priv, PP_CONTROL(index), EDP_BLC_ENABLE, + value ? EDP_BLC_ENABLE : 0); + break; + case MIPI_AVEE_EN_1: + case MIPI_AVEE_EN_2: + index = gpio == MIPI_AVEE_EN_1 ? 1 : 2; + + intel_de_rmw(dev_priv, GPIO(dev_priv, index), + GPIO_CLOCK_VAL_OUT, + GPIO_CLOCK_DIR_MASK | GPIO_CLOCK_DIR_OUT | + GPIO_CLOCK_VAL_MASK | (value ? GPIO_CLOCK_VAL_OUT : 0)); + break; + case MIPI_VIO_EN_1: + case MIPI_VIO_EN_2: + index = gpio == MIPI_VIO_EN_1 ? 1 : 2; + + intel_de_rmw(dev_priv, GPIO(dev_priv, index), + GPIO_DATA_VAL_OUT, + GPIO_DATA_DIR_MASK | GPIO_DATA_DIR_OUT | + GPIO_DATA_VAL_MASK | (value ? GPIO_DATA_VAL_OUT : 0)); + break; + default: + MISSING_CASE(gpio); + } +} + static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) { struct drm_device *dev = intel_dsi->base.base.dev; @@ -384,8 +465,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) struct intel_connector *connector = intel_dsi->attached_connector; u8 gpio_source, gpio_index = 0, gpio_number; bool value; - - drm_dbg_kms(&dev_priv->drm, "\n"); + bool native = DISPLAY_VER(dev_priv) >= 11; if (connector->panel.vbt.dsi.seq_version >= 3) gpio_index = *data++; @@ -398,10 +478,18 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) else gpio_source = 0; + if (connector->panel.vbt.dsi.seq_version >= 4 && *data & BIT(1)) + native = false; + /* pull up/down */ value = *data++ & 1; - if (DISPLAY_VER(dev_priv) >= 11) + drm_dbg_kms(&dev_priv->drm, "GPIO index %u, number %u, source %u, native %s, set to %s\n", + gpio_index, gpio_number, gpio_source, str_yes_no(native), str_on_off(value)); + + if (native) + icl_native_gpio_set_value(dev_priv, gpio_number, value); + else if (DISPLAY_VER(dev_priv) >= 11) icl_exec_gpio(connector, gpio_source, gpio_index, value); else if (IS_VALLEYVIEW(dev_priv)) vlv_exec_gpio(connector, gpio_source, gpio_number, value); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index edfe363af838..91c533986041 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1974,7 +1974,10 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) if (ddi_hotplug_trigger) { u32 dig_hotplug_reg; + /* Locking due to DSI native GPIO sequences */ + spin_lock(&dev_priv->irq_lock); dig_hotplug_reg = intel_uncore_rmw(&dev_priv->uncore, SHOTPLUG_CTL_DDI, 0, 0); + spin_unlock(&dev_priv->irq_lock); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, ddi_hotplug_trigger, dig_hotplug_reg, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8e1892d14774..916176872544 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5988,6 +5988,7 @@ #define SHOTPLUG_CTL_DDI _MMIO(0xc4030) #define SHOTPLUG_CTL_DDI_HPD_ENABLE(hpd_pin) (0x8 << (_HPD_PIN_DDI(hpd_pin) * 4)) +#define SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(hpd_pin) (0x4 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_STATUS_MASK(hpd_pin) (0x3 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_NO_DETECT(hpd_pin) (0x0 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_SHORT_DETECT(hpd_pin) (0x1 << (_HPD_PIN_DDI(hpd_pin) * 4)) -- cgit v1.2.3-59-g8ed1b From 6217e9f05a74df48c77ee68993d587cdfdb1feb7 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Dec 2022 16:01:05 +0200 Subject: drm/i915/dsi: fix MIPI_BKLT_EN_1 native GPIO index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to copy-paste fail, MIPI_BKLT_EN_1 would always use PPS index 1, never 0. Fix the sloppiest commit in recent memory. Fixes: 963bbdb32b47 ("drm/i915/dsi: add support for ICL+ native MIPI GPIO sequence") Reported-by: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221220140105.313333-1-jani.nikula@intel.com (cherry picked from commit a561933c571798868b5fa42198427a7e6df56c09) Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 41f025f089d9..2cbc1292ab38 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -430,7 +430,7 @@ static void icl_native_gpio_set_value(struct drm_i915_private *dev_priv, break; case MIPI_BKLT_EN_1: case MIPI_BKLT_EN_2: - index = gpio == MIPI_AVDD_EN_1 ? 0 : 1; + index = gpio == MIPI_BKLT_EN_1 ? 0 : 1; intel_de_rmw(dev_priv, PP_CONTROL(index), EDP_BLC_ENABLE, value ? EDP_BLC_ENABLE : 0); -- cgit v1.2.3-59-g8ed1b From 88603b6dc419445847923fcb7fe5080067a30f98 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Jan 2023 13:53:16 -0800 Subject: Linux 6.2-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a5133e422f69..c05b4fb7121e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* -- cgit v1.2.3-59-g8ed1b From 7adde5ac25fa50dbb1fb237042316685cafe976c Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 8 Dec 2022 11:36:20 +0000 Subject: mtd: parsers: Fix potential memory leak in mtd_parser_tplink_safeloader_parse() The parts needs to be freed with all its elements, otherwise it will be leaked. Fixes: 00a3588084be ("mtd: parsers: add TP-Link SafeLoader partitions table parser") Signed-off-by: Yuan Can Reviewed-by: Dan Carpenter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221208113620.78855-1-yuancan@huawei.com --- drivers/mtd/parsers/tplink_safeloader.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/tplink_safeloader.c b/drivers/mtd/parsers/tplink_safeloader.c index f601e7bd8627..1c689dafca2a 100644 --- a/drivers/mtd/parsers/tplink_safeloader.c +++ b/drivers/mtd/parsers/tplink_safeloader.c @@ -91,7 +91,7 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd, buf = mtd_parser_tplink_safeloader_read_table(mtd); if (!buf) { err = -ENOENT; - goto err_out; + goto err_free_parts; } for (idx = 0, offset = TPLINK_SAFELOADER_DATA_OFFSET; @@ -118,6 +118,8 @@ static int mtd_parser_tplink_safeloader_parse(struct mtd_info *mtd, err_free: for (idx -= 1; idx >= 0; idx--) kfree(parts[idx].name); +err_free_parts: + kfree(parts); err_out: return err; }; -- cgit v1.2.3-59-g8ed1b From 105c14b84d93168431abba5d55e6c26fa4b65abb Mon Sep 17 00:00:00 2001 From: Mikhail Zhilkin Date: Thu, 8 Dec 2022 23:28:29 +0300 Subject: mtd: parsers: scpart: fix __udivdi3 undefined on mips This fixes the following compile error on mips architecture with clang version 16.0.0 reported by the 0-DAY CI Kernel Test Service: ld.lld: error: undefined symbol: __udivdi3 referenced by scpart.c mtd/parsers/scpart.o:(scpart_parse) in archive drivers/built-in.a As a workaround this makes 'offs' a 32-bit type. This is enough, because the mtd containing partition table practically does not exceed 1 MB. We can revert this when the [Link] has been resolved. Link: https://github.com/ClangBuiltLinux/linux/issues/1635 Fixes: 9b78ef0c7997 ("mtd: parsers: add support for Sercomm partitions") Reported-by: kernel test robot Suggested-by: Arnd Bergmann Signed-off-by: Mikhail Zhilkin Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/805fe58e-690f-6a3f-5ebf-2f6f6e6e4599@gmail.com --- drivers/mtd/parsers/scpart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/scpart.c b/drivers/mtd/parsers/scpart.c index 02601bb33de4..6e5e11c37078 100644 --- a/drivers/mtd/parsers/scpart.c +++ b/drivers/mtd/parsers/scpart.c @@ -50,7 +50,7 @@ static int scpart_scan_partmap(struct mtd_info *master, loff_t partmap_offs, int cnt = 0; int res = 0; int res2; - loff_t offs; + uint32_t offs; size_t retlen; struct sc_part_desc *pdesc = NULL; struct sc_part_desc *tmpdesc; -- cgit v1.2.3-59-g8ed1b From d19ab1f785d0b6b9f709799f0938658903821ba1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Dec 2022 15:13:34 +0100 Subject: mtd: cfi: allow building spi-intel standalone When MTD or MTD_CFI_GEOMETRY is disabled, the spi-intel driver fails to build, as it includes the shared CFI header: include/linux/mtd/cfi.h:62:2: error: #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. [-Werror=cpp] 62 | #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. linux/mtd/spi-nor.h does not actually need to include cfi.h, so remove the inclusion here to fix the warning. This uncovers a missing #include in spi-nor/core.c so add that there to prevent a different build issue. Fixes: e23e5a05d1fd ("mtd: spi-nor: intel-spi: Convert to SPI MEM") Signed-off-by: Arnd Bergmann Reviewed-by: Mika Westerberg Reviewed-by: Tokunori Ikegami Acked-by: Pratyush Yadav Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221220141352.1486360-1-arnd@kernel.org --- drivers/mtd/spi-nor/core.c | 1 + include/linux/mtd/spi-nor.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index d8703d7dfd0a..d67c926bca8b 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 25765556223a..a3f8cdca90c8 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -7,7 +7,6 @@ #define __LINUX_MTD_SPI_NOR_H #include -#include #include #include -- cgit v1.2.3-59-g8ed1b From c0f7ae27539fbac267384a7bfc58296ea7550d52 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 26 Dec 2022 16:40:43 +0200 Subject: MAINTAINERS: Update email of Tudor Ambarus My professional email will change and the microchip one will bounce after mid-november of 2022. Update the MAINTAINERS file, the YAML bindings, MODULE_AUTHOR entries and author mentions, and add an entry in the .mailmap file. Signed-off-by: Tudor Ambarus Acked-by: Rob Herring Acked-by: Pratyush Yadav Acked-by: Mark Brown Acked-by: Nicolas Ferre Acked-by: Herbert Xu Acked-by: Krzysztof Kozlowski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221226144043.367706-1-tudor.ambarus@linaro.org --- .mailmap | 1 + .../devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml | 2 +- .../devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml | 2 +- .../devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml | 2 +- .../devicetree/bindings/spi/atmel,at91rm9200-spi.yaml | 2 +- Documentation/devicetree/bindings/spi/atmel,quadspi.yaml | 2 +- MAINTAINERS | 10 +++++----- drivers/crypto/atmel-ecc.c | 4 ++-- drivers/crypto/atmel-i2c.c | 4 ++-- drivers/crypto/atmel-i2c.h | 2 +- 10 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.mailmap b/.mailmap index ccba4cf0d893..562f70d3b6a5 100644 --- a/.mailmap +++ b/.mailmap @@ -422,6 +422,7 @@ Tony Luck TripleX Chung TripleX Chung Tsuneo Yoshioka +Tudor Ambarus Tycho Andersen Tzung-Bi Shih Uwe Kleine-König diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml index 0ccaab16dc61..0b7383b3106b 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Advanced Encryption Standard (AES) HW cryptographic accelerator maintainers: - - Tudor Ambarus + - Tudor Ambarus properties: compatible: diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml index 5163c51b4547..ee2ffb034325 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Secure Hash Algorithm (SHA) HW cryptographic accelerator maintainers: - - Tudor Ambarus + - Tudor Ambarus properties: compatible: diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml index fcc5adf03cad..3d6ed24b1b00 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Triple Data Encryption Standard (TDES) HW cryptographic accelerator maintainers: - - Tudor Ambarus + - Tudor Ambarus properties: compatible: diff --git a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml index 4dd973e341e6..6c57dd6c3a36 100644 --- a/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml +++ b/Documentation/devicetree/bindings/spi/atmel,at91rm9200-spi.yaml @@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel SPI device maintainers: - - Tudor Ambarus + - Tudor Ambarus allOf: - $ref: spi-controller.yaml# diff --git a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml index 1d493add4053..b0d99bc10535 100644 --- a/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml +++ b/Documentation/devicetree/bindings/spi/atmel,quadspi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Atmel Quad Serial Peripheral Interface (QSPI) maintainers: - - Tudor Ambarus + - Tudor Ambarus allOf: - $ref: spi-controller.yaml# diff --git a/MAINTAINERS b/MAINTAINERS index 7f86d02cb427..36c500e34508 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13620,7 +13620,7 @@ F: arch/microblaze/ MICROCHIP AT91 DMA DRIVERS M: Ludovic Desroches -M: Tudor Ambarus +M: Tudor Ambarus L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: dmaengine@vger.kernel.org S: Supported @@ -13665,7 +13665,7 @@ F: Documentation/devicetree/bindings/media/microchip,csi2dc.yaml F: drivers/media/platform/microchip/microchip-csi2dc.c MICROCHIP ECC DRIVER -M: Tudor Ambarus +M: Tudor Ambarus L: linux-crypto@vger.kernel.org S: Maintained F: drivers/crypto/atmel-ecc.* @@ -13762,7 +13762,7 @@ S: Maintained F: drivers/mmc/host/atmel-mci.c MICROCHIP NAND DRIVER -M: Tudor Ambarus +M: Tudor Ambarus L: linux-mtd@lists.infradead.org S: Supported F: Documentation/devicetree/bindings/mtd/atmel-nand.txt @@ -13814,7 +13814,7 @@ S: Supported F: drivers/power/reset/at91-sama5d2_shdwc.c MICROCHIP SPI DRIVER -M: Tudor Ambarus +M: Tudor Ambarus S: Supported F: drivers/spi/spi-atmel.* @@ -19665,7 +19665,7 @@ F: drivers/clk/spear/ F: drivers/pinctrl/spear/ SPI NOR SUBSYSTEM -M: Tudor Ambarus +M: Tudor Ambarus M: Pratyush Yadav R: Michael Walle L: linux-mtd@lists.infradead.org diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 53100fb9b07b..12205e2b53b4 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -3,7 +3,7 @@ * Microchip / Atmel ECC (I2C) driver. * * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus + * Author: Tudor Ambarus */ #include @@ -411,6 +411,6 @@ static void __exit atmel_ecc_exit(void) module_init(atmel_ecc_init); module_exit(atmel_ecc_exit); -MODULE_AUTHOR("Tudor Ambarus "); +MODULE_AUTHOR("Tudor Ambarus"); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 81ce09bedda8..55bff1e13142 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -3,7 +3,7 @@ * Microchip / Atmel ECC (I2C) driver. * * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus + * Author: Tudor Ambarus */ #include @@ -390,6 +390,6 @@ static void __exit atmel_i2c_exit(void) module_init(atmel_i2c_init); module_exit(atmel_i2c_exit); -MODULE_AUTHOR("Tudor Ambarus "); +MODULE_AUTHOR("Tudor Ambarus"); MODULE_DESCRIPTION("Microchip / Atmel ECC (I2C) driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index 48929efe2a5b..35f7857a7f7c 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2017, Microchip Technology Inc. - * Author: Tudor Ambarus + * Author: Tudor Ambarus */ #ifndef __ATMEL_I2C_H__ -- cgit v1.2.3-59-g8ed1b From 25e3f30601a368642678744fc8a9b1dce183c7bc Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Fri, 23 Sep 2022 11:14:57 +0800 Subject: mtd: spi-nor: core: fix implicit declaration warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit spi-nor/core.c needs to include linux/delay.h, or it would raise below compile warning: drivers/mtd/spi-nor/core.c: In function ‘spi_nor_soft_reset’: drivers/mtd/spi-nor/core.c:2779:2: error: implicit declaration of function ‘usleep_range’ [-Werror=implicit-function-declaration] 2779 | usleep_range(SPI_NOR_SRST_SLEEP_MIN, SPI_NOR_SRST_SLEEP_MAX); | ^~~~~~~~~~~~ Fixes: d73ee7534cc5 ("mtd: spi-nor: core: perform a Soft Reset on shutdown") Signed-off-by: Zeng Heng Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20220923031457.56103-1-zengheng4@huawei.com --- drivers/mtd/spi-nor/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index d8703d7dfd0a..b500655f7937 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3-59-g8ed1b From 3f592a869f87723314f0cb1ac232bd3bf8245be8 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 10 Jan 2023 18:47:02 +0200 Subject: mtd: spi-nor: spansion: Consider reserved bits in CFR5 register CFR5[6] is reserved bit and must be always 1. Set it to comply with flash requirements. While fixing SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_{EN, DS} definition, stop using magic numbers and describe the missing bit fields in CFR5 register. This is useful for both readability and future possible addition of Octal STR mode support. Fixes: c3266af101f2 ("mtd: spi-nor: spansion: add support for Cypress Semper flash") Cc: stable@vger.kernel.org Reported-by: Takahiro Kuwano Signed-off-by: Tudor Ambarus Reviewed-by: Dhruva Gole Reviewed-by: Pratyush Yadav Tested-by: Dhruva Gole Link: https://lore.kernel.org/linux-mtd/20230110164703.83413-1-tudor.ambarus@linaro.org --- drivers/mtd/spi-nor/spansion.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/spi-nor/spansion.c b/drivers/mtd/spi-nor/spansion.c index b621cdfd506f..07fe0f6fdfe3 100644 --- a/drivers/mtd/spi-nor/spansion.c +++ b/drivers/mtd/spi-nor/spansion.c @@ -21,8 +21,13 @@ #define SPINOR_REG_CYPRESS_CFR3V 0x00800004 #define SPINOR_REG_CYPRESS_CFR3V_PGSZ BIT(4) /* Page size. */ #define SPINOR_REG_CYPRESS_CFR5V 0x00800006 -#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN 0x3 -#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS 0 +#define SPINOR_REG_CYPRESS_CFR5_BIT6 BIT(6) +#define SPINOR_REG_CYPRESS_CFR5_DDR BIT(1) +#define SPINOR_REG_CYPRESS_CFR5_OPI BIT(0) +#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN \ + (SPINOR_REG_CYPRESS_CFR5_BIT6 | SPINOR_REG_CYPRESS_CFR5_DDR | \ + SPINOR_REG_CYPRESS_CFR5_OPI) +#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS SPINOR_REG_CYPRESS_CFR5_BIT6 #define SPINOR_OP_CYPRESS_RD_FAST 0xee /* Cypress SPI NOR flash operations. */ -- cgit v1.2.3-59-g8ed1b From ca5a16db010018095da35c074397289a5bbcb543 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 10 Jan 2023 18:47:03 +0200 Subject: mtd: spi-nor: spansion: Make CFRx reg fields generic Cypress defines two flavors of configuration registers, volatile and non volatile, and both use the same bit fields. Rename the bitfields in the configuration registers so that they can be used for both flavors. Signed-off-by: Tudor Ambarus Reviewed-by: Dhruva Gole Reviewed-by: Takahiro Kuwano Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/linux-mtd/20230110164703.83413-2-tudor.ambarus@linaro.org --- drivers/mtd/spi-nor/spansion.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/mtd/spi-nor/spansion.c b/drivers/mtd/spi-nor/spansion.c index 07fe0f6fdfe3..12a256c0ef4c 100644 --- a/drivers/mtd/spi-nor/spansion.c +++ b/drivers/mtd/spi-nor/spansion.c @@ -15,19 +15,19 @@ #define SPINOR_OP_RD_ANY_REG 0x65 /* Read any register */ #define SPINOR_OP_WR_ANY_REG 0x71 /* Write any register */ #define SPINOR_REG_CYPRESS_CFR1V 0x00800002 -#define SPINOR_REG_CYPRESS_CFR1V_QUAD_EN BIT(1) /* Quad Enable */ +#define SPINOR_REG_CYPRESS_CFR1_QUAD_EN BIT(1) /* Quad Enable */ #define SPINOR_REG_CYPRESS_CFR2V 0x00800003 -#define SPINOR_REG_CYPRESS_CFR2V_MEMLAT_11_24 0xb +#define SPINOR_REG_CYPRESS_CFR2_MEMLAT_11_24 0xb #define SPINOR_REG_CYPRESS_CFR3V 0x00800004 -#define SPINOR_REG_CYPRESS_CFR3V_PGSZ BIT(4) /* Page size. */ +#define SPINOR_REG_CYPRESS_CFR3_PGSZ BIT(4) /* Page size. */ #define SPINOR_REG_CYPRESS_CFR5V 0x00800006 #define SPINOR_REG_CYPRESS_CFR5_BIT6 BIT(6) #define SPINOR_REG_CYPRESS_CFR5_DDR BIT(1) #define SPINOR_REG_CYPRESS_CFR5_OPI BIT(0) -#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN \ +#define SPINOR_REG_CYPRESS_CFR5_OCT_DTR_EN \ (SPINOR_REG_CYPRESS_CFR5_BIT6 | SPINOR_REG_CYPRESS_CFR5_DDR | \ SPINOR_REG_CYPRESS_CFR5_OPI) -#define SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS SPINOR_REG_CYPRESS_CFR5_BIT6 +#define SPINOR_REG_CYPRESS_CFR5_OCT_DTR_DS SPINOR_REG_CYPRESS_CFR5_BIT6 #define SPINOR_OP_CYPRESS_RD_FAST 0xee /* Cypress SPI NOR flash operations. */ @@ -57,7 +57,7 @@ static int cypress_nor_octal_dtr_en(struct spi_nor *nor) u8 addr_mode_nbytes = nor->params->addr_mode_nbytes; /* Use 24 dummy cycles for memory array reads. */ - *buf = SPINOR_REG_CYPRESS_CFR2V_MEMLAT_11_24; + *buf = SPINOR_REG_CYPRESS_CFR2_MEMLAT_11_24; op = (struct spi_mem_op) CYPRESS_NOR_WR_ANY_REG_OP(addr_mode_nbytes, SPINOR_REG_CYPRESS_CFR2V, 1, buf); @@ -69,7 +69,7 @@ static int cypress_nor_octal_dtr_en(struct spi_nor *nor) nor->read_dummy = 24; /* Set the octal and DTR enable bits. */ - buf[0] = SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_EN; + buf[0] = SPINOR_REG_CYPRESS_CFR5_OCT_DTR_EN; op = (struct spi_mem_op) CYPRESS_NOR_WR_ANY_REG_OP(addr_mode_nbytes, SPINOR_REG_CYPRESS_CFR5V, 1, buf); @@ -103,7 +103,7 @@ static int cypress_nor_octal_dtr_dis(struct spi_nor *nor) * in 8D-8D-8D mode. Since there is no register at the next location, * just initialize the value to 0 and let the transaction go on. */ - buf[0] = SPINOR_REG_CYPRESS_CFR5V_OCT_DTR_DS; + buf[0] = SPINOR_REG_CYPRESS_CFR5_OCT_DTR_DS; buf[1] = 0; op = (struct spi_mem_op) CYPRESS_NOR_WR_ANY_REG_OP(nor->addr_nbytes, @@ -155,11 +155,11 @@ static int cypress_nor_quad_enable_volatile(struct spi_nor *nor) if (ret) return ret; - if (nor->bouncebuf[0] & SPINOR_REG_CYPRESS_CFR1V_QUAD_EN) + if (nor->bouncebuf[0] & SPINOR_REG_CYPRESS_CFR1_QUAD_EN) return 0; /* Update the Quad Enable bit. */ - nor->bouncebuf[0] |= SPINOR_REG_CYPRESS_CFR1V_QUAD_EN; + nor->bouncebuf[0] |= SPINOR_REG_CYPRESS_CFR1_QUAD_EN; op = (struct spi_mem_op) CYPRESS_NOR_WR_ANY_REG_OP(addr_mode_nbytes, SPINOR_REG_CYPRESS_CFR1V, 1, @@ -210,7 +210,7 @@ static int cypress_nor_set_page_size(struct spi_nor *nor) if (ret) return ret; - if (nor->bouncebuf[0] & SPINOR_REG_CYPRESS_CFR3V_PGSZ) + if (nor->bouncebuf[0] & SPINOR_REG_CYPRESS_CFR3_PGSZ) nor->params->page_size = 512; else nor->params->page_size = 256; -- cgit v1.2.3-59-g8ed1b From 59273180299ad264d086135e8199c1b05ea51b69 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 15 Dec 2022 09:12:33 +0100 Subject: mtd: spi-nor: Create macros to define chip IDs and geometries The INFO() macro defines an ID array and a couple of geometry properties. Right now all its lines are duplicated twice because of the INFO6() macro (for extended IDs) and soon as well we will need to add a geometry parameter to include the number of banks. In order to limit the code duplication, let's create a number of intermediate macros which will facilitate defining high-level INFOX() macros. There is no functional change. Signed-off-by: Miquel Raynal Reviewed-by: Pratyush Yadav Link: https://lore.kernel.org/r/20221215081241.407098-2-miquel.raynal@bootlin.com Signed-off-by: Tudor Ambarus --- drivers/mtd/spi-nor/core.h | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h index f03b55cf7e6f..f6d012e1f681 100644 --- a/drivers/mtd/spi-nor/core.h +++ b/drivers/mtd/spi-nor/core.h @@ -529,33 +529,30 @@ struct flash_info { const struct spi_nor_fixups *fixups; }; +#define SPI_NOR_ID_2ITEMS(_id) ((_id) >> 8) & 0xff, (_id) & 0xff +#define SPI_NOR_ID_3ITEMS(_id) ((_id) >> 16) & 0xff, SPI_NOR_ID_2ITEMS(_id) + +#define SPI_NOR_ID(_jedec_id, _ext_id) \ + .id = { SPI_NOR_ID_3ITEMS(_jedec_id), SPI_NOR_ID_2ITEMS(_ext_id) }, \ + .id_len = !(_jedec_id) ? 0 : (3 + ((_ext_id) ? 2 : 0)) + +#define SPI_NOR_ID6(_jedec_id, _ext_id) \ + .id = { SPI_NOR_ID_3ITEMS(_jedec_id), SPI_NOR_ID_3ITEMS(_ext_id) }, \ + .id_len = 6 + +#define SPI_NOR_GEOMETRY(_sector_size, _n_sectors) \ + .sector_size = (_sector_size), \ + .n_sectors = (_n_sectors), \ + .page_size = 256 + /* Used when the "_ext_id" is two bytes at most */ #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors) \ - .id = { \ - ((_jedec_id) >> 16) & 0xff, \ - ((_jedec_id) >> 8) & 0xff, \ - (_jedec_id) & 0xff, \ - ((_ext_id) >> 8) & 0xff, \ - (_ext_id) & 0xff, \ - }, \ - .id_len = (!(_jedec_id) ? 0 : (3 + ((_ext_id) ? 2 : 0))), \ - .sector_size = (_sector_size), \ - .n_sectors = (_n_sectors), \ - .page_size = 256, \ + SPI_NOR_ID((_jedec_id), (_ext_id)), \ + SPI_NOR_GEOMETRY((_sector_size), (_n_sectors)), #define INFO6(_jedec_id, _ext_id, _sector_size, _n_sectors) \ - .id = { \ - ((_jedec_id) >> 16) & 0xff, \ - ((_jedec_id) >> 8) & 0xff, \ - (_jedec_id) & 0xff, \ - ((_ext_id) >> 16) & 0xff, \ - ((_ext_id) >> 8) & 0xff, \ - (_ext_id) & 0xff, \ - }, \ - .id_len = 6, \ - .sector_size = (_sector_size), \ - .n_sectors = (_n_sectors), \ - .page_size = 256, \ + SPI_NOR_ID6((_jedec_id), (_ext_id)), \ + SPI_NOR_GEOMETRY((_sector_size), (_n_sectors)), #define CAT25_INFO(_sector_size, _n_sectors, _page_size, _addr_nbytes) \ .sector_size = (_sector_size), \ -- cgit v1.2.3-59-g8ed1b From f0f0cfdc3a024e21161714f2e05f0df3b84d42ad Mon Sep 17 00:00:00 2001 From: Louis Rannou Date: Fri, 3 Feb 2023 09:07:54 +0200 Subject: mtd: spi-nor: Fix shift-out-of-bounds in spi_nor_set_erase_type spi_nor_set_erase_type() was used either to set or to mask out an erase type. When we used it to mask out an erase type a shift-out-of-bounds was hit: UBSAN: shift-out-of-bounds in drivers/mtd/spi-nor/core.c:2237:24 shift exponent 4294967295 is too large for 32-bit type 'int' The setting of the size_{shift, mask} and of the opcode are unnecessary when the erase size is zero, as throughout the code just the erase size is considered to determine whether an erase type is supported or not. Setting the opcode to 0xFF was wrong too as nobody guarantees that 0xFF is an unused opcode. Thus when masking out an erase type, just set the erase size to zero. This will fix the shift-out-of-bounds. Fixes: 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories") Cc: stable@vger.kernel.org Reported-by: Alexander Stein Signed-off-by: Louis Rannou Tested-by: Alexander Stein Link: https://lore.kernel.org/r/20230203070754.50677-1-tudor.ambarus@linaro.org [ta: refine changes, new commit message, fix compilation error] Signed-off-by: Tudor Ambarus --- drivers/mtd/spi-nor/core.c | 9 +++++++++ drivers/mtd/spi-nor/core.h | 1 + drivers/mtd/spi-nor/sfdp.c | 4 ++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index b500655f7937..3012758bbafa 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2026,6 +2026,15 @@ void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size, erase->size_mask = (1 << erase->size_shift) - 1; } +/** + * spi_nor_mask_erase_type() - mask out a SPI NOR erase type + * @erase: pointer to a structure that describes a SPI NOR erase type + */ +void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase) +{ + erase->size = 0; +} + /** * spi_nor_init_uniform_erase_map() - Initialize uniform erase map * @map: the erase map of the SPI NOR diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h index f6d012e1f681..25423225c29d 100644 --- a/drivers/mtd/spi-nor/core.h +++ b/drivers/mtd/spi-nor/core.h @@ -681,6 +681,7 @@ void spi_nor_set_pp_settings(struct spi_nor_pp_command *pp, u8 opcode, void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size, u8 opcode); +void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase); struct spi_nor_erase_region * spi_nor_region_next(struct spi_nor_erase_region *region); void spi_nor_init_uniform_erase_map(struct spi_nor_erase_map *map, diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c index 3acc01c3a900..526c5d57b905 100644 --- a/drivers/mtd/spi-nor/sfdp.c +++ b/drivers/mtd/spi-nor/sfdp.c @@ -875,7 +875,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor, */ for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) if (!(regions_erase_type & BIT(erase[i].idx))) - spi_nor_set_erase_type(&erase[i], 0, 0xFF); + spi_nor_mask_erase_type(&erase[i]); return 0; } @@ -1089,7 +1089,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor, erase_type[i].opcode = (dwords[SFDP_DWORD(2)] >> erase_type[i].idx * 8) & 0xFF; else - spi_nor_set_erase_type(&erase_type[i], 0u, 0xFF); + spi_nor_mask_erase_type(&erase_type[i]); } /* -- cgit v1.2.3-59-g8ed1b From 893fd950c89d516a7cf365700b2bd7bb3efc15a5 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 2 Feb 2023 16:46:28 +0200 Subject: mtd: spi-nor: Sort headers alphabetically Sort headers alphabetically - it helps locating duplicates, and makes it easier to figure out where to insert new headers. Alphabetic order should also prove that each header is self-contained, i.e. can be included without prerequisites. Link: https://lore.kernel.org/r/20230202144628.14443-1-tudor.ambarus@linaro.org Signed-off-by: Tudor Ambarus --- drivers/mtd/spi-nor/core.c | 13 ++++++------- drivers/mtd/spi-nor/debugfs.c | 2 +- drivers/mtd/spi-nor/sfdp.c | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 3012758bbafa..0a78045ca1d9 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -9,19 +9,18 @@ #include #include -#include +#include #include -#include #include -#include -#include -#include - +#include #include +#include +#include #include #include +#include +#include #include -#include #include "core.h" diff --git a/drivers/mtd/spi-nor/debugfs.c b/drivers/mtd/spi-nor/debugfs.c index ff895f6758ea..845b78c7ecc7 100644 --- a/drivers/mtd/spi-nor/debugfs.c +++ b/drivers/mtd/spi-nor/debugfs.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include -#include #include "core.h" diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c index 526c5d57b905..298ab5e53a8c 100644 --- a/drivers/mtd/spi-nor/sfdp.c +++ b/drivers/mtd/spi-nor/sfdp.c @@ -5,9 +5,9 @@ */ #include +#include #include #include -#include #include "core.h" -- cgit v1.2.3-59-g8ed1b