From bb90d4bc7b6a536b2e4db45f4763e467c2008251 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 9 Feb 2021 22:22:14 -0800 Subject: mm/highmem: Lift memcpy_[to|from]_page to core Working through a conversion to a call kmap_local_page() instead of kmap() revealed many places where the pattern kmap/memcpy/kunmap occurred. Eric Biggers, Matthew Wilcox, Christoph Hellwig, Dan Williams, and Al Viro all suggested putting this code into helper functions. Al Viro further pointed out that these functions already existed in the iov_iter code.[1] Various locations for the lifted functions were considered. Headers like mm.h or string.h seem ok but don't really portray the functionality well. pagemap.h made some sense but is for page cache functionality.[2] Another alternative would be to create a new header for the promoted memcpy functions, but it masks the fact that these are designed to copy to/from pages using the kernel direct mappings and complicates matters with a new header. Placing these functions in 'highmem.h' is suboptimal especially with the changes being proposed in the functionality of kmap. From a caller perspective including/using 'highmem.h' implies that the functions defined in that header are only required when highmem is in use which is increasingly not the case with modern processors. However, highmem.h is where all the current functions like this reside (zero_user(), clear_highpage(), clear_user_highpage(), copy_user_highpage(), and copy_highpage()). So it makes the most sense even though it is distasteful for some.[3] Lift memcpy_to_page() and memcpy_from_page() to pagemap.h. [1] https://lore.kernel.org/lkml/20201013200149.GI3576660@ZenIV.linux.org.uk/ https://lore.kernel.org/lkml/20201013112544.GA5249@infradead.org/ [2] https://lore.kernel.org/lkml/20201208122316.GH7338@casper.infradead.org/ [3] https://lore.kernel.org/lkml/20201013200149.GI3576660@ZenIV.linux.org.uk/#t https://lore.kernel.org/lkml/20201208163814.GN1563847@iweiny-DESK2.sc.intel.com/ Cc: Boris Pismenny Cc: Or Gerlitz Cc: Dave Hansen Suggested-by: Matthew Wilcox Suggested-by: Christoph Hellwig Suggested-by: Dan Williams Suggested-by: Al Viro Suggested-by: Eric Biggers Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Ira Weiny Signed-off-by: David Sterba --- include/linux/highmem.h | 18 ++++++++++++++++++ lib/iov_iter.c | 14 -------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d2c70d3772a3..736b6a9f144d 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -276,4 +276,22 @@ static inline void copy_highpage(struct page *to, struct page *from) #endif +static inline void memcpy_from_page(char *to, struct page *page, + size_t offset, size_t len) +{ + char *from = kmap_atomic(page); + + memcpy(to, from + offset, len); + kunmap_atomic(from); +} + +static inline void memcpy_to_page(struct page *page, size_t offset, + const char *from, size_t len) +{ + char *to = kmap_atomic(page); + + memcpy(to + offset, from, len); + kunmap_atomic(to); +} + #endif /* _LINUX_HIGHMEM_H */ diff --git a/lib/iov_iter.c b/lib/iov_iter.c index a21e6a5792c5..9889e9903cdf 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -466,20 +466,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_init); -static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) -{ - char *from = kmap_atomic(page); - memcpy(to, from + offset, len); - kunmap_atomic(from); -} - -static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) -{ - char *to = kmap_atomic(page); - memcpy(to + offset, from, len); - kunmap_atomic(to); -} - static void memzero_page(struct page *page, size_t offset, size_t len) { char *addr = kmap_atomic(page); -- cgit v1.2.3-59-g8ed1b From 61b205f579911a11f0b576f73275eca2aed0d108 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 9 Feb 2021 22:22:15 -0800 Subject: mm/highmem: Convert memcpy_[to|from]_page() to kmap_local_page() kmap_local_page() is more efficient and is well suited for these calls. Convert the kmap() to kmap_local_page() Cc: Andrew Morton Cc: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Ira Weiny Signed-off-by: David Sterba --- include/linux/highmem.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 736b6a9f144d..c17a175fe5fe 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -279,19 +279,19 @@ static inline void copy_highpage(struct page *to, struct page *from) static inline void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) { - char *from = kmap_atomic(page); + char *from = kmap_local_page(page); memcpy(to, from + offset, len); - kunmap_atomic(from); + kunmap_local(from); } static inline void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) { - char *to = kmap_atomic(page); + char *to = kmap_local_page(page); memcpy(to + offset, from, len); - kunmap_atomic(to); + kunmap_local(to); } #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3-59-g8ed1b From 6a0996db6879cf09f989c5f44f9edd38240cb346 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 9 Feb 2021 22:22:16 -0800 Subject: mm/highmem: Introduce memcpy_page(), memmove_page(), and memset_page() 3 more common kmap patterns are kmap/memcpy/kunmap, kmap/memmove/kunmap. and kmap/memset/kunmap. Add helper functions for those patterns which use kmap_local_page(). Cc: Andrew Morton Cc: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Ira Weiny Signed-off-by: David Sterba --- include/linux/highmem.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index c17a175fe5fe..0b5d89621cb9 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -276,6 +276,39 @@ static inline void copy_highpage(struct page *to, struct page *from) #endif +static inline void memcpy_page(struct page *dst_page, size_t dst_off, + struct page *src_page, size_t src_off, + size_t len) +{ + char *dst = kmap_local_page(dst_page); + char *src = kmap_local_page(src_page); + + memcpy(dst + dst_off, src + src_off, len); + kunmap_local(src); + kunmap_local(dst); +} + +static inline void memmove_page(struct page *dst_page, size_t dst_off, + struct page *src_page, size_t src_off, + size_t len) +{ + char *dst = kmap_local_page(dst_page); + char *src = kmap_local_page(src_page); + + memmove(dst + dst_off, src + src_off, len); + kunmap_local(src); + kunmap_local(dst); +} + +static inline void memset_page(struct page *page, size_t offset, int val, + size_t len) +{ + char *addr = kmap_local_page(page); + + memset(addr + offset, val, len); + kunmap_local(addr); +} + static inline void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) { -- cgit v1.2.3-59-g8ed1b From ca18f6ea012bf30236b76c3480ac2c97131b6f8f Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 10 Feb 2021 09:49:28 -0800 Subject: mm/highmem: Add VM_BUG_ON() to mem*_page() calls Add VM_BUG_ON bounds checks to ensure the newly lifted and created page memory operations do not result in corrupted data in neighbor pages.[1][2] [1] https://lore.kernel.org/lkml/20201210053502.GS1563847@iweiny-DESK2.sc.intel.com/ [2] https://lore.kernel.org/lkml/20210209110931.00f00e47d9a0529fcee2ff01@linux-foundation.org/ Suggested-by: Matthew Wilcox Suggested-by: Andrew Morton Reviewed-by: Christoph Hellwig Signed-off-by: Ira Weiny Signed-off-by: David Sterba --- include/linux/highmem.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 0b5d89621cb9..44170f312ae7 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -283,6 +283,7 @@ static inline void memcpy_page(struct page *dst_page, size_t dst_off, char *dst = kmap_local_page(dst_page); char *src = kmap_local_page(src_page); + VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE); memcpy(dst + dst_off, src + src_off, len); kunmap_local(src); kunmap_local(dst); @@ -295,6 +296,7 @@ static inline void memmove_page(struct page *dst_page, size_t dst_off, char *dst = kmap_local_page(dst_page); char *src = kmap_local_page(src_page); + VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE); memmove(dst + dst_off, src + src_off, len); kunmap_local(src); kunmap_local(dst); @@ -305,6 +307,7 @@ static inline void memset_page(struct page *page, size_t offset, int val, { char *addr = kmap_local_page(page); + VM_BUG_ON(offset + len > PAGE_SIZE); memset(addr + offset, val, len); kunmap_local(addr); } @@ -314,6 +317,7 @@ static inline void memcpy_from_page(char *to, struct page *page, { char *from = kmap_local_page(page); + VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to, from + offset, len); kunmap_local(from); } @@ -323,6 +327,7 @@ static inline void memcpy_to_page(struct page *page, size_t offset, { char *to = kmap_local_page(page); + VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); kunmap_local(to); } -- cgit v1.2.3-59-g8ed1b From d70cef0d46729808dc53f145372c02b145c92604 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 27 Jan 2021 22:15:03 -0800 Subject: btrfs: fix raid6 qstripe kmap When a qstripe is required an extra page is allocated and mapped. There were 3 problems: 1) There is no corresponding call of kunmap() for the qstripe page. 2) There is no reason to map the qstripe page more than once if the number of bits set in rbio->dbitmap is greater than one. 3) There is no reason to map the parity page and unmap it each time through the loop. The page memory can continue to be reused with a single mapping on each iteration by raid6_call.gen_syndrome() without remapping. So map the page for the duration of the loop. Similarly, improve the algorithm by mapping the parity page just 1 time. Fixes: 5a6ac9eacb49 ("Btrfs, raid56: support parity scrub on raid56") CC: stable@vger.kernel.org # 4.4.x: c17af96554a8: btrfs: raid56: simplify tracking of Q stripe presence CC: stable@vger.kernel.org # 4.4.x Signed-off-by: Ira Weiny Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 5394641541f7..abf17fae0912 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2362,16 +2362,21 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, SetPageUptodate(p_page); if (has_qstripe) { + /* RAID6, allocate and map temp space for the Q stripe */ q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!q_page) { __free_page(p_page); goto cleanup; } SetPageUptodate(q_page); + pointers[rbio->real_stripes - 1] = kmap(q_page); } atomic_set(&rbio->error, 0); + /* Map the parity stripe just once */ + pointers[nr_data] = kmap(p_page); + for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { struct page *p; void *parity; @@ -2381,16 +2386,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, pointers[stripe] = kmap(p); } - /* then add the parity stripe */ - pointers[stripe++] = kmap(p_page); - if (has_qstripe) { - /* - * raid6, add the qstripe and call the - * library function to fill in our p/q - */ - pointers[stripe++] = kmap(q_page); - + /* RAID6, call the library function to fill in our P/Q */ raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, pointers); } else { @@ -2411,12 +2408,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, for (stripe = 0; stripe < nr_data; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); - kunmap(p_page); } + kunmap(p_page); __free_page(p_page); - if (q_page) + if (q_page) { + kunmap(q_page); __free_page(q_page); + } writeback: /* -- cgit v1.2.3-59-g8ed1b From be6a13613fd35602ea9e65d6634cf7af79f0a93d Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Feb 2021 15:03:23 +0800 Subject: btrfs: make btrfs_submit_compressed_read() subpage compatible For compressed read, we always submit page read using page size. This doesn't work well with subpage, as for subpage one page can contain several sectors. Such submission will read range out of what we want, and cause problems. Thankfully to make it subpage compatible, we only need to change how the last page of the compressed extent is read. Instead of always adding a full page to the compressed read bio, if we're at the last page, calculate the size using compressed length, so that we only add part of the range into the compressed read bio. Since we are here, also change the PAGE_SIZE used in lookup_extent_mapping() to sectorsize. This modification won't cause any functional change, as lookup_extent_mapping() can handle the case where the search range is larger than found extent range. Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 6d203acfdeb3..5bad3a0b8a88 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -640,7 +640,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, page_offset(bio_first_page_all(bio)), - PAGE_SIZE); + fs_info->sectorsize); read_unlock(&em_tree->lock); if (!em) return BLK_STS_IOERR; @@ -698,19 +698,30 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, refcount_set(&cb->pending_bios, 1); for (pg_index = 0; pg_index < nr_pages; pg_index++) { + u32 pg_len = PAGE_SIZE; int submit = 0; + /* + * To handle subpage case, we need to make sure the bio only + * covers the range we need. + * + * If we're at the last page, truncate the length to only cover + * the remaining part. + */ + if (pg_index == nr_pages - 1) + pg_len = min_t(u32, PAGE_SIZE, + compressed_len - pg_index * PAGE_SIZE); + page = cb->compressed_pages[pg_index]; page->mapping = inode->i_mapping; page->index = em_start >> PAGE_SHIFT; if (comp_bio->bi_iter.bi_size) - submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, + submit = btrfs_bio_fits_in_stripe(page, pg_len, comp_bio, 0); page->mapping = NULL; - if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) < - PAGE_SIZE) { + if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) { unsigned int nr_sectors; ret = btrfs_bio_wq_end_io(fs_info, comp_bio, @@ -743,9 +754,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; - bio_add_page(comp_bio, page, PAGE_SIZE, 0); + bio_add_page(comp_bio, page, pg_len, 0); } - cur_disk_byte += PAGE_SIZE; + cur_disk_byte += pg_len; } ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA); -- cgit v1.2.3-59-g8ed1b From 04d4ba4c90759844fb4ffa735214c1c41508d2f7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 4 Feb 2021 15:03:24 +0800 Subject: btrfs: make check_compressed_csum() to be subpage compatible Currently check_compressed_csum() completely relies on sectorsize == PAGE_SIZE to do checksum verification for compressed extents. To make it subpage compatible, this patch will: - Do extra calculation for the csum range Since we have multiple sectors inside a page, we need to only hash the range we want, not the full page anymore. - Do sector-by-sector hash inside the page With this patch and previous conversion on btrfs_submit_compressed_read(), now we can read subpage compressed extents properly, and do proper csum verification. Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/compression.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 5bad3a0b8a88..375dee691a37 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -141,6 +141,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, struct btrfs_fs_info *fs_info = inode->root->fs_info; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); const u32 csum_size = fs_info->csum_size; + const u32 sectorsize = fs_info->sectorsize; struct page *page; unsigned long i; char *kaddr; @@ -154,22 +155,34 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio, shash->tfm = fs_info->csum_shash; for (i = 0; i < cb->nr_pages; i++) { + u32 pg_offset; + u32 bytes_left = PAGE_SIZE; page = cb->compressed_pages[i]; - kaddr = kmap_atomic(page); - crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum); - kunmap_atomic(kaddr); - - if (memcmp(&csum, cb_sum, csum_size)) { - btrfs_print_data_csum_error(inode, disk_start, - csum, cb_sum, cb->mirror_num); - if (btrfs_io_bio(bio)->device) - btrfs_dev_stat_inc_and_print( - btrfs_io_bio(bio)->device, - BTRFS_DEV_STAT_CORRUPTION_ERRS); - return -EIO; + /* Determine the remaining bytes inside the page first */ + if (i == cb->nr_pages - 1) + bytes_left = cb->compressed_len - i * PAGE_SIZE; + + /* Hash through the page sector by sector */ + for (pg_offset = 0; pg_offset < bytes_left; + pg_offset += sectorsize) { + kaddr = kmap_atomic(page); + crypto_shash_digest(shash, kaddr + pg_offset, + sectorsize, csum); + kunmap_atomic(kaddr); + + if (memcmp(&csum, cb_sum, csum_size) != 0) { + btrfs_print_data_csum_error(inode, disk_start, + csum, cb_sum, cb->mirror_num); + if (btrfs_io_bio(bio)->device) + btrfs_dev_stat_inc_and_print( + btrfs_io_bio(bio)->device, + BTRFS_DEV_STAT_CORRUPTION_ERRS); + return -EIO; + } + cb_sum += csum_size; + disk_start += sectorsize; } - cb_sum += csum_size; } return 0; } -- cgit v1.2.3-59-g8ed1b From 3c17916510428dbccdf657de050c34e208347089 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 8 Feb 2021 10:26:54 +0200 Subject: btrfs: fix race between extent freeing/allocation when using bitmaps During allocation the allocator will try to allocate an extent using cluster policy. Once the current cluster is exhausted it will remove the entry under btrfs_free_cluster::lock and subsequently acquire btrfs_free_space_ctl::tree_lock to dispose of the already-deleted entry and adjust btrfs_free_space_ctl::total_bitmap. This poses a problem because there exists a race condition between removing the entry under one lock and doing the necessary accounting holding a different lock since extent freeing only uses the 2nd lock. This can result in the following situation: T1: T2: btrfs_alloc_from_cluster insert_into_bitmap if (entry->bytes == 0) if (block_group && !list_empty(&block_group->cluster_list)) { rb_erase(entry) spin_unlock(&cluster->lock); (total_bitmaps is still 4) spin_lock(&cluster->lock); root> spin_lock(&ctl->tree_lock); recalculate_thresholds To fix this ensure that once depleted, the cluster entry is deleted when both cluster lock and tree locks are held in the allocator (T1), this ensures that even if there is a race with a concurrent insert_into_bitmap call it will correctly find the entry in the cluster and add the new space to it. CC: # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5400294bd271..abcf951e6b44 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3125,8 +3125,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, entry->bytes -= bytes; } - if (entry->bytes == 0) - rb_erase(&entry->offset_index, &cluster->root); break; } out: @@ -3143,7 +3141,10 @@ out: ctl->free_space -= bytes; if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; + + spin_lock(&cluster->lock); if (entry->bytes == 0) { + rb_erase(&entry->offset_index, &cluster->root); ctl->free_extents--; if (entry->bitmap) { kmem_cache_free(btrfs_free_space_bitmap_cachep, @@ -3156,6 +3157,7 @@ out: kmem_cache_free(btrfs_free_space_cachep, entry); } + spin_unlock(&cluster->lock); spin_unlock(&ctl->tree_lock); return ret; -- cgit v1.2.3-59-g8ed1b From 20903032cd9f0260b99aeab92e6540f0350e4a23 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 5 Feb 2021 12:55:36 +0000 Subject: btrfs: avoid checking for RO block group twice during nocow writeback During the nocow writeback path, we currently iterate the rbtree of block groups twice: once for checking if the target block group is RO with the call to btrfs_extent_readonly()), and once again for getting a nocow reference on the block group with a call to btrfs_inc_nocow_writers(). Since btrfs_inc_nocow_writers() already returns false when the target block group is RO, remove the call to btrfs_extent_readonly(). Not only we avoid searching the blocks group rbtree twice, it also helps reduce contention on the lock that protects it (specially since it is a spin lock and not a read-write lock). That may make a noticeable difference on very large filesystems, with thousands of allocated block groups. Reviewed-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 535abf898225..d6f0e1ad3711 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1674,9 +1674,6 @@ next_slot: */ btrfs_release_path(path); - /* If extent is RO, we must COW it */ - if (btrfs_extent_readonly(fs_info, disk_bytenr)) - goto out_check; ret = btrfs_cross_ref_exist(root, ino, found_key.offset - extent_offset, disk_bytenr, false); @@ -1723,6 +1720,7 @@ next_slot: WARN_ON_ONCE(freespace_inode); goto out_check; } + /* If the extent's block group is RO, we must COW */ if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) goto out_check; nocow = true; -- cgit v1.2.3-59-g8ed1b From 195a49eaf655eb914896c92cecd96bc863c9feb3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 5 Feb 2021 12:55:37 +0000 Subject: btrfs: fix race between writes to swap files and scrub When we active a swap file, at btrfs_swap_activate(), we acquire the exclusive operation lock to prevent the physical location of the swap file extents to be changed by operations such as balance and device replace/resize/remove. We also call there can_nocow_extent() which, among other things, checks if the block group of a swap file extent is currently RO, and if it is we can not use the extent, since a write into it would result in COWing the extent. However we have no protection against a scrub operation running after we activate the swap file, which can result in the swap file extents to be COWed while the scrub is running and operating on the respective block group, because scrub turns a block group into RO before it processes it and then back again to RW mode after processing it. That means an attempt to write into a swap file extent while scrub is processing the respective block group, will result in COWing the extent, changing its physical location on disk. Fix this by making sure that block groups that have extents that are used by active swap files can not be turned into RO mode, therefore making it not possible for a scrub to turn them into RO mode. When a scrub finds a block group that can not be turned to RO due to the existence of extents used by swap files, it proceeds to the next block group and logs a warning message that mentions the block group was skipped due to active swap files - this is the same approach we currently use for balance. Fixes: ed46ff3d42378 ("Btrfs: support swap files") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 33 ++++++++++++++++++++++++++++++++- fs/btrfs/block-group.h | 9 +++++++++ fs/btrfs/ctree.h | 5 +++++ fs/btrfs/inode.c | 19 ++++++++++++++++++- fs/btrfs/scrub.c | 9 ++++++++- 5 files changed, 72 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 5064be59dac5..744b99ddc28c 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1162,6 +1162,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) spin_lock(&sinfo->lock); spin_lock(&cache->lock); + if (cache->swap_extents) { + ret = -ETXTBSY; + goto out; + } + if (cache->ro) { cache->ro++; ret = 0; @@ -2307,7 +2312,7 @@ again: } ret = inc_block_group_ro(cache, 0); - if (!do_chunk_alloc) + if (!do_chunk_alloc || ret == -ETXTBSY) goto unlock_out; if (!ret) goto out; @@ -2316,6 +2321,8 @@ again: if (ret < 0) goto out; ret = inc_block_group_ro(cache, 0); + if (ret == -ETXTBSY) + goto unlock_out; out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); @@ -3406,6 +3413,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ASSERT(list_empty(&block_group->io_list)); ASSERT(list_empty(&block_group->bg_list)); ASSERT(refcount_read(&block_group->refs) == 1); + ASSERT(block_group->swap_extents == 0); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); @@ -3472,3 +3480,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group) __btrfs_remove_free_space_cache(block_group->free_space_ctl); } } + +bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg) +{ + bool ret = true; + + spin_lock(&bg->lock); + if (bg->ro) + ret = false; + else + bg->swap_extents++; + spin_unlock(&bg->lock); + + return ret; +} + +void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount) +{ + spin_lock(&bg->lock); + ASSERT(!bg->ro); + ASSERT(bg->swap_extents >= amount); + bg->swap_extents -= amount; + spin_unlock(&bg->lock); +} diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 29678426247d..3ecc3372a5ce 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -186,6 +186,12 @@ struct btrfs_block_group { /* Flag indicating this block group is placed on a sequential zone */ bool seq_zone; + /* + * Number of extents in this block group used for swap files. + * All accesses protected by the spinlock 'lock'. + */ + int swap_extents; + /* Record locked full stripes for RAID5/6 block group */ struct btrfs_full_stripe_locks_tree full_stripe_locks_root; @@ -312,4 +318,7 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache) void btrfs_freeze_block_group(struct btrfs_block_group *cache); void btrfs_unfreeze_block_group(struct btrfs_block_group *cache); +bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg); +void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount); + #endif /* BTRFS_BLOCK_GROUP_H */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3bc00aed13b2..40ec3393d2a1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -524,6 +524,11 @@ struct btrfs_swapfile_pin { * points to a struct btrfs_device. */ bool is_block_group; + /* + * Only used when 'is_block_group' is true and it is the number of + * extents used by a swapfile for this block group ('ptr' field). + */ + int bg_extent_count; }; bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6f0e1ad3711..30358a2e2bc0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10192,6 +10192,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, sp->ptr = ptr; sp->inode = inode; sp->is_block_group = is_block_group; + sp->bg_extent_count = 1; spin_lock(&fs_info->swapfile_pins_lock); p = &fs_info->swapfile_pins.rb_node; @@ -10205,6 +10206,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, (sp->ptr == entry->ptr && sp->inode > entry->inode)) { p = &(*p)->rb_right; } else { + if (is_block_group) + entry->bg_extent_count++; spin_unlock(&fs_info->swapfile_pins_lock); kfree(sp); return 1; @@ -10230,8 +10233,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode) sp = rb_entry(node, struct btrfs_swapfile_pin, node); if (sp->inode == inode) { rb_erase(&sp->node, &fs_info->swapfile_pins); - if (sp->is_block_group) + if (sp->is_block_group) { + btrfs_dec_block_group_swap_extents(sp->ptr, + sp->bg_extent_count); btrfs_put_block_group(sp->ptr); + } kfree(sp); } node = next; @@ -10446,6 +10452,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, goto out; } + if (!btrfs_inc_block_group_swap_extents(bg)) { + btrfs_warn(fs_info, + "block group for swapfile at %llu is read-only%s", + bg->start, + atomic_read(&fs_info->scrubs_running) ? + " (scrub running)" : ""); + btrfs_put_block_group(bg); + ret = -EINVAL; + goto out; + } + ret = btrfs_add_swapfile_pin(inode, bg, true); if (ret) { btrfs_put_block_group(bg); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 310fce00fcda..70a90ca2c8da 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3767,6 +3767,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, * commit_transactions. */ ro_set = 0; + } else if (ret == -ETXTBSY) { + btrfs_warn(fs_info, + "skipping scrub of block group %llu due to active swapfile", + cache->start); + scrub_pause_off(fs_info); + ret = 0; + goto skip_unfreeze; } else { btrfs_warn(fs_info, "failed setting block group ro: %d", ret); @@ -3862,7 +3869,7 @@ done: } else { spin_unlock(&cache->lock); } - +skip_unfreeze: btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); if (ret) -- cgit v1.2.3-59-g8ed1b From dd0734f2a866f9d619d4abf97c3d71bcdee40ea9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 5 Feb 2021 12:55:38 +0000 Subject: btrfs: fix race between swap file activation and snapshot creation When creating a snapshot we check if the current number of swap files, in the root, is non-zero, and if it is, we error out and warn that we can not create the snapshot because there are active swap files. However this is racy because when a task started activation of a swap file, another task might have started already snapshot creation and might have seen the counter for the number of swap files as zero. This means that after the swap file is activated we may end up with a snapshot of the same root successfully created, and therefore when the first write to the swap file happens it has to fall back into COW mode, which should never happen for active swap files. Basically what can happen is: 1) Task A starts snapshot creation and enters ioctl.c:create_snapshot(). There it sees that root->nr_swapfiles has a value of 0 so it continues; 2) Task B enters btrfs_swap_activate(). It is not aware that another task started snapshot creation but it did not finish yet. It increments root->nr_swapfiles from 0 to 1; 3) Task B checks that the file meets all requirements to be an active swap file - it has NOCOW set, there are no snapshots for the inode's root at the moment, no file holes, no reflinked extents, etc; 4) Task B returns success and now the file is an active swap file; 5) Task A commits the transaction to create the snapshot and finishes. The swap file's extents are now shared between the original root and the snapshot; 6) A write into an extent of the swap file is attempted - there is a snapshot of the file's root, so we fall back to COW mode and therefore the physical location of the extent changes on disk. So fix this by taking the snapshot lock during swap file activation before locking the extent range, as that is the order in which we lock these during buffered writes. Fixes: ed46ff3d42378 ("Btrfs: support swap files") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 30358a2e2bc0..4f2f1e932751 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10298,7 +10298,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; struct extent_map *em = NULL; @@ -10349,13 +10350,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, "cannot activate swapfile while exclusive operation is running"); return -EBUSY; } + + /* + * Prevent snapshot creation while we are activating the swap file. + * We do not want to race with snapshot creation. If snapshot creation + * already started before we bumped nr_swapfiles from 0 to 1 and + * completes before the first write into the swap file after it is + * activated, than that write would fallback to COW. + */ + if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) { + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because snapshot creation is in progress"); + return -EINVAL; + } /* * Snapshots can create extents which require COW even if NODATACOW is * set. We use this counter to prevent snapshots. We must increment it * before walking the extents because we don't want a concurrent * snapshot to run after we've already checked the extents. */ - atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles); + atomic_inc(&root->nr_swapfiles); isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); @@ -10501,6 +10516,8 @@ out: if (ret) btrfs_swap_deactivate(file); + btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_exclop_finish(fs_info); if (ret) -- cgit v1.2.3-59-g8ed1b From 1119a72e223f3073a604f8fccb3a470ccd8a4416 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 16 Feb 2021 15:43:22 -0500 Subject: btrfs: tree-checker: do not error out if extent ref hash doesn't match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tree checker checks the extent ref hash at read and write time to make sure we do not corrupt the file system. Generally extent references go inline, but if we have enough of them we need to make an item, which looks like key.objectid = key.type = key.offset = hash(tree, owner, offset) However if key.offset collide with an unrelated extent reference we'll simply key.offset++ until we get something that doesn't collide. Obviously this doesn't match at tree checker time, and thus we error while writing out the transaction. This is relatively easy to reproduce, simply do something like the following xfs_io -f -c "pwrite 0 1M" file offset=2 for i in {0..10000} do xfs_io -c "reflink file 0 ${offset}M 1M" file offset=$(( offset + 2 )) done xfs_io -c "reflink file 0 17999258914816 1M" file xfs_io -c "reflink file 0 35998517829632 1M" file xfs_io -c "reflink file 0 53752752058368 1M" file btrfs filesystem sync And the sync will error out because we'll abort the transaction. The magic values above are used because they generate hash collisions with the first file in the main subvol. The fix for this is to remove the hash value check from tree checker, as we have no idea which offset ours should belong to. Reported-by: Tuomas Lähdekorpi Fixes: 0785a9aacf9d ("btrfs: tree-checker: Add EXTENT_DATA_REF check") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ add comment] Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 582061c7b547..f4ade821307d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1453,22 +1453,14 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { - u64 root_objectid; - u64 owner; u64 offset; - u64 hash; + /* + * We cannot check the extent_data_ref hash due to possible + * overflow from the leaf due to hash collisions. + */ dref = (struct btrfs_extent_data_ref *)ptr; - root_objectid = btrfs_extent_data_ref_root(leaf, dref); - owner = btrfs_extent_data_ref_objectid(leaf, dref); offset = btrfs_extent_data_ref_offset(leaf, dref); - hash = hash_extent_data_ref(root_objectid, owner, offset); - if (unlikely(hash != key->offset)) { - extent_err(leaf, slot, - "invalid extent data ref hash, item has 0x%016llx key has 0x%016llx", - hash, key->offset); - return -EUCLEAN; - } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", -- cgit v1.2.3-59-g8ed1b From 3660d0bcdb82807d434da9d2e57d88b37331182d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 16 Feb 2021 11:09:25 +0000 Subject: btrfs: fix stale data exposure after cloning a hole with NO_HOLES enabled When using the NO_HOLES feature, if we clone a file range that spans only a hole into a range that is at or beyond the current i_size of the destination file, we end up not setting the full sync runtime flag on the inode. As a result, if we then fsync the destination file and have a power failure, after log replay we can end up exposing stale data instead of having a hole for that range. The conditions for this to happen are the following: 1) We have a file with a size of, for example, 1280K; 2) There is a written (non-prealloc) extent for the file range from 1024K to 1280K with a length of 256K; 3) This particular file extent layout is durably persisted, so that the existing superblock persisted on disk points to a subvolume root where the file has that exact file extent layout and state; 4) The file is truncated to a smaller size, to an offset lower than the start offset of its last extent, for example to 800K. The truncate sets the full sync runtime flag on the inode; 6) Fsync the file to log it and clear the full sync runtime flag; 7) Clone a region that covers only a hole (implicit hole due to NO_HOLES) into the file with a destination offset that starts at or beyond the 256K file extent item we had - for example to offset 1024K; 8) Since the clone operation does not find extents in the source range, we end up in the if branch at the bottom of btrfs_clone() where we punch a hole for the file range starting at offset 1024K by calling btrfs_replace_file_extents(). There we end up not setting the full sync flag on the inode, because we don't know we are being called in a clone context (and not fallocate's punch hole operation), and neither do we create an extent map to represent a hole because the requested range is beyond eof; 9) A further fsync to the file will be a fast fsync, since the clone operation did not set the full sync flag, and therefore it relies on modified extent maps to correctly log the file layout. But since it does not find any extent map marking the range from 1024K (the previous eof) to the new eof, it does not log a file extent item for that range representing the hole; 10) After a power failure no hole for the range starting at 1024K is punched and we end up exposing stale data from the old 256K extent. Turning this into exact steps: $ mkfs.btrfs -f -O no-holes /dev/sdi $ mount /dev/sdi /mnt # Create our test file with 3 extents of 256K and a 256K hole at offset # 256K. The file has a size of 1280K. $ xfs_io -f -s \ -c "pwrite -S 0xab -b 256K 0 256K" \ -c "pwrite -S 0xcd -b 256K 512K 256K" \ -c "pwrite -S 0xef -b 256K 768K 256K" \ -c "pwrite -S 0x73 -b 256K 1024K 256K" \ /mnt/sdi/foobar # Make sure it's durably persisted. We want the last committed super # block to point to this particular file extent layout. sync # Now truncate our file to a smaller size, falling within a position of # the second extent. This sets the full sync runtime flag on the inode. # Then fsync the file to log it and clear the full sync flag from the # inode. The third extent is no longer part of the file and therefore # it is not logged. $ xfs_io -c "truncate 800K" -c "fsync" /mnt/foobar # Now do a clone operation that only clones the hole and sets back the # file size to match the size it had before the truncate operation # (1280K). $ xfs_io \ -c "reflink /mnt/foobar 256K 1024K 256K" \ -c "fsync" \ /mnt/foobar # File data before power failure: $ od -A d -t x1 /mnt/foobar 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 0262144 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0524288 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd * 0786432 ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef * 0819200 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 1310720 # Mount the fs again to replay the log tree. $ mount /dev/sdi /mnt # File data after power failure: $ od -A d -t x1 /mnt/foobar 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 0262144 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0524288 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd * 0786432 ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef * 0819200 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 1048576 73 73 73 73 73 73 73 73 73 73 73 73 73 73 73 73 * 1310720 The range from 1024K to 1280K should correspond to a hole but instead it points to stale data, to the 256K extent that should not exist after the truncate operation. The issue does not exists when not using NO_HOLES, because for that case we use file extent items to represent holes, these are found and copied during the loop that iterates over extents at btrfs_clone(), and that causes btrfs_replace_file_extents() to be called with a non-NULL extent_info argument and therefore set the full sync runtime flag on the inode. So fix this by making the code that deals with a trailing hole during cloning, at btrfs_clone(), to set the full sync flag on the inode, if the range starts at or beyond the current i_size. A test case for fstests will follow soon. Backporting notes: for kernel 5.4 the change goes to ioctl.c into btrfs_clone before the last call to btrfs_punch_hole_range. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/reflink.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index b24396cf2f99..5413578d2c32 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -553,6 +553,24 @@ process_slot: */ btrfs_release_path(path); + /* + * When using NO_HOLES and we are cloning a range that covers + * only a hole (no extents) into a range beyond the current + * i_size, punching a hole in the target range will not create + * an extent map defining a hole, because the range starts at or + * beyond current i_size. If the file previously had an i_size + * greater than the new i_size set by this clone operation, we + * need to make sure the next fsync is a full fsync, so that it + * detects and logs a hole covering a range from the current + * i_size to the new i_size. If the clone range covers extents, + * besides a hole, then we know the full sync flag was already + * set by previous calls to btrfs_replace_file_extents() that + * replaced file extent items. + */ + if (last_dest_end >= i_size_read(inode)) + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags); + ret = btrfs_replace_file_extents(inode, path, last_dest_end, destoff + len - 1, NULL, &trans); if (ret) -- cgit v1.2.3-59-g8ed1b From 95c85fba1f64c3249c67f0078a29f8a125078189 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 25 Jan 2021 16:42:35 -0500 Subject: btrfs: avoid double put of block group when emptying cluster It's wrong calling btrfs_put_block_group in __btrfs_return_cluster_to_free_space if the block group passed is different than the block group the cluster represents. As this means the cluster doesn't have a reference to the passed block group. This results in double put and a use-after-free bug. Fix this by simply bailing if the block group we passed in does not match the block group on the cluster. Fixes: fa9c0d795f7b ("Btrfs: rework allocation clustering") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index abcf951e6b44..711a6a751ae9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2801,8 +2801,10 @@ static void __btrfs_return_cluster_to_free_space( struct rb_node *node; spin_lock(&cluster->lock); - if (cluster->block_group != block_group) - goto out; + if (cluster->block_group != block_group) { + spin_unlock(&cluster->lock); + return; + } cluster->block_group = NULL; cluster->window_start = 0; @@ -2840,8 +2842,6 @@ static void __btrfs_return_cluster_to_free_space( entry->offset, &entry->offset_index, bitmap); } cluster->root = RB_ROOT; - -out: spin_unlock(&cluster->lock); btrfs_put_block_group(block_group); } -- cgit v1.2.3-59-g8ed1b From 6e37d245994189ba757df7dc2950a44d31421ac6 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 17 Feb 2021 16:06:18 +0900 Subject: btrfs: zoned: fix deadlock on log sync Lockdep with fstests test case btrfs/041 detected a unsafe locking scenario when we allocate the log node on a zoned filesystem. btrfs/041 ============================================ WARNING: possible recursive locking detected 5.11.0-rc7+ #939 Not tainted -------------------------------------------- xfs_io/698 is trying to acquire lock: ffff88810cd673a0 (&root->log_mutex){+.+.}-{3:3}, at: btrfs_sync_log+0x3d1/0xee0 [btrfs] but task is already holding lock: ffff88810b0fc3a0 (&root->log_mutex){+.+.}-{3:3}, at: btrfs_sync_log+0x313/0xee0 [btrfs] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&root->log_mutex); lock(&root->log_mutex); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by xfs_io/698: #0: ffff88810cd66620 (sb_internal){.+.+}-{0:0}, at: btrfs_sync_file+0x2c3/0x570 [btrfs] #1: ffff88810b0fc3a0 (&root->log_mutex){+.+.}-{3:3}, at: btrfs_sync_log+0x313/0xee0 [btrfs] stack backtrace: CPU: 0 PID: 698 Comm: xfs_io Not tainted 5.11.0-rc7+ #939 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack+0x77/0x97 __lock_acquire.cold+0xb9/0x32a lock_acquire+0xb5/0x400 ? btrfs_sync_log+0x3d1/0xee0 [btrfs] __mutex_lock+0x7b/0x8d0 ? btrfs_sync_log+0x3d1/0xee0 [btrfs] ? btrfs_sync_log+0x3d1/0xee0 [btrfs] ? find_first_extent_bit+0x9f/0x100 [btrfs] ? __mutex_unlock_slowpath+0x35/0x270 btrfs_sync_log+0x3d1/0xee0 [btrfs] btrfs_sync_file+0x3a8/0x570 [btrfs] __x64_sys_fsync+0x34/0x60 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This happens, because we are taking the ->log_mutex albeit it has already been locked. Also while at it, fix the bogus unlock of the tree_log_mutex in the error handling. Fixes: 3ddebf27fcd3 ("btrfs: zoned: reorder log node allocation on zoned filesystem") Reviewed-by: Filipe Manana Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d90695c1ab6c..2f1acc9aea9e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3174,16 +3174,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, root_log_ctx.log_transid = log_root_tree->log_transid; if (btrfs_is_zoned(fs_info)) { - mutex_lock(&fs_info->tree_root->log_mutex); if (!log_root_tree->node) { ret = btrfs_alloc_log_tree_node(trans, log_root_tree); if (ret) { - mutex_unlock(&fs_info->tree_log_mutex); mutex_unlock(&log_root_tree->log_mutex); goto out; } } - mutex_unlock(&fs_info->tree_root->log_mutex); } /* -- cgit v1.2.3-59-g8ed1b From 9c7d83ae6ba67d6c6199cce24573983db3b56332 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 14 Feb 2021 12:13:07 +0900 Subject: pstore: Fix warning in pstore_kill_sb() syzbot is hitting WARN_ON(pstore_sb != sb) at pstore_kill_sb() [1], for the assumption that pstore_sb != NULL is wrong because pstore_fill_super() will not assign pstore_sb = sb when new_inode() for d_make_root() returned NULL (due to memory allocation fault injection). Since mount_single() calls pstore_kill_sb() when pstore_fill_super() failed, pstore_kill_sb() needs to be aware of such failure path. [1] https://syzkaller.appspot.com/bug?id=6abacb8da5137cb47a416f2bef95719ed60508a0 Reported-by: syzbot Signed-off-by: Tetsuo Handa Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210214031307.57903-1-penguin-kernel@I-love.SAKURA.ne.jp --- fs/pstore/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 93a217e4f563..14658b009f1b 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -467,7 +467,7 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type, static void pstore_kill_sb(struct super_block *sb) { mutex_lock(&pstore_sb_lock); - WARN_ON(pstore_sb != sb); + WARN_ON(pstore_sb && pstore_sb != sb); kill_litter_super(sb); pstore_sb = NULL; -- cgit v1.2.3-59-g8ed1b From a3cb15cda1b8213387f258caad6b13afcc378fd5 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 18 Feb 2021 16:28:37 +0100 Subject: dt-bindings: bcm2711-hdmi: Fix broken schema For some reason, unevaluatedProperties doesn't work and additionalProperties is required. Fix it by switching to additionalProperties. Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210218152837.1080819-1-maxime@cerno.tech Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml index 57324a5f0271..a1d5a32660e0 100644 --- a/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml @@ -109,7 +109,7 @@ required: - resets - ddc -unevaluatedProperties: false +additionalProperties: false examples: - | -- cgit v1.2.3-59-g8ed1b From 497a4dc8276d25130ef0034363c68c106447d9f5 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Tue, 23 Feb 2021 21:41:14 +0100 Subject: dts: drop dangling c6x symlink With c6x architecture removal, scripts/dtc/include-prefixes/c6x symlink lost its target. Drop the dangling symlink which triggers some distribution check scripts. Fixes: a579fcfa8e49 ("c6x: remove architecture") Signed-off-by: Michal Kubecek Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210223204114.E7F55E0155@unicorn.suse.cz --- scripts/dtc/include-prefixes/c6x | 1 - 1 file changed, 1 deletion(-) delete mode 120000 scripts/dtc/include-prefixes/c6x diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x deleted file mode 120000 index 49ded4cae2be..000000000000 --- a/scripts/dtc/include-prefixes/c6x +++ /dev/null @@ -1 +0,0 @@ -../../../arch/c6x/boot/dts \ No newline at end of file -- cgit v1.2.3-59-g8ed1b From a5665ec2affdba21bff3b0d4d3aed83b3951e8ff Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Sat, 20 Feb 2021 00:55:59 +0200 Subject: tpm, tpm_tis: Decorate tpm_get_timeouts() with request_locality() This is shown with Samsung Chromebook Pro (Caroline) with TPM 1.2 (SLB 9670): [ 4.324298] TPM returned invalid status [ 4.324806] WARNING: CPU: 2 PID: 1 at drivers/char/tpm/tpm_tis_core.c:275 tpm_tis_status+0x86/0x8f Background ========== TCG PC Client Platform TPM Profile (PTP) Specification, paragraph 6.1 FIFO Interface Locality Usage per Register, Table 39 Register Behavior Based on Locality Setting for FIFO - a read attempt to TPM_STS_x Registers returns 0xFF in case of lack of locality. The fix ======= Decorate tpm_get_timeouts() with request_locality() and release_locality(). Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()") Cc: James Bottomley Cc: Guenter Roeck Cc: Laurent Bigonville Cc: stable@vger.kernel.org Reported-by: Lukasz Majczak Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_tis_core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 431919d5f48a..30843954aa36 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -1019,11 +1019,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, init_waitqueue_head(&priv->read_queue); init_waitqueue_head(&priv->int_queue); if (irq != -1) { - /* Before doing irq testing issue a command to the TPM in polling mode + /* + * Before doing irq testing issue a command to the TPM in polling mode * to make sure it works. May as well use that command to set the * proper timeouts for the driver. */ - if (tpm_get_timeouts(chip)) { + + rc = request_locality(chip, 0); + if (rc < 0) + goto out_err; + + rc = tpm_get_timeouts(chip); + + release_locality(chip, 0); + + if (rc) { dev_err(dev, "Could not get TPM timeouts and durations\n"); rc = -ENODEV; goto out_err; -- cgit v1.2.3-59-g8ed1b From d53a6adfb553969809eb2b736a976ebb5146cd95 Mon Sep 17 00:00:00 2001 From: Lukasz Majczak Date: Tue, 16 Feb 2021 10:17:49 +0200 Subject: tpm, tpm_tis: Decorate tpm_tis_gen_interrupt() with request_locality() This is shown with Samsung Chromebook Pro (Caroline) with TPM 1.2 (SLB 9670): [ 4.324298] TPM returned invalid status [ 4.324806] WARNING: CPU: 2 PID: 1 at drivers/char/tpm/tpm_tis_core.c:275 tpm_tis_status+0x86/0x8f Background ========== TCG PC Client Platform TPM Profile (PTP) Specification, paragraph 6.1 FIFO Interface Locality Usage per Register, Table 39 Register Behavior Based on Locality Setting for FIFO - a read attempt to TPM_STS_x Registers returns 0xFF in case of lack of locality. The fix ======= Decorate tpm_tis_gen_interrupt() with request_locality() and release_locality(). Cc: Laurent Bigonville Cc: James Bottomley Cc: Guenter Roeck Cc: stable@vger.kernel.org Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()") Signed-off-by: Lukasz Majczak Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_tis_core.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 30843954aa36..a2e0395cbe61 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -707,12 +707,22 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip) const char *desc = "attempting to generate an interrupt"; u32 cap2; cap_t cap; + int ret; + /* TPM 2.0 */ if (chip->flags & TPM_CHIP_FLAG_TPM2) return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc); - else - return tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, - 0); + + /* TPM 1.2 */ + ret = request_locality(chip, 0); + if (ret < 0) + return ret; + + ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0); + + release_locality(chip, 0); + + return ret; } /* Register the IRQ and issue a command that will cause an interrupt. If an -- cgit v1.2.3-59-g8ed1b From e2a0fcac6b1dac0fd15bb449a20233f8f91a8a85 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 25 Feb 2021 08:37:01 -0500 Subject: Documentation: kvm: fix messy conversion from .txt to .rst Building the documentation gives a warning that the KVM_PPC_RESIZE_HPT_PREPARE label is defined twice. The root cause is that the KVM_PPC_RESIZE_HPT_PREPARE API is present twice, the second being a mix of the prepare and commit APIs. Fix it. Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 69 ++++++++++-------------------------------- 1 file changed, 16 insertions(+), 53 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index aed52b0fc16e..e5f341e89c39 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3856,49 +3856,20 @@ base 2 of the page size in the bottom 6 bits. -EFAULT if struct kvm_reinject_control cannot be read, -EINVAL if the supplied shift or flags are invalid, -ENOMEM if unable to allocate the new HPT, - -ENOSPC if there was a hash collision - -:: - - struct kvm_ppc_rmmu_info { - struct kvm_ppc_radix_geom { - __u8 page_shift; - __u8 level_bits[4]; - __u8 pad[3]; - } geometries[8]; - __u32 ap_encodings[8]; - }; - -The geometries[] field gives up to 8 supported geometries for the -radix page table, in terms of the log base 2 of the smallest page -size, and the number of bits indexed at each level of the tree, from -the PTE level up to the PGD level in that order. Any unused entries -will have 0 in the page_shift field. - -The ap_encodings gives the supported page sizes and their AP field -encodings, encoded with the AP value in the top 3 bits and the log -base 2 of the page size in the bottom 6 bits. - -4.102 KVM_PPC_RESIZE_HPT_PREPARE --------------------------------- - -:Capability: KVM_CAP_SPAPR_RESIZE_HPT -:Architectures: powerpc -:Type: vm ioctl -:Parameters: struct kvm_ppc_resize_hpt (in) -:Returns: 0 on successful completion, - >0 if a new HPT is being prepared, the value is an estimated - number of milliseconds until preparation is complete, - -EFAULT if struct kvm_reinject_control cannot be read, - -EINVAL if the supplied shift or flags are invalid,when moving existing - HPT entries to the new HPT, - -EIO on other error conditions Used to implement the PAPR extension for runtime resizing of a guest's Hashed Page Table (HPT). Specifically this starts, stops or monitors the preparation of a new potential HPT for the guest, essentially implementing the H_RESIZE_HPT_PREPARE hypercall. +:: + + struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; + }; + If called with shift > 0 when there is no pending HPT for the guest, this begins preparation of a new pending HPT of size 2^(shift) bytes. It then returns a positive integer with the estimated number of @@ -3926,14 +3897,6 @@ Normally this will be called repeatedly with the same parameters until it returns <= 0. The first call will initiate preparation, subsequent ones will monitor preparation until it completes or fails. -:: - - struct kvm_ppc_resize_hpt { - __u64 flags; - __u32 shift; - __u32 pad; - }; - 4.103 KVM_PPC_RESIZE_HPT_COMMIT ------------------------------- @@ -3956,6 +3919,14 @@ Hashed Page Table (HPT). Specifically this requests that the guest be transferred to working with the new HPT, essentially implementing the H_RESIZE_HPT_COMMIT hypercall. +:: + + struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; + }; + This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has returned 0 with the same parameters. In other cases KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or @@ -3971,14 +3942,6 @@ HPT and the previous HPT will be discarded. On failure, the guest will still be operating on its previous HPT. -:: - - struct kvm_ppc_resize_hpt { - __u64 flags; - __u32 shift; - __u32 pad; - }; - 4.104 KVM_X86_GET_MCE_CAP_SUPPORTED ----------------------------------- -- cgit v1.2.3-59-g8ed1b From 96564d777366417cca0f5b39fc13202a1d7d9881 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Fri, 26 Feb 2021 15:55:41 +0800 Subject: KVM: Documentation: rectify rst markup in kvm_run->flags Commit c32b1b896d2a ("KVM: X86: Add the Document for KVM_CAP_X86_BUS_LOCK_EXIT") added a new flag in kvm_run->flags documentation, and caused warning in make htmldocs: Documentation/virt/kvm/api.rst:5004: WARNING: Unexpected indentation Documentation/virt/kvm/api.rst:5004: WARNING: Inline emphasis start-string without end-string Fix this rst markup issue. Signed-off-by: Chenyi Qiang Message-Id: <20210226075541.27179-1-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e5f341e89c39..80237dee7a96 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4963,7 +4963,8 @@ local APIC is not used. __u16 flags; More architecture-specific flags detailing state of the VCPU that may -affect the device's behavior. Current defined flags: +affect the device's behavior. Current defined flags:: + /* x86, set if the VCPU is in system management mode */ #define KVM_RUN_X86_SMM (1 << 0) /* x86, set if bus lock detected in VM */ -- cgit v1.2.3-59-g8ed1b From ffe76c24c5c1851e5ef949d8726d57e78cd0cf34 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Thu, 25 Feb 2021 22:19:45 -0800 Subject: KVM: x86: remove misplaced comment on active_mmu_pages The 'mmu_page_hash' is used as hash table while 'active_mmu_pages' is a list. Remove the misplaced comment as it's mostly stating the obvious anyways. Signed-off-by: Dongli Zhang Reviewed-by: Sean Christopherson Message-Id: <20210226061945.1222-1-dongli.zhang@oracle.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0cf71ff2b2e5..85ccbd4b7c52 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -939,9 +939,6 @@ struct kvm_arch { unsigned int indirect_shadow_pages; u8 mmu_valid_gen; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; - /* - * Hash table of struct kvm_mmu_page. - */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; struct list_head lpage_disallowed_mmu_pages; -- cgit v1.2.3-59-g8ed1b From 919f4ebc598701670e80e31573a58f1f2d2bf918 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 26 Feb 2021 15:59:59 +0800 Subject: KVM: x86: hyper-v: Fix Hyper-V context null-ptr-deref Reported by syzkaller: KASAN: null-ptr-deref in range [0x0000000000000140-0x0000000000000147] CPU: 1 PID: 8370 Comm: syz-executor859 Not tainted 5.11.0-syzkaller #0 RIP: 0010:synic_get arch/x86/kvm/hyperv.c:165 [inline] RIP: 0010:kvm_hv_set_sint_gsi arch/x86/kvm/hyperv.c:475 [inline] RIP: 0010:kvm_hv_irq_routing_update+0x230/0x460 arch/x86/kvm/hyperv.c:498 Call Trace: kvm_set_irq_routing+0x69b/0x940 arch/x86/kvm/../../../virt/kvm/irqchip.c:223 kvm_vm_ioctl+0x12d0/0x2800 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3959 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Hyper-V context is lazily allocated until Hyper-V specific MSRs are accessed or SynIC is enabled. However, the syzkaller testcase sets irq routing table directly w/o enabling SynIC. This results in null-ptr-deref when accessing SynIC Hyper-V context. This patch fixes it. syzkaller source: https://syzkaller.appspot.com/x/repro.c?x=163342ccd00000 Reported-by: syzbot+6987f3b2dbd9eda95f12@syzkaller.appspotmail.com Fixes: 8f014550dfb1 ("KVM: x86: hyper-v: Make Hyper-V emulation enablement conditional") Signed-off-by: Wanpeng Li Message-Id: <1614326399-5762-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 7d2dae92d638..58fa8c029867 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -159,7 +159,7 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx) struct kvm_vcpu_hv_synic *synic; vcpu = get_vcpu_by_vpidx(kvm, vpidx); - if (!vcpu) + if (!vcpu || !to_hv_vcpu(vcpu)) return NULL; synic = to_hv_synic(vcpu); return (synic->active) ? synic : NULL; -- cgit v1.2.3-59-g8ed1b From 44ac5958a6c1fd91ac8810fbb37194e377d78db5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 25 Feb 2021 12:47:26 -0800 Subject: KVM: x86/mmu: Set SPTE_AD_WRPROT_ONLY_MASK if and only if PML is enabled Check that PML is actually enabled before setting the mask to force a SPTE to be write-protected. The bits used for the !AD_ENABLED case are in the upper half of the SPTE. With 64-bit paging and EPT, these bits are ignored, but with 32-bit PAE paging they are reserved. Setting them for L2 SPTEs without checking PML breaks NPT on 32-bit KVM. Fixes: 1f4e5fc83a42 ("KVM: x86: fix nested guest live migration with PML") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210225204749.1512652-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu_internal.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 72b0928f2b2d..ec4fc28b325a 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -81,15 +81,15 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) { /* - * When using the EPT page-modification log, the GPAs in the log - * would come from L2 rather than L1. Therefore, we need to rely - * on write protection to record dirty pages. This also bypasses - * PML, since writes now result in a vmexit. Note, this helper will - * tag SPTEs as needing write-protection even if PML is disabled or - * unsupported, but that's ok because the tag is consumed if and only - * if PML is enabled. Omit the PML check to save a few uops. + * When using the EPT page-modification log, the GPAs in the CPU dirty + * log would come from L2 rather than L1. Therefore, we need to rely + * on write protection to record dirty pages, which bypasses PML, since + * writes now result in a vmexit. Note, the check on CPU dirty logging + * being enabled is mandatory as the bits used to denote WP-only SPTEs + * are reserved for NPT w/ PAE (32-bit KVM). */ - return vcpu->arch.mmu == &vcpu->arch.guest_mmu; + return vcpu->arch.mmu == &vcpu->arch.guest_mmu && + kvm_x86_ops.cpu_dirty_log_size; } bool is_nx_huge_page_enabled(void); -- cgit v1.2.3-59-g8ed1b From c462f859f895840e7cd4f20f822ad4c6df2db489 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 26 Feb 2021 04:49:06 -0500 Subject: KVM: xen: flush deferred static key before checking it A missing flush would cause the static branch to trigger incorrectly. Cc: David Woodhouse Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1d2bc89431a2..bfc928495bd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8039,6 +8039,7 @@ void kvm_arch_exit(void) kvm_mmu_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_fpu_cache); + static_key_deferred_flush(&kvm_xen_enabled); WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key)); } -- cgit v1.2.3-59-g8ed1b From 3590ec58991bcf0f3512c4353a786079a6619758 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 9 Feb 2021 22:22:19 -0800 Subject: btrfs: use memcpy_[to|from]_page() and kmap_local_page() There are many places where the pattern kmap/memcpy/kunmap occurs. This pattern was lifted to the core common functions memcpy_[to|from]_page(). Use these new functions to reduce the code, eliminate direct uses of kmap, and leverage the new core functions use of kmap_local_page(). Also, there is 1 place where a kmap/memcpy is followed by an optional memset. Here we leave the kmap open coded to avoid remapping the page but use kmap_local_page() directly. Development of this patch was aided by the coccinelle script: // // SPDX-License-Identifier: GPL-2.0-only // Find kmap/memcpy/kunmap pattern and replace with memcpy*page calls // // NOTE: Offsets and other expressions may be more complex than what the script // will automatically generate. Therefore a catchall rule is provided to find // the pattern which then must be evaluated by hand. // // Confidence: Low // Copyright: (C) 2021 Intel Corporation // URL: http://coccinelle.lip6.fr/ // Comments: // Options: // // simple memcpy version // @ memcpy_rule1 @ expression page, T, F, B, Off; identifier ptr; type VP; @@ ( -VP ptr = kmap(page); | -ptr = kmap(page); | -VP ptr = kmap_atomic(page); | -ptr = kmap_atomic(page); ) <+... ( -memcpy(ptr + Off, F, B); +memcpy_to_page(page, Off, F, B); | -memcpy(ptr, F, B); +memcpy_to_page(page, 0, F, B); | -memcpy(T, ptr + Off, B); +memcpy_from_page(T, page, Off, B); | -memcpy(T, ptr, B); +memcpy_from_page(T, page, 0, B); ) ...+> ( -kunmap(page); | -kunmap_atomic(ptr); ) // Remove any pointers left unused @ depends on memcpy_rule1 @ identifier memcpy_rule1.ptr; type VP, VP1; @@ -VP ptr; ... when != ptr; ? VP1 ptr; // // Some callers kmap without a temp pointer // @ memcpy_rule2 @ expression page, T, Off, F, B; @@ <+... ( -memcpy(kmap(page) + Off, F, B); +memcpy_to_page(page, Off, F, B); | -memcpy(kmap(page), F, B); +memcpy_to_page(page, 0, F, B); | -memcpy(T, kmap(page) + Off, B); +memcpy_from_page(T, page, Off, B); | -memcpy(T, kmap(page), B); +memcpy_from_page(T, page, 0, B); ) ...+> -kunmap(page); // No need for the ptr variable removal // // Catch all // @ memcpy_rule3 @ expression page; expression GenTo, GenFrom, GenSize; identifier ptr; type VP; @@ ( -VP ptr = kmap(page); | -ptr = kmap(page); | -VP ptr = kmap_atomic(page); | -ptr = kmap_atomic(page); ) <+... ( // // Some call sites have complex expressions within the memcpy // match a catch all to be evaluated by hand. // -memcpy(GenTo, GenFrom, GenSize); +memcpy_to_pageExtra(page, GenTo, GenFrom, GenSize); +memcpy_from_pageExtra(GenTo, page, GenFrom, GenSize); ) ...+> ( -kunmap(page); | -kunmap_atomic(ptr); ) // Remove any pointers left unused @ depends on memcpy_rule3 @ identifier memcpy_rule3.ptr; type VP, VP1; @@ -VP ptr; ... when != ptr; ? VP1 ptr; // Reviewed-by: Christoph Hellwig Signed-off-by: Ira Weiny Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 6 ++---- fs/btrfs/lzo.c | 4 ++-- fs/btrfs/reflink.c | 6 +----- fs/btrfs/send.c | 7 ++----- fs/btrfs/zlib.c | 5 ++--- fs/btrfs/zstd.c | 6 ++---- 6 files changed, 11 insertions(+), 23 deletions(-) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 5ae3fa0386b7..047b632b4139 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1231,7 +1231,6 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start, unsigned long prev_start_byte; unsigned long working_bytes = total_out - buf_start; unsigned long bytes; - char *kaddr; struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter); /* @@ -1262,9 +1261,8 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start, PAGE_SIZE - (buf_offset % PAGE_SIZE)); bytes = min(bytes, working_bytes); - kaddr = kmap_atomic(bvec.bv_page); - memcpy(kaddr + bvec.bv_offset, buf + buf_offset, bytes); - kunmap_atomic(kaddr); + memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + buf_offset, + bytes); flush_dcache_page(bvec.bv_page); buf_offset += bytes; diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index aa9cd11f4b78..9084a950dc09 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -467,7 +467,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in, destlen = min_t(unsigned long, destlen, PAGE_SIZE); bytes = min_t(unsigned long, destlen, out_len - start_byte); - kaddr = kmap_atomic(dest_page); + kaddr = kmap_local_page(dest_page); memcpy(kaddr, workspace->buf + start_byte, bytes); /* @@ -477,7 +477,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in, */ if (bytes < destlen) memset(kaddr+bytes, 0, destlen-bytes); - kunmap_atomic(kaddr); + kunmap_local(kaddr); out: return ret; } diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index b03e7891394e..74c62e49c0c9 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -103,12 +103,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode, set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags); if (comp_type == BTRFS_COMPRESS_NONE) { - char *map; - - map = kmap(page); - memcpy(map, data_start, datal); + memcpy_to_page(page, 0, data_start, datal); flush_dcache_page(page); - kunmap(page); } else { ret = btrfs_decompress(comp_type, data_start, page, 0, inline_size, datal); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 78a35374d492..83982b3b7057 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4948,7 +4948,6 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) struct btrfs_fs_info *fs_info = root->fs_info; struct inode *inode; struct page *page; - char *addr; pgoff_t index = offset >> PAGE_SHIFT; pgoff_t last_index; unsigned pg_offset = offset_in_page(offset); @@ -5001,10 +5000,8 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) } } - addr = kmap(page); - memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset, - cur_len); - kunmap(page); + memcpy_from_page(sctx->send_buf + sctx->send_size, page, + pg_offset, cur_len); unlock_page(page); put_page(page); index++; diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 05615a1099db..d524acf7b3e5 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -432,9 +432,8 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in, PAGE_SIZE - (buf_offset % PAGE_SIZE)); bytes = min(bytes, bytes_left); - kaddr = kmap_atomic(dest_page); - memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); - kunmap_atomic(kaddr); + memcpy_to_page(dest_page, pg_offset, + workspace->buf + buf_offset, bytes); pg_offset += bytes; bytes_left -= bytes; diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 9a4871636c6c..8e9626d63976 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -688,10 +688,8 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in, bytes = min_t(unsigned long, destlen - pg_offset, workspace->out_buf.size - buf_offset); - kaddr = kmap_atomic(dest_page); - memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset, - bytes); - kunmap_atomic(kaddr); + memcpy_to_page(dest_page, pg_offset, + workspace->out_buf.dst + buf_offset, bytes); pg_offset += bytes; } -- cgit v1.2.3-59-g8ed1b From 80cc83842394e5ad3e93487359106aab3420bcb7 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 9 Feb 2021 22:22:20 -0800 Subject: btrfs: use copy_highpage() instead of 2 kmaps() There are many places where kmap/memove/kunmap patterns occur. This pattern exists in the core common function copy_highpage(). Use copy_highpage to avoid open coding the use of kmap and leverages the core functions use of kmap_local_page(). Development of this patch was aided by the following coccinelle script: // // SPDX-License-Identifier: GPL-2.0-only // Find kmap/copypage/kunmap pattern and replace with copy_highpage calls // // NOTE: The expressions in the copy page version of this kmap pattern are // overly complex and so these all need individual attention. // // Confidence: Low // Copyright: (C) 2021 Intel Corporation // URL: http://coccinelle.lip6.fr/ // Comments: // Options: // // Then a copy_page where we have 2 pages involved. // @ copy_page_rule @ expression page, page2, To, From, Size; identifier ptr, ptr2; type VP, VP2; @@ /* kmap */ ( -VP ptr = kmap(page); ... -VP2 ptr2 = kmap(page2); | -VP ptr = kmap_atomic(page); ... -VP2 ptr2 = kmap_atomic(page2); | -ptr = kmap(page); ... -ptr2 = kmap(page2); | -ptr = kmap_atomic(page); ... -ptr2 = kmap_atomic(page2); ) // 1 or more copy versions of the entire page <+... ( -copy_page(To, From); +copy_highpage(To, From); | -memmove(To, From, Size); +memmoveExtra(To, From, Size); ) ...+> /* kunmap */ ( -kunmap(page2); ... -kunmap(page); | -kunmap(page); ... -kunmap(page2); | -kmap_atomic(ptr2); ... -kmap_atomic(ptr); ) // Remove any pointers left unused @ depends on copy_page_rule @ identifier copy_page_rule.ptr; identifier copy_page_rule.ptr2; type VP, VP1; type VP2, VP21; @@ -VP ptr; ... when != ptr; ? VP1 ptr; -VP2 ptr2; ... when != ptr2; ? VP21 ptr2; // Reviewed-by: Christoph Hellwig Signed-off-by: Ira Weiny Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 93fbf87bdc8d..c86aff9c7daa 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -250,8 +250,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) static void cache_rbio_pages(struct btrfs_raid_bio *rbio) { int i; - char *s; - char *d; int ret; ret = alloc_rbio_pages(rbio); @@ -262,13 +260,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) if (!rbio->bio_pages[i]) continue; - s = kmap(rbio->bio_pages[i]); - d = kmap(rbio->stripe_pages[i]); - - copy_page(d, s); - - kunmap(rbio->bio_pages[i]); - kunmap(rbio->stripe_pages[i]); + copy_highpage(rbio->stripe_pages[i], rbio->bio_pages[i]); SetPageUptodate(rbio->stripe_pages[i]); } set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); -- cgit v1.2.3-59-g8ed1b From 0f47227705d88382d9a8f98013d56442066d90ca Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 1 Mar 2021 12:04:02 +0900 Subject: block: revert "block: fix bd_size_lock use" With the removal of the skd driver, using IRQ safe locking of a bdev bd_size_lock spinlock to protect the bdev inode size is not necessary anymore as there is no other known driver using this lock under an IRQ disabled context (e.g. calling set_capacity() with IRQ disabled). Revert commit 0fe37724f8e7 ("block: fix bd_size_lock use") which introduced the IRQ safe change. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/genhd.c | 5 ++--- block/partitions/core.c | 6 ++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index fcc530164b5a..c55e8f0fced1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -45,11 +45,10 @@ static void disk_release_events(struct gendisk *disk); void set_capacity(struct gendisk *disk, sector_t sectors) { struct block_device *bdev = disk->part0; - unsigned long flags; - spin_lock_irqsave(&bdev->bd_size_lock, flags); + spin_lock(&bdev->bd_size_lock); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock_irqrestore(&bdev->bd_size_lock, flags); + spin_unlock(&bdev->bd_size_lock); } EXPORT_SYMBOL(set_capacity); diff --git a/block/partitions/core.c b/block/partitions/core.c index f3d9ff2cafb6..1a7558917c47 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -88,11 +88,9 @@ static int (*check_part[])(struct parsed_partitions *) = { static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) { - unsigned long flags; - - spin_lock_irqsave(&bdev->bd_size_lock, flags); + spin_lock(&bdev->bd_size_lock); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock_irqrestore(&bdev->bd_size_lock, flags); + spin_unlock(&bdev->bd_size_lock); } static struct parsed_partitions *allocate_partitions(struct gendisk *hd) -- cgit v1.2.3-59-g8ed1b From 44cc89f764646b2f1f2ea5d1a08b230131707851 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Feb 2021 19:23:27 +0100 Subject: PM: runtime: Update device status before letting suppliers suspend Because the PM-runtime status of the device is not updated in __rpm_callback(), attempts to suspend the suppliers of the given device triggered by rpm_put_suppliers() called by it may fail. Fix this by making __rpm_callback() update the device's status to RPM_SUSPENDED before calling rpm_put_suppliers() if the current status of the device is RPM_SUSPENDING and the callback just invoked by it has returned 0 (success). While at it, modify the code in __rpm_callback() to always check the device's PM-runtime status under its PM lock. Link: https://lore.kernel.org/linux-pm/CAPDyKFqm06KDw_p8WXsM4dijDbho4bb6T4k50UqqvR1_COsp8g@mail.gmail.com/ Fixes: 21d5c57b3726 ("PM / runtime: Use device links") Reported-by: Elaine Zhang Diagnosed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Tested-by: Elaine Zhang Reviewed-by: Ulf Hansson Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 62 ++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index a46a7e30881b..18b82427d0cb 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev) static int __rpm_callback(int (*cb)(struct device *), struct device *dev) __releases(&dev->power.lock) __acquires(&dev->power.lock) { - int retval, idx; bool use_links = dev->power.links_count > 0; + bool get = false; + int retval, idx; + bool put; if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); + } else if (!use_links) { + spin_unlock_irq(&dev->power.lock); } else { + get = dev->power.runtime_status == RPM_RESUMING; + spin_unlock_irq(&dev->power.lock); - /* - * Resume suppliers if necessary. - * - * The device's runtime PM status cannot change until this - * routine returns, so it is safe to read the status outside of - * the lock. - */ - if (use_links && dev->power.runtime_status == RPM_RESUMING) { + /* Resume suppliers if necessary. */ + if (get) { idx = device_links_read_lock(); retval = rpm_get_suppliers(dev); @@ -355,24 +355,36 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) if (dev->power.irq_safe) { spin_lock(&dev->power.lock); - } else { - /* - * If the device is suspending and the callback has returned - * success, drop the usage counters of the suppliers that have - * been reference counted on its resume. - * - * Do that if resume fails too. - */ - if (use_links - && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) - || (dev->power.runtime_status == RPM_RESUMING && retval))) { - idx = device_links_read_lock(); + return retval; + } - fail: - rpm_put_suppliers(dev); + spin_lock_irq(&dev->power.lock); - device_links_read_unlock(idx); - } + if (!use_links) + return retval; + + /* + * If the device is suspending and the callback has returned success, + * drop the usage counters of the suppliers that have been reference + * counted on its resume. + * + * Do that if the resume fails too. + */ + put = dev->power.runtime_status == RPM_SUSPENDING && !retval; + if (put) + __update_runtime_status(dev, RPM_SUSPENDED); + else + put = get && retval; + + if (put) { + spin_unlock_irq(&dev->power.lock); + + idx = device_links_read_lock(); + +fail: + rpm_put_suppliers(dev); + + device_links_read_unlock(idx); spin_lock_irq(&dev->power.lock); } -- cgit v1.2.3-59-g8ed1b From f3c141057976120148ca32d9d030a2256a5ffb7b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 24 Feb 2021 19:30:21 +0100 Subject: powercap/drivers/dtpm: Fix root node initialization The root node is not set to NULL when the dtpm root node is removed. Consequently, it is not possible to create a new root as it is already set. Set the root node to NULL when the last node is removed. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/powercap/dtpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index 5a51cd34a7e8..c2185ec5f887 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -207,6 +207,9 @@ int dtpm_release_zone(struct powercap_zone *pcz) if (dtpm->ops) dtpm->ops->release(dtpm); + if (root == dtpm) + root = NULL; + kfree(dtpm); return 0; -- cgit v1.2.3-59-g8ed1b From 9e2be308f023a741a0d4024bef508ef88dfb3a43 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 24 Feb 2021 19:30:22 +0100 Subject: powercap/drivers/dtpm: Add the experimental label to the option description The DTPM framework will evolve in the next cycles. Let's add a temporary EXPERIMENTAL tag to the option so users will be aware the API may change over time. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/powercap/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index 20b4325c6161..8242e8c5ed77 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -45,7 +45,7 @@ config IDLE_INJECT on a per CPU basis. config DTPM - bool "Power capping for Dynamic Thermal Power Management" + bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)" help This enables support for the power capping for the dynamic thermal power management userspace engine. -- cgit v1.2.3-59-g8ed1b From 5218e12e9f3a324f41c05da4874d76d7ea3677cb Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 1 Mar 2021 17:23:25 +0100 Subject: block: Drop leftover references to RQF_SORTED Commit a1ce35fa49852db60fc6e268038530be533c5b15 ("block: remove dead elevator code") removed all users of RQF_SORTED. However it is still defined, and there is one reference left to it (which in effect is dead code). Clear it all up. Signed-off-by: Jean Delvare Cc: Jens Axboe Cc: Ming Lei Cc: Omar Sandoval Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 - block/blk-mq-sched.c | 6 +----- include/linux/blkdev.h | 2 -- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 4de03da9a624..9ebb344e2585 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -292,7 +292,6 @@ static const char *const cmd_flag_name[] = { #define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name static const char *const rqf_name[] = { - RQF_NAME(SORTED), RQF_NAME(STARTED), RQF_NAME(SOFTBARRIER), RQF_NAME(FLUSH_SEQ), diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index ddb65e9e6fd9..e1e997af89a0 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -385,7 +385,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, - bool has_sched, struct request *rq) { /* @@ -402,9 +401,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) return true; - if (has_sched) - rq->rq_flags |= RQF_SORTED; - return false; } @@ -418,7 +414,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG)); - if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { + if (blk_mq_sched_bypass_insert(hctx, rq)) { /* * Firstly normal IO request is inserted to scheduler queue or * sw queue, meantime we add flush request to dispatch queue( diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c032cfe133c7..bc6bc8383b43 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -65,8 +65,6 @@ typedef void (rq_end_io_fn)(struct request *, blk_status_t); * request flags */ typedef __u32 __bitwise req_flags_t; -/* elevator knows about this request */ -#define RQF_SORTED ((__force req_flags_t)(1 << 0)) /* drive already may have started this one */ #define RQF_STARTED ((__force req_flags_t)(1 << 1)) /* may not be passed by ioscheduler */ -- cgit v1.2.3-59-g8ed1b From 65d43023171edc0d27208f6ac7a1a73732950cf7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 09:50:55 -0700 Subject: io-wq: wait for worker startup when forking a new one We need to have our worker count updated before continuing, to avoid cases where we repeatedly think we need a new worker, but a fork is already in progress. Signed-off-by: Jens Axboe --- fs/io-wq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/io-wq.c b/fs/io-wq.c index 44e20248805a..965022fe9961 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -56,6 +56,7 @@ struct io_worker { const struct cred *saved_creds; struct completion ref_done; + struct completion started; struct rcu_head rcu; }; @@ -267,6 +268,7 @@ static void io_worker_start(struct io_worker *worker) { worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); io_wqe_inc_running(worker); + complete(&worker->started); } /* @@ -644,6 +646,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) worker->wqe = wqe; spin_lock_init(&worker->lock); init_completion(&worker->ref_done); + init_completion(&worker->started); refcount_inc(&wq->refs); @@ -656,6 +659,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) kfree(worker); return false; } + wait_for_completion(&worker->started); return true; } -- cgit v1.2.3-59-g8ed1b From b5a95bb1883e2bac1009cc88e65c71cff6f931e6 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 23 Feb 2021 15:01:27 -0600 Subject: dt-bindings: media: Use graph and video-interfaces schemas, round 2 A couple of media schemas got applied without using or incorrectly using the video-interfaces.yaml and graph.yaml schemas. Fix them up before we have more copy-n-paste errors. Fixes: 41b3e23376e9 ("media: dt-bindings: media: Add bindings for imx334") Fixes: d899e5f1db7a ("media: dt-bindings: media: imx258: add bindings for IMX258 sensor") Fixes: 918b866edfec ("media: dt-bindings: Remove old ov5647.yaml file, update ovti,ov5647.yaml") Fixes: 22f2b47517a6 ("media: dt-bindings: media: i2c: Add OV8865 bindings documentation") Fixes: 29a202fa7acc ("media: dt-bindings: media: i2c: Add OV5648 bindings documentation") Cc: Sakari Ailus Cc: Mauro Carvalho Chehab Cc: Dave Stevenson Cc: Jacopo Mondi Cc: "Paul J. Murphy" Cc: Daniele Alessandrelli Cc: Krzysztof Kozlowski Cc: Paul Kocialkowski Cc: linux-media@vger.kernel.org Signed-off-by: Rob Herring Acked-by: Sakari Ailus Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20210223210127.55455-1-robh@kernel.org --- Documentation/devicetree/bindings/media/i2c/imx258.yaml | 14 +++++++------- .../devicetree/bindings/media/i2c/ovti,ov5647.yaml | 5 ++--- .../devicetree/bindings/media/i2c/ovti,ov5648.yaml | 16 +++++----------- .../devicetree/bindings/media/i2c/ovti,ov8865.yaml | 16 +++++----------- .../devicetree/bindings/media/i2c/sony,imx334.yaml | 11 +++++------ 5 files changed, 24 insertions(+), 38 deletions(-) diff --git a/Documentation/devicetree/bindings/media/i2c/imx258.yaml b/Documentation/devicetree/bindings/media/i2c/imx258.yaml index eaf77866ed9b..515317eff41a 100644 --- a/Documentation/devicetree/bindings/media/i2c/imx258.yaml +++ b/Documentation/devicetree/bindings/media/i2c/imx258.yaml @@ -49,10 +49,14 @@ properties: # See ../video-interfaces.txt for more details port: - type: object + $ref: /schemas/graph.yaml#/properties/port + additionalProperties: false + properties: endpoint: - type: object + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false + properties: data-lanes: oneOf: @@ -65,11 +69,7 @@ properties: - const: 1 - const: 2 - link-frequencies: - allOf: - - $ref: /schemas/types.yaml#/definitions/uint64-array - description: - Allowed data bus frequencies. + link-frequencies: true required: - data-lanes diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml index 1ab22e75d3c6..3e5d82df90a2 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml @@ -31,7 +31,8 @@ properties: maxItems: 1 port: - $ref: /schemas/graph.yaml#/$defs/port-base + $ref: /schemas/graph.yaml#/properties/port + additionalProperties: false properties: endpoint: @@ -41,8 +42,6 @@ properties: properties: clock-noncontinuous: true - additionalProperties: false - required: - compatible - reg diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml index f8783f77cc54..9149f5685688 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml @@ -44,19 +44,17 @@ properties: description: Reset Pin GPIO Control (active low) port: - type: object description: MIPI CSI-2 transmitter port + $ref: /schemas/graph.yaml#/properties/port + additionalProperties: false properties: endpoint: - type: object + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false properties: - remote-endpoint: true - - link-frequencies: - $ref: /schemas/types.yaml#/definitions/uint64-array - description: Allowed MIPI CSI-2 link frequencies + link-frequencies: true data-lanes: minItems: 1 @@ -65,10 +63,6 @@ properties: required: - data-lanes - link-frequencies - - remote-endpoint - - required: - - endpoint required: - compatible diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml index c0ba28aa30c4..0699c7e4fdeb 100644 --- a/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml +++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml @@ -44,19 +44,17 @@ properties: description: Reset Pin GPIO Control (active low) port: - type: object description: MIPI CSI-2 transmitter port + $ref: /schemas/graph.yaml#/properties/port + additionalProperties: false properties: endpoint: - type: object + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false properties: - remote-endpoint: true - - link-frequencies: - $ref: /schemas/types.yaml#/definitions/uint64-array - description: Allowed MIPI CSI-2 link frequencies + link-frequencies: true data-lanes: minItems: 1 @@ -65,10 +63,6 @@ properties: required: - data-lanes - link-frequencies - - remote-endpoint - - required: - - endpoint required: - compatible diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml index 24e689314bde..27cc5b7ff613 100644 --- a/Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml @@ -36,18 +36,17 @@ properties: description: Reference to the GPIO connected to the XCLR pin, if any. port: - type: object additionalProperties: false $ref: /schemas/graph.yaml#/properties/port properties: endpoint: - type: object + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false + properties: - data-lanes: - $ref: ../video-interfaces.yaml#/properties/data-lanes - link-frequencies: - $ref: ../video-interfaces.yaml#/properties/link-frequencies + data-lanes: true + link-frequencies: true required: - data-lanes -- cgit v1.2.3-59-g8ed1b From 221384df6123747d2a75517dd06cc01752f81518 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 1 Mar 2021 10:18:44 +0200 Subject: RDMA/cm: Fix IRQ restore in ib_send_cm_sidr_rep ib_send_cm_sidr_rep() { spin_lock_irqsave() cm_send_sidr_rep_locked() { ... spin_lock_irq() .... spin_unlock_irq() <--- this will enable interrupts } spin_unlock_irqrestore() } spin_unlock_irqrestore() expects interrupts to be disabled but the internal spin_unlock_irq() will always enable hard interrupts. Fix this by replacing the internal spin_{lock,unlock}_irq() with irqsave/restore variants. It fixes the following kernel trace: raw_local_irq_restore() called with IRQs enabled WARNING: CPU: 2 PID: 20001 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x1d/0x20 Call Trace: _raw_spin_unlock_irqrestore+0x4e/0x50 ib_send_cm_sidr_rep+0x3a/0x50 [ib_cm] cma_send_sidr_rep+0xa1/0x160 [rdma_cm] rdma_accept+0x25e/0x350 [rdma_cm] ucma_accept+0x132/0x1cc [rdma_ucm] ucma_write+0xbf/0x140 [rdma_ucm] vfs_write+0xc1/0x340 ksys_write+0xb3/0xe0 do_syscall_64+0x2d/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 87c4c774cbef ("RDMA/cm: Protect access to remote_sidr_table") Link: https://lore.kernel.org/r/20210301081844.445823-1-leon@kernel.org Signed-off-by: Saeed Mahameed Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index be996dba040c..3d194bb60840 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3651,6 +3651,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param) { struct ib_mad_send_buf *msg; + unsigned long flags; int ret; lockdep_assert_held(&cm_id_priv->lock); @@ -3676,12 +3677,12 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, return ret; } cm_id_priv->id.state = IB_CM_IDLE; - spin_lock_irq(&cm.lock); + spin_lock_irqsave(&cm.lock, flags); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); } - spin_unlock_irq(&cm.lock); + spin_unlock_irqrestore(&cm.lock, flags); return 0; } -- cgit v1.2.3-59-g8ed1b From 475f23b8c66d2892ad6acbf90ed757cafab13de7 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Fri, 19 Feb 2021 18:32:26 -0500 Subject: RDMA/rxe: Fix missing kconfig dependency on CRYPTO When RDMA_RXE is enabled and CRYPTO is disabled, Kbuild gives the following warning: WARNING: unmet direct dependencies detected for CRYPTO_CRC32 Depends on [n]: CRYPTO [=n] Selected by [y]: - RDMA_RXE [=y] && (INFINIBAND_USER_ACCESS [=y] || !INFINIBAND_USER_ACCESS [=y]) && INET [=y] && PCI [=y] && INFINIBAND [=y] && INFINIBAND_VIRT_DMA [=y] This is because RDMA_RXE selects CRYPTO_CRC32, without depending on or selecting CRYPTO, despite that config option being subordinate to CRYPTO. Fixes: cee2688e3cd6 ("IB/rxe: Offload CRC calculation when possible") Signed-off-by: Julian Braha Link: https://lore.kernel.org/r/21525878.NYvzQUHefP@ubuntu-mate-laptop Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 452149066792..06b8dc5093f7 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -4,6 +4,7 @@ config RDMA_RXE depends on INET && PCI && INFINIBAND depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL + select CRYPTO select CRYPTO_CRC32 help This driver implements the InfiniBand RDMA transport over -- cgit v1.2.3-59-g8ed1b From 3a9b3d4536e0c25bd3906a28c1f584177e49dd0f Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 22 Feb 2021 20:23:43 +0800 Subject: IB/mlx5: Add missing error code Set err to -ENOMEM if kzalloc fails instead of 0. Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX") Link: https://lore.kernel.org/r/20210222122343.19720-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ebc2a4355fa5..de3c2fc6f361 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2073,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( num_alloc_xa_entries++; event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL); - if (!event_sub) + if (!event_sub) { + err = -ENOMEM; goto err; + } list_add_tail(&event_sub->event_list, &sub_list); uverbs_uobject_get(&ev_file->uobj); -- cgit v1.2.3-59-g8ed1b From b924a8197ac7660eb358ed0277bd5b12f9b40fe2 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 15:05:05 +0800 Subject: gcc-plugins: structleak: remove unneeded variable 'ret' Fix the following coccicheck warning: scripts/gcc-plugins/structleak_plugin.c:177:14-17: Unneeded variable: "ret". Return "0" on line 207 Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20200418070505.10715-1-yanaijie@huawei.com --- scripts/gcc-plugins/structleak_plugin.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/gcc-plugins/structleak_plugin.c b/scripts/gcc-plugins/structleak_plugin.c index 29b480c33a8d..d7190e443a14 100644 --- a/scripts/gcc-plugins/structleak_plugin.c +++ b/scripts/gcc-plugins/structleak_plugin.c @@ -170,7 +170,6 @@ static void initialize(tree var) static unsigned int structleak_execute(void) { basic_block bb; - unsigned int ret = 0; tree var; unsigned int i; @@ -200,7 +199,7 @@ static unsigned int structleak_execute(void) initialize(var); } - return ret; + return 0; } #define PASS_NAME structleak -- cgit v1.2.3-59-g8ed1b From 5477edcacaacb8af8169450180a1d3bd0dfb9c99 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 18 Apr 2020 15:05:21 +0800 Subject: gcc-plugins: latent_entropy: remove unneeded semicolon Fix the following coccicheck warning: scripts/gcc-plugins/latent_entropy_plugin.c:539:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20200418070521.10931-1-yanaijie@huawei.com --- scripts/gcc-plugins/latent_entropy_plugin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 9dced66d158e..589454bce930 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -524,7 +524,7 @@ static unsigned int latent_entropy_execute(void) while (bb != EXIT_BLOCK_PTR_FOR_FN(cfun)) { perturb_local_entropy(bb, local_entropy); bb = bb->next_bb; - }; + } /* 4. mix local entropy into the global entropy variable */ perturb_latent_entropy(local_entropy); -- cgit v1.2.3-59-g8ed1b From 3531ba21f5520d0865004e7d75b6f505d08589f2 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 11:46:17 -0600 Subject: ALSA: hda: fix kernel-doc warnings v5.12-rc1 flags new warnings with make W=1, fix missing or broken function descriptors. sound/pci/hda/hda_codec.c:3492: warning: expecting prototype for snd_hda_input_mux_info_info(). Prototype was for snd_hda_input_mux_info() instead sound/pci/hda/hda_codec.c:3521: warning: expecting prototype for snd_hda_input_mux_info_put(). Prototype was for snd_hda_input_mux_put() instead sound/pci/hda/hda_codec.c:3958: warning: expecting prototype for _snd_hda_pin_ctl(). Prototype was for _snd_hda_set_pin_ctl() instead sound/pci/hda/hda_jack.c:223: warning: expecting prototype for snd_hda_set_dirty_all(). Prototype was for snd_hda_jack_set_dirty_all() instead sound/pci/hda/hda_jack.c:309: warning: expecting prototype for snd_hda_jack_detect_enable_mst(). Prototype was for snd_hda_jack_detect_enable_callback_mst() instead sound/pci/hda/hda_generic.c:3933: warning: expecting prototype for snd_dha_gen_add_mute_led_cdev(). Prototype was for snd_hda_gen_add_mute_led_cdev() instead sound/pci/hda/hda_generic.c:4093: warning: expecting prototype for snd_dha_gen_add_micmute_led_cdev(). Prototype was for snd_hda_gen_add_micmute_led_cdev() instead sound/pci/hda/patch_ca0132.c:2357: warning: expecting prototype for Prepare and send the SCP message to DSP(). Prototype was for dspio_scp() instead sound/pci/hda/patch_ca0132.c:2883: warning: expecting prototype for Allocate router ports(). Prototype was for dsp_allocate_router_ports() instead sound/pci/hda/patch_ca0132.c:3202: warning: expecting prototype for Write a block of data into DSP code or data RAM using pre(). Prototype was for dspxfr_one_seg() instead sound/pci/hda/patch_ca0132.c:3397: warning: expecting prototype for data overlay to DSP memories(). Prototype was for dspxfr_image() instead sound/hda/hdac_regmap.c:393: warning: expecting prototype for snd_hdac_regmap_init(). Prototype was for snd_hdac_regmap_exit() instead sound/hda/ext/hdac_ext_controller.c:142: warning: expecting prototype for snd_hdac_ext_bus_get_link_index(). Prototype was for snd_hdac_ext_bus_get_link() instead sound/hda/ext/hdac_ext_stream.c:140: warning: expecting prototype for snd_hdac_ext_linkstream_start(). Prototype was for snd_hdac_ext_link_stream_start() instead Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210301174617.116960-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/ext/hdac_ext_controller.c | 2 +- sound/hda/ext/hdac_ext_stream.c | 2 +- sound/hda/hdac_regmap.c | 2 +- sound/pci/hda/hda_codec.c | 6 +++--- sound/pci/hda/hda_generic.c | 4 ++-- sound/pci/hda/hda_jack.c | 4 ++-- sound/pci/hda/patch_ca0132.c | 9 ++++----- 7 files changed, 14 insertions(+), 15 deletions(-) diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index a9bd39b93697..b2df7b4f9227 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -133,7 +133,7 @@ void snd_hdac_link_free_all(struct hdac_bus *bus) EXPORT_SYMBOL_GPL(snd_hdac_link_free_all); /** - * snd_hdac_ext_bus_get_link_index - get link based on codec name + * snd_hdac_ext_bus_get_link - get link based on codec name * @bus: the pointer to HDAC bus object * @codec_name: codec name */ diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c index c4d54a838773..0c005d67fa89 100644 --- a/sound/hda/ext/hdac_ext_stream.c +++ b/sound/hda/ext/hdac_ext_stream.c @@ -133,7 +133,7 @@ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple); /** - * snd_hdac_ext_linkstream_start - start a stream + * snd_hdac_ext_link_stream_start - start a stream * @stream: HD-audio ext core stream to start */ void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *stream) diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c index d75f31eb9d78..fe3587547cfe 100644 --- a/sound/hda/hdac_regmap.c +++ b/sound/hda/hdac_regmap.c @@ -386,7 +386,7 @@ int snd_hdac_regmap_init(struct hdac_device *codec) EXPORT_SYMBOL_GPL(snd_hdac_regmap_init); /** - * snd_hdac_regmap_init - Release the regmap from HDA codec + * snd_hdac_regmap_exit - Release the regmap from HDA codec * @codec: the codec object */ void snd_hdac_regmap_exit(struct hdac_device *codec) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 9b755062d841..2026f1ccaf5a 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3483,7 +3483,7 @@ EXPORT_SYMBOL_GPL(snd_hda_check_amp_list_power); */ /** - * snd_hda_input_mux_info_info - Info callback helper for the input-mux enum + * snd_hda_input_mux_info - Info callback helper for the input-mux enum * @imux: imux helper object * @uinfo: pointer to get/store the data */ @@ -3506,7 +3506,7 @@ int snd_hda_input_mux_info(const struct hda_input_mux *imux, EXPORT_SYMBOL_GPL(snd_hda_input_mux_info); /** - * snd_hda_input_mux_info_put - Put callback helper for the input-mux enum + * snd_hda_input_mux_put - Put callback helper for the input-mux enum * @codec: the HDA codec * @imux: imux helper object * @ucontrol: pointer to get/store the data @@ -3941,7 +3941,7 @@ unsigned int snd_hda_correct_pin_ctl(struct hda_codec *codec, EXPORT_SYMBOL_GPL(snd_hda_correct_pin_ctl); /** - * _snd_hda_pin_ctl - Helper to set pin ctl value + * _snd_hda_set_pin_ctl - Helper to set pin ctl value * @codec: the HDA codec * @pin: referred pin NID * @val: pin control value to set diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 5e40944e7342..8b7c5508f368 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3923,7 +3923,7 @@ static void vmaster_update_mute_led(void *private_data, int enabled) } /** - * snd_dha_gen_add_mute_led_cdev - Create a LED classdev and enable as vmaster mute LED + * snd_hda_gen_add_mute_led_cdev - Create a LED classdev and enable as vmaster mute LED * @codec: the HDA codec * @callback: the callback for LED classdev brightness_set_blocking */ @@ -4074,7 +4074,7 @@ static int add_micmute_led_hook(struct hda_codec *codec) } /** - * snd_dha_gen_add_micmute_led_cdev - Create a LED classdev and enable as mic-mute LED + * snd_hda_gen_add_micmute_led_cdev - Create a LED classdev and enable as mic-mute LED * @codec: the HDA codec * @callback: the callback for LED classdev brightness_set_blocking * diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c index b8b568046592..ac00866d8032 100644 --- a/sound/pci/hda/hda_jack.c +++ b/sound/pci/hda/hda_jack.c @@ -213,7 +213,7 @@ static void jack_detect_update(struct hda_codec *codec, } /** - * snd_hda_set_dirty_all - Mark all the cached as dirty + * snd_hda_jack_set_dirty_all - Mark all the cached as dirty * @codec: the HDA codec * * This function sets the dirty flag to all entries of jack table. @@ -293,7 +293,7 @@ find_callback_from_list(struct hda_jack_tbl *jack, } /** - * snd_hda_jack_detect_enable_mst - enable the jack-detection + * snd_hda_jack_detect_enable_callback_mst - enable the jack-detection * @codec: the HDA codec * @nid: pin NID to enable * @func: callback function to register diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 7e62aed172a9..c966f49fa942 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -2338,7 +2338,7 @@ static int dspio_send_scp_message(struct hda_codec *codec, } /** - * Prepare and send the SCP message to DSP + * dspio_scp - Prepare and send the SCP message to DSP * @codec: the HDA codec * @mod_id: ID of the DSP module to send the command * @src_id: ID of the source @@ -2865,7 +2865,7 @@ static int dsp_dma_stop(struct hda_codec *codec, } /** - * Allocate router ports + * dsp_allocate_router_ports - Allocate router ports * * @codec: the HDA codec * @num_chans: number of channels in the stream @@ -3178,8 +3178,7 @@ static int dspxfr_hci_write(struct hda_codec *codec, } /** - * Write a block of data into DSP code or data RAM using pre-allocated - * DMA engine. + * dspxfr_one_seg - Write a block of data into DSP code or data RAM using pre-allocated DMA engine. * * @codec: the HDA codec * @fls: pointer to a fast load image @@ -3376,7 +3375,7 @@ static int dspxfr_one_seg(struct hda_codec *codec, } /** - * Write the entire DSP image of a DSP code/data overlay to DSP memories + * dspxfr_image - Write the entire DSP image of a DSP code/data overlay to DSP memories * * @codec: the HDA codec * @fls_data: pointer to a fast load image -- cgit v1.2.3-59-g8ed1b From a864e8f159b13babf552aff14a5fbe11abc017e4 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:01:46 -0600 Subject: ALSA: hda: intel-nhlt: verify config type Multiple bug reports report issues with the SOF and SST drivers when dealing with single microphone cases. We currently read the DMIC array information unconditionally but we don't check that the configuration type is actually a mic array. When the DMIC link does not rely on a mic array configuration, the recommendation is to check the format information to infer the maximum number of channels, and map this to the number of microphones. This leaves a potential for a mismatch between actual microphones available in hardware and what the ACPI table contains, but we have no other source of information. Note that single microphone configurations can alternatively be handled with a 'mic array' configuration along with a 'vendor-defined' geometry. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201251 BugLink: https://github.com/thesofproject/linux/issues/2725 Fixes: 7a33ea70e1868 ('ALSA: hda: intel-nhlt: handle NHLT VENDOR_DEFINED DMIC geometry') Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Reviewed-by: Rander Wang Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20210302000146.1177770-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- include/sound/intel-nhlt.h | 5 +++++ sound/hda/intel-nhlt.c | 54 ++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 743c2f442280..d0574805865f 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -112,6 +112,11 @@ struct nhlt_vendor_dmic_array_config { /* TODO add vendor mic config */ } __packed; +enum { + NHLT_CONFIG_TYPE_GENERIC = 0, + NHLT_CONFIG_TYPE_MIC_ARRAY = 1 +}; + enum { NHLT_MIC_ARRAY_2CH_SMALL = 0xa, NHLT_MIC_ARRAY_2CH_BIG = 0xb, diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 059aaf04f536..d053beccfaec 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -31,18 +31,44 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) struct nhlt_endpoint *epnt; struct nhlt_dmic_array_config *cfg; struct nhlt_vendor_dmic_array_config *cfg_vendor; + struct nhlt_fmt *fmt_configs; unsigned int dmic_geo = 0; - u8 j; + u16 max_ch = 0; + u8 i, j; if (!nhlt) return 0; - epnt = (struct nhlt_endpoint *)nhlt->desc; + for (j = 0, epnt = nhlt->desc; j < nhlt->endpoint_count; j++, + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length)) { - for (j = 0; j < nhlt->endpoint_count; j++) { - if (epnt->linktype == NHLT_LINK_DMIC) { - cfg = (struct nhlt_dmic_array_config *) - (epnt->config.caps); + if (epnt->linktype != NHLT_LINK_DMIC) + continue; + + cfg = (struct nhlt_dmic_array_config *)(epnt->config.caps); + fmt_configs = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size); + + /* find max number of channels based on format_configuration */ + if (fmt_configs->fmt_count) { + dev_dbg(dev, "%s: found %d format definitions\n", + __func__, fmt_configs->fmt_count); + + for (i = 0; i < fmt_configs->fmt_count; i++) { + struct wav_fmt_ext *fmt_ext; + + fmt_ext = &fmt_configs->fmt_config[i].fmt_ext; + + if (fmt_ext->fmt.channels > max_ch) + max_ch = fmt_ext->fmt.channels; + } + dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch); + } else { + dev_dbg(dev, "%s: No format information found\n", __func__); + } + + if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) { + dmic_geo = max_ch; + } else { switch (cfg->array_type) { case NHLT_MIC_ARRAY_2CH_SMALL: case NHLT_MIC_ARRAY_2CH_BIG: @@ -59,13 +85,23 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) dmic_geo = cfg_vendor->nb_mics; break; default: - dev_warn(dev, "undefined DMIC array_type 0x%0x\n", - cfg->array_type); + dev_warn(dev, "%s: undefined DMIC array_type 0x%0x\n", + __func__, cfg->array_type); + } + + if (dmic_geo > 0) { + dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo); + } + if (max_ch > dmic_geo) { + dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n", + __func__, max_ch, dmic_geo); } } - epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); } + dev_dbg(dev, "%s: dmic number %d max_ch %d\n", + __func__, dmic_geo, max_ch); + return dmic_geo; } EXPORT_SYMBOL_GPL(intel_nhlt_get_dmic_geo); -- cgit v1.2.3-59-g8ed1b From 78652ff69be439f7e925067c6a61b1839e531c01 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 2 Mar 2021 21:36:14 +1000 Subject: drm/nouveau/fifo/gk104-gp1xx: fix creation of sw class Fixes: 496162037cd24191 ("drm/nouveau/fifo: add id_engine hook") Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c index 69da601f1754..e771bd519ee2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c @@ -261,6 +261,9 @@ gk104_fifo_pbdma = { struct nvkm_engine * gk104_fifo_id_engine(struct nvkm_fifo *base, int engi) { + if (engi == GK104_FIFO_ENGN_SW) + return nvkm_device_engine(base->engine.subdev.device, NVKM_ENGINE_SW, 0); + return gk104_fifo(base)->engine[engi].engine; } -- cgit v1.2.3-59-g8ed1b From c7929b15b6e926c7150d9ec64844aceecf8a7a4a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:31:19 -0600 Subject: ASoC: soc-acpi: allow for partial match in parent name To change the module dependencies and simplify Kconfigs, we need to introduce new driver names (sof-audio-acpi-intel-byt and sof-audio-acpi-intel-bdw), and move from an exact string match to a partial one. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Bard Liao Acked-by: Mark Brown Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20210302003125.1178419-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- include/sound/soc-acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h index 9a43c44dcbbb..c45075024c30 100644 --- a/include/sound/soc-acpi.h +++ b/include/sound/soc-acpi.h @@ -174,7 +174,7 @@ struct snd_soc_acpi_codecs { static inline bool snd_soc_acpi_sof_parent(struct device *dev) { return dev->parent && dev->parent->driver && dev->parent->driver->name && - !strcmp(dev->parent->driver->name, "sof-audio-acpi"); + !strncmp(dev->parent->driver->name, "sof-audio-acpi", strlen("sof-audio-acpi")); } #endif -- cgit v1.2.3-59-g8ed1b From 8a49cd11e68ed0e6a687de04d25c06553bf96b0c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 1 Mar 2021 18:31:20 -0600 Subject: ASoC: SOF: ACPI: avoid reverse module dependency The SOF-ACPI driver is backwards from the normal Linux model, it has a generic driver that knows about all the specific drivers, as opposed to having hardware specific drivers that link against a common framework. This requires ugly Kconfig magic and leads to missed dependencies as seen in this link error: arm-linux-gnueabi-ld: sound/soc/sof/sof-pci-dev.o: in function `sof_acpi_probe': sof-pci-dev.c:(.text+0x1c): undefined reference to `snd_intel_dsp_driver_probe' Change it to use the normal probe order of starting with a specific device in a driver, turning the sof-acpi-dev.c driver into a library (exported symbols are name-spaced to avoid symbol pollution). For backwards-compatibility with previous Kconfigs, the default values for platform drivers uses the top-level ACPI configurations. The modules were also renamed to allow for gradual transitions in test scripts. Co-developed-by: Pierre-Louis Bossart Signed-off-by: Pierre-Louis Bossart Signed-off-by: Arnd Bergmann Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Bard Liao Acked-by: Mark Brown Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20210302003125.1178419-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/soc/sof/Kconfig | 8 ++- sound/soc/sof/Makefile | 2 +- sound/soc/sof/intel/Kconfig | 48 ++++++---------- sound/soc/sof/intel/Makefile | 8 +-- sound/soc/sof/intel/bdw.c | 67 ++++++++++++++++++++-- sound/soc/sof/intel/byt.c | 106 ++++++++++++++++++++++++++++++++--- sound/soc/sof/intel/shim.h | 6 -- sound/soc/sof/sof-acpi-dev.c | 130 ++++--------------------------------------- sound/soc/sof/sof-acpi-dev.h | 16 ++++++ 9 files changed, 215 insertions(+), 176 deletions(-) create mode 100644 sound/soc/sof/sof-acpi-dev.h diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig index 3e8b6c035ce3..95f55a0daefb 100644 --- a/sound/soc/sof/Kconfig +++ b/sound/soc/sof/Kconfig @@ -23,15 +23,17 @@ config SND_SOC_SOF_PCI config SND_SOC_SOF_ACPI tristate "SOF ACPI enumeration support" depends on ACPI || COMPILE_TEST - select SND_SOC_SOF - select SND_SOC_ACPI if ACPI - select IOSF_MBI if X86 && PCI help This adds support for ACPI enumeration. This option is required to enable Intel Broadwell/Baytrail/Cherrytrail devices. + For backwards-compatibility with previous configurations the selection will + be used as default for platform-specific drivers. Say Y if you need this option. If unsure select "N". +config SND_SOC_SOF_ACPI_DEV + tristate + config SND_SOC_SOF_OF tristate "SOF OF enumeration support" depends on OF || COMPILE_TEST diff --git a/sound/soc/sof/Makefile b/sound/soc/sof/Makefile index 05718dfe6cd2..f88fce23bbc7 100644 --- a/sound/soc/sof/Makefile +++ b/sound/soc/sof/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_SND_SOC_SOF) += snd-sof.o obj-$(CONFIG_SND_SOC_SOF_NOCODEC) += snd-sof-nocodec.o -obj-$(CONFIG_SND_SOC_SOF_ACPI) += snd-sof-acpi.o +obj-$(CONFIG_SND_SOC_SOF_ACPI_DEV) += snd-sof-acpi.o obj-$(CONFIG_SND_SOC_SOF_OF) += snd-sof-of.o obj-$(CONFIG_SND_SOC_SOF_PCI) += snd-sof-pci.o diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 4797a1cf8c80..2b062a66f1e7 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -9,14 +9,6 @@ config SND_SOC_SOF_INTEL_TOPLEVEL if SND_SOC_SOF_INTEL_TOPLEVEL -config SND_SOC_SOF_INTEL_ACPI - def_tristate SND_SOC_SOF_ACPI - select SND_SOC_SOF_BAYTRAIL if SND_SOC_SOF_BAYTRAIL_SUPPORT - select SND_SOC_SOF_BROADWELL if SND_SOC_SOF_BROADWELL_SUPPORT - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - config SND_SOC_SOF_INTEL_PCI def_tristate SND_SOC_SOF_PCI select SND_SOC_SOF_MERRIFIELD if SND_SOC_SOF_MERRIFIELD_SUPPORT @@ -50,18 +42,25 @@ config SND_SOC_SOF_INTEL_ATOM_HIFI_EP config SND_SOC_SOF_INTEL_COMMON tristate + select SND_SOC_SOF select SND_SOC_ACPI_INTEL_MATCH select SND_SOC_SOF_XTENSA select SND_SOC_INTEL_MACH select SND_SOC_ACPI if ACPI + select SND_INTEL_DSP_CONFIG help This option is not user-selectable but automagically handled by 'select' statements at a higher level. -if SND_SOC_SOF_INTEL_ACPI +if SND_SOC_SOF_ACPI -config SND_SOC_SOF_BAYTRAIL_SUPPORT - bool "SOF support for Baytrail, Braswell and Cherrytrail" +config SND_SOC_SOF_BAYTRAIL + tristate "SOF support for Baytrail, Braswell and Cherrytrail" + default SND_SOC_SOF_ACPI + select SND_SOC_SOF_INTEL_COMMON + select SND_SOC_SOF_INTEL_ATOM_HIFI_EP + select SND_SOC_SOF_ACPI_DEV + select IOSF_MBI if X86 && PCI help This adds support for Sound Open Firmware for Intel(R) platforms using the Baytrail, Braswell or Cherrytrail processors. @@ -75,17 +74,12 @@ config SND_SOC_SOF_BAYTRAIL_SUPPORT Say Y if you want to enable SOF on Baytrail/Cherrytrail. If unsure select "N". -config SND_SOC_SOF_BAYTRAIL - tristate - select SND_SOC_SOF_INTEL_ATOM_HIFI_EP - select SND_INTEL_DSP_CONFIG - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - -config SND_SOC_SOF_BROADWELL_SUPPORT - bool "SOF support for Broadwell" - select SND_INTEL_DSP_CONFIG +config SND_SOC_SOF_BROADWELL + tristate "SOF support for Broadwell" + default SND_SOC_SOF_ACPI + select SND_SOC_SOF_INTEL_COMMON + select SND_SOC_SOF_INTEL_HIFI_EP_IPC + select SND_SOC_SOF_ACPI_DEV help This adds support for Sound Open Firmware for Intel(R) platforms using the Broadwell processors. @@ -100,15 +94,7 @@ config SND_SOC_SOF_BROADWELL_SUPPORT Say Y if you want to enable SOF on Broadwell. If unsure select "N". -config SND_SOC_SOF_BROADWELL - tristate - select SND_SOC_SOF_INTEL_COMMON - select SND_SOC_SOF_INTEL_HIFI_EP_IPC - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - -endif ## SND_SOC_SOF_INTEL_ACPI +endif ## SND_SOC_SOF_ACPI if SND_SOC_SOF_INTEL_PCI diff --git a/sound/soc/sof/intel/Makefile b/sound/soc/sof/intel/Makefile index 2589111c2fae..f6640fa73636 100644 --- a/sound/soc/sof/intel/Makefile +++ b/sound/soc/sof/intel/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) -snd-sof-intel-byt-objs := byt.o -snd-sof-intel-bdw-objs := bdw.o +snd-sof-acpi-intel-byt-objs := byt.o +snd-sof-acpi-intel-bdw-objs := bdw.o snd-sof-intel-ipc-objs := intel-ipc.o @@ -13,8 +13,8 @@ snd-sof-intel-hda-common-$(CONFIG_SND_SOC_SOF_HDA_PROBES) += hda-compress.o snd-sof-intel-hda-objs := hda-codec.o -obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-intel-byt.o -obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-intel-bdw.o +obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-acpi-intel-byt.o +obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-acpi-intel-bdw.o obj-$(CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC) += snd-sof-intel-ipc.o obj-$(CONFIG_SND_SOC_SOF_HDA_COMMON) += snd-sof-intel-hda-common.o obj-$(CONFIG_SND_SOC_SOF_HDA) += snd-sof-intel-hda.o diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c index 50a4a73e6b9f..fd5ae628732d 100644 --- a/sound/soc/sof/intel/bdw.c +++ b/sound/soc/sof/intel/bdw.c @@ -15,8 +15,12 @@ #include #include #include +#include +#include +#include #include "../ops.h" #include "shim.h" +#include "../sof-acpi-dev.h" #include "../sof-audio.h" /* BARs */ @@ -590,7 +594,7 @@ static struct snd_soc_dai_driver bdw_dai[] = { }; /* broadwell ops */ -const struct snd_sof_dsp_ops sof_bdw_ops = { +static const struct snd_sof_dsp_ops sof_bdw_ops = { /*Device init */ .probe = bdw_probe, @@ -651,14 +655,69 @@ const struct snd_sof_dsp_ops sof_bdw_ops = { .arch_ops = &sof_xtensa_arch_ops, }; -EXPORT_SYMBOL_NS(sof_bdw_ops, SND_SOC_SOF_BROADWELL); -const struct sof_intel_dsp_desc bdw_chip_info = { +static const struct sof_intel_dsp_desc bdw_chip_info = { .cores_num = 1, .host_managed_cores_mask = 1, }; -EXPORT_SYMBOL_NS(bdw_chip_info, SND_SOC_SOF_BROADWELL); + +static const struct sof_dev_desc sof_acpi_broadwell_desc = { + .machines = snd_soc_acpi_intel_broadwell_machines, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = 1, + .resindex_imr_base = -1, + .irqindex_host_ipc = 0, + .chip_info = &bdw_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-bdw.ri", + .nocodec_tplg_filename = "sof-bdw-nocodec.tplg", + .ops = &sof_bdw_ops, +}; + +static const struct acpi_device_id sof_broadwell_match[] = { + { "INT3438", (unsigned long)&sof_acpi_broadwell_desc }, + { } +}; +MODULE_DEVICE_TABLE(acpi, sof_broadwell_match); + +static int sof_broadwell_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct acpi_device_id *id; + const struct sof_dev_desc *desc; + int ret; + + id = acpi_match_device(dev->driver->acpi_match_table, dev); + if (!id) + return -ENODEV; + + ret = snd_intel_acpi_dsp_driver_probe(dev, id->id); + if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { + dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n"); + return -ENODEV; + } + + desc = device_get_match_data(dev); + if (!desc) + return -ENODEV; + + return sof_acpi_probe(pdev, device_get_match_data(dev)); +} + +/* acpi_driver definition */ +static struct platform_driver snd_sof_acpi_intel_bdw_driver = { + .probe = sof_broadwell_probe, + .remove = sof_acpi_remove, + .driver = { + .name = "sof-audio-acpi-intel-bdw", + .pm = &sof_acpi_pm, + .acpi_match_table = sof_broadwell_match, + }, +}; +module_platform_driver(snd_sof_acpi_intel_bdw_driver); MODULE_LICENSE("Dual BSD/GPL"); MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC); MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA); +MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV); diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c index 19260dbecac5..2846fdec9d95 100644 --- a/sound/soc/sof/intel/byt.c +++ b/sound/soc/sof/intel/byt.c @@ -15,8 +15,12 @@ #include #include #include +#include +#include +#include #include "../ops.h" #include "shim.h" +#include "../sof-acpi-dev.h" #include "../sof-audio.h" #include "../../intel/common/soc-intel-quirks.h" @@ -822,7 +826,7 @@ irq: } /* baytrail ops */ -const struct snd_sof_dsp_ops sof_byt_ops = { +static const struct snd_sof_dsp_ops sof_byt_ops = { /* device init */ .probe = byt_acpi_probe, .remove = byt_remove, @@ -892,16 +896,14 @@ const struct snd_sof_dsp_ops sof_byt_ops = { .arch_ops = &sof_xtensa_arch_ops, }; -EXPORT_SYMBOL_NS(sof_byt_ops, SND_SOC_SOF_BAYTRAIL); -const struct sof_intel_dsp_desc byt_chip_info = { +static const struct sof_intel_dsp_desc byt_chip_info = { .cores_num = 1, .host_managed_cores_mask = 1, }; -EXPORT_SYMBOL_NS(byt_chip_info, SND_SOC_SOF_BAYTRAIL); /* cherrytrail and braswell ops */ -const struct snd_sof_dsp_ops sof_cht_ops = { +static const struct snd_sof_dsp_ops sof_cht_ops = { /* device init */ .probe = byt_acpi_probe, .remove = byt_remove, @@ -972,16 +974,104 @@ const struct snd_sof_dsp_ops sof_cht_ops = { .arch_ops = &sof_xtensa_arch_ops, }; -EXPORT_SYMBOL_NS(sof_cht_ops, SND_SOC_SOF_BAYTRAIL); -const struct sof_intel_dsp_desc cht_chip_info = { +static const struct sof_intel_dsp_desc cht_chip_info = { .cores_num = 1, .host_managed_cores_mask = 1, }; -EXPORT_SYMBOL_NS(cht_chip_info, SND_SOC_SOF_BAYTRAIL); + +/* BYTCR uses different IRQ index */ +static const struct sof_dev_desc sof_acpi_baytrailcr_desc = { + .machines = snd_soc_acpi_intel_baytrail_machines, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = 1, + .resindex_imr_base = 2, + .irqindex_host_ipc = 0, + .chip_info = &byt_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-byt.ri", + .nocodec_tplg_filename = "sof-byt-nocodec.tplg", + .ops = &sof_byt_ops, +}; + +static const struct sof_dev_desc sof_acpi_baytrail_desc = { + .machines = snd_soc_acpi_intel_baytrail_machines, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = 1, + .resindex_imr_base = 2, + .irqindex_host_ipc = 5, + .chip_info = &byt_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-byt.ri", + .nocodec_tplg_filename = "sof-byt-nocodec.tplg", + .ops = &sof_byt_ops, +}; + +static const struct sof_dev_desc sof_acpi_cherrytrail_desc = { + .machines = snd_soc_acpi_intel_cherrytrail_machines, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = 1, + .resindex_imr_base = 2, + .irqindex_host_ipc = 5, + .chip_info = &cht_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-cht.ri", + .nocodec_tplg_filename = "sof-cht-nocodec.tplg", + .ops = &sof_cht_ops, +}; + +static const struct acpi_device_id sof_baytrail_match[] = { + { "80860F28", (unsigned long)&sof_acpi_baytrail_desc }, + { "808622A8", (unsigned long)&sof_acpi_cherrytrail_desc }, + { } +}; +MODULE_DEVICE_TABLE(acpi, sof_baytrail_match); + +static int sof_baytrail_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct sof_dev_desc *desc; + const struct acpi_device_id *id; + int ret; + + id = acpi_match_device(dev->driver->acpi_match_table, dev); + if (!id) + return -ENODEV; + + ret = snd_intel_acpi_dsp_driver_probe(dev, id->id); + if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { + dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n"); + return -ENODEV; + } + + desc = device_get_match_data(&pdev->dev); + if (!desc) + return -ENODEV; + + if (desc == &sof_acpi_baytrail_desc && soc_intel_is_byt_cr(pdev)) + desc = &sof_acpi_baytrailcr_desc; + + return sof_acpi_probe(pdev, desc); +} + +/* acpi_driver definition */ +static struct platform_driver snd_sof_acpi_intel_byt_driver = { + .probe = sof_baytrail_probe, + .remove = sof_acpi_remove, + .driver = { + .name = "sof-audio-acpi-intel-byt", + .pm = &sof_acpi_pm, + .acpi_match_table = sof_baytrail_match, + }, +}; +module_platform_driver(snd_sof_acpi_intel_byt_driver); #endif /* CONFIG_SND_SOC_SOF_BAYTRAIL */ MODULE_LICENSE("Dual BSD/GPL"); MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC); MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA); +MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV); diff --git a/sound/soc/sof/intel/shim.h b/sound/soc/sof/intel/shim.h index 1e0afb5c8720..529f68d0ca47 100644 --- a/sound/soc/sof/intel/shim.h +++ b/sound/soc/sof/intel/shim.h @@ -167,13 +167,7 @@ struct sof_intel_dsp_desc { }; extern const struct snd_sof_dsp_ops sof_tng_ops; -extern const struct snd_sof_dsp_ops sof_byt_ops; -extern const struct snd_sof_dsp_ops sof_cht_ops; -extern const struct snd_sof_dsp_ops sof_bdw_ops; -extern const struct sof_intel_dsp_desc byt_chip_info; -extern const struct sof_intel_dsp_desc cht_chip_info; -extern const struct sof_intel_dsp_desc bdw_chip_info; extern const struct sof_intel_dsp_desc tng_chip_info; struct sof_intel_stream { diff --git a/sound/soc/sof/sof-acpi-dev.c b/sound/soc/sof/sof-acpi-dev.c index cc2e257087e4..1fec0420f662 100644 --- a/sound/soc/sof/sof-acpi-dev.c +++ b/sound/soc/sof/sof-acpi-dev.c @@ -12,12 +12,12 @@ #include #include #include -#include #include #include #include #include "../intel/common/soc-intel-quirks.h" #include "ops.h" +#include "sof-acpi-dev.h" /* platform specific devices */ #include "intel/shim.h" @@ -36,74 +36,12 @@ MODULE_PARM_DESC(sof_acpi_debug, "SOF ACPI debug options (0x0 all off)"); #define SOF_ACPI_DISABLE_PM_RUNTIME BIT(0) -#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BROADWELL) -static const struct sof_dev_desc sof_acpi_broadwell_desc = { - .machines = snd_soc_acpi_intel_broadwell_machines, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = 1, - .resindex_imr_base = -1, - .irqindex_host_ipc = 0, - .chip_info = &bdw_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-bdw.ri", - .nocodec_tplg_filename = "sof-bdw-nocodec.tplg", - .ops = &sof_bdw_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL) - -/* BYTCR uses different IRQ index */ -static const struct sof_dev_desc sof_acpi_baytrailcr_desc = { - .machines = snd_soc_acpi_intel_baytrail_machines, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = 1, - .resindex_imr_base = 2, - .irqindex_host_ipc = 0, - .chip_info = &byt_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-byt.ri", - .nocodec_tplg_filename = "sof-byt-nocodec.tplg", - .ops = &sof_byt_ops, -}; - -static const struct sof_dev_desc sof_acpi_baytrail_desc = { - .machines = snd_soc_acpi_intel_baytrail_machines, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = 1, - .resindex_imr_base = 2, - .irqindex_host_ipc = 5, - .chip_info = &byt_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-byt.ri", - .nocodec_tplg_filename = "sof-byt-nocodec.tplg", - .ops = &sof_byt_ops, -}; - -static const struct sof_dev_desc sof_acpi_cherrytrail_desc = { - .machines = snd_soc_acpi_intel_cherrytrail_machines, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = 1, - .resindex_imr_base = 2, - .irqindex_host_ipc = 5, - .chip_info = &cht_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-cht.ri", - .nocodec_tplg_filename = "sof-cht-nocodec.tplg", - .ops = &sof_cht_ops, -}; - -#endif - -static const struct dev_pm_ops sof_acpi_pm = { +const struct dev_pm_ops sof_acpi_pm = { SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume) SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume, snd_sof_runtime_idle) }; +EXPORT_SYMBOL_NS(sof_acpi_pm, SND_SOC_SOF_ACPI_DEV); static void sof_acpi_probe_complete(struct device *dev) { @@ -118,41 +56,19 @@ static void sof_acpi_probe_complete(struct device *dev) pm_runtime_enable(dev); } -static int sof_acpi_probe(struct platform_device *pdev) +int sof_acpi_probe(struct platform_device *pdev, const struct sof_dev_desc *desc) { struct device *dev = &pdev->dev; - const struct acpi_device_id *id; - const struct sof_dev_desc *desc; struct snd_sof_pdata *sof_pdata; const struct snd_sof_dsp_ops *ops; int ret; - id = acpi_match_device(dev->driver->acpi_match_table, dev); - if (!id) - return -ENODEV; - - if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) { - ret = snd_intel_acpi_dsp_driver_probe(dev, id->id); - if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { - dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n"); - return -ENODEV; - } - } dev_dbg(dev, "ACPI DSP detected"); sof_pdata = devm_kzalloc(dev, sizeof(*sof_pdata), GFP_KERNEL); if (!sof_pdata) return -ENOMEM; - desc = device_get_match_data(dev); - if (!desc) - return -ENODEV; - -#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL) - if (desc == &sof_acpi_baytrail_desc && soc_intel_is_byt_cr(pdev)) - desc = &sof_acpi_baytrailcr_desc; -#endif - /* get ops for platform */ ops = desc->ops; if (!ops) { @@ -194,44 +110,20 @@ static int sof_acpi_probe(struct platform_device *pdev) return ret; } +EXPORT_SYMBOL_NS(sof_acpi_probe, SND_SOC_SOF_ACPI_DEV); -static int sof_acpi_remove(struct platform_device *pdev) +int sof_acpi_remove(struct platform_device *pdev) { + struct device *dev = &pdev->dev; + if (!(sof_acpi_debug & SOF_ACPI_DISABLE_PM_RUNTIME)) - pm_runtime_disable(&pdev->dev); + pm_runtime_disable(dev); /* call sof helper for DSP hardware remove */ - snd_sof_device_remove(&pdev->dev); + snd_sof_device_remove(dev); return 0; } - -#ifdef CONFIG_ACPI -static const struct acpi_device_id sof_acpi_match[] = { -#if IS_ENABLED(CONFIG_SND_SOC_SOF_BROADWELL) - { "INT3438", (unsigned long)&sof_acpi_broadwell_desc }, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL) - { "80860F28", (unsigned long)&sof_acpi_baytrail_desc }, - { "808622A8", (unsigned long)&sof_acpi_cherrytrail_desc }, -#endif - { } -}; -MODULE_DEVICE_TABLE(acpi, sof_acpi_match); -#endif - -/* acpi_driver definition */ -static struct platform_driver snd_sof_acpi_driver = { - .probe = sof_acpi_probe, - .remove = sof_acpi_remove, - .driver = { - .name = "sof-audio-acpi", - .pm = &sof_acpi_pm, - .acpi_match_table = ACPI_PTR(sof_acpi_match), - }, -}; -module_platform_driver(snd_sof_acpi_driver); +EXPORT_SYMBOL_NS(sof_acpi_remove, SND_SOC_SOF_ACPI_DEV); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_IMPORT_NS(SND_SOC_SOF_BAYTRAIL); -MODULE_IMPORT_NS(SND_SOC_SOF_BROADWELL); diff --git a/sound/soc/sof/sof-acpi-dev.h b/sound/soc/sof/sof-acpi-dev.h new file mode 100644 index 000000000000..5c2b558d2ace --- /dev/null +++ b/sound/soc/sof/sof-acpi-dev.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * Copyright(c) 2021 Intel Corporation. All rights reserved. + */ + +#ifndef __SOUND_SOC_SOF_ACPI_H +#define __SOUND_SOC_SOF_ACPI_H + +extern const struct dev_pm_ops sof_acpi_pm; +int sof_acpi_probe(struct platform_device *pdev, const struct sof_dev_desc *desc); +int sof_acpi_remove(struct platform_device *pdev); + +#endif -- cgit v1.2.3-59-g8ed1b From 8d4ba1be3d2257606e04aff412829d8972670750 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:31:21 -0600 Subject: ASoC: SOF: pci: split PCI into different drivers Move PCI IDs and device-specific definitions out of common code. No functionality change for now, just code split and removal of IF_ENABLED() which made the configurations too complicated in case of reuse of IP across generations. Additional changes to address the DSP_CONFIG case and SoundWire depends/select confusions are provided in follow-up patches. Suggested-by: Arnd Bergmann Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Bard Liao Acked-by: Mark Brown Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20210302003125.1178419-4-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/soc/sof/Kconfig | 7 +- sound/soc/sof/Makefile | 2 +- sound/soc/sof/intel/Kconfig | 179 ++++++++--------------- sound/soc/sof/intel/Makefile | 12 ++ sound/soc/sof/intel/pci-apl.c | 81 +++++++++++ sound/soc/sof/intel/pci-cnl.c | 104 +++++++++++++ sound/soc/sof/intel/pci-icl.c | 84 +++++++++++ sound/soc/sof/intel/pci-tgl.c | 121 +++++++++++++++ sound/soc/sof/intel/pci-tng.c | 70 +++++++++ sound/soc/sof/sof-pci-dev.c | 332 ++---------------------------------------- sound/soc/sof/sof-pci-dev.h | 17 +++ 11 files changed, 562 insertions(+), 447 deletions(-) create mode 100644 sound/soc/sof/intel/pci-apl.c create mode 100644 sound/soc/sof/intel/pci-cnl.c create mode 100644 sound/soc/sof/intel/pci-icl.c create mode 100644 sound/soc/sof/intel/pci-tgl.c create mode 100644 sound/soc/sof/intel/pci-tng.c create mode 100644 sound/soc/sof/sof-pci-dev.h diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig index 95f55a0daefb..8dfc165c3690 100644 --- a/sound/soc/sof/Kconfig +++ b/sound/soc/sof/Kconfig @@ -9,14 +9,17 @@ config SND_SOC_SOF_TOPLEVEL if SND_SOC_SOF_TOPLEVEL +config SND_SOC_SOF_PCI_DEV + tristate + config SND_SOC_SOF_PCI tristate "SOF PCI enumeration support" depends on PCI - select SND_SOC_SOF - select SND_SOC_ACPI if ACPI help This adds support for PCI enumeration. This option is required to enable Intel Skylake+ devices. + For backwards-compatibility with previous configurations the selection will + be used as default for platform-specific drivers. Say Y if you need this option. If unsure select "N". diff --git a/sound/soc/sof/Makefile b/sound/soc/sof/Makefile index f88fce23bbc7..606d8137cd98 100644 --- a/sound/soc/sof/Makefile +++ b/sound/soc/sof/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_SND_SOC_SOF_NOCODEC) += snd-sof-nocodec.o obj-$(CONFIG_SND_SOC_SOF_ACPI_DEV) += snd-sof-acpi.o obj-$(CONFIG_SND_SOC_SOF_OF) += snd-sof-of.o -obj-$(CONFIG_SND_SOC_SOF_PCI) += snd-sof-pci.o +obj-$(CONFIG_SND_SOC_SOF_PCI_DEV) += snd-sof-pci.o obj-$(CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL) += intel/ obj-$(CONFIG_SND_SOC_SOF_IMX_TOPLEVEL) += imx/ diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 2b062a66f1e7..7dcb61ba5763 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -9,23 +9,6 @@ config SND_SOC_SOF_INTEL_TOPLEVEL if SND_SOC_SOF_INTEL_TOPLEVEL -config SND_SOC_SOF_INTEL_PCI - def_tristate SND_SOC_SOF_PCI - select SND_SOC_SOF_MERRIFIELD if SND_SOC_SOF_MERRIFIELD_SUPPORT - select SND_SOC_SOF_APOLLOLAKE if SND_SOC_SOF_APOLLOLAKE_SUPPORT - select SND_SOC_SOF_GEMINILAKE if SND_SOC_SOF_GEMINILAKE_SUPPORT - select SND_SOC_SOF_CANNONLAKE if SND_SOC_SOF_CANNONLAKE_SUPPORT - select SND_SOC_SOF_COFFEELAKE if SND_SOC_SOF_COFFEELAKE_SUPPORT - select SND_SOC_SOF_ICELAKE if SND_SOC_SOF_ICELAKE_SUPPORT - select SND_SOC_SOF_COMETLAKE if SND_SOC_SOF_COMETLAKE_SUPPORT - select SND_SOC_SOF_TIGERLAKE if SND_SOC_SOF_TIGERLAKE_SUPPORT - select SND_SOC_SOF_ELKHARTLAKE if SND_SOC_SOF_ELKHARTLAKE_SUPPORT - select SND_SOC_SOF_JASPERLAKE if SND_SOC_SOF_JASPERLAKE_SUPPORT - select SND_SOC_SOF_ALDERLAKE if SND_SOC_SOF_ALDERLAKE_SUPPORT - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - config SND_SOC_SOF_INTEL_HIFI_EP_IPC tristate help @@ -96,187 +79,141 @@ config SND_SOC_SOF_BROADWELL endif ## SND_SOC_SOF_ACPI -if SND_SOC_SOF_INTEL_PCI +if SND_SOC_SOF_PCI -config SND_SOC_SOF_MERRIFIELD_SUPPORT - bool "SOF support for Tangier/Merrifield" +config SND_SOC_SOF_MERRIFIELD + tristate "SOF support for Tangier/Merrifield" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_ATOM_HIFI_EP help This adds support for Sound Open Firmware for Intel(R) platforms using the Tangier/Merrifield processors. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_MERRIFIELD +config SND_SOC_SOF_INTEL_APL tristate - select SND_SOC_SOF_INTEL_ATOM_HIFI_EP - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. + select SND_SOC_SOF_HDA_COMMON -config SND_SOC_SOF_APOLLOLAKE_SUPPORT - bool "SOF support for Apollolake" +config SND_SOC_SOF_APOLLOLAKE + tristate "SOF support for Apollolake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_APL help This adds support for Sound Open Firmware for Intel(R) platforms using the Apollolake processors. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_APOLLOLAKE - tristate - select SND_SOC_SOF_HDA_COMMON - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - -config SND_SOC_SOF_GEMINILAKE_SUPPORT - bool "SOF support for GeminiLake" +config SND_SOC_SOF_GEMINILAKE + tristate "SOF support for GeminiLake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_APL help This adds support for Sound Open Firmware for Intel(R) platforms using the Geminilake processors. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_GEMINILAKE +config SND_SOC_SOF_INTEL_CNL tristate select SND_SOC_SOF_HDA_COMMON - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. + select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE -config SND_SOC_SOF_CANNONLAKE_SUPPORT - bool "SOF support for Cannonlake" +config SND_SOC_SOF_CANNONLAKE + tristate "SOF support for Cannonlake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_CNL help This adds support for Sound Open Firmware for Intel(R) platforms using the Cannonlake processors. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_CANNONLAKE - tristate - select SND_SOC_SOF_HDA_COMMON - select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - -config SND_SOC_SOF_COFFEELAKE_SUPPORT - bool "SOF support for CoffeeLake" +config SND_SOC_SOF_COFFEELAKE + tristate "SOF support for CoffeeLake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_CNL help This adds support for Sound Open Firmware for Intel(R) platforms using the Coffeelake processors. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_COFFEELAKE - tristate - select SND_SOC_SOF_HDA_COMMON - select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - -config SND_SOC_SOF_ICELAKE_SUPPORT - bool "SOF support for Icelake" +config SND_SOC_SOF_COMETLAKE + tristate "SOF support for CometLake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_CNL help This adds support for Sound Open Firmware for Intel(R) platforms - using the Icelake processors. - Say Y if you have such a device. + using the Cometlake processors. If unsure select "N". -config SND_SOC_SOF_ICELAKE +config SND_SOC_SOF_INTEL_ICL tristate select SND_SOC_SOF_HDA_COMMON select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. -config SND_SOC_SOF_COMETLAKE - tristate - select SND_SOC_SOF_HDA_COMMON - select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - -config SND_SOC_SOF_COMETLAKE_SUPPORT - bool - -config SND_SOC_SOF_COMETLAKE_LP_SUPPORT - bool "SOF support for CometLake" - select SND_SOC_SOF_COMETLAKE_SUPPORT +config SND_SOC_SOF_ICELAKE + tristate "SOF support for Icelake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_ICL help This adds support for Sound Open Firmware for Intel(R) platforms - using the Cometlake processors. + using the Icelake processors. + Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_TIGERLAKE_SUPPORT - bool "SOF support for Tigerlake" +config SND_SOC_SOF_JASPERLAKE + tristate "SOF support for JasperLake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_ICL help This adds support for Sound Open Firmware for Intel(R) platforms - using the Tigerlake processors. + using the JasperLake processors. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_TIGERLAKE +config SND_SOC_SOF_INTEL_TGL tristate select SND_SOC_SOF_HDA_COMMON select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. -config SND_SOC_SOF_ELKHARTLAKE_SUPPORT - bool "SOF support for ElkhartLake" +config SND_SOC_SOF_TIGERLAKE + tristate "SOF support for Tigerlake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_TGL help This adds support for Sound Open Firmware for Intel(R) platforms - using the ElkhartLake processors. + using the Tigerlake processors. Say Y if you have such a device. If unsure select "N". config SND_SOC_SOF_ELKHARTLAKE - tristate - select SND_SOC_SOF_HDA_COMMON - select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - -config SND_SOC_SOF_JASPERLAKE_SUPPORT - bool "SOF support for JasperLake" + tristate "SOF support for ElkhartLake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_TGL help This adds support for Sound Open Firmware for Intel(R) platforms - using the JasperLake processors. + using the ElkhartLake processors. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_JASPERLAKE - tristate - select SND_SOC_SOF_HDA_COMMON - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. - -config SND_SOC_SOF_ALDERLAKE_SUPPORT - bool "SOF support for Alderlake" +config SND_SOC_SOF_ALDERLAKE + tristate "SOF support for Alderlake" + default SND_SOC_SOF_PCI + select SND_SOC_SOF_INTEL_TGL help This adds support for Sound Open Firmware for Intel(R) platforms using the Alderlake processors. Say Y if you have such a device. If unsure select "N". -config SND_SOC_SOF_ALDERLAKE - tristate - select SND_SOC_SOF_HDA_COMMON - select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level - config SND_SOC_SOF_HDA_COMMON tristate - select SND_INTEL_DSP_CONFIG select SND_SOC_SOF_INTEL_COMMON + select SND_SOC_SOF_PCI_DEV + select SND_INTEL_DSP_CONFIG select SND_SOC_SOF_HDA_LINK_BASELINE help This option is not user-selectable but automagically handled by diff --git a/sound/soc/sof/intel/Makefile b/sound/soc/sof/intel/Makefile index f6640fa73636..f3d6f7070fb3 100644 --- a/sound/soc/sof/intel/Makefile +++ b/sound/soc/sof/intel/Makefile @@ -18,3 +18,15 @@ obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-acpi-intel-bdw.o obj-$(CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC) += snd-sof-intel-ipc.o obj-$(CONFIG_SND_SOC_SOF_HDA_COMMON) += snd-sof-intel-hda-common.o obj-$(CONFIG_SND_SOC_SOF_HDA) += snd-sof-intel-hda.o + +snd-sof-pci-intel-tng-objs := pci-tng.o +snd-sof-pci-intel-apl-objs := pci-apl.o +snd-sof-pci-intel-cnl-objs := pci-cnl.o +snd-sof-pci-intel-icl-objs := pci-icl.o +snd-sof-pci-intel-tgl-objs := pci-tgl.o + +obj-$(CONFIG_SND_SOC_SOF_MERRIFIELD) += snd-sof-pci-intel-tng.o +obj-$(CONFIG_SND_SOC_SOF_INTEL_APL) += snd-sof-pci-intel-apl.o +obj-$(CONFIG_SND_SOC_SOF_INTEL_CNL) += snd-sof-pci-intel-cnl.o +obj-$(CONFIG_SND_SOC_SOF_INTEL_ICL) += snd-sof-pci-intel-icl.o +obj-$(CONFIG_SND_SOC_SOF_INTEL_TGL) += snd-sof-pci-intel-tgl.o diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c new file mode 100644 index 000000000000..e83ddbaafa29 --- /dev/null +++ b/sound/soc/sof/intel/pci-apl.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +// +// This file is provided under a dual BSD/GPLv2 license. When using or +// redistributing this file, you may do so under either license. +// +// Copyright(c) 2018-2021 Intel Corporation. All rights reserved. +// +// Author: Liam Girdwood +// + +#include +#include +#include +#include +#include +#include "../ops.h" +#include "../sof-pci-dev.h" + +/* platform specific devices */ +#include "hda.h" + +static const struct sof_dev_desc bxt_desc = { + .machines = snd_soc_acpi_intel_bxt_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &apl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-apl.ri", + .nocodec_tplg_filename = "sof-apl-nocodec.tplg", + .ops = &sof_apl_ops, +}; + +static const struct sof_dev_desc glk_desc = { + .machines = snd_soc_acpi_intel_glk_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &apl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-glk.ri", + .nocodec_tplg_filename = "sof-glk-nocodec.tplg", + .ops = &sof_apl_ops, +}; + +/* PCI IDs */ +static const struct pci_device_id sof_pci_ids[] = { + { PCI_DEVICE(0x8086, 0x5a98), /* BXT-P (ApolloLake) */ + .driver_data = (unsigned long)&bxt_desc}, + { PCI_DEVICE(0x8086, 0x1a98),/* BXT-T */ + .driver_data = (unsigned long)&bxt_desc}, + { PCI_DEVICE(0x8086, 0x3198), /* GeminiLake */ + .driver_data = (unsigned long)&glk_desc}, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, sof_pci_ids); + +/* pci_driver definition */ +static struct pci_driver snd_sof_pci_intel_apl_driver = { + .name = "sof-audio-pci-intel-apl", + .id_table = sof_pci_ids, + .probe = sof_pci_probe, + .remove = sof_pci_remove, + .shutdown = sof_pci_shutdown, + .driver = { + .pm = &sof_pci_pm, + }, +}; +module_pci_driver(snd_sof_pci_intel_apl_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON); +MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV); diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c new file mode 100644 index 000000000000..f974d3a77217 --- /dev/null +++ b/sound/soc/sof/intel/pci-cnl.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +// +// This file is provided under a dual BSD/GPLv2 license. When using or +// redistributing this file, you may do so under either license. +// +// Copyright(c) 2018 Intel Corporation. All rights reserved. +// +// Author: Liam Girdwood +// + +#include +#include +#include +#include +#include +#include "../ops.h" +#include "../sof-pci-dev.h" + +/* platform specific devices */ +#include "hda.h" + +static const struct sof_dev_desc cnl_desc = { + .machines = snd_soc_acpi_intel_cnl_machines, + .alt_machines = snd_soc_acpi_intel_cnl_sdw_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &cnl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-cnl.ri", + .nocodec_tplg_filename = "sof-cnl-nocodec.tplg", + .ops = &sof_cnl_ops, +}; + +static const struct sof_dev_desc cfl_desc = { + .machines = snd_soc_acpi_intel_cfl_machines, + .alt_machines = snd_soc_acpi_intel_cfl_sdw_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &cnl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-cfl.ri", + .nocodec_tplg_filename = "sof-cnl-nocodec.tplg", + .ops = &sof_cnl_ops, +}; + +static const struct sof_dev_desc cml_desc = { + .machines = snd_soc_acpi_intel_cml_machines, + .alt_machines = snd_soc_acpi_intel_cml_sdw_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &cnl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-cml.ri", + .nocodec_tplg_filename = "sof-cnl-nocodec.tplg", + .ops = &sof_cnl_ops, +}; + +/* PCI IDs */ +static const struct pci_device_id sof_pci_ids[] = { + { PCI_DEVICE(0x8086, 0x9dc8), /* CNL-LP */ + .driver_data = (unsigned long)&cnl_desc}, + { PCI_DEVICE(0x8086, 0xa348), /* CNL-H */ + .driver_data = (unsigned long)&cfl_desc}, + { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */ + .driver_data = (unsigned long)&cml_desc}, + { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */ + .driver_data = (unsigned long)&cml_desc}, + { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */ + .driver_data = (unsigned long)&cml_desc}, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, sof_pci_ids); + +/* pci_driver definition */ +static struct pci_driver snd_sof_pci_intel_cnl_driver = { + .name = "sof-audio-pci-intel-cnl", + .id_table = sof_pci_ids, + .probe = sof_pci_probe, + .remove = sof_pci_remove, + .shutdown = sof_pci_shutdown, + .driver = { + .pm = &sof_pci_pm, + }, +}; +module_pci_driver(snd_sof_pci_intel_cnl_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON); +MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV); diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c new file mode 100644 index 000000000000..d5d7cefa6ef9 --- /dev/null +++ b/sound/soc/sof/intel/pci-icl.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +// +// This file is provided under a dual BSD/GPLv2 license. When using or +// redistributing this file, you may do so under either license. +// +// Copyright(c) 2018-2021 Intel Corporation. All rights reserved. +// +// Author: Liam Girdwood +// + +#include +#include +#include +#include +#include +#include "../ops.h" +#include "../sof-pci-dev.h" + +/* platform specific devices */ +#include "hda.h" + +static const struct sof_dev_desc icl_desc = { + .machines = snd_soc_acpi_intel_icl_machines, + .alt_machines = snd_soc_acpi_intel_icl_sdw_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &icl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-icl.ri", + .nocodec_tplg_filename = "sof-icl-nocodec.tplg", + .ops = &sof_icl_ops, +}; + +static const struct sof_dev_desc jsl_desc = { + .machines = snd_soc_acpi_intel_jsl_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &jsl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-jsl.ri", + .nocodec_tplg_filename = "sof-jsl-nocodec.tplg", + .ops = &sof_cnl_ops, +}; + +/* PCI IDs */ +static const struct pci_device_id sof_pci_ids[] = { + { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */ + .driver_data = (unsigned long)&icl_desc}, + { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */ + .driver_data = (unsigned long)&icl_desc}, + { PCI_DEVICE(0x8086, 0x38c8), /* ICL-N */ + .driver_data = (unsigned long)&jsl_desc}, + { PCI_DEVICE(0x8086, 0x4dc8), /* JSL-N */ + .driver_data = (unsigned long)&jsl_desc}, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, sof_pci_ids); + +/* pci_driver definition */ +static struct pci_driver snd_sof_pci_intel_icl_driver = { + .name = "sof-audio-pci-intel-icl", + .id_table = sof_pci_ids, + .probe = sof_pci_probe, + .remove = sof_pci_remove, + .shutdown = sof_pci_shutdown, + .driver = { + .pm = &sof_pci_pm, + }, +}; +module_pci_driver(snd_sof_pci_intel_icl_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON); +MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV); diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c new file mode 100644 index 000000000000..d35c25a450aa --- /dev/null +++ b/sound/soc/sof/intel/pci-tgl.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +// +// This file is provided under a dual BSD/GPLv2 license. When using or +// redistributing this file, you may do so under either license. +// +// Copyright(c) 2018-2021 Intel Corporation. All rights reserved. +// +// Author: Liam Girdwood +// + +#include +#include +#include +#include +#include +#include "../ops.h" +#include "../sof-pci-dev.h" + +/* platform specific devices */ +#include "hda.h" + +static const struct sof_dev_desc tgl_desc = { + .machines = snd_soc_acpi_intel_tgl_machines, + .alt_machines = snd_soc_acpi_intel_tgl_sdw_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &tgl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-tgl.ri", + .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", + .ops = &sof_tgl_ops, +}; + +static const struct sof_dev_desc tglh_desc = { + .machines = snd_soc_acpi_intel_tgl_machines, + .alt_machines = snd_soc_acpi_intel_tgl_sdw_machines, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &tglh_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-tgl-h.ri", + .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", + .ops = &sof_tgl_ops, +}; + +static const struct sof_dev_desc ehl_desc = { + .machines = snd_soc_acpi_intel_ehl_machines, + .use_acpi_target_states = true, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &ehl_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-ehl.ri", + .nocodec_tplg_filename = "sof-ehl-nocodec.tplg", + .ops = &sof_cnl_ops, +}; + +static const struct sof_dev_desc adls_desc = { + .machines = snd_soc_acpi_intel_adl_machines, + .alt_machines = snd_soc_acpi_intel_adl_sdw_machines, + .resindex_lpe_base = 0, + .resindex_pcicfg_base = -1, + .resindex_imr_base = -1, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &adls_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-adl-s.ri", + .nocodec_tplg_filename = "sof-adl-nocodec.tplg", + .ops = &sof_tgl_ops, +}; + +/* PCI IDs */ +static const struct pci_device_id sof_pci_ids[] = { + { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */ + .driver_data = (unsigned long)&tgl_desc}, + { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */ + .driver_data = (unsigned long)&tglh_desc}, + { PCI_DEVICE(0x8086, 0x4b55), /* EHL */ + .driver_data = (unsigned long)&ehl_desc}, + { PCI_DEVICE(0x8086, 0x4b58), /* EHL */ + .driver_data = (unsigned long)&ehl_desc}, + { PCI_DEVICE(0x8086, 0x7ad0), /* ADL-S */ + .driver_data = (unsigned long)&adls_desc}, + { PCI_DEVICE(0x8086, 0x51c8), /* ADL-P */ + .driver_data = (unsigned long)&tgl_desc}, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, sof_pci_ids); + +/* pci_driver definition */ +static struct pci_driver snd_sof_pci_intel_tgl_driver = { + .name = "sof-audio-pci-intel-tgl", + .id_table = sof_pci_ids, + .probe = sof_pci_probe, + .remove = sof_pci_remove, + .shutdown = sof_pci_shutdown, + .driver = { + .pm = &sof_pci_pm, + }, +}; +module_pci_driver(snd_sof_pci_intel_tgl_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON); +MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV); + diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c new file mode 100644 index 000000000000..94b9704c0117 --- /dev/null +++ b/sound/soc/sof/intel/pci-tng.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +// +// This file is provided under a dual BSD/GPLv2 license. When using or +// redistributing this file, you may do so under either license. +// +// Copyright(c) 2018-2021 Intel Corporation. All rights reserved. +// +// Author: Liam Girdwood +// + +#include +#include +#include +#include +#include +#include "../ops.h" +#include "../sof-pci-dev.h" + +/* platform specific devices */ +#include "shim.h" + +static struct snd_soc_acpi_mach sof_tng_machines[] = { + { + .id = "INT343A", + .drv_name = "edison", + .sof_fw_filename = "sof-byt.ri", + .sof_tplg_filename = "sof-byt.tplg", + }, + {} +}; + +static const struct sof_dev_desc tng_desc = { + .machines = sof_tng_machines, + .resindex_lpe_base = 3, /* IRAM, but subtract IRAM offset */ + .resindex_pcicfg_base = -1, + .resindex_imr_base = 0, + .irqindex_host_ipc = -1, + .resindex_dma_base = -1, + .chip_info = &tng_chip_info, + .default_fw_path = "intel/sof", + .default_tplg_path = "intel/sof-tplg", + .default_fw_filename = "sof-byt.ri", + .nocodec_tplg_filename = "sof-byt.tplg", + .ops = &sof_tng_ops, +}; + +/* PCI IDs */ +static const struct pci_device_id sof_pci_ids[] = { + { PCI_DEVICE(0x8086, 0x119a), + .driver_data = (unsigned long)&tng_desc}, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, sof_pci_ids); + +/* pci_driver definition */ +static struct pci_driver snd_sof_pci_intel_tng_driver = { + .name = "sof-audio-pci-intel-tng", + .id_table = sof_pci_ids, + .probe = sof_pci_probe, + .remove = sof_pci_remove, + .shutdown = sof_pci_shutdown, + .driver = { + .pm = &sof_pci_pm, + }, +}; +module_pci_driver(snd_sof_pci_intel_tng_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(SND_SOC_SOF_MERRIFIELD); +MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV); diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index fd1f0d8c2853..cfcbd9754c03 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -18,10 +18,7 @@ #include #include #include "ops.h" - -/* platform specific devices */ -#include "intel/shim.h" -#include "intel/hda.h" +#include "sof-pci-dev.h" static char *fw_path; module_param(fw_path, charp, 0444); @@ -81,243 +78,14 @@ static const struct dmi_system_id community_key_platforms[] = { {}, }; -#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE) -static const struct sof_dev_desc bxt_desc = { - .machines = snd_soc_acpi_intel_bxt_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &apl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-apl.ri", - .nocodec_tplg_filename = "sof-apl-nocodec.tplg", - .ops = &sof_apl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE) -static const struct sof_dev_desc glk_desc = { - .machines = snd_soc_acpi_intel_glk_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &apl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-glk.ri", - .nocodec_tplg_filename = "sof-glk-nocodec.tplg", - .ops = &sof_apl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_MERRIFIELD) -static struct snd_soc_acpi_mach sof_tng_machines[] = { - { - .id = "INT343A", - .drv_name = "edison", - .sof_fw_filename = "sof-byt.ri", - .sof_tplg_filename = "sof-byt.tplg", - }, - {} -}; - -static const struct sof_dev_desc tng_desc = { - .machines = sof_tng_machines, - .resindex_lpe_base = 3, /* IRAM, but subtract IRAM offset */ - .resindex_pcicfg_base = -1, - .resindex_imr_base = 0, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &tng_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-byt.ri", - .nocodec_tplg_filename = "sof-byt.tplg", - .ops = &sof_tng_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_CANNONLAKE) -static const struct sof_dev_desc cnl_desc = { - .machines = snd_soc_acpi_intel_cnl_machines, - .alt_machines = snd_soc_acpi_intel_cnl_sdw_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &cnl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-cnl.ri", - .nocodec_tplg_filename = "sof-cnl-nocodec.tplg", - .ops = &sof_cnl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COFFEELAKE) -static const struct sof_dev_desc cfl_desc = { - .machines = snd_soc_acpi_intel_cfl_machines, - .alt_machines = snd_soc_acpi_intel_cfl_sdw_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &cnl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-cfl.ri", - .nocodec_tplg_filename = "sof-cnl-nocodec.tplg", - .ops = &sof_cnl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE) -static const struct sof_dev_desc cml_desc = { - .machines = snd_soc_acpi_intel_cml_machines, - .alt_machines = snd_soc_acpi_intel_cml_sdw_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &cnl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-cml.ri", - .nocodec_tplg_filename = "sof-cnl-nocodec.tplg", - .ops = &sof_cnl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE) -static const struct sof_dev_desc icl_desc = { - .machines = snd_soc_acpi_intel_icl_machines, - .alt_machines = snd_soc_acpi_intel_icl_sdw_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &icl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-icl.ri", - .nocodec_tplg_filename = "sof-icl-nocodec.tplg", - .ops = &sof_icl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) || IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE) -static const struct sof_dev_desc tgl_desc = { - .machines = snd_soc_acpi_intel_tgl_machines, - .alt_machines = snd_soc_acpi_intel_tgl_sdw_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &tgl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-tgl.ri", - .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", - .ops = &sof_tgl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) -static const struct sof_dev_desc tglh_desc = { - .machines = snd_soc_acpi_intel_tgl_machines, - .alt_machines = snd_soc_acpi_intel_tgl_sdw_machines, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &tglh_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-tgl-h.ri", - .nocodec_tplg_filename = "sof-tgl-nocodec.tplg", - .ops = &sof_tgl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE) -static const struct sof_dev_desc ehl_desc = { - .machines = snd_soc_acpi_intel_ehl_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &ehl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-ehl.ri", - .nocodec_tplg_filename = "sof-ehl-nocodec.tplg", - .ops = &sof_cnl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE) -static const struct sof_dev_desc jsl_desc = { - .machines = snd_soc_acpi_intel_jsl_machines, - .use_acpi_target_states = true, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &jsl_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-jsl.ri", - .nocodec_tplg_filename = "sof-jsl-nocodec.tplg", - .ops = &sof_cnl_ops, -}; -#endif - -#if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE) -static const struct sof_dev_desc adls_desc = { - .machines = snd_soc_acpi_intel_adl_machines, - .alt_machines = snd_soc_acpi_intel_adl_sdw_machines, - .resindex_lpe_base = 0, - .resindex_pcicfg_base = -1, - .resindex_imr_base = -1, - .irqindex_host_ipc = -1, - .resindex_dma_base = -1, - .chip_info = &adls_chip_info, - .default_fw_path = "intel/sof", - .default_tplg_path = "intel/sof-tplg", - .default_fw_filename = "sof-adl-s.ri", - .nocodec_tplg_filename = "sof-adl-nocodec.tplg", - .ops = &sof_tgl_ops, -}; -#endif - -static const struct dev_pm_ops sof_pci_pm = { +const struct dev_pm_ops sof_pci_pm = { .prepare = snd_sof_prepare, .complete = snd_sof_complete, SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume) SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume, snd_sof_runtime_idle) }; +EXPORT_SYMBOL_NS(sof_pci_pm, SND_SOC_SOF_PCI_DEV); static void sof_pci_probe_complete(struct device *dev) { @@ -343,8 +111,7 @@ static void sof_pci_probe_complete(struct device *dev) pm_runtime_put_noidle(dev); } -static int sof_pci_probe(struct pci_dev *pci, - const struct pci_device_id *pci_id) +int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) { struct device *dev = &pci->dev; const struct sof_dev_desc *desc = @@ -447,8 +214,9 @@ release_regions: return ret; } +EXPORT_SYMBOL_NS(sof_pci_probe, SND_SOC_SOF_PCI_DEV); -static void sof_pci_remove(struct pci_dev *pci) +void sof_pci_remove(struct pci_dev *pci) { /* call sof helper for DSP hardware remove */ snd_sof_device_remove(&pci->dev); @@ -461,94 +229,12 @@ static void sof_pci_remove(struct pci_dev *pci) /* release pci regions and disable device */ pci_release_regions(pci); } +EXPORT_SYMBOL_NS(sof_pci_remove, SND_SOC_SOF_PCI_DEV); -static void sof_pci_shutdown(struct pci_dev *pci) +void sof_pci_shutdown(struct pci_dev *pci) { snd_sof_device_shutdown(&pci->dev); } - -/* PCI IDs */ -static const struct pci_device_id sof_pci_ids[] = { -#if IS_ENABLED(CONFIG_SND_SOC_SOF_MERRIFIELD) - { PCI_DEVICE(0x8086, 0x119a), - .driver_data = (unsigned long)&tng_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE) - /* BXT-P & Apollolake */ - { PCI_DEVICE(0x8086, 0x5a98), - .driver_data = (unsigned long)&bxt_desc}, - { PCI_DEVICE(0x8086, 0x1a98), - .driver_data = (unsigned long)&bxt_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE) - { PCI_DEVICE(0x8086, 0x3198), - .driver_data = (unsigned long)&glk_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_CANNONLAKE) - { PCI_DEVICE(0x8086, 0x9dc8), - .driver_data = (unsigned long)&cnl_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COFFEELAKE) - { PCI_DEVICE(0x8086, 0xa348), - .driver_data = (unsigned long)&cfl_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE) - { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */ - .driver_data = (unsigned long)&icl_desc}, - { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */ - .driver_data = (unsigned long)&icl_desc}, - -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE) - { PCI_DEVICE(0x8086, 0x38c8), - .driver_data = (unsigned long)&jsl_desc}, - { PCI_DEVICE(0x8086, 0x4dc8), - .driver_data = (unsigned long)&jsl_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE) - { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */ - .driver_data = (unsigned long)&cml_desc}, - { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */ - .driver_data = (unsigned long)&cml_desc}, - { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */ - .driver_data = (unsigned long)&cml_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) - { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */ - .driver_data = (unsigned long)&tgl_desc}, - { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */ - .driver_data = (unsigned long)&tglh_desc}, - -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE) - { PCI_DEVICE(0x8086, 0x4b55), - .driver_data = (unsigned long)&ehl_desc}, - { PCI_DEVICE(0x8086, 0x4b58), - .driver_data = (unsigned long)&ehl_desc}, -#endif -#if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE) - { PCI_DEVICE(0x8086, 0x7ad0), - .driver_data = (unsigned long)&adls_desc}, - { PCI_DEVICE(0x8086, 0x51c8), - .driver_data = (unsigned long)&tgl_desc}, -#endif - { 0, } -}; -MODULE_DEVICE_TABLE(pci, sof_pci_ids); - -/* pci_driver definition */ -static struct pci_driver snd_sof_pci_driver = { - .name = "sof-audio-pci", - .id_table = sof_pci_ids, - .probe = sof_pci_probe, - .remove = sof_pci_remove, - .shutdown = sof_pci_shutdown, - .driver = { - .pm = &sof_pci_pm, - }, -}; -module_pci_driver(snd_sof_pci_driver); +EXPORT_SYMBOL_NS(sof_pci_shutdown, SND_SOC_SOF_PCI_DEV); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_IMPORT_NS(SND_SOC_SOF_MERRIFIELD); -MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON); diff --git a/sound/soc/sof/sof-pci-dev.h b/sound/soc/sof/sof-pci-dev.h new file mode 100644 index 000000000000..81155a59e63a --- /dev/null +++ b/sound/soc/sof/sof-pci-dev.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * Copyright(c) 2021 Intel Corporation. All rights reserved. + */ + +#ifndef __SOUND_SOC_SOF_PCI_H +#define __SOUND_SOC_SOF_PCI_H + +extern const struct dev_pm_ops sof_pci_pm; +int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id); +void sof_pci_remove(struct pci_dev *pci); +void sof_pci_shutdown(struct pci_dev *pci); + +#endif -- cgit v1.2.3-59-g8ed1b From 194fe0fc3422d695a277cf9ccb39fa35c9c7d00a Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:31:22 -0600 Subject: ASoC: SOF: pci: move DSP_CONFIG use to platform-specific drivers There is no reason why we should call the intel_dspcfg helpers from common code, this should be moved in Intel-specific code and only called from platforms where a conflict may occur with the HDaudio or SST/Skylake driver. Suggested-by: Arnd Bergmann Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Bard Liao Acked-by: Mark Brown Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20210302003125.1178419-5-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/soc/sof/intel/hda.c | 17 +++++++++++++++++ sound/soc/sof/intel/hda.h | 3 +++ sound/soc/sof/intel/pci-apl.c | 2 +- sound/soc/sof/intel/pci-cnl.c | 2 +- sound/soc/sof/intel/pci-icl.c | 2 +- sound/soc/sof/intel/pci-tgl.c | 2 +- sound/soc/sof/sof-pci-dev.c | 8 -------- 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 0dc3a8c0f5e3..995a8c427177 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -22,10 +22,12 @@ #include #include #include +#include #include #include #include #include "../sof-audio.h" +#include "../sof-pci-dev.h" #include "../ops.h" #include "hda.h" @@ -1258,7 +1260,22 @@ void hda_machine_select(struct snd_sof_dev *sdev) dev_warn(sdev->dev, "warning: No matching ASoC machine driver found\n"); } +int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) +{ + int ret; + + ret = snd_intel_dsp_driver_probe(pci); + if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { + dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n"); + return -ENODEV; + } + + return sof_pci_probe(pci, pci_id); +} +EXPORT_SYMBOL_NS(hda_pci_intel_probe, SND_SOC_SOF_INTEL_HDA_COMMON); + MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV); MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC); MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915); MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA); diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index d1c38c37bc9d..7c7579daee7f 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -764,4 +764,7 @@ void hda_machine_select(struct snd_sof_dev *sdev); void hda_set_mach_params(const struct snd_soc_acpi_mach *mach, struct device *dev); +/* PCI driver selection and probe */ +int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id); + #endif diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c index e83ddbaafa29..f89e746c2570 100644 --- a/sound/soc/sof/intel/pci-apl.c +++ b/sound/soc/sof/intel/pci-apl.c @@ -67,7 +67,7 @@ MODULE_DEVICE_TABLE(pci, sof_pci_ids); static struct pci_driver snd_sof_pci_intel_apl_driver = { .name = "sof-audio-pci-intel-apl", .id_table = sof_pci_ids, - .probe = sof_pci_probe, + .probe = hda_pci_intel_probe, .remove = sof_pci_remove, .shutdown = sof_pci_shutdown, .driver = { diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c index f974d3a77217..f23257adf2ab 100644 --- a/sound/soc/sof/intel/pci-cnl.c +++ b/sound/soc/sof/intel/pci-cnl.c @@ -90,7 +90,7 @@ MODULE_DEVICE_TABLE(pci, sof_pci_ids); static struct pci_driver snd_sof_pci_intel_cnl_driver = { .name = "sof-audio-pci-intel-cnl", .id_table = sof_pci_ids, - .probe = sof_pci_probe, + .probe = hda_pci_intel_probe, .remove = sof_pci_remove, .shutdown = sof_pci_shutdown, .driver = { diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c index d5d7cefa6ef9..2f60c28ae81f 100644 --- a/sound/soc/sof/intel/pci-icl.c +++ b/sound/soc/sof/intel/pci-icl.c @@ -70,7 +70,7 @@ MODULE_DEVICE_TABLE(pci, sof_pci_ids); static struct pci_driver snd_sof_pci_intel_icl_driver = { .name = "sof-audio-pci-intel-icl", .id_table = sof_pci_ids, - .probe = sof_pci_probe, + .probe = hda_pci_intel_probe, .remove = sof_pci_remove, .shutdown = sof_pci_shutdown, .driver = { diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index d35c25a450aa..485607471181 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -106,7 +106,7 @@ MODULE_DEVICE_TABLE(pci, sof_pci_ids); static struct pci_driver snd_sof_pci_intel_tgl_driver = { .name = "sof-audio-pci-intel-tgl", .id_table = sof_pci_ids, - .probe = sof_pci_probe, + .probe = hda_pci_intel_probe, .remove = sof_pci_remove, .shutdown = sof_pci_shutdown, .driver = { diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index cfcbd9754c03..b842a414e1df 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -120,13 +119,6 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) const struct snd_sof_dsp_ops *ops; int ret; - if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) { - ret = snd_intel_dsp_driver_probe(pci); - if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { - dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n"); - return -ENODEV; - } - } dev_dbg(&pci->dev, "PCI DSP detected"); /* get ops for platform */ -- cgit v1.2.3-59-g8ed1b From cf5807f5f814fcb14fd6c78878e2441918796af9 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:31:23 -0600 Subject: ASoC: SOF: Intel: SoundWire: simplify Kconfig The Kconfig file is way too convoluted. Track platforms where SoundWire is supported, and add simpler conditions to make sure there is no module/built-in issue. The use of 'depends on' is less intuitive if a required 'depend' is missing, but that's a small price to pay for clarity and simplicity. Suggested-by: Arnd Bergmann Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Bard Liao Acked-by: Mark Brown Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20210302003125.1178419-6-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/soc/sof/intel/Kconfig | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 7dcb61ba5763..21e24a3c64fb 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -276,29 +276,21 @@ config SND_SOC_SOF_HDA This option is not user-selectable but automagically handled by 'select' statements at a higher level. -config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK - bool "SOF support for SoundWire" - depends on ACPI - help - This adds support for SoundWire with Sound Open Firmware - for Intel(R) platforms. - Say Y if you want to enable SoundWire links with SOF. - If unsure select "N". - config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE tristate - select SND_SOC_SOF_INTEL_SOUNDWIRE if SND_SOC_SOF_INTEL_SOUNDWIRE_LINK - help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. config SND_SOC_SOF_INTEL_SOUNDWIRE - tristate - select SOUNDWIRE + tristate "SOF support for SoundWire" + default SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE + depends on SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE + depends on ACPI && SOUNDWIRE + depends on !(SOUNDWIRE=m && SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=y) select SOUNDWIRE_INTEL help - This option is not user-selectable but automagically handled by - 'select' statements at a higher level. + This adds support for SoundWire with Sound Open Firmware + for Intel(R) platforms. + Say Y if you want to enable SoundWire links with SOF. + If unsure select "N". endif ## SND_SOC_SOF_INTEL_PCI -- cgit v1.2.3-59-g8ed1b From 08c2a4bc9f2acaefbd0158866db5cb3238a68674 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:31:24 -0600 Subject: ALSA: hda: move Intel SoundWire ACPI scan to dedicated module The ACPI scan capabilities is called from the intel-dspconfig as well as the SOF/HDaudio drivers. This creates dependencies and randconfig issues when HDaudio and SOF/SoundWire are not all configured as modules. To simplify Kconfig dependencies between HDAudio, SoundWire, SOF and intel-dspconfig, move the ACPI scan helpers to a dedicated module. This follows the same idea as NHLT helpers which are already handled as a dedicated module. The only functional change is that the kernel parameter to filter links is now handled by a different module, but that was only provided for developers needing work-arounds for early BIOS releases. Reported-by: Arnd Bergmann Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Bard Liao Acked-by: Mark Brown Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20210302003125.1178419-7-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- drivers/soundwire/intel.h | 2 - drivers/soundwire/intel_init.c | 158 -------------------------------- include/linux/soundwire/sdw_intel.h | 2 + sound/hda/Kconfig | 4 + sound/hda/Makefile | 3 + sound/hda/intel-dsp-config.c | 2 +- sound/hda/intel-sdw-acpi.c | 174 ++++++++++++++++++++++++++++++++++++ sound/soc/sof/intel/Kconfig | 1 + sound/soc/sof/intel/hda.c | 1 + 9 files changed, 186 insertions(+), 161 deletions(-) create mode 100644 sound/hda/intel-sdw-acpi.c diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index 76820d0b9deb..06bac8ba14e9 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -48,8 +48,6 @@ struct sdw_intel { #endif }; -#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1) - int intel_master_startup(struct platform_device *pdev); int intel_master_process_wakeen_event(struct platform_device *pdev); diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c index bc8520eb385e..05b726cdfebc 100644 --- a/drivers/soundwire/intel_init.c +++ b/drivers/soundwire/intel_init.c @@ -18,42 +18,12 @@ #include "cadence_master.h" #include "intel.h" -#define SDW_LINK_TYPE 4 /* from Intel ACPI documentation */ -#define SDW_MAX_LINKS 4 #define SDW_SHIM_LCAP 0x0 #define SDW_SHIM_BASE 0x2C000 #define SDW_ALH_BASE 0x2C800 #define SDW_LINK_BASE 0x30000 #define SDW_LINK_SIZE 0x10000 -static int ctrl_link_mask; -module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444); -MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)"); - -static bool is_link_enabled(struct fwnode_handle *fw_node, int i) -{ - struct fwnode_handle *link; - char name[32]; - u32 quirk_mask = 0; - - /* Find master handle */ - snprintf(name, sizeof(name), - "mipi-sdw-link-%d-subproperties", i); - - link = fwnode_get_named_child_node(fw_node, name); - if (!link) - return false; - - fwnode_property_read_u32(link, - "intel-quirk-mask", - &quirk_mask); - - if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) - return false; - - return true; -} - static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx) { struct sdw_intel_link_res *link = ctx->links; @@ -81,74 +51,6 @@ static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx) return 0; } -static int -sdw_intel_scan_controller(struct sdw_intel_acpi_info *info) -{ - struct acpi_device *adev; - int ret, i; - u8 count; - - if (acpi_bus_get_device(info->handle, &adev)) - return -EINVAL; - - /* Found controller, find links supported */ - count = 0; - ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev), - "mipi-sdw-master-count", &count, 1); - - /* - * In theory we could check the number of links supported in - * hardware, but in that step we cannot assume SoundWire IP is - * powered. - * - * In addition, if the BIOS doesn't even provide this - * 'master-count' property then all the inits based on link - * masks will fail as well. - * - * We will check the hardware capabilities in the startup() step - */ - - if (ret) { - dev_err(&adev->dev, - "Failed to read mipi-sdw-master-count: %d\n", ret); - return -EINVAL; - } - - /* Check count is within bounds */ - if (count > SDW_MAX_LINKS) { - dev_err(&adev->dev, "Link count %d exceeds max %d\n", - count, SDW_MAX_LINKS); - return -EINVAL; - } - - if (!count) { - dev_warn(&adev->dev, "No SoundWire links detected\n"); - return -EINVAL; - } - dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count); - - info->count = count; - info->link_mask = 0; - - for (i = 0; i < count; i++) { - if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) { - dev_dbg(&adev->dev, - "Link %d masked, will not be enabled\n", i); - continue; - } - - if (!is_link_enabled(acpi_fwnode_handle(adev), i)) { - dev_dbg(&adev->dev, - "Link %d not selected in firmware\n", i); - continue; - } - - info->link_mask |= BIT(i); - } - - return 0; -} - #define HDA_DSP_REG_ADSPIC2 (0x10) #define HDA_DSP_REG_ADSPIS2 (0x14) #define HDA_DSP_REG_ADSPIC2_SNDW BIT(5) @@ -357,66 +259,6 @@ sdw_intel_startup_controller(struct sdw_intel_ctx *ctx) return 0; } -static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, - void *cdata, void **return_value) -{ - struct sdw_intel_acpi_info *info = cdata; - struct acpi_device *adev; - acpi_status status; - u64 adr; - - status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr); - if (ACPI_FAILURE(status)) - return AE_OK; /* keep going */ - - if (acpi_bus_get_device(handle, &adev)) { - pr_err("%s: Couldn't find ACPI handle\n", __func__); - return AE_NOT_FOUND; - } - - info->handle = handle; - - /* - * On some Intel platforms, multiple children of the HDAS - * device can be found, but only one of them is the SoundWire - * controller. The SNDW device is always exposed with - * Name(_ADR, 0x40000000), with bits 31..28 representing the - * SoundWire link so filter accordingly - */ - if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE) - return AE_OK; /* keep going */ - - /* device found, stop namespace walk */ - return AE_CTRL_TERMINATE; -} - -/** - * sdw_intel_acpi_scan() - SoundWire Intel init routine - * @parent_handle: ACPI parent handle - * @info: description of what firmware/DSDT tables expose - * - * This scans the namespace and queries firmware to figure out which - * links to enable. A follow-up use of sdw_intel_probe() and - * sdw_intel_startup() is required for creation of devices and bus - * startup - */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, - struct sdw_intel_acpi_info *info) -{ - acpi_status status; - - info->handle = NULL; - status = acpi_walk_namespace(ACPI_TYPE_DEVICE, - parent_handle, 1, - sdw_intel_acpi_cb, - NULL, info, NULL); - if (ACPI_FAILURE(status) || info->handle == NULL) - return -ENODEV; - - return sdw_intel_scan_controller(info); -} -EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SOUNDWIRE_INTEL_INIT); - /** * sdw_intel_probe() - SoundWire Intel probe routine * @res: resource data diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 120ffddc03d2..3a5446ac014a 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -187,4 +187,6 @@ void sdw_intel_enable_irq(void __iomem *mmio_base, bool enable); irqreturn_t sdw_intel_thread(int irq, void *dev_id); +#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1) + #endif diff --git a/sound/hda/Kconfig b/sound/hda/Kconfig index 9ed5cfa3c18c..57595f1552c9 100644 --- a/sound/hda/Kconfig +++ b/sound/hda/Kconfig @@ -44,9 +44,13 @@ config SND_INTEL_NHLT config SND_INTEL_DSP_CONFIG tristate select SND_INTEL_NHLT if ACPI + select SND_INTEL_SOUNDWIRE_ACPI if ACPI # this config should be selected only for Intel DSP platforms. # A fallback is provided so that the code compiles in all cases. +config SND_INTEL_SOUNDWIRE_ACPI + tristate + config SND_INTEL_BYT_PREFER_SOF bool "Prefer SOF driver over SST on BY/CHT platforms" depends on SND_SST_ATOM_HIFI2_PLATFORM_ACPI && SND_SOC_SOF_BAYTRAIL diff --git a/sound/hda/Makefile b/sound/hda/Makefile index 601e617918b8..78f487a635f8 100644 --- a/sound/hda/Makefile +++ b/sound/hda/Makefile @@ -17,3 +17,6 @@ obj-$(CONFIG_SND_HDA_EXT_CORE) += ext/ snd-intel-dspcfg-objs := intel-dsp-config.o snd-intel-dspcfg-$(CONFIG_SND_INTEL_NHLT) += intel-nhlt.o obj-$(CONFIG_SND_INTEL_DSP_CONFIG) += snd-intel-dspcfg.o + +snd-intel-sdw-acpi-objs := intel-sdw-acpi.o +obj-$(CONFIG_SND_INTEL_SOUNDWIRE_ACPI) += snd-intel-sdw-acpi.o diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index d1eb9d34993a..ab5ff7867eb9 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -557,4 +557,4 @@ EXPORT_SYMBOL_GPL(snd_intel_acpi_dsp_driver_probe); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Intel DSP config driver"); -MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT); +MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI); diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c new file mode 100644 index 000000000000..6359936a1503 --- /dev/null +++ b/sound/hda/intel-sdw-acpi.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +// Copyright(c) 2015-2021 Intel Corporation. + +/* + * SDW Intel ACPI scan helpers + */ + +#include +#include +#include +#include +#include + +#define SDW_LINK_TYPE 4 /* from Intel ACPI documentation */ +#define SDW_MAX_LINKS 4 + +static int ctrl_link_mask; +module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444); +MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)"); + +static bool is_link_enabled(struct fwnode_handle *fw_node, int i) +{ + struct fwnode_handle *link; + char name[32]; + u32 quirk_mask = 0; + + /* Find master handle */ + snprintf(name, sizeof(name), + "mipi-sdw-link-%d-subproperties", i); + + link = fwnode_get_named_child_node(fw_node, name); + if (!link) + return false; + + fwnode_property_read_u32(link, + "intel-quirk-mask", + &quirk_mask); + + if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE) + return false; + + return true; +} + +static int +sdw_intel_scan_controller(struct sdw_intel_acpi_info *info) +{ + struct acpi_device *adev; + int ret, i; + u8 count; + + if (acpi_bus_get_device(info->handle, &adev)) + return -EINVAL; + + /* Found controller, find links supported */ + count = 0; + ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev), + "mipi-sdw-master-count", &count, 1); + + /* + * In theory we could check the number of links supported in + * hardware, but in that step we cannot assume SoundWire IP is + * powered. + * + * In addition, if the BIOS doesn't even provide this + * 'master-count' property then all the inits based on link + * masks will fail as well. + * + * We will check the hardware capabilities in the startup() step + */ + + if (ret) { + dev_err(&adev->dev, + "Failed to read mipi-sdw-master-count: %d\n", ret); + return -EINVAL; + } + + /* Check count is within bounds */ + if (count > SDW_MAX_LINKS) { + dev_err(&adev->dev, "Link count %d exceeds max %d\n", + count, SDW_MAX_LINKS); + return -EINVAL; + } + + if (!count) { + dev_warn(&adev->dev, "No SoundWire links detected\n"); + return -EINVAL; + } + dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count); + + info->count = count; + info->link_mask = 0; + + for (i = 0; i < count; i++) { + if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) { + dev_dbg(&adev->dev, + "Link %d masked, will not be enabled\n", i); + continue; + } + + if (!is_link_enabled(acpi_fwnode_handle(adev), i)) { + dev_dbg(&adev->dev, + "Link %d not selected in firmware\n", i); + continue; + } + + info->link_mask |= BIT(i); + } + + return 0; +} + +static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, + void *cdata, void **return_value) +{ + struct sdw_intel_acpi_info *info = cdata; + struct acpi_device *adev; + acpi_status status; + u64 adr; + + status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr); + if (ACPI_FAILURE(status)) + return AE_OK; /* keep going */ + + if (acpi_bus_get_device(handle, &adev)) { + pr_err("%s: Couldn't find ACPI handle\n", __func__); + return AE_NOT_FOUND; + } + + info->handle = handle; + + /* + * On some Intel platforms, multiple children of the HDAS + * device can be found, but only one of them is the SoundWire + * controller. The SNDW device is always exposed with + * Name(_ADR, 0x40000000), with bits 31..28 representing the + * SoundWire link so filter accordingly + */ + if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE) + return AE_OK; /* keep going */ + + /* device found, stop namespace walk */ + return AE_CTRL_TERMINATE; +} + +/** + * sdw_intel_acpi_scan() - SoundWire Intel init routine + * @parent_handle: ACPI parent handle + * @info: description of what firmware/DSDT tables expose + * + * This scans the namespace and queries firmware to figure out which + * links to enable. A follow-up use of sdw_intel_probe() and + * sdw_intel_startup() is required for creation of devices and bus + * startup + */ +int sdw_intel_acpi_scan(acpi_handle *parent_handle, + struct sdw_intel_acpi_info *info) +{ + acpi_status status; + + info->handle = NULL; + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, + parent_handle, 1, + sdw_intel_acpi_cb, + NULL, info, NULL); + if (ACPI_FAILURE(status) || info->handle == NULL) + return -ENODEV; + + return sdw_intel_scan_controller(info); +} +EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SND_INTEL_SOUNDWIRE_ACPI); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Intel Soundwire ACPI helpers"); diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 21e24a3c64fb..da1c396f529d 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -286,6 +286,7 @@ config SND_SOC_SOF_INTEL_SOUNDWIRE depends on ACPI && SOUNDWIRE depends on !(SOUNDWIRE=m && SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=y) select SOUNDWIRE_INTEL + select SND_INTEL_SOUNDWIRE_ACPI help This adds support for SoundWire with Sound Open Firmware for Intel(R) platforms. diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 995a8c427177..1d29b1fd6a94 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1279,4 +1279,5 @@ MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV); MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC); MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915); MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA); +MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI); MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT); -- cgit v1.2.3-59-g8ed1b From ffd7e705fad695fc0abd5809ef8dc72cda7e49a6 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:31:25 -0600 Subject: ALSA: hda: intel-sdw-acpi: add missing include files We rely on implicit includes, list out explicitly what this code relies on. Suggested-by: Guennadi Liakhovetski Signed-off-by: Pierre-Louis Bossart Reviewed-by: Kai Vehmanen Reviewed-by: Guennadi Liakhovetski Reviewed-by: Bard Liao Acked-by: Mark Brown Acked-by: Vinod Koul Link: https://lore.kernel.org/r/20210302003125.1178419-8-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-sdw-acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index 6359936a1503..c0123bc31c0d 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -6,7 +6,12 @@ */ #include +#include +#include +#include +#include #include +#include #include #include #include -- cgit v1.2.3-59-g8ed1b From aedb9d9089ceb1c86be495bcc70e6021c01f92ff Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 18 Feb 2021 22:54:17 -0800 Subject: btrfs: ref-verify: use 'inline void' keyword ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build warnings of function signature when CONFIG_STACKTRACE is not enabled by reordering the 'inline' and 'void' keywords. ../fs/btrfs/ref-verify.c:221:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] static void inline __save_stack_trace(struct ref_action *ra) ../fs/btrfs/ref-verify.c:225:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] static void inline __print_stack_trace(struct btrfs_fs_info *fs_info, Signed-off-by: Randy Dunlap Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ref-verify.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 2b490becbe67..8e026de74c44 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -218,11 +218,11 @@ static void __print_stack_trace(struct btrfs_fs_info *fs_info, stack_trace_print(ra->trace, ra->trace_len, 2); } #else -static void inline __save_stack_trace(struct ref_action *ra) +static inline void __save_stack_trace(struct ref_action *ra) { } -static void inline __print_stack_trace(struct btrfs_fs_info *fs_info, +static inline void __print_stack_trace(struct btrfs_fs_info *fs_info, struct ref_action *ra) { btrfs_err(fs_info, " ref-verify: no stacktrace support"); -- cgit v1.2.3-59-g8ed1b From 4f6a49de64fd1b1dba5229c02047376da7cf24fd Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 23 Feb 2021 15:20:42 +0200 Subject: btrfs: unlock extents in btrfs_zero_range in case of quota reservation errors If btrfs_qgroup_reserve_data returns an error (i.e quota limit reached) the handling logic directly goes to the 'out' label without first unlocking the extent range between lockstart, lockend. This results in deadlocks as other processes try to lock the same extent. Fixes: a7f8b1c2ac21 ("btrfs: file: reserve qgroup space after the hole punch range is locked") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 01a72f53fb5d..2c282664c4b8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3260,8 +3260,11 @@ reserve_space: goto out; ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, alloc_start, bytes_to_reserve); - if (ret) + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, + lockend, &cached_state); goto out; + } ret = btrfs_prealloc_file_range(inode, mode, alloc_start, alloc_end - alloc_start, i_blocksize(inode), -- cgit v1.2.3-59-g8ed1b From 5011c5a663b9c6d6aff3d394f11049b371199627 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Feb 2021 09:04:34 +0300 Subject: btrfs: validate qgroup inherit for SNAP_CREATE_V2 ioctl The problem is we're copying "inherit" from user space but we don't necessarily know that we're copying enough data for a 64 byte struct. Then the next problem is that 'inherit' has a variable size array at the end, and we have to verify that array is the size we expected. Fixes: 6f72c7e20dba ("Btrfs: add qgroup inheritance") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Dan Carpenter Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a8c60d46d19c..1b837c08ca90 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1935,7 +1935,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { - if (vol_args->size > PAGE_SIZE) { + u64 nums; + + if (vol_args->size < sizeof(*inherit) || + vol_args->size > PAGE_SIZE) { ret = -EINVAL; goto free_args; } @@ -1944,6 +1947,20 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = PTR_ERR(inherit); goto free_args; } + + if (inherit->num_qgroups > PAGE_SIZE || + inherit->num_ref_copies > PAGE_SIZE || + inherit->num_excl_copies > PAGE_SIZE) { + ret = -EINVAL; + goto free_inherit; + } + + nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + + 2 * inherit->num_excl_copies; + if (vol_args->size != struct_size(inherit, qgroups, nums)) { + ret = -EINVAL; + goto free_inherit; + } } ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, -- cgit v1.2.3-59-g8ed1b From c55a4319c4f2c3ba0a385b1ebc454fa283cfe920 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Tue, 23 Feb 2021 10:22:32 -0800 Subject: btrfs: fix spurious free_space_tree remount warning The intended logic of the check is to catch cases where the desired free_space_tree setting doesn't match the mounted setting, and the remount is anything but ro->rw. However, it makes the mistake of checking equality on a masked integer (btrfs_test_opt) against a boolean (btrfs_fs_compat_ro). If you run the reproducer: $ mount -o space_cache=v2 dev mnt $ mount -o remount,ro mnt you would expect no warning, because the remount is not attempting to change the free space tree setting, but we do see the warning. To fix this, add explicit bool type casts to the condition. I tested a variety of transitions: sudo mount -o space_cache=v2 /dev/vg0/lv0 mnt/lol (fst enabled) mount -o remount,ro mnt/lol (no warning, no fst change) sudo mount -o remount,rw,space_cache=v1,clear_cache (no warning, ro->rw) sudo mount -o remount,rw,space_cache=v2 mnt (warning, rw->rw with change) sudo mount -o remount,ro mnt (no warning, no fst change) sudo mount -o remount,rw,space_cache=v2 mnt (no warning, no fst change) Reported-by: Chris Murphy CC: stable@vger.kernel.org # 5.11 Signed-off-by: Boris Burkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f8435641b912..f7a4ad86adee 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1918,8 +1918,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); - if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) != - btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) != + (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && (!sb_rdonly(sb) || (*flags & SB_RDONLY))) { btrfs_warn(fs_info, "remount supports changing free space tree only from ro to rw"); -- cgit v1.2.3-59-g8ed1b From 0f9c03d824f6f522d3bc43629635c9765546ebc5 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 22 Feb 2021 18:40:42 +0200 Subject: btrfs: free correct amount of space in btrfs_delayed_inode_reserve_metadata Following commit f218ea6c4792 ("btrfs: delayed-inode: Remove wrong qgroup meta reservation calls") this function now reserves num_bytes, rather than the fixed amount of nodesize. As such this requires the same amount to be freed in case of failure. Fix this by adjusting the amount we are freeing. Fixes: f218ea6c4792 ("btrfs: delayed-inode: Remove wrong qgroup meta reservation calls") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index ec0b50b8c5d6..ac9966e76a2f 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -649,7 +649,7 @@ static int btrfs_delayed_inode_reserve_metadata( btrfs_ino(inode), num_bytes, 1); } else { - btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); + btrfs_qgroup_free_meta_prealloc(root, num_bytes); } return ret; } -- cgit v1.2.3-59-g8ed1b From 80e9baed722c853056e0c5374f51524593cb1031 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 22 Feb 2021 18:40:43 +0200 Subject: btrfs: export and rename qgroup_reserve_meta Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 8 ++++---- fs/btrfs/qgroup.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 808370ada888..14ff388fd3bd 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3841,8 +3841,8 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes, return num_bytes; } -static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, - enum btrfs_qgroup_rsv_type type, bool enforce) +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type, bool enforce) { struct btrfs_fs_info *fs_info = root->fs_info; int ret; @@ -3873,14 +3873,14 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, { int ret; - ret = qgroup_reserve_meta(root, num_bytes, type, enforce); + ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); if (ret <= 0 && ret != -EDQUOT) return ret; ret = try_flush_qgroup(root); if (ret < 0) return ret; - return qgroup_reserve_meta(root, num_bytes, type, enforce); + return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); } void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 50dea9a2d8fb..7283e4f549af 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -361,6 +361,8 @@ int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len); int btrfs_qgroup_free_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len); +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type, bool enforce); int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce); /* Reserve metadata space for pertrans and prealloc type */ -- cgit v1.2.3-59-g8ed1b From 4d14c5cde5c268a2bc26addecf09489cb953ef64 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 22 Feb 2021 18:40:44 +0200 Subject: btrfs: don't flush from btrfs_delayed_inode_reserve_metadata Calling btrfs_qgroup_reserve_meta_prealloc from btrfs_delayed_inode_reserve_metadata can result in flushing delalloc while holding a transaction and delayed node locks. This is deadlock prone. In the past multiple commits: * ae5e070eaca9 ("btrfs: qgroup: don't try to wait flushing if we're already holding a transaction") * 6f23277a49e6 ("btrfs: qgroup: don't commit transaction when we already hold the handle") Tried to solve various aspects of this but this was always a whack-a-mole game. Unfortunately those 2 fixes don't solve a deadlock scenario involving btrfs_delayed_node::mutex. Namely, one thread can call btrfs_dirty_inode as a result of reading a file and modifying its atime: PID: 6963 TASK: ffff8c7f3f94c000 CPU: 2 COMMAND: "test" #0 __schedule at ffffffffa529e07d #1 schedule at ffffffffa529e4ff #2 schedule_timeout at ffffffffa52a1bdd #3 wait_for_completion at ffffffffa529eeea <-- sleeps with delayed node mutex held #4 start_delalloc_inodes at ffffffffc0380db5 #5 btrfs_start_delalloc_snapshot at ffffffffc0393836 #6 try_flush_qgroup at ffffffffc03f04b2 #7 __btrfs_qgroup_reserve_meta at ffffffffc03f5bb6 <-- tries to reserve space and starts delalloc inodes. #8 btrfs_delayed_update_inode at ffffffffc03e31aa <-- acquires delayed node mutex #9 btrfs_update_inode at ffffffffc0385ba8 #10 btrfs_dirty_inode at ffffffffc038627b <-- TRANSACTIION OPENED #11 touch_atime at ffffffffa4cf0000 #12 generic_file_read_iter at ffffffffa4c1f123 #13 new_sync_read at ffffffffa4ccdc8a #14 vfs_read at ffffffffa4cd0849 #15 ksys_read at ffffffffa4cd0bd1 #16 do_syscall_64 at ffffffffa4a052eb #17 entry_SYSCALL_64_after_hwframe at ffffffffa540008c This will cause an asynchronous work to flush the delalloc inodes to happen which can try to acquire the same delayed_node mutex: PID: 455 TASK: ffff8c8085fa4000 CPU: 5 COMMAND: "kworker/u16:30" #0 __schedule at ffffffffa529e07d #1 schedule at ffffffffa529e4ff #2 schedule_preempt_disabled at ffffffffa529e80a #3 __mutex_lock at ffffffffa529fdcb <-- goes to sleep, never wakes up. #4 btrfs_delayed_update_inode at ffffffffc03e3143 <-- tries to acquire the mutex #5 btrfs_update_inode at ffffffffc0385ba8 <-- this is the same inode that pid 6963 is holding #6 cow_file_range_inline.constprop.78 at ffffffffc0386be7 #7 cow_file_range at ffffffffc03879c1 #8 btrfs_run_delalloc_range at ffffffffc038894c #9 writepage_delalloc at ffffffffc03a3c8f #10 __extent_writepage at ffffffffc03a4c01 #11 extent_write_cache_pages at ffffffffc03a500b #12 extent_writepages at ffffffffc03a6de2 #13 do_writepages at ffffffffa4c277eb #14 __filemap_fdatawrite_range at ffffffffa4c1e5bb #15 btrfs_run_delalloc_work at ffffffffc0380987 <-- starts running delayed nodes #16 normal_work_helper at ffffffffc03b706c #17 process_one_work at ffffffffa4aba4e4 #18 worker_thread at ffffffffa4aba6fd #19 kthread at ffffffffa4ac0a3d #20 ret_from_fork at ffffffffa54001ff To fully address those cases the complete fix is to never issue any flushing while holding the transaction or the delayed node lock. This patch achieves it by calling qgroup_reserve_meta directly which will either succeed without flushing or will fail and return -EDQUOT. In the latter case that return value is going to be propagated to btrfs_dirty_inode which will fallback to start a new transaction. That's fine as the majority of time we expect the inode will have BTRFS_DELAYED_NODE_INODE_DIRTY flag set which will result in directly copying the in-memory state. Fixes: c53e9653605d ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 3 ++- fs/btrfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index ac9966e76a2f..bf25401c9768 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -627,7 +627,8 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + ret = btrfs_qgroup_reserve_meta(root, num_bytes, + BTRFS_QGROUP_RSV_META_PREALLOC, true); if (ret < 0) return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4f2f1e932751..c35b724a5611 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6081,7 +6081,7 @@ static int btrfs_dirty_inode(struct inode *inode) return PTR_ERR(trans); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); - if (ret && ret == -ENOSPC) { + if (ret && (ret == -ENOSPC || ret == -EDQUOT)) { /* whoops, lets try again with the full transaction */ btrfs_end_transaction(trans); trans = btrfs_start_transaction(root, 1); -- cgit v1.2.3-59-g8ed1b From fd57a98d6f0c98fa295813087f13afb26c224e73 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 26 Feb 2021 17:51:44 +0000 Subject: btrfs: fix warning when creating a directory with smack enabled When we have smack enabled, during the creation of a directory smack may attempt to add a "smack transmute" xattr on the inode, which results in the following warning and trace: WARNING: CPU: 3 PID: 2548 at fs/btrfs/transaction.c:537 start_transaction+0x489/0x4f0 Modules linked in: nft_objref nf_conntrack_netbios_ns (...) CPU: 3 PID: 2548 Comm: mkdir Not tainted 5.9.0-rc2smack+ #81 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 RIP: 0010:start_transaction+0x489/0x4f0 Code: e9 be fc ff ff (...) RSP: 0018:ffffc90001887d10 EFLAGS: 00010202 RAX: ffff88816f1e0000 RBX: 0000000000000201 RCX: 0000000000000003 RDX: 0000000000000201 RSI: 0000000000000002 RDI: ffff888177849000 RBP: ffff888177849000 R08: 0000000000000001 R09: 0000000000000004 R10: ffffffff825e8f7a R11: 0000000000000003 R12: ffffffffffffffe2 R13: 0000000000000000 R14: ffff88803d884270 R15: ffff8881680d8000 FS: 00007f67317b8440(0000) GS:ffff88817bcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f67247a22a8 CR3: 000000004bfbc002 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? slab_free_freelist_hook+0xea/0x1b0 ? trace_hardirqs_on+0x1c/0xe0 btrfs_setxattr_trans+0x3c/0xf0 __vfs_setxattr+0x63/0x80 smack_d_instantiate+0x2d3/0x360 security_d_instantiate+0x29/0x40 d_instantiate_new+0x38/0x90 btrfs_mkdir+0x1cf/0x1e0 vfs_mkdir+0x14f/0x200 do_mkdirat+0x6d/0x110 do_syscall_64+0x2d/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f673196ae6b Code: 8b 05 11 (...) RSP: 002b:00007ffc3c679b18 EFLAGS: 00000246 ORIG_RAX: 0000000000000053 RAX: ffffffffffffffda RBX: 00000000000001ff RCX: 00007f673196ae6b RDX: 0000000000000000 RSI: 00000000000001ff RDI: 00007ffc3c67a30d RBP: 00007ffc3c67a30d R08: 00000000000001ff R09: 0000000000000000 R10: 000055d3e39fe930 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc3c679cd8 R14: 00007ffc3c67a30d R15: 00007ffc3c679ce0 irq event stamp: 11029 hardirqs last enabled at (11037): [] console_unlock+0x486/0x670 hardirqs last disabled at (11044): [] console_unlock+0xa1/0x670 softirqs last enabled at (8864): [] asm_call_on_stack+0xf/0x20 softirqs last disabled at (8851): [] asm_call_on_stack+0xf/0x20 This happens because at btrfs_mkdir() we call d_instantiate_new() while holding a transaction handle, which results in the following call chain: btrfs_mkdir() trans = btrfs_start_transaction(root, 5); d_instantiate_new() smack_d_instantiate() __vfs_setxattr() btrfs_setxattr_trans() btrfs_start_transaction() start_transaction() WARN_ON() --> a tansaction start has TRANS_EXTWRITERS set in its type h->orig_rsv = h->block_rsv h->block_rsv = NULL btrfs_end_transaction(trans) Besides the warning triggered at start_transaction, we set the handle's block_rsv to NULL which may cause some surprises later on. So fix this by making btrfs_setxattr_trans() not start a transaction when we already have a handle on one, stored in current->journal_info, and use that handle. We are good to use the handle because at btrfs_mkdir() we did reserve space for the xattr and the inode item. Reported-by: Casey Schaufler CC: stable@vger.kernel.org # 5.4+ Acked-by: Casey Schaufler Tested-by: Casey Schaufler Link: https://lore.kernel.org/linux-btrfs/434d856f-bd7b-4889-a6ec-e81aaebfa735@schaufler-ca.com/ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/xattr.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index af6246f36a9e..03135dbb318a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; + const bool start_trans = (current->journal_info == NULL); int ret; - trans = btrfs_start_transaction(root, 2); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (start_trans) { + /* + * 1 unit for inserting/updating/deleting the xattr + * 1 unit for the inode item update + */ + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } else { + /* + * This can happen when smack is enabled and a directory is being + * created. It happens through d_instantiate_new(), which calls + * smack_d_instantiate(), which in turn calls __vfs_setxattr() to + * set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the + * inode. We have already reserved space for the xattr and inode + * update at btrfs_mkdir(), so just use the transaction handle. + * We don't join or start a transaction, as that will reset the + * block_rsv of the handle and trigger a warning for the start + * case. + */ + ASSERT(strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN) == 0); + trans = current->journal_info; + } ret = btrfs_setxattr(trans, inode, name, value, size, flags); if (ret) @@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); BUG_ON(ret); out: - btrfs_end_transaction(trans); + if (start_trans) + btrfs_end_transaction(trans); return ret; } -- cgit v1.2.3-59-g8ed1b From c28ea613fafad910d08f67efe76ae552b1434e44 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 1 Mar 2021 16:44:22 +0800 Subject: btrfs: subpage: fix the false data csum mismatch error [BUG] When running fstresss, we can hit strange data csum mismatch where the on-disk data is in fact correct (passes scrub). With some extra debug info added, we have the following traces: 0482us: btrfs_do_readpage: root=5 ino=284 offset=393216, submit force=0 pgoff=0 iosize=8192 0494us: btrfs_do_readpage: root=5 ino=284 offset=401408, submit force=0 pgoff=8192 iosize=4096 0498us: btrfs_submit_data_bio: root=5 ino=284 bio first bvec=393216 len=8192 0591us: btrfs_do_readpage: root=5 ino=284 offset=405504, submit force=0 pgoff=12288 iosize=36864 0594us: btrfs_submit_data_bio: root=5 ino=284 bio first bvec=401408 len=4096 0863us: btrfs_submit_data_bio: root=5 ino=284 bio first bvec=405504 len=36864 0933us: btrfs_verify_data_csum: root=5 ino=284 offset=393216 len=8192 0967us: btrfs_do_readpage: root=5 ino=284 offset=442368, skip beyond isize pgoff=49152 iosize=16384 1047us: btrfs_verify_data_csum: root=5 ino=284 offset=401408 len=4096 1163us: btrfs_verify_data_csum: root=5 ino=284 offset=405504 len=36864 1290us: check_data_csum: !!! root=5 ino=284 offset=438272 pg_off=45056 !!! 7387us: end_bio_extent_readpage: root=5 ino=284 before pending_read_bios=0 [CAUSE] Normally we expect all submitted bio reads to only touch the range we specified, and under subpage context, it means we should only touch the range specified in each bvec. But in data read path, inside end_bio_extent_readpage(), we have page zeroing which only takes regular page size into consideration. This means for subpage if we have an inode whose content looks like below: 0 16K 32K 48K 64K |///////| |///////| | |//| = data needs to be read from disk | | = hole And i_size is 64K initially. Then the following race can happen: T1 | T2 --------------------------------+-------------------------------- btrfs_do_readpage() | |- isize = 64K; | | At this time, the isize is | | 64K | | | |- submit_extent_page() | | submit previous assembled bio| | assemble bio for [0, 16K) | | | |- submit_extent_page() | submit read bio for [0, 16K) | assemble read bio for | [32K, 48K) | | | btrfs_setsize() | |- i_size_write(, 16K); | Now i_size is only 16K end_io() for [0K, 16K) | |- end_bio_extent_readpage() | |- btrfs_verify_data_csum() | | No csum error | |- i_size = 16K; | |- zero_user_segment(16K, | PAGE_SIZE); | !!! We zeroed range | !!! [32K, 48K) | | end_io for [32K, 48K) | |- end_bio_extent_readpage() | |- btrfs_verify_data_csum() | ! CSUM MISMATCH ! | ! As the range is zeroed now ! [FIX] To fix the problem, make end_bio_extent_readpage() to only zero the range of bvec. The bug only affects subpage read-write support, as for full read-only mount we can't change i_size thus won't hit the race condition. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4dfb3ead1175..4671c99d468d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3008,12 +3008,23 @@ readpage_ok: if (likely(uptodate)) { loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_SHIFT; - unsigned off; - /* Zero out the end if this page straddles i_size */ - off = offset_in_page(i_size); - if (page->index == end_index && off) - zero_user_segment(page, off, PAGE_SIZE); + /* + * Zero out the remaining part if this range straddles + * i_size. + * + * Here we should only zero the range inside the bvec, + * not touch anything else. + * + * NOTE: i_size is exclusive while end is inclusive. + */ + if (page->index == end_index && i_size <= end) { + u32 zero_start = max(offset_in_page(i_size), + offset_in_page(end)); + + zero_user_segment(page, zero_start, + offset_in_page(end) + 1); + } } ASSERT(bio_offset + len > bio_offset); bio_offset += len; -- cgit v1.2.3-59-g8ed1b From 48698c973e6b4dde94d87cd1ded56d9436e9c97d Mon Sep 17 00:00:00 2001 From: Eckhart Mohr Date: Tue, 2 Mar 2021 17:25:22 +0100 Subject: ALSA: hda/realtek: Add quirk for Clevo NH55RZQ This applies a SND_PCI_QUIRK(...) to the Clevo NH55RZQ barebone. This fixes the issue of the device not recognizing a pluged in microphone. The device has both, a microphone only jack, and a speaker + microphone combo jack. The combo jack already works. The microphone-only jack does not recognize when a device is pluged in without this patch. Signed-off-by: Eckhart Mohr Co-developed-by: Werner Sembach Signed-off-by: Werner Sembach Cc: Link: https://lore.kernel.org/r/0eee6545-5169-ef08-6cfa-5def8cd48c86@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4871507cd4bf..2b7ddb3baf93 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8141,6 +8141,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[5|7][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), -- cgit v1.2.3-59-g8ed1b From 13046370c4d143b629adc1a51659a8a6497fbbe6 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 1 Mar 2021 19:12:02 +0800 Subject: ALSA: hda/hdmi: let new platforms assign the pcm slot dynamically If the platform set the dyn_pcm_assign to true, it will call hdmi_find_pcm_slot() to find a pcm slot when hdmi/dp monitor is connected and need to create a pcm. So far only intel_hsw_common_init() and patch_nvhdmi() set the dyn_pcm_assign to true, here we let tgl platforms assign the pcm slot dynamically first, if the driver runs for a period of time and there is no regression reported, we could set no_fixed_assgin to true in the intel_hsw_common_init(), and then set it to true in the patch_nvhdmi(). This change comes from the discussion between Takashi and Kai Vehmanen. Please refer to: https://github.com/alsa-project/alsa-lib/pull/118 Suggested-and-reviewed-by: Takashi Iwai Suggested-and-reviewed-by: Kai Vehmanen Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20210301111202.2684-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index e405be7929e3..e6d0843ee9df 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -157,6 +157,7 @@ struct hdmi_spec { bool dyn_pin_out; bool dyn_pcm_assign; + bool dyn_pcm_no_legacy; bool intel_hsw_fixup; /* apply Intel platform-specific fixups */ /* * Non-generic VIA/NVIDIA specific @@ -1345,6 +1346,12 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec, { int i; + /* on the new machines, try to assign the pcm slot dynamically, + * not use the preferred fixed map (legacy way) anymore. + */ + if (spec->dyn_pcm_no_legacy) + goto last_try; + /* * generic_hdmi_build_pcms() may allocate extra PCMs on some * platforms (with maximum of 'num_nids + dev_num - 1') @@ -1374,6 +1381,7 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec, return i; } + last_try: /* the last try; check the empty slots in pins */ for (i = 0; i < spec->num_nids; i++) { if (!test_bit(i, &spec->pcm_bitmap)) @@ -2987,8 +2995,16 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec) * the index indicate the port number. */ static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}; + int ret; - return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map)); + ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map)); + if (!ret) { + struct hdmi_spec *spec = codec->spec; + + spec->dyn_pcm_no_legacy = true; + } + + return ret; } /* Intel Baytrail and Braswell; with eld notifier */ -- cgit v1.2.3-59-g8ed1b From e4ef09e512940846fad77b1934065c166870b85a Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Tue, 2 Mar 2021 09:53:39 +0800 Subject: rsxx: remove unused including Remove including that don't need it. Signed-off-by: Tian Tao Signed-off-by: Jens Axboe --- drivers/block/rsxx/rsxx_priv.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h index 4861669e5786..6147977994ff 100644 --- a/drivers/block/rsxx/rsxx_priv.h +++ b/drivers/block/rsxx/rsxx_priv.h @@ -11,7 +11,6 @@ #ifndef __RSXX_PRIV_H__ #define __RSXX_PRIV_H__ -#include #include #include -- cgit v1.2.3-59-g8ed1b From 4168a8d27ed3a00f160e7f885c956f060d2a0741 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 23 Feb 2021 09:55:28 +0800 Subject: block/bfq: update comments and default value in docs for fifo_expire Correct the comments since bfq_fifo_expire[0] is for async request, while bfq_fifo_expire[1] is for sync request. Also update docs, according the source code, the default fifo_expire_async is 250ms, and fifo_expire_sync is 125ms. Signed-off-by: Joseph Qi Acked-by: Paolo Valente Signed-off-by: Jens Axboe --- Documentation/block/bfq-iosched.rst | 4 ++-- block/bfq-iosched.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst index 19d4d1570cee..66c5a4e54130 100644 --- a/Documentation/block/bfq-iosched.rst +++ b/Documentation/block/bfq-iosched.rst @@ -430,13 +430,13 @@ fifo_expire_async ----------------- This parameter is used to set the timeout of asynchronous requests. Default -value of this is 248ms. +value of this is 250ms. fifo_expire_sync ---------------- This parameter is used to set the timeout of synchronous requests. Default -value of this is 124ms. In case to favor synchronous requests over asynchronous +value of this is 125ms. In case to favor synchronous requests over asynchronous one, this value should be decreased relative to fifo_expire_async. low_latency diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index ec482e6641ff..95586137194e 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -162,7 +162,7 @@ BFQ_BFQQ_FNS(split_coop); BFQ_BFQQ_FNS(softrt_update); #undef BFQ_BFQQ_FNS \ -/* Expiration time of sync (0) and async (1) requests, in ns. */ +/* Expiration time of async (0) and sync (1) requests, in ns. */ static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 }; /* Maximum backwards seek (magic number lifted from CFQ), in KiB. */ -- cgit v1.2.3-59-g8ed1b From b59b153d1026b73deb032d01bb9319ebba896006 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 26 Feb 2021 04:54:45 -0500 Subject: KVM: x86: allow compiling out the Xen hypercall interface The Xen hypercall interface adds to the attack surface of the hypervisor and will be used quite rarely. Allow compiling it out. Suggested-by: Christoph Hellwig Reviewed-by: David Woodhouse Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 9 +++++++++ arch/x86/kvm/Makefile | 3 ++- arch/x86/kvm/x86.c | 8 ++++++++ arch/x86/kvm/xen.h | 24 +++++++++++++++++++++++- 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 7ac592664c52..a788d5120d4d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -103,6 +103,15 @@ config KVM_AMD_SEV Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. +config KVM_XEN + bool "Support for Xen hypercall interface" + depends on KVM + help + Provides KVM support for the hosting Xen HVM guests and + passing Xen hypercalls to userspace. + + If in doubt, say "N". + config KVM_MMU_AUDIT bool "Audit KVM MMU" depends on KVM && TRACEPOINTS diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index aeab168c5711..1b4766fe1de2 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -14,11 +14,12 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/dirty_ring.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o -kvm-y += x86.o emulate.o i8259.o irq.o lapic.o xen.o \ +kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \ mmu/spte.o kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o +kvm-$(CONFIG_KVM_XEN) += xen.o kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ vmx/evmcs.o vmx/nested.o vmx/posted_intr.o diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bfc928495bd4..4a5ce57b0bb2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3755,11 +3755,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: r = 1; break; +#ifdef CONFIG_KVM_XEN case KVM_CAP_XEN_HVM: r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO; break; +#endif case KVM_CAP_SYNC_REGS: r = KVM_SYNC_X86_VALID_FIELDS; break; @@ -5012,6 +5014,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_GET_SUPPORTED_HV_CPUID: r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp); break; +#ifdef CONFIG_KVM_XEN case KVM_XEN_VCPU_GET_ATTR: { struct kvm_xen_vcpu_attr xva; @@ -5032,6 +5035,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_xen_vcpu_set_attr(vcpu, &xva); break; } +#endif default: r = -EINVAL; } @@ -5653,6 +5657,7 @@ set_pit2_out: kvm->arch.bsp_vcpu_id = arg; mutex_unlock(&kvm->lock); break; +#ifdef CONFIG_KVM_XEN case KVM_XEN_HVM_CONFIG: { struct kvm_xen_hvm_config xhc; r = -EFAULT; @@ -5681,6 +5686,7 @@ set_pit2_out: r = kvm_xen_hvm_set_attr(kvm, &xha); break; } +#endif case KVM_SET_CLOCK: { struct kvm_clock_data user_ns; u64 now_ns; @@ -8039,8 +8045,10 @@ void kvm_arch_exit(void) kvm_mmu_module_exit(); free_percpu(user_return_msrs); kmem_cache_destroy(x86_fpu_cache); +#ifdef CONFIG_KVM_XEN static_key_deferred_flush(&kvm_xen_enabled); WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key)); +#endif } static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index b66a921776f4..87eaf2be9549 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -9,6 +9,7 @@ #ifndef __ARCH_X86_KVM_XEN_H__ #define __ARCH_X86_KVM_XEN_H__ +#ifdef CONFIG_KVM_XEN #include extern struct static_key_false_deferred kvm_xen_enabled; @@ -18,7 +19,6 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); -int kvm_xen_hypercall(struct kvm_vcpu *vcpu); int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data); int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc); void kvm_xen_destroy_vm(struct kvm *kvm); @@ -38,6 +38,28 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu) return 0; } +#else +static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) +{ + return 1; +} + +static inline void kvm_xen_destroy_vm(struct kvm *kvm) +{ +} + +static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) +{ + return false; +} + +static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu) +{ + return 0; +} +#endif + +int kvm_xen_hypercall(struct kvm_vcpu *vcpu); /* 32-bit compatibility definitions, also used natively in 32-bit build */ #include -- cgit v1.2.3-59-g8ed1b From 7d2cdad0da9dc0b1eb74c498c155be8c35a49ee6 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Fri, 26 Feb 2021 22:48:32 +1300 Subject: KVM: Documentation: Fix index for KVM_CAP_PPC_DAWR1 It should be 7.23 instead of 7.22, which has already been taken by KVM_CAP_X86_BUS_LOCK_EXIT. Signed-off-by: Kai Huang Message-Id: <20210226094832.380394-1-kai.huang@intel.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 80237dee7a96..359435d4e417 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6181,7 +6181,7 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons. KVM_RUN_BUS_LOCK flag is used to distinguish between them. -7.22 KVM_CAP_PPC_DAWR1 +7.23 KVM_CAP_PPC_DAWR1 ---------------------- :Architectures: ppc -- cgit v1.2.3-59-g8ed1b From 6528fc0a11de3d16339cf17639e2f69a68fcaf4d Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Wed, 10 Feb 2021 08:50:36 -0800 Subject: selftests: kvm: Mmap the entire vcpu mmap area The vcpu mmap area may consist of more than just the kvm_run struct. Allocate enough space for the entire vcpu mmap area. Without this, on x86, the PIO page, for example, will be missing. This is problematic when dealing with an unhandled exception from the guest as the exception vector will be incorrectly reported as 0x0. Message-Id: <20210210165035.3712489-1-aaronlewis@google.com> Reviewed-by: Andrew Jones Co-developed-by: Steve Rutherford Signed-off-by: Aaron Lewis Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index d787cb802b4a..e5fbf16f725b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -21,6 +21,8 @@ #define KVM_UTIL_PGS_PER_HUGEPG 512 #define KVM_UTIL_MIN_PFN 2 +static int vcpu_mmap_sz(void); + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static void *align(void *x, size_t size) { @@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->state, sizeof(*vcpu->state)); + ret = munmap(vcpu->state, vcpu_mmap_sz()); TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " "errno: %i", ret, errno); close(vcpu->fd); @@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->state)); - vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), + vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " "vcpu id: %u errno: %i", vcpuid, errno); -- cgit v1.2.3-59-g8ed1b From 7d7c5f76e54131ed05b057103b5278b6b852148b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 1 Mar 2021 12:53:08 +0000 Subject: KVM: x86/xen: Fix return code when clearing vcpu_info and vcpu_time_info When clearing the per-vCPU shared regions, set the return value to zero to indicate success. This was causing spurious errors to be returned to userspace on soft reset. Also add a paranoid BUILD_BUG_ON() for compat structure compatibility. Fixes: 0c165b3c01fe ("KVM: x86/xen: Allow reset of Xen attributes") Signed-off-by: David Woodhouse Message-Id: <20210301125309.874953-1-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index af8f6562fce4..77b20ff09078 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -187,9 +187,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) /* No compat necessary here. */ BUILD_BUG_ON(sizeof(struct vcpu_info) != sizeof(struct compat_vcpu_info)); + BUILD_BUG_ON(offsetof(struct vcpu_info, time) != + offsetof(struct compat_vcpu_info, time)); if (data->u.gpa == GPA_INVALID) { vcpu->arch.xen.vcpu_info_set = false; + r = 0; break; } @@ -206,6 +209,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (data->u.gpa == GPA_INVALID) { vcpu->arch.xen.vcpu_time_info_set = false; + r = 0; break; } -- cgit v1.2.3-59-g8ed1b From 30b5c851af7991ad08abe90c1e7c31615fa98a1a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 1 Mar 2021 12:53:09 +0000 Subject: KVM: x86/xen: Add support for vCPU runstate information This is how Xen guests do steal time accounting. The hypervisor records the amount of time spent in each of running/runnable/blocked/offline states. In the Xen accounting, a vCPU is still in state RUNSTATE_running while in Xen for a hypercall or I/O trap, etc. Only if Xen explicitly schedules does the state become RUNSTATE_blocked. In KVM this means that even when the vCPU exits the kvm_run loop, the state remains RUNSTATE_running. The VMM can explicitly set the vCPU to RUNSTATE_blocked by using the KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT attribute, and can also use KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST to retrospectively add a given amount of time to the blocked state and subtract it from the running state. The state_entry_time corresponds to get_kvmclock_ns() at the time the vCPU entered the current state, and the total times of all four states should always add up to state_entry_time. Co-developed-by: Joao Martins Signed-off-by: Joao Martins Signed-off-by: David Woodhouse Message-Id: <20210301125309.874953-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 41 +++ arch/x86/include/asm/kvm_host.h | 6 + arch/x86/kvm/x86.c | 13 +- arch/x86/kvm/xen.c | 286 +++++++++++++++++++++ arch/x86/kvm/xen.h | 40 ++- include/uapi/linux/kvm.h | 13 + .../testing/selftests/kvm/x86_64/xen_shinfo_test.c | 159 +++++++++++- 7 files changed, 553 insertions(+), 5 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 359435d4e417..1a2b5210cdbf 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4878,6 +4878,14 @@ see KVM_XEN_HVM_SET_ATTR above. union { __u64 gpa; __u64 pad[4]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; } u; }; @@ -4890,6 +4898,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO Sets the guest physical address of an additional pvclock structure for a given vCPU. This is typically used for guest vsyscall support. +KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR + Sets the guest physical address of the vcpu_runstate_info for a given + vCPU. This is how a Xen guest tracks CPU state such as steal time. + +KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT + Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of + the given vCPU from the .u.runstate.state member of the structure. + KVM automatically accounts running and runnable time but blocked + and offline states are only entered explicitly. + +KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA + Sets all fields of the vCPU runstate data from the .u.runstate member + of the structure, including the current runstate. The state_entry_time + must equal the sum of the other four times. + +KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST + This *adds* the contents of the .u.runstate members of the structure + to the corresponding members of the given vCPU's runstate data, thus + permitting atomic adjustments to the runstate times. The adjustment + to the state_entry_time must equal the sum of the adjustments to the + other four times. The state field must be set to -1, or to a valid + runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked + or RUNSTATE_offline) to set the current accounted state as of the + adjusted state_entry_time. + 4.130 KVM_XEN_VCPU_GET_ATTR --------------------------- @@ -4902,6 +4935,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO Allows Xen vCPU attributes to be read. For the structure and types, see KVM_XEN_VCPU_SET_ATTR above. +The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used +with the KVM_XEN_VCPU_GET_ATTR ioctl. + 5. The kvm_run structure ======================== @@ -6666,6 +6702,7 @@ PVHVM guests. Valid flags are:: #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2) The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG ioctl is available, for the guest to set its hypercall page. @@ -6680,3 +6717,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors for event channel upcalls when the evtchn_upcall_pending field of a vcpu's vcpu_info is set. + +The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related +features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are +supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 85ccbd4b7c52..877a4025d8da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -535,10 +535,16 @@ struct kvm_vcpu_hv { /* Xen HVM per vcpu emulation context */ struct kvm_vcpu_xen { u64 hypercall_rip; + u32 current_runstate; bool vcpu_info_set; bool vcpu_time_info_set; + bool runstate_set; struct gfn_to_hva_cache vcpu_info_cache; struct gfn_to_hva_cache vcpu_time_info_cache; + struct gfn_to_hva_cache runstate_cache; + u64 last_steal; + u64 runstate_entry_time; + u64 runstate_times[4]; }; struct kvm_vcpu_arch { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a5ce57b0bb2..868213ca4f98 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2956,6 +2956,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu) struct kvm_host_map map; struct kvm_steal_time *st; + if (kvm_xen_msr_enabled(vcpu->kvm)) { + kvm_xen_runstate_set_running(vcpu); + return; + } + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -3760,6 +3765,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO; + if (sched_info_on()) + r |= KVM_XEN_HVM_CONFIG_RUNSTATE; break; #endif case KVM_CAP_SYNC_REGS: @@ -4039,7 +4046,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) if (vcpu->preempted && !vcpu->arch.guest_state_protected) vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); - kvm_steal_time_set_preempted(vcpu); + if (kvm_xen_msr_enabled(vcpu->kvm)) + kvm_xen_runstate_set_preempted(vcpu); + else + kvm_steal_time_set_preempted(vcpu); + static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); /* diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 77b20ff09078..ae17250e1efe 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -11,9 +11,11 @@ #include "hyperv.h" #include +#include #include #include +#include #include "trace.h" @@ -61,6 +63,132 @@ out: return ret; } +static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) +{ + struct kvm_vcpu_xen *vx = &v->arch.xen; + u64 now = get_kvmclock_ns(v->kvm); + u64 delta_ns = now - vx->runstate_entry_time; + u64 run_delay = current->sched_info.run_delay; + + if (unlikely(!vx->runstate_entry_time)) + vx->current_runstate = RUNSTATE_offline; + + /* + * Time waiting for the scheduler isn't "stolen" if the + * vCPU wasn't running anyway. + */ + if (vx->current_runstate == RUNSTATE_running) { + u64 steal_ns = run_delay - vx->last_steal; + + delta_ns -= steal_ns; + + vx->runstate_times[RUNSTATE_runnable] += steal_ns; + } + vx->last_steal = run_delay; + + vx->runstate_times[vx->current_runstate] += delta_ns; + vx->current_runstate = state; + vx->runstate_entry_time = now; +} + +void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) +{ + struct kvm_vcpu_xen *vx = &v->arch.xen; + uint64_t state_entry_time; + unsigned int offset; + + kvm_xen_update_runstate(v, state); + + if (!vx->runstate_set) + return; + + BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); + + offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time); +#ifdef CONFIG_X86_64 + /* + * The only difference is alignment of uint64_t in 32-bit. + * So the first field 'state' is accessed directly using + * offsetof() (where its offset happens to be zero), while the + * remaining fields which are all uint64_t, start at 'offset' + * which we tweak here by adding 4. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != + offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != + offsetof(struct compat_vcpu_runstate_info, time) + 4); + + if (v->kvm->arch.xen.long_mode) + offset = offsetof(struct vcpu_runstate_info, state_entry_time); +#endif + /* + * First write the updated state_entry_time at the appropriate + * location determined by 'offset'. + */ + state_entry_time = vx->runstate_entry_time; + state_entry_time |= XEN_RUNSTATE_UPDATE; + + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) != + sizeof(state_entry_time)); + BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) != + sizeof(state_entry_time)); + + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, + &state_entry_time, offset, + sizeof(state_entry_time))) + return; + smp_wmb(); + + /* + * Next, write the new runstate. This is in the *same* place + * for 32-bit and 64-bit guests, asserted here for paranoia. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != + offsetof(struct compat_vcpu_runstate_info, state)); + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) != + sizeof(vx->current_runstate)); + BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) != + sizeof(vx->current_runstate)); + + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, + &vx->current_runstate, + offsetof(struct vcpu_runstate_info, state), + sizeof(vx->current_runstate))) + return; + + /* + * Write the actual runstate times immediately after the + * runstate_entry_time. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != + offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); + BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != + offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != + sizeof(((struct compat_vcpu_runstate_info *)0)->time)); + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != + sizeof(vx->runstate_times)); + + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, + &vx->runstate_times[0], + offset + sizeof(u64), + sizeof(vx->runstate_times))) + return; + + smp_wmb(); + + /* + * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's + * runstate_entry_time field. + */ + + state_entry_time &= ~XEN_RUNSTATE_UPDATE; + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, + &state_entry_time, offset, + sizeof(state_entry_time))) + return; +} + int __kvm_xen_has_interrupt(struct kvm_vcpu *v) { u8 rc = 0; @@ -223,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) } break; + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (data->u.gpa == GPA_INVALID) { + vcpu->arch.xen.runstate_set = false; + r = 0; + break; + } + + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.xen.runstate_cache, + data->u.gpa, + sizeof(struct vcpu_runstate_info)); + if (!r) { + vcpu->arch.xen.runstate_set = true; + } + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (data->u.runstate.state > RUNSTATE_offline) { + r = -EINVAL; + break; + } + + kvm_xen_update_runstate(vcpu, data->u.runstate.state); + r = 0; + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (data->u.runstate.state > RUNSTATE_offline) { + r = -EINVAL; + break; + } + if (data->u.runstate.state_entry_time != + (data->u.runstate.time_running + + data->u.runstate.time_runnable + + data->u.runstate.time_blocked + + data->u.runstate.time_offline)) { + r = -EINVAL; + break; + } + if (get_kvmclock_ns(vcpu->kvm) < + data->u.runstate.state_entry_time) { + r = -EINVAL; + break; + } + + vcpu->arch.xen.current_runstate = data->u.runstate.state; + vcpu->arch.xen.runstate_entry_time = + data->u.runstate.state_entry_time; + vcpu->arch.xen.runstate_times[RUNSTATE_running] = + data->u.runstate.time_running; + vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = + data->u.runstate.time_runnable; + vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = + data->u.runstate.time_blocked; + vcpu->arch.xen.runstate_times[RUNSTATE_offline] = + data->u.runstate.time_offline; + vcpu->arch.xen.last_steal = current->sched_info.run_delay; + r = 0; + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (data->u.runstate.state > RUNSTATE_offline && + data->u.runstate.state != (u64)-1) { + r = -EINVAL; + break; + } + /* The adjustment must add up */ + if (data->u.runstate.state_entry_time != + (data->u.runstate.time_running + + data->u.runstate.time_runnable + + data->u.runstate.time_blocked + + data->u.runstate.time_offline)) { + r = -EINVAL; + break; + } + + if (get_kvmclock_ns(vcpu->kvm) < + (vcpu->arch.xen.runstate_entry_time + + data->u.runstate.state_entry_time)) { + r = -EINVAL; + break; + } + + vcpu->arch.xen.runstate_entry_time += + data->u.runstate.state_entry_time; + vcpu->arch.xen.runstate_times[RUNSTATE_running] += + data->u.runstate.time_running; + vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += + data->u.runstate.time_runnable; + vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += + data->u.runstate.time_blocked; + vcpu->arch.xen.runstate_times[RUNSTATE_offline] += + data->u.runstate.time_offline; + + if (data->u.runstate.state <= RUNSTATE_offline) + kvm_xen_update_runstate(vcpu, data->u.runstate.state); + r = 0; + break; + default: break; } @@ -255,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) r = 0; break; + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (vcpu->arch.xen.runstate_set) { + data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; + r = 0; + } + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + data->u.runstate.state = vcpu->arch.xen.current_runstate; + r = 0; + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + data->u.runstate.state = vcpu->arch.xen.current_runstate; + data->u.runstate.state_entry_time = + vcpu->arch.xen.runstate_entry_time; + data->u.runstate.time_running = + vcpu->arch.xen.runstate_times[RUNSTATE_running]; + data->u.runstate.time_runnable = + vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; + data->u.runstate.time_blocked = + vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; + data->u.runstate.time_offline = + vcpu->arch.xen.runstate_times[RUNSTATE_offline]; + r = 0; + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: + r = -EINVAL; + break; + default: break; } diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 87eaf2be9549..463a7844a8ca 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -23,6 +23,12 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data); int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc); void kvm_xen_destroy_vm(struct kvm *kvm); +static inline bool kvm_xen_msr_enabled(struct kvm *kvm) +{ + return static_branch_unlikely(&kvm_xen_enabled.key) && + kvm->arch.xen_hvm_config.msr; +} + static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) { return static_branch_unlikely(&kvm_xen_enabled.key) && @@ -48,6 +54,11 @@ static inline void kvm_xen_destroy_vm(struct kvm *kvm) { } +static inline bool kvm_xen_msr_enabled(struct kvm *kvm) +{ + return false; +} + static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) { return false; @@ -61,10 +72,31 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu) int kvm_xen_hypercall(struct kvm_vcpu *vcpu); -/* 32-bit compatibility definitions, also used natively in 32-bit build */ #include #include +#include +void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state); + +static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu) +{ + kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running); +} + +static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) +{ + /* + * If the vCPU wasn't preempted but took a normal exit for + * some reason (hypercalls, I/O, etc.), that is accounted as + * still RUNSTATE_running, as the VMM is still operating on + * behalf of the vCPU. Only if the VMM does actually block + * does it need to enter RUNSTATE_blocked. + */ + if (vcpu->preempted) + kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); +} + +/* 32-bit compatibility definitions, also used natively in 32-bit build */ struct compat_arch_vcpu_info { unsigned int cr2; unsigned int pad[5]; @@ -97,4 +129,10 @@ struct compat_shared_info { struct compat_arch_shared_info arch; }; +struct compat_vcpu_runstate_info { + int state; + uint64_t state_entry_time; + uint64_t time[4]; +} __attribute__((packed)); + #endif /* __ARCH_X86_KVM_XEN_H__ */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8b281f722e5b..f6afee209620 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1154,6 +1154,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) struct kvm_xen_hvm_config { __u32 flags; @@ -1621,12 +1622,24 @@ struct kvm_xen_vcpu_attr { union { __u64 gpa; __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; } u; }; /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 9246ea310587..804ff5ff022d 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -13,19 +13,27 @@ #include #include +#include +#include #define VCPU_ID 5 +#define SHINFO_REGION_GVA 0xc0000000ULL #define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_SLOT 10 #define PAGE_SIZE 4096 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) +#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) + +#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) static struct kvm_vm *vm; #define XEN_HYPERCALL_MSR 0x40000000 +#define MIN_STEAL_TIME 50000 + struct pvclock_vcpu_time_info { u32 version; u32 pad0; @@ -43,11 +51,67 @@ struct pvclock_wall_clock { u32 nsec; } __attribute__((__packed__)); +struct vcpu_runstate_info { + uint32_t state; + uint64_t state_entry_time; + uint64_t time[4]; +}; + +#define RUNSTATE_running 0 +#define RUNSTATE_runnable 1 +#define RUNSTATE_blocked 2 +#define RUNSTATE_offline 3 + static void guest_code(void) { + struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; + + /* Test having the host set runstates manually */ + GUEST_SYNC(RUNSTATE_runnable); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); + GUEST_ASSERT(rs->state == 0); + + GUEST_SYNC(RUNSTATE_blocked); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); + GUEST_ASSERT(rs->state == 0); + + GUEST_SYNC(RUNSTATE_offline); + GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); + GUEST_ASSERT(rs->state == 0); + + /* Test runstate time adjust */ + GUEST_SYNC(4); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); + + /* Test runstate time set */ + GUEST_SYNC(5); + GUEST_ASSERT(rs->state_entry_time >= 0x8000); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); + + /* sched_yield() should result in some 'runnable' time */ + GUEST_SYNC(6); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); + GUEST_DONE(); } +static long get_run_delay(void) +{ + char path[64]; + long val[2]; + FILE *fp; + + sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); + fp = fopen(path, "r"); + fscanf(fp, "%ld %ld ", &val[0], &val[1]); + fclose(fp); + + return val[1]; +} + static int cmp_timespec(struct timespec *a, struct timespec *b) { if (a->tv_sec > b->tv_sec) @@ -66,12 +130,14 @@ int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; - if (!(kvm_check_cap(KVM_CAP_XEN_HVM) & - KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { + int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); + if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available"); exit(KSFT_SKIP); } + bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); + clock_gettime(CLOCK_REALTIME, &min_ts); vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); @@ -80,6 +146,7 @@ int main(int argc, char *argv[]) /* Map a region for the shared_info page */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0); struct kvm_xen_hvm_config hvmc = { .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, @@ -111,6 +178,17 @@ int main(int argc, char *argv[]) }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); + if (do_runstate_tests) { + struct kvm_xen_vcpu_attr st = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + .u.gpa = RUNSTATE_ADDR, + }; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); + } + + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);; + rs->state = 0x5a; + for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; @@ -126,8 +204,56 @@ int main(int argc, char *argv[]) case UCALL_ABORT: TEST_FAIL("%s", (const char *)uc.args[0]); /* NOT REACHED */ - case UCALL_SYNC: + case UCALL_SYNC: { + struct kvm_xen_vcpu_attr rst; + long rundelay; + + /* If no runstate support, bail out early */ + if (!do_runstate_tests) + goto done; + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + + switch (uc.args[1]) { + case RUNSTATE_running...RUNSTATE_offline: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; + rst.u.runstate.state = uc.args[1]; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + case 4: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; + memset(&rst.u, 0, sizeof(rst.u)); + rst.u.runstate.state = (uint64_t)-1; + rst.u.runstate.time_blocked = + 0x5a - rs->time[RUNSTATE_blocked]; + rst.u.runstate.time_offline = + 0x6b6b - rs->time[RUNSTATE_offline]; + rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - + rst.u.runstate.time_offline; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + + case 5: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; + memset(&rst.u, 0, sizeof(rst.u)); + rst.u.runstate.state = RUNSTATE_running; + rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; + rst.u.runstate.time_blocked = 0x6b6b; + rst.u.runstate.time_offline = 0x5a; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + case 6: + /* Yield until scheduler delay exceeds target */ + rundelay = get_run_delay() + MIN_STEAL_TIME; + do { + sched_yield(); + } while (get_run_delay() < rundelay); + break; + } break; + } case UCALL_DONE: goto done; default: @@ -162,6 +288,33 @@ int main(int argc, char *argv[]) TEST_ASSERT(ti2->version && !(ti2->version & 1), "Bad time_info version %x", ti->version); + if (do_runstate_tests) { + /* + * Fetch runstate and check sanity. Strictly speaking in the + * general case we might not expect the numbers to be identical + * but in this case we know we aren't running the vCPU any more. + */ + struct kvm_xen_vcpu_attr rst = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, + }; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); + + TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + } kvm_vm_free(vm); return 0; } -- cgit v1.2.3-59-g8ed1b From 9e46f6c6c959d9bb45445c2e8f04a75324a0dfd0 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Tue, 2 Mar 2021 12:51:31 -0600 Subject: KVM: SVM: Clear the CR4 register on reset This problem was reported on a SVM guest while executing kexec. Kexec fails to load the new kernel when the PCID feature is enabled. When kexec starts loading the new kernel, it starts the process by resetting the vCPU's and then bringing each vCPU online one by one. The vCPU reset is supposed to reset all the register states before the vCPUs are brought online. However, the CR4 register is not reset during this process. If this register is already setup during the last boot, all the flags can remain intact. The X86_CR4_PCIDE bit can only be enabled in long mode. So, it must be enabled much later in SMP initialization. Having the X86_CR4_PCIDE bit set during SMP boot can cause a boot failures. Fix the issue by resetting the CR4 register in init_vmcb(). Signed-off-by: Babu Moger Message-Id: <161471109108.30811.6392805173629704166.stgit@bmoger-ubuntu> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c636021b066b..baee91c1e936 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1200,6 +1200,7 @@ static void init_vmcb(struct vcpu_svm *svm) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); + svm_set_cr4(&svm->vcpu, 0); svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED); -- cgit v1.2.3-59-g8ed1b From 7db688e99c0f770ae73e0f1f3fb67f9b64266445 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 2 Mar 2021 12:58:50 +0300 Subject: pstore/ram: Rate-limit "uncorrectable error in header" message There is a quite huge "uncorrectable error in header" flood in KMSG on a clean system boot since there is no pstore buffer saved in RAM. Let's silence the redundant noisy messages by rate-limiting the printk message. Now there are maximum 10 messages printed repeatedly instead of 35+. Signed-off-by: Dmitry Osipenko Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210302095850.30894-1-digetx@gmail.com --- fs/pstore/ram_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index aa8e0b65ff1a..fff363bfd484 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -246,7 +246,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz, pr_info("error in header, %d\n", numerr); prz->corrected_bytes += numerr; } else if (numerr < 0) { - pr_info("uncorrectable error in header\n"); + pr_info_ratelimited("uncorrectable error in header\n"); prz->bad_blocks++; } -- cgit v1.2.3-59-g8ed1b From 999340d51174ce4141dd723105d4cef872b13ee9 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Mon, 22 Feb 2021 13:58:40 +0000 Subject: ftrace: Have recordmcount use w8 to read relp->r_info in arm64_is_fake_mcount On little endian system, Use aarch64_be(gcc v7.3) downloaded from linaro.org to build image with CONFIG_CPU_BIG_ENDIAN = y, CONFIG_FTRACE = y, CONFIG_DYNAMIC_FTRACE = y. gcc will create symbols of _mcount but recordmcount can not create mcount_loc for *.o. aarch64_be-linux-gnu-objdump -r fs/namei.o | grep mcount 00000000000000d0 R_AARCH64_CALL26 _mcount ... 0000000000007190 R_AARCH64_CALL26 _mcount The reason is than funciton arm64_is_fake_mcount can not work correctly. A symbol of _mcount in *.o compiled with big endian compiler likes: 00 00 00 2d 00 00 01 1b w(rp->r_info) will return 0x2d instead of 0x011b. Because w() takes uint32_t as parameter, which truncates rp->r_info. Use w8() instead w() to read relp->r_info Link: https://lkml.kernel.org/r/20210222135840.56250-1-chenjun102@huawei.com Fixes: ea0eada45632 ("recordmcount: only record relocation of type R_AARCH64_CALL26 on arm64.") Acked-by: Will Deacon Signed-off-by: Chen Jun Signed-off-by: Steven Rostedt (VMware) --- scripts/recordmcount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index b9c2ee7ab43f..cce12e1971d8 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -438,7 +438,7 @@ static int arm_is_fake_mcount(Elf32_Rel const *rp) static int arm64_is_fake_mcount(Elf64_Rel const *rp) { - return ELF64_R_TYPE(w(rp->r_info)) != R_AARCH64_CALL26; + return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26; } /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. -- cgit v1.2.3-59-g8ed1b From f5f4fc4649ae542b1a25670b17aaf3cbb6187acc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 Mar 2021 17:22:11 -0700 Subject: ia64: don't call handle_signal() unless there's actually a signal queued Sergei and John both reported that ia64 failed to boot in 5.11, and it was related to signals. Turns out the ia64 signal handling is a bit odd, it doesn't check the return value of get_signal() for whether there's a signal to deliver or not. With the introduction of TIF_NOTIFY_SIGNAL, then task_work could trigger it. Fix it by only calling handle_signal() if we actually have a real signal to deliver. This brings it in line with all other archs, too. Fixes: b269c229b0e8 ("ia64: add support for TIF_NOTIFY_SIGNAL") Reported-by: Sergei Trofimovich Reported-by: John Paul Adrian Glaubitz Tested-by: Sergei Trofimovich Signed-off-by: Jens Axboe --- arch/ia64/kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index e67b22fc3c60..c1b299760bf7 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) * need to push through a forced SIGSEGV. */ while (1) { - get_signal(&ksig); + if (!get_signal(&ksig)) + break; /* * get_signal() may have run a debugger (via notify_parent()) -- cgit v1.2.3-59-g8ed1b From caf6912f3f4af7232340d500a4a2008f81b93f14 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 Mar 2021 14:53:21 -0700 Subject: swap: fix swapfile read/write offset We're not factoring in the start of the file for where to write and read the swapfile, which leads to very unfortunate side effects of writing where we should not be... Fixes: 48d15436fde6 ("mm: remove get_swap_bio") Signed-off-by: Jens Axboe --- include/linux/swap.h | 1 + mm/page_io.c | 5 ----- mm/swapfile.c | 13 +++++++++++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 32f665b1ee85..4cc6ec3bf0ab 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -485,6 +485,7 @@ struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); +sector_t swap_page_sector(struct page *page); static inline void put_swap_device(struct swap_info_struct *si) { diff --git a/mm/page_io.c b/mm/page_io.c index 485fa5cca4a2..c493ce9ebcf5 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -254,11 +254,6 @@ out: return ret; } -static sector_t swap_page_sector(struct page *page) -{ - return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9); -} - static inline void count_swpout_vm_event(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/swapfile.c b/mm/swapfile.c index f039745989d2..084a5b9a18e5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -219,6 +219,19 @@ offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) BUG(); } +sector_t swap_page_sector(struct page *page) +{ + struct swap_info_struct *sis = page_swap_info(page); + struct swap_extent *se; + sector_t sector; + pgoff_t offset; + + offset = __page_file_index(page); + se = offset_to_swap_extent(sis, offset); + sector = se->start_block + (offset - se->start_page); + return sector << (PAGE_SHIFT - 9); +} + /* * swap allocation tell device that a cluster of swap can now be discarded, * to allow the swap device to optimize its wear-levelling. -- cgit v1.2.3-59-g8ed1b From 8310b77b48c5558c140e7a57a702e7819e62f04e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 25 Feb 2021 16:34:43 +0100 Subject: Xen/gnttab: handle p2m update errors on a per-slot basis Bailing immediately from set_foreign_p2m_mapping() upon a p2m updating error leaves the full batch in an ambiguous state as far as the caller is concerned. Instead flags respective slots as bad, unmapping what was mapped there right away. HYPERVISOR_grant_table_op()'s return value and the individual unmap slots' status fields get used only for a one-time - there's not much we can do in case of a failure. Note that there's no GNTST_enomem or alike, so GNTST_general_error gets used. The map ops' handle fields get overwritten just to be on the safe side. This is part of XSA-367. Cc: Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/96cccf5d-e756-5f53-b91a-ea269bfb9be0@suse.com Signed-off-by: Juergen Gross --- arch/arm/xen/p2m.c | 35 +++++++++++++++++++++++++++++++---- arch/x86/xen/p2m.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 7 deletions(-) diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index fd6e3aafe272..acb464547a54 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -93,12 +93,39 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, int i; for (i = 0; i < count; i++) { + struct gnttab_unmap_grant_ref unmap; + int rc; + if (map_ops[i].status) continue; - if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, - map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) { - return -ENOMEM; - } + if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, + map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap.host_addr = map_ops[i].host_addr, + unmap.handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap.dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap.dev_bus_addr = 0; + + /* + * Pre-populate the status field, to be recognizable in + * the log message below. + */ + unmap.status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + &unmap, 1); + if (rc || unmap.status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st=%d\n", + rc, unmap.status); } return 0; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b5949e5a83ec..9545b8df5315 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -710,6 +710,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, for (i = 0; i < count; i++) { unsigned long mfn, pfn; + struct gnttab_unmap_grant_ref unmap[2]; + int rc; /* Do not add to override if the map failed. */ if (map_ops[i].status != GNTST_okay || @@ -727,10 +729,46 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { - ret = -ENOMEM; - goto out; + if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap[0].host_addr = map_ops[i].host_addr, + unmap[0].handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap[0].dev_bus_addr = 0; + + if (kmap_ops) { + kmap_ops[i].status = GNTST_general_error; + unmap[1].host_addr = kmap_ops[i].host_addr, + unmap[1].handle = kmap_ops[i].handle; + kmap_ops[i].handle = ~0; + if (kmap_ops[i].flags & GNTMAP_device_map) + unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr; + else + unmap[1].dev_bus_addr = 0; } + + /* + * Pre-populate both status fields, to be recognizable in + * the log message below. + */ + unmap[0].status = 1; + unmap[1].status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap, 1 + !!kmap_ops); + if (rc || unmap[0].status != GNTST_okay || + unmap[1].status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n", + rc, unmap[0].status, unmap[1].status); } out: -- cgit v1.2.3-59-g8ed1b From 2991397d23ec597405b116d96de3813420bdcbc3 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 25 Feb 2021 16:35:15 +0100 Subject: xen-netback: respect gnttab_map_refs()'s return value Commit 3194a1746e8a ("xen-netback: don't "handle" error by BUG()") dropped respective a BUG_ON() without noticing that with this the variable's value wouldn't be consumed anymore. With gnttab_set_map_op() setting all status fields to a non-zero value, in case of an error no slot should have a status of GNTST_okay (zero). This is part of XSA-367. Cc: Reported-by: kernel test robot Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/d933f495-619a-0086-5fb4-1ec3cf81a8fc@suse.com Signed-off-by: Juergen Gross --- drivers/net/xen-netback/netback.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index e5c73f819662..6afb5ca999c2 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1343,11 +1343,21 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return 0; gnttab_batch_copy(queue->tx_copy_ops, nr_cops); - if (nr_mops != 0) + if (nr_mops != 0) { ret = gnttab_map_refs(queue->tx_map_ops, NULL, queue->pages_to_map, nr_mops); + if (ret) { + unsigned int i; + + netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n", + nr_mops, ret); + for (i = 0; i < nr_mops; ++i) + WARN_ON_ONCE(queue->tx_map_ops[i].status == + GNTST_okay); + } + } work_done = xenvif_tx_submit(queue); -- cgit v1.2.3-59-g8ed1b From 882213990d32fd224340a4533f6318dd152be4b2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 24 Feb 2021 16:03:08 +0100 Subject: xen: fix p2m size in dom0 for disabled memory hotplug case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 9e2369c06c8a18 ("xen: add helpers to allocate unpopulated memory") foreign mappings are using guest physical addresses allocated via ZONE_DEVICE functionality. This will result in problems for the case of no balloon memory hotplug being configured, as the p2m list will only cover the initial memory size of the domain. Any ZONE_DEVICE allocated address will be outside the p2m range and thus a mapping can't be established with that memory address. Fix that by extending the p2m size for that case. At the same time add a check for a to be created mapping to be within the p2m limits in order to detect errors early. While changing a comment, remove some 32-bit leftovers. This is XSA-369. Fixes: 9e2369c06c8a18 ("xen: add helpers to allocate unpopulated memory") Cc: # 5.9 Reported-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/page.h | 12 ++++++++++++ arch/x86/xen/p2m.c | 10 ++++++---- arch/x86/xen/setup.c | 25 +++---------------------- 3 files changed, 21 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 1a162e559753..7068e4bb057d 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -86,6 +86,18 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, } #endif +/* + * The maximum amount of extra memory compared to the base size. The + * main scaling factor is the size of struct page. At extreme ratios + * of base:extra, all the base memory can be filled with page + * structures for the extra memory, leaving no space for anything + * else. + * + * 10x seems like a reasonable balance between scaling flexibility and + * leaving a practically usable system. + */ +#define XEN_EXTRA_MEM_RATIO (10) + /* * Helper functions to write or read unsigned long values to/from * memory, when the access may fault. diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 9545b8df5315..a3cc33091f46 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -416,6 +416,9 @@ void __init xen_vmalloc_p2m_tree(void) xen_p2m_last_pfn = xen_max_p2m_pfn; p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; + if (!p2m_limit && IS_ENABLED(CONFIG_XEN_UNPOPULATED_ALLOC)) + p2m_limit = xen_start_info->nr_pages * XEN_EXTRA_MEM_RATIO; + vm.flags = VM_ALLOC; vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), PMD_SIZE * PMDS_PER_MID_PAGE); @@ -652,10 +655,9 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) pte_t *ptep; unsigned int level; - if (unlikely(pfn >= xen_p2m_size)) { - BUG_ON(mfn != INVALID_P2M_ENTRY); - return true; - } + /* Only invalid entries allowed above the highest p2m covered frame. */ + if (unlikely(pfn >= xen_p2m_size)) + return mfn == INVALID_P2M_ENTRY; /* * The interface requires atomic updates on p2m elements. diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 7eab14d56369..1a3b75652fa4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -59,18 +59,6 @@ static struct { } xen_remap_buf __initdata __aligned(PAGE_SIZE); static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; -/* - * The maximum amount of extra memory compared to the base size. The - * main scaling factor is the size of struct page. At extreme ratios - * of base:extra, all the base memory can be filled with page - * structures for the extra memory, leaving no space for anything - * else. - * - * 10x seems like a reasonable balance between scaling flexibility and - * leaving a practically usable system. - */ -#define EXTRA_MEM_RATIO (10) - static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB); static void __init xen_parse_512gb(void) @@ -790,20 +778,13 @@ char * __init xen_memory_setup(void) extra_pages += max_pages - max_pfn; /* - * Clamp the amount of extra memory to a EXTRA_MEM_RATIO - * factor the base size. On non-highmem systems, the base - * size is the full initial memory allocation; on highmem it - * is limited to the max size of lowmem, so that it doesn't - * get completely filled. + * Clamp the amount of extra memory to a XEN_EXTRA_MEM_RATIO + * factor the base size. * * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. - * - * In principle there could be a problem in lowmem systems if - * the initial memory is also very large with respect to - * lowmem, but we won't try to deal with that here. */ - extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), + extra_pages = min3(XEN_EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), extra_pages, max_pages - max_pfn); i = 0; addr = xen_e820_table.entries[0].addr; -- cgit v1.2.3-59-g8ed1b From 73e7161eab5dee98114987239ec9c87fe8034ddb Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Tue, 2 Mar 2021 19:04:14 +0100 Subject: ALSA: hda/realtek: Add quirk for Intel NUC 10 This adds a new SND_PCI_QUIRK(...) and applies it to the Intel NUC 10 devices. This fixes the issue of the devices not having audio input and output on the headset jack because the kernel does not recognize when something is plugged in. The new quirk was inspired by the quirk for the Intel NUC 8 devices, but it turned out that the NUC 10 uses another pin. This information was acquired by black box testing likely pins. Co-developed-by: Eckhart Mohr Signed-off-by: Eckhart Mohr Signed-off-by: Werner Sembach Cc: Link: https://lore.kernel.org/r/20210302180414.23194-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2b7ddb3baf93..7c24d13181e1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6396,6 +6396,7 @@ enum { ALC269_FIXUP_LEMOTE_A1802, ALC269_FIXUP_LEMOTE_A190X, ALC256_FIXUP_INTEL_NUC8_RUGGED, + ALC256_FIXUP_INTEL_NUC10, ALC255_FIXUP_XIAOMI_HEADSET_MIC, ALC274_FIXUP_HP_MIC, ALC274_FIXUP_HP_HEADSET_MIC, @@ -7783,6 +7784,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC256_FIXUP_INTEL_NUC10] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC255_FIXUP_XIAOMI_HEADSET_MIC] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -8236,6 +8246,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), + SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), #if 0 /* Below is a quirk table taken from the old code. -- cgit v1.2.3-59-g8ed1b From 77516d25f54912a7baedeeac1b1b828b6f285152 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Mar 2021 13:59:12 +0300 Subject: rsxx: Return -EFAULT if copy_to_user() fails The copy_to_user() function returns the number of bytes remaining but we want to return -EFAULT to the user if it can't complete the copy. The "st" variable only holds zero on success or negative error codes on failure so the type should be int. Fixes: 36f988e978f8 ("rsxx: Adding in debugfs entries.") Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- drivers/block/rsxx/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 63f549889f87..5ac1881396af 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -165,15 +165,17 @@ static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, { struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - ssize_t st; + int st; buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1); - if (!st) - st = copy_to_user(ubuf, buf, cnt); + if (!st) { + if (copy_to_user(ubuf, buf, cnt)) + st = -EFAULT; + } kfree(buf); if (st) return st; -- cgit v1.2.3-59-g8ed1b From 26af17722a07597d3e556eda92c6fce8d528bc9f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 3 Mar 2021 15:23:46 +0100 Subject: ALSA: hda/realtek: Apply dual codec quirks for MSI Godlike X570 board There is another MSI board (1462:cc34) that has dual Realtek codecs, and we need to apply the existing quirk for fixing the conflicts of Master control. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211743 Cc: Link: https://lore.kernel.org/r/20210303142346.28182-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7c24d13181e1..b47504fa8dfd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2532,6 +2532,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), + SND_PCI_QUIRK(0x1462, 0xcc34, "MSI Godlike X570", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), -- cgit v1.2.3-59-g8ed1b From 232a37ea3aee9cb37bbc154fb1440a66ae4743f4 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 1 Mar 2021 23:24:16 +0200 Subject: tpm: Remove unintentional dump_stack() call Somewhere along the line, probably during a rebase, an unintentional dump_stack() got included. Revert this change. Reported-by: Rikard Falkeborn Fixes: 90cba8d20f8b ("tpm/ppi: Constify static struct attribute_group") Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm-chip.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 19e23fcc6bc8..ddaeceb7e109 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -278,8 +278,6 @@ static void tpm_devs_release(struct device *dev) { struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs); - dump_stack(); - /* release the master device reference */ put_device(&chip->dev); } -- cgit v1.2.3-59-g8ed1b From f91803998cf60fbbd4f10d24def676bf8b2a1a7e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Mar 2021 09:42:13 +0200 Subject: RDMA/mlx5: Set correct kernel-doc identifier The W=1 allmodconfig build produces the following warning: drivers/infiniband/hw/mlx5/odp.c:1086: warning: wrong kernel-doc identifier on line: * Parse a series of data segments for page fault handling. Fix it by changing /** to be /* as it is written in kernel-doc documentation. Fixes: 5e769e444d26 ("RDMA/hw/mlx5/odp: Fix formatting and add missing descriptions in 'pagefault_data_segments()'") Link: https://lore.kernel.org/r/20210302074214.1054299-2-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/odp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 374698186662..b103555b1f5d 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -1082,7 +1082,7 @@ end: return ret ? ret : npages; } -/** +/* * Parse a series of data segments for page fault handling. * * @dev: Pointer to mlx5 IB device -- cgit v1.2.3-59-g8ed1b From cca7f12b939bd75f3a5e2b0fa20e3de67d1d33b1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 2 Mar 2021 09:42:14 +0200 Subject: RDMA/uverbs: Fix kernel-doc warning of _uverbs_alloc Fix the following W=1 compilation warning: drivers/infiniband/core/uverbs_ioctl.c:108: warning: expecting prototype for uverbs_alloc(). Prototype was for _uverbs_alloc() instead Fixes: 461bb2eee4e1 ("IB/uverbs: Add a simple allocator to uverbs_attr_bundle") Link: https://lore.kernel.org/r/20210302074214.1054299-3-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index e47c5949013f..ff047eb024ab 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -91,7 +91,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, } /** - * uverbs_alloc() - Quickly allocate memory for use with a bundle + * _uverbs_alloc() - Quickly allocate memory for use with a bundle * @bundle: The bundle * @size: Number of bytes to allocate * @flags: Allocator flags -- cgit v1.2.3-59-g8ed1b From ff70784ab9f89e78e67d5d172bf7644de673f61f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 2 Mar 2021 15:35:48 +0200 Subject: ACPI: bus: Constify is_acpi_node() and friends (part 2) Commit 8b9d6802583a ("ACPI: Constify acpi_bus helper functions, switch to macros") only changed functions for CONFIG_ACPI=y case. This part adjusts the rest. Fixes: 8b9d6802583a ("ACPI: Constify acpi_bus helper functions, switch to macros") Signed-off-by: Andy Shevchenko Reviewed-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3c5757d539ab..9f432411e988 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -746,12 +746,12 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) static inline void acpi_dev_put(struct acpi_device *adev) {} -static inline bool is_acpi_node(struct fwnode_handle *fwnode) +static inline bool is_acpi_node(const struct fwnode_handle *fwnode) { return false; } -static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) +static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode) { return false; } @@ -761,7 +761,7 @@ static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwno return NULL; } -static inline bool is_acpi_data_node(struct fwnode_handle *fwnode) +static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode) { return false; } -- cgit v1.2.3-59-g8ed1b From 31ada99bdd1b4d6b80462eeb87d383f374409e2a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 25 Feb 2021 10:21:49 -0500 Subject: drm/amdgpu: Only check for S0ix if AMD_PMC is configured The S0ix check only makes sense if the AMD PMC driver is present. We need to use the legacy S3 pathes when the PMC driver is not present. Reviewed-by: Prike Liang Reviewed-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 8155c54392c8..36a741d63ddc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -903,10 +903,11 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev) */ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { +#if defined(CONFIG_AMD_PMC) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) return true; } - +#endif return false; } -- cgit v1.2.3-59-g8ed1b From 0c61ac8134ffc851681ce5d4bd60d97c3d5aed27 Mon Sep 17 00:00:00 2001 From: "Asher.Song" Date: Wed, 24 Feb 2021 18:41:34 +0800 Subject: drm/amdgpu:disable VCN for Navi12 SKU Navi12 0x7360/C7 SKU has no video support, so remove it. Reviewed-by: Guchun Chen Signed-off-by: Asher.Song Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/nv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 160fa5f59805..c625c5d8ed89 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -558,7 +558,8 @@ static bool nv_is_headless_sku(struct pci_dev *pdev) { if ((pdev->device == 0x731E && (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || - (pdev->device == 0x7340 && pdev->revision == 0xC9)) + (pdev->device == 0x7340 && pdev->revision == 0xC9) || + (pdev->device == 0x7360 && pdev->revision == 0xC7)) return true; return false; } @@ -634,7 +635,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + if (!nv_is_headless_sku(adev->pdev)) + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); break; -- cgit v1.2.3-59-g8ed1b From e3746696e78f2185633ae9b47c40fabf88bdcf99 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Feb 2021 12:21:07 -0500 Subject: drm/amdgpu/pm: make unsupported power profile messages debug Making them an error confuses users and the errors are harmless as not all asics support all profiles. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1488 Acked-by: Nirmoy Das Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 45564a776e9b..9f0d03ae3109 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1322,7 +1322,7 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, CMN2ASIC_MAPPING_WORKLOAD, profile_mode); if (workload_type < 0) { - dev_err(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode); + dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 093b01159408..d77e4a16e420 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -810,7 +810,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, CMN2ASIC_MAPPING_WORKLOAD, profile_mode); if (workload_type < 0) { - dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n", + dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n", profile_mode); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 5faa509f0dba..5493388fcb10 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -844,7 +844,7 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u * TODO: If some case need switch to powersave/default power mode * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving. */ - dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode); + dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode); return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From 992ace410c32955eb5b2cee602ea68ac9557e35b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Feb 2021 15:46:59 -0500 Subject: drm/amdgpu/swsmu/vangogh: Only use RLCPowerNotify msg for disable Per discussions with PMFW team, the driver only needs to notify the PMFW when the RLC is disabled. The RLC FW will notify the PMFW directly when it's enabled. Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index d77e4a16e420..7ddbaecb11c2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1685,9 +1685,9 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en) uint32_t feature_mask[2]; int ret = 0; - if (adev->pm.fw_version >= 0x43f1700) + if (adev->pm.fw_version >= 0x43f1700 && !en) ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify, - en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL); + RLC_STATUS_OFF, NULL); bitmap_zero(feature->enabled, feature->feature_num); bitmap_zero(feature->supported, feature->feature_num); -- cgit v1.2.3-59-g8ed1b From 6efda1671312e8432216ee8b106e71fa3102e1d3 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 19 Feb 2021 16:18:47 +0800 Subject: drm/amd/pm: correct Arcturus mmTHM_BACO_CNTL register address Arcturus has a different register address from other SMU V11 ASICs. Signed-off-by: Evan Quan Acked-by: Guchun Chen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 90585461a56e..a6211858ead4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -78,6 +78,9 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_smc.bin"); #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xC000 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0xE +#define mmTHM_BACO_CNTL_ARCT 0xA7 +#define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0 + static int link_width[] = {0, 1, 2, 4, 8, 12, 16}; static int link_speed[] = {25, 50, 80, 160}; @@ -1532,9 +1535,15 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) break; default: if (!ras || !ras->supported) { - data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); - data |= 0x80000000; - WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); + if (adev->asic_type == CHIP_ARCTURUS) { + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT); + data |= 0x80000000; + WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT, data); + } else { + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); + data |= 0x80000000; + WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); + } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 0, NULL); } else { -- cgit v1.2.3-59-g8ed1b From 25951362db7b3791488ec45bf56c0043f107b94b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 1 Mar 2021 10:42:50 -0500 Subject: drm/amdgpu: enable BACO runpm by default on sienna cichlid and navy flounder It works fine and was only disabled because primary GPUs don't enter runpm if there is a console bound to the fbdev due to the kmap. This will at least allow runpm on secondary cards. Reviewed-by: Evan Quan Reviewed-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 3c37cf1ae8b7..64beb3399604 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -173,8 +173,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: /* enable runpm if runpm=1 */ if (amdgpu_runtime_pm > 0) adev->runpm = true; -- cgit v1.2.3-59-g8ed1b From b934dd9b44e8ad180b3203ce7d6df3133453ee91 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 2 Mar 2021 14:05:09 +0000 Subject: drm/amd/display: fix the return of the uninitialized value in ret Currently if stream->signal is neither SIGNAL_TYPE_DISPLAY_PORT_MST or SIGNAL_TYPE_DISPLAY_PORT then variable ret is uninitialized and this is checked for > 0 at the end of the function. Ret should be initialized, I believe setting it to zero is a correct default. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: bd0c064c161c ("drm/amd/display: Add return code instead of boolean for future use") Reviewed-by: Harry Wentland Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 5159399f8239..5750818db8f6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -530,7 +530,7 @@ bool dm_helpers_dp_write_dsc_enable( { uint8_t enable_dsc = enable ? 1 : 0; struct amdgpu_dm_connector *aconnector; - uint8_t ret; + uint8_t ret = 0; if (!stream) return false; -- cgit v1.2.3-59-g8ed1b From 1aa46901ee51c1c5779b3b239ea0374a50c6d9ff Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 2 Mar 2021 15:54:00 +0800 Subject: drm/amdgpu: fix parameter error of RREG32_PCIE() in amdgpu_regs_pcie the register offset isn't needed division by 4 to pass RREG32_PCIE() Signed-off-by: Kevin Wang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0a25fecf488a..43059ead733b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -357,7 +357,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = RREG32_PCIE(*pos >> 2); + value = RREG32_PCIE(*pos); r = put_user(value, (uint32_t *)buf); if (r) { pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -424,7 +424,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user return r; } - WREG32_PCIE(*pos >> 2, value); + WREG32_PCIE(*pos, value); result += 4; buf += 4; -- cgit v1.2.3-59-g8ed1b From 140456f994195b568ecd7fc2287a34eadffef3ca Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 17 Feb 2021 17:30:04 +0300 Subject: iommu/amd: Fix sleeping in atomic in increase_address_space() increase_address_space() calls get_zeroed_page(gfp) under spin_lock with disabled interrupts. gfp flags passed to increase_address_space() may allow sleeping, so it comes to this: BUG: sleeping function called from invalid context at mm/page_alloc.c:4342 in_atomic(): 1, irqs_disabled(): 1, pid: 21555, name: epdcbbf1qnhbsd8 Call Trace: dump_stack+0x66/0x8b ___might_sleep+0xec/0x110 __alloc_pages_nodemask+0x104/0x300 get_zeroed_page+0x15/0x40 iommu_map_page+0xdd/0x3e0 amd_iommu_map+0x50/0x70 iommu_map+0x106/0x220 vfio_iommu_type1_ioctl+0x76e/0x950 [vfio_iommu_type1] do_vfs_ioctl+0xa3/0x6f0 ksys_ioctl+0x66/0x70 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4e/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by moving get_zeroed_page() out of spin_lock/unlock section. Fixes: 754265bcab ("iommu/amd: Fix race in increase_address_space()") Signed-off-by: Andrey Ryabinin Acked-by: Will Deacon Cc: Link: https://lore.kernel.org/r/20210217143004.19165-1-arbn@yandex-team.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 1c4961e05c12..bb0ee5c9fde7 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -182,6 +182,10 @@ static bool increase_address_space(struct protection_domain *domain, bool ret = true; u64 *pte; + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + spin_lock_irqsave(&domain->lock, flags); if (address <= PM_LEVEL_SIZE(domain->iop.mode)) @@ -191,10 +195,6 @@ static bool increase_address_space(struct protection_domain *domain, if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) goto out; - pte = (void *)get_zeroed_page(gfp); - if (!pte) - goto out; - *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); domain->iop.root = pte; @@ -208,10 +208,12 @@ static bool increase_address_space(struct protection_domain *domain, */ amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); + pte = NULL; ret = true; out: spin_unlock_irqrestore(&domain->lock, flags); + free_page((unsigned long)pte); return ret; } -- cgit v1.2.3-59-g8ed1b From 765a9d1d02b2f5996b05f5f65faa8a634adbe763 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 18 Feb 2021 14:07:02 -0800 Subject: iommu/tegra-smmu: Fix mc errors on tegra124-nyan Commit 25938c73cd79 ("iommu/tegra-smmu: Rework tegra_smmu_probe_device()") removed certain hack in the tegra_smmu_probe() by relying on IOMMU core to of_xlate SMMU's SID per device, so as to get rid of tegra_smmu_find() and tegra_smmu_configure() that are typically done in the IOMMU core also. This approach works for both existing devices that have DT nodes and other devices (like PCI device) that don't exist in DT, on Tegra210 and Tegra3 upon testing. However, Page Fault errors are reported on tegra124-Nyan: tegra-mc 70019000.memory-controller: display0a: read @0xfe056b40: EMEM address decode error (SMMU translation error [--S]) tegra-mc 70019000.memory-controller: display0a: read @0xfe056b40: Page fault (SMMU translation error [--S]) After debugging, I found that the mentioned commit changed some function callback sequence of tegra-smmu's, resulting in enabling SMMU for display client before display driver gets initialized. I couldn't reproduce exact same issue on Tegra210 as Tegra124 (arm-32) differs at arch-level code. Actually this Page Fault is a known issue, as on most of Tegra platforms, display gets enabled by the bootloader for the splash screen feature, so it keeps filling the framebuffer memory. A proper fix to this issue is to 1:1 linear map the framebuffer memory to IOVA space so the SMMU will have the same address as the physical address in its page table. Yet, Thierry has been working on the solution above for a year, and it hasn't merged. Therefore, let's partially revert the mentioned commit to fix the errors. The reason why we do a partial revert here is that we can still set priv in ->of_xlate() callback for PCI devices. Meanwhile, devices existing in DT, like display, will go through tegra_smmu_configure() at the stage of bus_set_iommu() when SMMU gets probed(), as what it did before we merged the mentioned commit. Once we have the linear map solution for framebuffer memory, this change can be cleaned away. [Big thank to Guillaume who reported and helped debugging/verification] Fixes: 25938c73cd79 ("iommu/tegra-smmu: Rework tegra_smmu_probe_device()") Reported-by: Guillaume Tucker Signed-off-by: Nicolin Chen Tested-by: Guillaume Tucker Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20210218220702.1962-1-nicoleotsuka@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 72 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4a3f095a1c26..97eb62f667d2 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -798,10 +798,70 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova); } +static struct tegra_smmu *tegra_smmu_find(struct device_node *np) +{ + struct platform_device *pdev; + struct tegra_mc *mc; + + pdev = of_find_device_by_node(np); + if (!pdev) + return NULL; + + mc = platform_get_drvdata(pdev); + if (!mc) + return NULL; + + return mc->smmu; +} + +static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, + struct of_phandle_args *args) +{ + const struct iommu_ops *ops = smmu->iommu.ops; + int err; + + err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); + if (err < 0) { + dev_err(dev, "failed to initialize fwspec: %d\n", err); + return err; + } + + err = ops->of_xlate(dev, args); + if (err < 0) { + dev_err(dev, "failed to parse SW group ID: %d\n", err); + iommu_fwspec_free(dev); + return err; + } + + return 0; +} + static struct iommu_device *tegra_smmu_probe_device(struct device *dev) { - struct tegra_smmu *smmu = dev_iommu_priv_get(dev); + struct device_node *np = dev->of_node; + struct tegra_smmu *smmu = NULL; + struct of_phandle_args args; + unsigned int index = 0; + int err; + + while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index, + &args) == 0) { + smmu = tegra_smmu_find(args.np); + if (smmu) { + err = tegra_smmu_configure(smmu, dev, &args); + of_node_put(args.np); + if (err < 0) + return ERR_PTR(err); + + break; + } + + of_node_put(args.np); + index++; + } + + smmu = dev_iommu_priv_get(dev); if (!smmu) return ERR_PTR(-ENODEV); @@ -1028,6 +1088,16 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, if (!smmu) return ERR_PTR(-ENOMEM); + /* + * This is a bit of a hack. Ideally we'd want to simply return this + * value. However the IOMMU registration process will attempt to add + * all devices to the IOMMU when bus_set_iommu() is called. In order + * not to rely on global variables to track the IOMMU instance, we + * set it here so that it can be looked up from the .probe_device() + * callback via the IOMMU device's .drvdata field. + */ + mc->smmu = smmu; + size = BITS_TO_LONGS(soc->num_asids) * sizeof(long); smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From 82c3cefb9f1652e7470f442ff96c613e8c8ed8f4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 25 Feb 2021 14:14:54 +0800 Subject: iommu: Don't use lazy flush for untrusted device The lazy IOTLB flushing setup leaves a time window, in which the device can still access some system memory, which has already been unmapped by the device driver. It's not suitable for untrusted devices. A malicious device might use this to attack the system by obtaining data that it shouldn't obtain. Fixes: c588072bba6b5 ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210225061454.2864009-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f659395e7959..65234e383d6b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -311,6 +311,11 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) domain->ops->flush_iotlb_all(domain); } +static bool dev_is_untrusted(struct device *dev) +{ + return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -365,8 +370,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, init_iova_domain(iovad, 1UL << order, base_pfn); - if (!cookie->fq_domain && !iommu_domain_get_attr(domain, - DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) { + if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) && + !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && + attr) { if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, iommu_dma_entry_dtor)) pr_warn("iova flush queue initialization failed\n"); @@ -508,11 +514,6 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr, iova_align(iovad, size), dir, attrs); } -static bool dev_is_untrusted(struct device *dev) -{ - return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; -} - static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot, u64 dma_mask) { -- cgit v1.2.3-59-g8ed1b From 444d66a23c1f1e4c4d12aed4812681d0ad835d60 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 27 Feb 2021 15:39:09 +0800 Subject: iommu/vt-d: Fix status code for Allocate/Free PASID command As per Intel vt-d spec, Rev 3.0 (section 10.4.45 "Virtual Command Response Register"), the status code of "No PASID available" error in response to the Allocate PASID command is 2, not 1. The same for "Invalid PASID" error in response to the Free PASID command. We will otherwise see confusing kernel log under the command failure from guest side. Fix it. Fixes: 24f27d32ab6b ("iommu/vt-d: Enlightened PASID allocation") Signed-off-by: Zenghui Yu Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210227073909.432-1-yuzenghui@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 97dfcffbf495..444c0bec221a 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -30,8 +30,8 @@ #define VCMD_VRSP_IP 0x1 #define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3) #define VCMD_VRSP_SC_SUCCESS 0 -#define VCMD_VRSP_SC_NO_PASID_AVAIL 1 -#define VCMD_VRSP_SC_INVALID_PASID 1 +#define VCMD_VRSP_SC_NO_PASID_AVAIL 2 +#define VCMD_VRSP_SC_INVALID_PASID 2 #define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff) #define VCMD_CMD_OPERAND(e) ((e) << 8) /* -- cgit v1.2.3-59-g8ed1b From fb3a1f6c745ccd896afadf6e2d6f073e871d38ba Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 09:47:20 -0700 Subject: io-wq: have manager wait for all workers to exit Instead of having to wait separately on workers and manager, just have the manager wait on the workers. We use an atomic_t for the reference here, as we need to start at 0 and allow increment from that. Since the number of workers is naturally capped by the allowed nr of processes, and that uses an int, there is no risk of overflow. Signed-off-by: Jens Axboe --- fs/io-wq.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 965022fe9961..1d01edada8aa 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -120,6 +120,9 @@ struct io_wq { refcount_t refs; struct completion done; + atomic_t worker_refs; + struct completion worker_done; + struct hlist_node cpuhp_node; pid_t task_pid; @@ -189,7 +192,8 @@ static void io_worker_exit(struct io_worker *worker) raw_spin_unlock_irq(&wqe->lock); kfree_rcu(worker, rcu); - io_wq_put(wqe->wq); + if (atomic_dec_and_test(&wqe->wq->worker_refs)) + complete(&wqe->wq->worker_done); } static inline bool io_wqe_run_queue(struct io_wqe *wqe) @@ -648,14 +652,15 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) init_completion(&worker->ref_done); init_completion(&worker->started); - refcount_inc(&wq->refs); + atomic_inc(&wq->worker_refs); if (index == IO_WQ_ACCT_BOUND) pid = io_wq_fork_thread(task_thread_bound, worker); else pid = io_wq_fork_thread(task_thread_unbound, worker); if (pid < 0) { - io_wq_put(wq); + if (atomic_dec_and_test(&wq->worker_refs)) + complete(&wq->worker_done); kfree(worker); return false; } @@ -736,6 +741,7 @@ static int io_wq_manager(void *data) { struct io_wq *wq = data; char buf[TASK_COMM_LEN]; + int node; sprintf(buf, "iou-mgr-%d", wq->task_pid); set_task_comm(current, buf); @@ -753,6 +759,15 @@ static int io_wq_manager(void *data) } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)); io_wq_check_workers(wq); + + rcu_read_lock(); + for_each_node(node) + io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL); + rcu_read_unlock(); + + /* we might not ever have created any workers */ + if (atomic_read(&wq->worker_refs)) + wait_for_completion(&wq->worker_done); wq->manager = NULL; io_wq_put(wq); do_exit(0); @@ -796,6 +811,7 @@ static int io_wq_fork_manager(struct io_wq *wq) if (wq->manager) return 0; + reinit_completion(&wq->worker_done); clear_bit(IO_WQ_BIT_EXIT, &wq->state); refcount_inc(&wq->refs); current->flags |= PF_IO_WORKER; @@ -1050,6 +1066,9 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) init_completion(&wq->done); refcount_set(&wq->refs, 1); + init_completion(&wq->worker_done); + atomic_set(&wq->worker_refs, 0); + ret = io_wq_fork_manager(wq); if (!ret) return wq; @@ -1077,11 +1096,6 @@ static void io_wq_destroy(struct io_wq *wq) if (wq->manager) wake_up_process(wq->manager); - rcu_read_lock(); - for_each_node(node) - io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL); - rcu_read_unlock(); - spin_lock_irq(&wq->hash->wait.lock); for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; -- cgit v1.2.3-59-g8ed1b From 613eeb600e3e636a1d3b3711dddaf2b134d5a32c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 09:52:02 -0700 Subject: io-wq: don't ask for a new worker if we're exiting If we're in the process of shutting down the async context, then don't create new workers if we already have at least the fixed one. Signed-off-by: Jens Axboe --- fs/io-wq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io-wq.c b/fs/io-wq.c index 1d01edada8aa..2f9d7ee12ee1 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -673,6 +673,8 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index) { struct io_wqe_acct *acct = &wqe->acct[index]; + if (acct->nr_workers && test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) + return false; /* if we have available workers or no work, no need */ if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe)) return false; -- cgit v1.2.3-59-g8ed1b From dbf996202e28c6b1eb30afad534abe45a691499e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 09:53:17 -0700 Subject: io-wq: rename wq->done completion to wq->started This is a leftover from a different use cases, it's used to wait for the manager to startup. Rename it as such. Signed-off-by: Jens Axboe --- fs/io-wq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 2f9d7ee12ee1..1e5b41614bd6 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -118,7 +118,7 @@ struct io_wq { struct io_wq_hash *hash; refcount_t refs; - struct completion done; + struct completion started; atomic_t worker_refs; struct completion worker_done; @@ -750,7 +750,7 @@ static int io_wq_manager(void *data) current->flags |= PF_IO_WORKER; wq->manager = current; - complete(&wq->done); + complete(&wq->started); do { set_current_state(TASK_INTERRUPTIBLE); @@ -820,7 +820,7 @@ static int io_wq_fork_manager(struct io_wq *wq) ret = io_wq_fork_thread(io_wq_manager, wq); current->flags &= ~PF_IO_WORKER; if (ret >= 0) { - wait_for_completion(&wq->done); + wait_for_completion(&wq->started); return 0; } @@ -1065,7 +1065,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) } wq->task_pid = current->pid; - init_completion(&wq->done); + init_completion(&wq->started); refcount_set(&wq->refs, 1); init_completion(&wq->worker_done); -- cgit v1.2.3-59-g8ed1b From d364d9e5db41678b77ed95c41e3ccaad9ab99ba0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 09:56:32 -0700 Subject: io-wq: wait for manager exit on wq destroy The manager waits for the workers, hence the manager is always valid if workers are running. Now also have wq destroy wait for the manager on exit, so we now everything is gone. Signed-off-by: Jens Axboe --- fs/io-wq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 1e5b41614bd6..7a1d51c1aca9 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -119,6 +119,7 @@ struct io_wq { refcount_t refs; struct completion started; + struct completion exited; atomic_t worker_refs; struct completion worker_done; @@ -771,6 +772,7 @@ static int io_wq_manager(void *data) if (atomic_read(&wq->worker_refs)) wait_for_completion(&wq->worker_done); wq->manager = NULL; + complete(&wq->exited); io_wq_put(wq); do_exit(0); } @@ -1066,6 +1068,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) wq->task_pid = current->pid; init_completion(&wq->started); + init_completion(&wq->exited); refcount_set(&wq->refs, 1); init_completion(&wq->worker_done); @@ -1095,8 +1098,10 @@ static void io_wq_destroy(struct io_wq *wq) cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); set_bit(IO_WQ_BIT_EXIT, &wq->state); - if (wq->manager) + if (wq->manager) { wake_up_process(wq->manager); + wait_for_completion(&wq->exited); + } spin_lock_irq(&wq->hash->wait.lock); for_each_node(node) { -- cgit v1.2.3-59-g8ed1b From 470ec4ed8c91b4db398ad607c700e9ce88365202 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 10:20:34 -0700 Subject: io-wq: fix double put of 'wq' in error path We are already freeing the wq struct in both spots, so don't put it and get it freed twice. Reported-by: syzbot+7bf785eedca35ca05501@syzkaller.appspotmail.com Fixes: 4fb6ac326204 ("io-wq: improve manager/worker handling over exec") Signed-off-by: Jens Axboe --- fs/io-wq.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 7a1d51c1aca9..f0b7e9ff63fa 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -826,7 +826,6 @@ static int io_wq_fork_manager(struct io_wq *wq) return 0; } - io_wq_put(wq); return ret; } @@ -1078,7 +1077,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) if (!ret) return wq; - io_wq_put(wq); io_wq_put_hash(data->hash); err: cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); -- cgit v1.2.3-59-g8ed1b From e54945ae947fb881212a4b97d5599a01bba6ad06 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 11:27:15 -0700 Subject: io_uring: SQPOLL stop error handling fixes If we fail to fork an SQPOLL worker, we can hit cancel, and hence attempted thread stop, with the thread already being stopped. Ensure we check for that. Also guard thread stop fully by the sqd mutex, just like we do for park. Signed-off-by: Jens Axboe --- fs/io_uring.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 4a088581b0f2..d55c9ab6314a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6793,9 +6793,9 @@ static int io_sq_thread(void *data) ctx->sqo_exec = 1; io_ring_set_wakeup_flag(ctx); } - mutex_unlock(&sqd->lock); complete(&sqd->exited); + mutex_unlock(&sqd->lock); do_exit(0); } @@ -7118,13 +7118,19 @@ static bool io_sq_thread_park(struct io_sq_data *sqd) static void io_sq_thread_stop(struct io_sq_data *sqd) { - if (!sqd->thread) + if (test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) return; - - set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); - WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)); - wake_up_process(sqd->thread); - wait_for_completion(&sqd->exited); + mutex_lock(&sqd->lock); + if (sqd->thread) { + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)); + wake_up_process(sqd->thread); + mutex_unlock(&sqd->lock); + wait_for_completion(&sqd->exited); + WARN_ON_ONCE(sqd->thread); + } else { + mutex_unlock(&sqd->lock); + } } static void io_put_sq_data(struct io_sq_data *sqd) @@ -8867,6 +8873,11 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx) if (!io_sq_thread_park(sqd)) return; tctx = ctx->sq_data->thread->io_uring; + /* can happen on fork/alloc failure, just ignore that state */ + if (!tctx) { + io_sq_thread_unpark(sqd); + return; + } atomic_inc(&tctx->in_idle); do { -- cgit v1.2.3-59-g8ed1b From ba50a036f23c44608b1d903c34644a1acd5d21fa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 26 Feb 2021 15:47:56 +0000 Subject: io_uring: run fallback on cancellation io_uring_try_cancel_requests() matches not only current's requests, but also of other exiting tasks, so we need to actively cancel them and not just wait, especially since the function can be called on flush during do_exit() -> exit_files(). Even if it's not a problem for now, it's much nicer to know that the function tries to cancel everything it can. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d55c9ab6314a..9d6696ff5748 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8518,9 +8518,10 @@ static int io_remove_personalities(int id, void *p, void *data) return 0; } -static void io_run_ctx_fallback(struct io_ring_ctx *ctx) +static bool io_run_ctx_fallback(struct io_ring_ctx *ctx) { struct callback_head *work, *head, *next; + bool executed = false; do { do { @@ -8537,7 +8538,10 @@ static void io_run_ctx_fallback(struct io_ring_ctx *ctx) work = next; cond_resched(); } while (work); + executed = true; } while (1); + + return executed; } static void io_ring_exit_work(struct work_struct *work) @@ -8677,6 +8681,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, ret |= io_poll_remove_all(ctx, task, files); ret |= io_kill_timeouts(ctx, task, files); ret |= io_run_task_work(); + ret |= io_run_ctx_fallback(ctx); io_cqring_overflow_flush(ctx, true, task, files); if (!ret) break; -- cgit v1.2.3-59-g8ed1b From 8629397e6e2753bb4cc62ba48a12e1d4d912b6a4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 13:46:49 -0700 Subject: io_uring: don't use complete_all() on SQPOLL thread exit We want to reuse this completion, and a single complete should do just fine. Ensure that we park ourselves first if requested, as that is what lead to the initial deadlock in this area. If we've got someone attempting to park us, then we can't proceed without having them finish first. Fixes: 37d1e2e3642e ("io_uring: move SQPOLL thread io-wq forked worker") Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9d6696ff5748..904bf0fecc36 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6783,10 +6783,13 @@ static int io_sq_thread(void *data) io_run_task_work(); + if (io_sq_thread_should_park(sqd)) + io_sq_thread_parkme(sqd); + /* * Clear thread under lock so that concurrent parks work correctly */ - complete_all(&sqd->completion); + complete(&sqd->completion); mutex_lock(&sqd->lock); sqd->thread = NULL; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { -- cgit v1.2.3-59-g8ed1b From afcc4015d1bf5659b8c722aff679e9b8c41ee156 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 13:48:19 -0700 Subject: io-wq: provide an io_wq_put_and_exit() helper If we put the io-wq from io_uring, we really want it to exit. Provide a helper that does that for us. Couple that with not having the manager hold a reference to the 'wq' and the normal SQPOLL exit will tear down the io-wq context appropriate. On the io-wq side, our wq context is per task, so only the task itself is manipulating ->manager and hence it's safe to check and clear without any extra locking. We just need to ensure that the manager task stays around, in case it exits. Signed-off-by: Jens Axboe --- fs/io-wq.c | 29 +++++++++++++++++++---------- fs/io-wq.h | 1 + fs/io_uring.c | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index f0b7e9ff63fa..1407ba74ffc3 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -749,7 +749,7 @@ static int io_wq_manager(void *data) sprintf(buf, "iou-mgr-%d", wq->task_pid); set_task_comm(current, buf); current->flags |= PF_IO_WORKER; - wq->manager = current; + wq->manager = get_task_struct(current); complete(&wq->started); @@ -771,9 +771,7 @@ static int io_wq_manager(void *data) /* we might not ever have created any workers */ if (atomic_read(&wq->worker_refs)) wait_for_completion(&wq->worker_done); - wq->manager = NULL; complete(&wq->exited); - io_wq_put(wq); do_exit(0); } @@ -816,8 +814,6 @@ static int io_wq_fork_manager(struct io_wq *wq) return 0; reinit_completion(&wq->worker_done); - clear_bit(IO_WQ_BIT_EXIT, &wq->state); - refcount_inc(&wq->refs); current->flags |= PF_IO_WORKER; ret = io_wq_fork_thread(io_wq_manager, wq); current->flags &= ~PF_IO_WORKER; @@ -1089,6 +1085,16 @@ err_wq: return ERR_PTR(ret); } +static void io_wq_destroy_manager(struct io_wq *wq) +{ + if (wq->manager) { + wake_up_process(wq->manager); + wait_for_completion(&wq->exited); + put_task_struct(wq->manager); + wq->manager = NULL; + } +} + static void io_wq_destroy(struct io_wq *wq) { int node; @@ -1096,10 +1102,7 @@ static void io_wq_destroy(struct io_wq *wq) cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); set_bit(IO_WQ_BIT_EXIT, &wq->state); - if (wq->manager) { - wake_up_process(wq->manager); - wait_for_completion(&wq->exited); - } + io_wq_destroy_manager(wq); spin_lock_irq(&wq->hash->wait.lock); for_each_node(node) { @@ -1112,7 +1115,6 @@ static void io_wq_destroy(struct io_wq *wq) io_wq_put_hash(wq->hash); kfree(wq->wqes); kfree(wq); - } void io_wq_put(struct io_wq *wq) @@ -1121,6 +1123,13 @@ void io_wq_put(struct io_wq *wq) io_wq_destroy(wq); } +void io_wq_put_and_exit(struct io_wq *wq) +{ + set_bit(IO_WQ_BIT_EXIT, &wq->state); + io_wq_destroy_manager(wq); + io_wq_put(wq); +} + static bool io_wq_worker_affinity(struct io_worker *worker, void *data) { struct task_struct *task = worker->task; diff --git a/fs/io-wq.h b/fs/io-wq.h index b6ca12b60c35..f6ef433df8a8 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -114,6 +114,7 @@ struct io_wq_data { struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); void io_wq_put(struct io_wq *wq); +void io_wq_put_and_exit(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_hash_work(struct io_wq_work *work, void *val); diff --git a/fs/io_uring.c b/fs/io_uring.c index 904bf0fecc36..cb65e54c1b09 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8857,7 +8857,7 @@ void __io_uring_files_cancel(struct files_struct *files) if (files) { io_uring_remove_task_files(tctx); if (tctx->io_wq) { - io_wq_put(tctx->io_wq); + io_wq_put_and_exit(tctx->io_wq); tctx->io_wq = NULL; } } -- cgit v1.2.3-59-g8ed1b From 1d5f360dd1a3c04e00a52af74dd84fdb0e1d454b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Feb 2021 14:54:16 -0700 Subject: io_uring: fix race condition in task_work add and clear We clear the bit marking the ctx task_work as active after having run the queued work, but we really should be clearing it before. Otherwise we can hit a tiny race ala: CPU0 CPU1 io_task_work_add() tctx_task_work() run_work add_to_list test_and_set_bit clear_bit already set and CPU0 will return thinking the task_work is queued, while in reality it's already being run. If we hit the condition after __tctx_task_work() found no more work, but before we've cleared the bit, then we'll end up thinking it's queued and will be run. In reality it is queued, but we didn't queue the ctx task_work to ensure that it gets run. Fixes: 7cbf1722d5fc ("io_uring: provide FIFO ordering for task_work") Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index cb65e54c1b09..83973f6b3c0a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1893,10 +1893,10 @@ static void tctx_task_work(struct callback_head *cb) { struct io_uring_task *tctx = container_of(cb, struct io_uring_task, task_work); + clear_bit(0, &tctx->task_state); + while (__tctx_task_work(tctx)) cond_resched(); - - clear_bit(0, &tctx->task_state); } static int io_task_work_add(struct task_struct *tsk, struct io_kiocb *req, -- cgit v1.2.3-59-g8ed1b From ef8eaa4e65facb1f51a64dbb4f5500134622c67c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 27 Feb 2021 11:16:45 +0000 Subject: io_uring: warn on not destroyed io-wq Make sure that we killed an io-wq by the time a task is dead. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 83973f6b3c0a..796b6d1f72f9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7843,6 +7843,8 @@ void __io_uring_free(struct task_struct *tsk) struct io_uring_task *tctx = tsk->io_uring; WARN_ON_ONCE(!xa_empty(&tctx->xa)); + WARN_ON_ONCE(tctx->io_wq); + percpu_counter_destroy(&tctx->inflight); kfree(tctx); tsk->io_uring = NULL; -- cgit v1.2.3-59-g8ed1b From 8452d4a674b0e59bd53baef0b30b018690dde594 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 27 Feb 2021 11:16:46 +0000 Subject: io_uring: destroy io-wq on exec Destroy current's io-wq backend and tctx on __io_uring_task_cancel(), aka exec(). Looks it's not strictly necessary, because it will be done at some point when the task dies and changes of creds/files/etc. are handled, but better to do that earlier to free io-wq and not potentially lock previous mm and other resources for the time being. It's safe to do because we wait for all requests of the current task to complete, so no request will use tctx afterwards. Note, that io_uring_files_cancel() may leave some requests for later reaping, so it leaves tctx intact, that's ok as the task is dying anyway. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 ++++++++++--------- include/linux/io_uring.h | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 796b6d1f72f9..73d1bd8db1bb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8835,13 +8835,17 @@ static void io_uring_del_task_file(struct file *file) fput(file); } -static void io_uring_remove_task_files(struct io_uring_task *tctx) +static void io_uring_clean_tctx(struct io_uring_task *tctx) { struct file *file; unsigned long index; xa_for_each(&tctx->xa, index, file) io_uring_del_task_file(file); + if (tctx->io_wq) { + io_wq_put_and_exit(tctx->io_wq); + tctx->io_wq = NULL; + } } void __io_uring_files_cancel(struct files_struct *files) @@ -8856,13 +8860,8 @@ void __io_uring_files_cancel(struct files_struct *files) io_uring_cancel_task_requests(file->private_data, files); atomic_dec(&tctx->in_idle); - if (files) { - io_uring_remove_task_files(tctx); - if (tctx->io_wq) { - io_wq_put_and_exit(tctx->io_wq); - tctx->io_wq = NULL; - } - } + if (files) + io_uring_clean_tctx(tctx); } static s64 tctx_inflight(struct io_uring_task *tctx) @@ -8954,7 +8953,9 @@ void __io_uring_task_cancel(void) atomic_dec(&tctx->in_idle); - io_uring_remove_task_files(tctx); + io_uring_clean_tctx(tctx); + /* all current's requests should be gone, we can kill tctx */ + __io_uring_free(current); } static int io_uring_flush(struct file *file, void *data) diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 51ede771cd99..7cb7bd0e334c 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -38,7 +38,7 @@ void __io_uring_free(struct task_struct *tsk); static inline void io_uring_task_cancel(void) { - if (current->io_uring && !xa_empty(¤t->io_uring->xa)) + if (current->io_uring) __io_uring_task_cancel(); } static inline void io_uring_files_cancel(struct files_struct *files) -- cgit v1.2.3-59-g8ed1b From 4010fec41fd9fc5ca6956b958d14b32e41aded48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 27 Feb 2021 15:04:18 -0700 Subject: io_uring: remove unused argument 'tsk' from io_req_caches_free() We prune the full cache regardless, get rid of the dead argument. Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 73d1bd8db1bb..acbf3c7264b5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8395,7 +8395,7 @@ static void io_req_cache_free(struct list_head *list, struct task_struct *tsk) } } -static void io_req_caches_free(struct io_ring_ctx *ctx, struct task_struct *tsk) +static void io_req_caches_free(struct io_ring_ctx *ctx) { struct io_submit_state *submit_state = &ctx->submit_state; struct io_comp_state *cs = &ctx->submit_state.comp; @@ -8455,7 +8455,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) percpu_ref_exit(&ctx->refs); free_uid(ctx->user); - io_req_caches_free(ctx, NULL); + io_req_caches_free(ctx); if (ctx->hash_map) io_wq_put_hash(ctx->hash_map); kfree(ctx->cancel_hash); @@ -8969,7 +8969,7 @@ static int io_uring_flush(struct file *file, void *data) if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) { io_uring_cancel_task_requests(ctx, NULL); - io_req_caches_free(ctx, current); + io_req_caches_free(ctx); } io_run_ctx_fallback(ctx); -- cgit v1.2.3-59-g8ed1b From 1575f21a09206e914b81dace0add693346d97594 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 27 Feb 2021 15:20:49 -0700 Subject: io_uring: kill unnecessary REQ_F_WORK_INITIALIZED checks We're no longer checking anything that requires the work item to be initialized, as we're not carrying any file related state there. Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index acbf3c7264b5..1dd30a15ea6a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1080,8 +1080,6 @@ static bool io_match_task(struct io_kiocb *head, return true; io_for_each_link(req, head) { - if (!(req->flags & REQ_F_WORK_INITIALIZED)) - continue; if (req->file && req->file->f_op == &io_uring_fops) return true; if (req->task->files == files) @@ -1800,15 +1798,7 @@ static void io_fail_links(struct io_kiocb *req) trace_io_uring_fail_link(req, link); io_cqring_fill_event(link, -ECANCELED); - /* - * It's ok to free under spinlock as they're not linked anymore, - * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on - * work.fs->lock. - */ - if (link->flags & REQ_F_WORK_INITIALIZED) - io_put_req_deferred(link, 2); - else - io_double_put_req(link); + io_put_req_deferred(link, 2); link = nxt; } io_commit_cqring(ctx); -- cgit v1.2.3-59-g8ed1b From 5730b27e84fdb37353c7cc2b11c24a4f9d73626e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 27 Feb 2021 15:57:30 -0700 Subject: io_uring: move cred assignment into io_issue_sqe() If we move it in there, then we no longer have to care about it in io-wq. This means we can drop the cred handling in io-wq, and we can drop the REQ_F_WORK_INITIALIZED flag and async init functions as that was the last user of it since we moved to the new workers. Then we can also drop io_wq_work->creds, and just hold the personality u16 in there instead. Suggested-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io-wq.c | 26 --------------------- fs/io-wq.h | 2 +- fs/io_uring.c | 75 +++++++++++++++++------------------------------------------ 3 files changed, 22 insertions(+), 81 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 1407ba74ffc3..946826beefe6 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -52,9 +52,6 @@ struct io_worker { struct io_wq_work *cur_work; spinlock_t lock; - const struct cred *cur_creds; - const struct cred *saved_creds; - struct completion ref_done; struct completion started; @@ -180,11 +177,6 @@ static void io_worker_exit(struct io_worker *worker) worker->flags = 0; preempt_enable(); - if (worker->saved_creds) { - revert_creds(worker->saved_creds); - worker->cur_creds = worker->saved_creds = NULL; - } - raw_spin_lock_irq(&wqe->lock); if (flags & IO_WORKER_F_FREE) hlist_nulls_del_rcu(&worker->nulls_node); @@ -326,10 +318,6 @@ static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) worker->flags |= IO_WORKER_F_FREE; hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); } - if (worker->saved_creds) { - revert_creds(worker->saved_creds); - worker->cur_creds = worker->saved_creds = NULL; - } } static inline unsigned int io_get_work_hash(struct io_wq_work *work) @@ -404,18 +392,6 @@ static void io_flush_signals(void) } } -static void io_wq_switch_creds(struct io_worker *worker, - struct io_wq_work *work) -{ - const struct cred *old_creds = override_creds(work->creds); - - worker->cur_creds = work->creds; - if (worker->saved_creds) - put_cred(old_creds); /* creds set by previous switch */ - else - worker->saved_creds = old_creds; -} - static void io_assign_current_work(struct io_worker *worker, struct io_wq_work *work) { @@ -465,8 +441,6 @@ get_next: unsigned int hash = io_get_work_hash(work); next_hashed = wq_next_work(work); - if (work->creds && worker->cur_creds != work->creds) - io_wq_switch_creds(worker, work); wq->do_work(work); io_assign_current_work(worker, NULL); diff --git a/fs/io-wq.h b/fs/io-wq.h index f6ef433df8a8..42f0be64a84d 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -79,8 +79,8 @@ static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work { struct io_wq_work_node list; - const struct cred *creds; unsigned flags; + unsigned short personality; }; static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1dd30a15ea6a..d48be0ccc590 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -688,7 +688,6 @@ enum { REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, REQ_F_NO_FILE_TABLE_BIT, - REQ_F_WORK_INITIALIZED_BIT, REQ_F_LTIMEOUT_ACTIVE_BIT, REQ_F_COMPLETE_INLINE_BIT, @@ -730,8 +729,6 @@ enum { REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), /* doesn't need file table for this request */ REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), - /* io_wq_work is initialized */ - REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), /* linked timeout is active, i.e. prepared by link's head */ REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT), /* completion is deferred through io_comp_state */ @@ -1094,24 +1091,6 @@ static inline void req_set_fail_links(struct io_kiocb *req) req->flags |= REQ_F_FAIL_LINK; } -static inline void __io_req_init_async(struct io_kiocb *req) -{ - memset(&req->work, 0, sizeof(req->work)); - req->flags |= REQ_F_WORK_INITIALIZED; -} - -/* - * Note: must call io_req_init_async() for the first time you - * touch any members of io_wq_work. - */ -static inline void io_req_init_async(struct io_kiocb *req) -{ - if (req->flags & REQ_F_WORK_INITIALIZED) - return; - - __io_req_init_async(req); -} - static void io_ring_ctx_ref_free(struct percpu_ref *ref) { struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); @@ -1196,13 +1175,6 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq) static void io_req_clean_work(struct io_kiocb *req) { - if (!(req->flags & REQ_F_WORK_INITIALIZED)) - return; - - if (req->work.creds) { - put_cred(req->work.creds); - req->work.creds = NULL; - } if (req->flags & REQ_F_INFLIGHT) { struct io_ring_ctx *ctx = req->ctx; struct io_uring_task *tctx = req->task->io_uring; @@ -1215,8 +1187,6 @@ static void io_req_clean_work(struct io_kiocb *req) if (atomic_read(&tctx->in_idle)) wake_up(&tctx->wait); } - - req->flags &= ~REQ_F_WORK_INITIALIZED; } static void io_req_track_inflight(struct io_kiocb *req) @@ -1224,7 +1194,6 @@ static void io_req_track_inflight(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; if (!(req->flags & REQ_F_INFLIGHT)) { - io_req_init_async(req); req->flags |= REQ_F_INFLIGHT; spin_lock_irq(&ctx->inflight_lock); @@ -1238,8 +1207,6 @@ static void io_prep_async_work(struct io_kiocb *req) const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; - io_req_init_async(req); - if (req->flags & REQ_F_FORCE_ASYNC) req->work.flags |= IO_WQ_WORK_CONCURRENT; @@ -1250,8 +1217,6 @@ static void io_prep_async_work(struct io_kiocb *req) if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } - if (!req->work.creds) - req->work.creds = get_current_cred(); } static void io_prep_async_link(struct io_kiocb *req) @@ -3578,7 +3543,6 @@ static int __io_splice_prep(struct io_kiocb *req, * Splice operation will be punted aync, and here need to * modify io_wq_work.flags, so initialize io_wq_work firstly. */ - io_req_init_async(req); req->work.flags |= IO_WQ_WORK_UNBOUND; } @@ -5935,8 +5899,22 @@ static void __io_clean_op(struct io_kiocb *req) static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; + const struct cred *creds = NULL; int ret; + if (req->work.personality) { + const struct cred *new_creds; + + if (!(issue_flags & IO_URING_F_NONBLOCK)) + mutex_lock(&ctx->uring_lock); + new_creds = idr_find(&ctx->personality_idr, req->work.personality); + if (!(issue_flags & IO_URING_F_NONBLOCK)) + mutex_unlock(&ctx->uring_lock); + if (!new_creds) + return -EINVAL; + creds = override_creds(new_creds); + } + switch (req->opcode) { case IORING_OP_NOP: ret = io_nop(req, issue_flags); @@ -6043,6 +6021,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) break; } + if (creds) + revert_creds(creds); + if (ret) return ret; @@ -6206,18 +6187,10 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) static void __io_queue_sqe(struct io_kiocb *req) { struct io_kiocb *linked_timeout = io_prep_linked_timeout(req); - const struct cred *old_creds = NULL; int ret; - if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds && - req->work.creds != current_cred()) - old_creds = override_creds(req->work.creds); - ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); - if (old_creds) - revert_creds(old_creds); - /* * We async punt it if the file wasn't marked NOWAIT, or if the file * doesn't support non-blocking read/write attempts @@ -6304,7 +6277,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, { struct io_submit_state *state; unsigned int sqe_flags; - int id, ret = 0; + int ret = 0; req->opcode = READ_ONCE(sqe->opcode); /* same numerical values with corresponding REQ_F_*, safe to copy */ @@ -6336,15 +6309,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, !io_op_defs[req->opcode].buffer_select) return -EOPNOTSUPP; - id = READ_ONCE(sqe->personality); - if (id) { - __io_req_init_async(req); - req->work.creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!req->work.creds)) - return -EINVAL; - get_cred(req->work.creds); - } - + req->work.list.next = NULL; + req->work.flags = 0; + req->work.personality = READ_ONCE(sqe->personality); state = &ctx->submit_state; /* -- cgit v1.2.3-59-g8ed1b From 914390bcfdd6351a4d308da7f43294476ea7d3bf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 28 Feb 2021 12:22:37 -0700 Subject: io_uring: kill unnecessary io_run_ctx_fallback() in io_ring_exit_work() We already run the fallback task_work in io_uring_try_cancel_requests(), no need to duplicate at ring exit explicitly. Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d48be0ccc590..7cad82b51eca 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8519,7 +8519,6 @@ static void io_ring_exit_work(struct work_struct *work) */ do { io_uring_try_cancel_requests(ctx, NULL, NULL); - io_run_ctx_fallback(ctx); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); } -- cgit v1.2.3-59-g8ed1b From 0d30b3e7eea94cc818fadf2ac0dd189c616028f8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 28 Feb 2021 12:23:27 -0700 Subject: io_uring: kill io_uring_flush() This was always a weird work-around or file referencing, and we don't need it anymore. Get rid of it. Signed-off-by: Jens Axboe --- fs/io_uring.c | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7cad82b51eca..766df21769a8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8914,52 +8914,6 @@ void __io_uring_task_cancel(void) __io_uring_free(current); } -static int io_uring_flush(struct file *file, void *data) -{ - struct io_uring_task *tctx = current->io_uring; - struct io_ring_ctx *ctx = file->private_data; - - /* Ignore helper thread files exit */ - if (current->flags & PF_IO_WORKER) - return 0; - - if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) { - io_uring_cancel_task_requests(ctx, NULL); - io_req_caches_free(ctx); - } - - io_run_ctx_fallback(ctx); - - if (!tctx) - return 0; - - /* we should have cancelled and erased it before PF_EXITING */ - WARN_ON_ONCE((current->flags & PF_EXITING) && - xa_load(&tctx->xa, (unsigned long)file)); - - /* - * fput() is pending, will be 2 if the only other ref is our potential - * task file note. If the task is exiting, drop regardless of count. - */ - if (atomic_long_read(&file->f_count) != 2) - return 0; - - if (ctx->flags & IORING_SETUP_SQPOLL) { - /* there is only one file note, which is owned by sqo_task */ - WARN_ON_ONCE(ctx->sqo_task != current && - xa_load(&tctx->xa, (unsigned long)file)); - /* sqo_dead check is for when this happens after cancellation */ - WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead && - !xa_load(&tctx->xa, (unsigned long)file)); - - io_disable_sqo_submit(ctx); - } - - if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current) - io_uring_del_task_file(file); - return 0; -} - static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff, size_t sz) { @@ -9291,7 +9245,6 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) static const struct file_operations io_uring_fops = { .release = io_uring_release, - .flush = io_uring_flush, .mmap = io_uring_mmap, #ifndef CONFIG_MMU .get_unmapped_area = io_uring_nommu_get_unmapped_area, -- cgit v1.2.3-59-g8ed1b From 2c32395d8111037ae2cb8cab883e80bcdbb70713 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 28 Feb 2021 22:04:53 +0000 Subject: io_uring: fix __tctx_task_work() ctx race There is an unlikely but possible race using a freed context. That's because req->task_work.func() can free a request, but we won't necessarily find a completion in submit_state.comp and so all ctx refs may be put by the time we do mutex_lock(&ctx->uring_ctx); There are several reasons why it can miss going through submit_state.comp: 1) req->task_work.func() didn't complete it itself, but punted to iowq (e.g. reissue) and it got freed later, or a similar situation with it overflowing and getting flushed by someone else, or being submitted to IRQ completion, 2) As we don't hold the uring_lock, someone else can do io_submit_flush_completions() and put our ref. 3) Bugs and code obscurities, e.g. failing to propagate issue_flags properly. One example is as follows CPU1 | CPU2 ======================================================================= @req->task_work.func() | -> @req overflwed, | so submit_state.comp,nr==0 | | flush overflows, and free @req | ctx refs == 0, free it ctx is dead, but we do | lock + flush + unlock | So take a ctx reference for each new ctx we see in __tctx_task_work(), and do release it until we do all our flushing. Fixes: 65453d1efbd2 ("io_uring: enable req cache for task_work items") Reported-by: syzbot+a157ac7c03a56397f553@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov [axboe: fold in my one-liner and fix ref mismatch] Signed-off-by: Jens Axboe --- fs/io_uring.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 766df21769a8..62a73543ab86 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1800,6 +1800,18 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) return __io_req_find_next(req); } +static void ctx_flush_and_put(struct io_ring_ctx *ctx) +{ + if (!ctx) + return; + if (ctx->submit_state.comp.nr) { + mutex_lock(&ctx->uring_lock); + io_submit_flush_completions(&ctx->submit_state.comp, ctx); + mutex_unlock(&ctx->uring_lock); + } + percpu_ref_put(&ctx->refs); +} + static bool __tctx_task_work(struct io_uring_task *tctx) { struct io_ring_ctx *ctx = NULL; @@ -1817,30 +1829,20 @@ static bool __tctx_task_work(struct io_uring_task *tctx) node = list.first; while (node) { struct io_wq_work_node *next = node->next; - struct io_ring_ctx *this_ctx; struct io_kiocb *req; req = container_of(node, struct io_kiocb, io_task_work.node); - this_ctx = req->ctx; - req->task_work.func(&req->task_work); - node = next; - - if (!ctx) { - ctx = this_ctx; - } else if (ctx != this_ctx) { - mutex_lock(&ctx->uring_lock); - io_submit_flush_completions(&ctx->submit_state.comp, ctx); - mutex_unlock(&ctx->uring_lock); - ctx = this_ctx; + if (req->ctx != ctx) { + ctx_flush_and_put(ctx); + ctx = req->ctx; + percpu_ref_get(&ctx->refs); } - } - if (ctx && ctx->submit_state.comp.nr) { - mutex_lock(&ctx->uring_lock); - io_submit_flush_completions(&ctx->submit_state.comp, ctx); - mutex_unlock(&ctx->uring_lock); + req->task_work.func(&req->task_work); + node = next; } + ctx_flush_and_put(ctx); return list.first != NULL; } -- cgit v1.2.3-59-g8ed1b From 28c4721b80a702462fb77373c23428ee698fa5dd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 28 Feb 2021 22:04:54 +0000 Subject: io_uring: replace cmpxchg in fallback with xchg io_run_ctx_fallback() can use xchg() instead of cmpxchg(). It's simpler and faster. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 62a73543ab86..57c7833ba62b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8484,15 +8484,11 @@ static int io_remove_personalities(int id, void *p, void *data) static bool io_run_ctx_fallback(struct io_ring_ctx *ctx) { - struct callback_head *work, *head, *next; + struct callback_head *work, *next; bool executed = false; do { - do { - head = NULL; - work = READ_ONCE(ctx->exit_task_work); - } while (cmpxchg(&ctx->exit_task_work, work, head) != work); - + work = xchg(&ctx->exit_task_work, NULL); if (!work) break; -- cgit v1.2.3-59-g8ed1b From 3ebba796fa251d042be42b929a2d916ee5c34a49 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 28 Feb 2021 15:32:18 -0700 Subject: io_uring: ensure that SQPOLL thread is started for exit If we create it in a disabled state because IORING_SETUP_R_DISABLED is set on ring creation, we need to ensure that we've kicked the thread if we're exiting before it's been explicitly disabled. Otherwise we can run into a deadlock where exit is waiting go park the SQPOLL thread, but the SQPOLL thread itself is waiting to get a signal to start. That results in the below trace of both tasks hung, waiting on each other: INFO: task syz-executor458:8401 blocked for more than 143 seconds. Not tainted 5.11.0-next-20210226-syzkaller #0 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor458 state:D stack:27536 pid: 8401 ppid: 8400 flags:0x00004004 Call Trace: context_switch kernel/sched/core.c:4324 [inline] __schedule+0x90c/0x21a0 kernel/sched/core.c:5075 schedule+0xcf/0x270 kernel/sched/core.c:5154 schedule_timeout+0x1db/0x250 kernel/time/timer.c:1868 do_wait_for_common kernel/sched/completion.c:85 [inline] __wait_for_common kernel/sched/completion.c:106 [inline] wait_for_common kernel/sched/completion.c:117 [inline] wait_for_completion+0x168/0x270 kernel/sched/completion.c:138 io_sq_thread_park fs/io_uring.c:7115 [inline] io_sq_thread_park+0xd5/0x130 fs/io_uring.c:7103 io_uring_cancel_task_requests+0x24c/0xd90 fs/io_uring.c:8745 __io_uring_files_cancel+0x110/0x230 fs/io_uring.c:8840 io_uring_files_cancel include/linux/io_uring.h:47 [inline] do_exit+0x299/0x2a60 kernel/exit.c:780 do_group_exit+0x125/0x310 kernel/exit.c:922 __do_sys_exit_group kernel/exit.c:933 [inline] __se_sys_exit_group kernel/exit.c:931 [inline] __x64_sys_exit_group+0x3a/0x50 kernel/exit.c:931 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x43e899 RSP: 002b:00007ffe89376d48 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 00000000004af2f0 RCX: 000000000043e899 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffffffffffffffc0 R09: 0000000010000000 R10: 0000000000008011 R11: 0000000000000246 R12: 00000000004af2f0 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000001 INFO: task iou-sqp-8401:8402 can't die for more than 143 seconds. task:iou-sqp-8401 state:D stack:30272 pid: 8402 ppid: 8400 flags:0x00004004 Call Trace: context_switch kernel/sched/core.c:4324 [inline] __schedule+0x90c/0x21a0 kernel/sched/core.c:5075 schedule+0xcf/0x270 kernel/sched/core.c:5154 schedule_timeout+0x1db/0x250 kernel/time/timer.c:1868 do_wait_for_common kernel/sched/completion.c:85 [inline] __wait_for_common kernel/sched/completion.c:106 [inline] wait_for_common kernel/sched/completion.c:117 [inline] wait_for_completion+0x168/0x270 kernel/sched/completion.c:138 io_sq_thread+0x27d/0x1ae0 fs/io_uring.c:6717 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 INFO: task iou-sqp-8401:8402 blocked for more than 143 seconds. Reported-by: syzbot+fb5458330b4442f2090d@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 57c7833ba62b..94b1a0f48fed 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7916,6 +7916,7 @@ static void io_sq_offload_start(struct io_ring_ctx *ctx) { struct io_sq_data *sqd = ctx->sq_data; + ctx->flags &= ~IORING_SETUP_R_DISABLED; if (ctx->flags & IORING_SETUP_SQPOLL) complete(&sqd->startup); } @@ -8692,6 +8693,8 @@ static void io_disable_sqo_submit(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); ctx->sqo_dead = 1; + if (ctx->flags & IORING_SETUP_R_DISABLED) + io_sq_offload_start(ctx); mutex_unlock(&ctx->uring_lock); /* make sure callers enter the ring to get error */ @@ -9637,10 +9640,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx) if (ctx->restrictions.registered) ctx->restricted = 1; - ctx->flags &= ~IORING_SETUP_R_DISABLED; - io_sq_offload_start(ctx); - return 0; } -- cgit v1.2.3-59-g8ed1b From 1c3b3e6527e57156bf4082f11c2151957560fe6a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 28 Feb 2021 16:07:30 -0700 Subject: io_uring: ignore double poll add on the same waitqueue head syzbot reports a deadlock, attempting to lock the same spinlock twice: ============================================ WARNING: possible recursive locking detected 5.11.0-syzkaller #0 Not tainted -------------------------------------------- swapper/1/0 is trying to acquire lock: ffff88801b2b1130 (&runtime->sleep){..-.}-{2:2}, at: spin_lock include/linux/spinlock.h:354 [inline] ffff88801b2b1130 (&runtime->sleep){..-.}-{2:2}, at: io_poll_double_wake+0x25f/0x6a0 fs/io_uring.c:4960 but task is already holding lock: ffff88801b2b3130 (&runtime->sleep){..-.}-{2:2}, at: __wake_up_common_lock+0xb4/0x130 kernel/sched/wait.c:137 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&runtime->sleep); lock(&runtime->sleep); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by swapper/1/0: #0: ffff888147474908 (&group->lock){..-.}-{2:2}, at: _snd_pcm_stream_lock_irqsave+0x9f/0xd0 sound/core/pcm_native.c:170 #1: ffff88801b2b3130 (&runtime->sleep){..-.}-{2:2}, at: __wake_up_common_lock+0xb4/0x130 kernel/sched/wait.c:137 stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.11.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0xfa/0x151 lib/dump_stack.c:120 print_deadlock_bug kernel/locking/lockdep.c:2829 [inline] check_deadlock kernel/locking/lockdep.c:2872 [inline] validate_chain kernel/locking/lockdep.c:3661 [inline] __lock_acquire.cold+0x14c/0x3b4 kernel/locking/lockdep.c:4900 lock_acquire kernel/locking/lockdep.c:5510 [inline] lock_acquire+0x1ab/0x730 kernel/locking/lockdep.c:5475 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:354 [inline] io_poll_double_wake+0x25f/0x6a0 fs/io_uring.c:4960 __wake_up_common+0x147/0x650 kernel/sched/wait.c:108 __wake_up_common_lock+0xd0/0x130 kernel/sched/wait.c:138 snd_pcm_update_state+0x46a/0x540 sound/core/pcm_lib.c:203 snd_pcm_update_hw_ptr0+0xa75/0x1a50 sound/core/pcm_lib.c:464 snd_pcm_period_elapsed+0x160/0x250 sound/core/pcm_lib.c:1805 dummy_hrtimer_callback+0x94/0x1b0 sound/drivers/dummy.c:378 __run_hrtimer kernel/time/hrtimer.c:1519 [inline] __hrtimer_run_queues+0x609/0xe40 kernel/time/hrtimer.c:1583 hrtimer_run_softirq+0x17b/0x360 kernel/time/hrtimer.c:1600 __do_softirq+0x29b/0x9f6 kernel/softirq.c:345 invoke_softirq kernel/softirq.c:221 [inline] __irq_exit_rcu kernel/softirq.c:422 [inline] irq_exit_rcu+0x134/0x200 kernel/softirq.c:434 sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:632 RIP: 0010:native_save_fl arch/x86/include/asm/irqflags.h:29 [inline] RIP: 0010:arch_local_save_flags arch/x86/include/asm/irqflags.h:70 [inline] RIP: 0010:arch_irqs_disabled arch/x86/include/asm/irqflags.h:137 [inline] RIP: 0010:acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline] RIP: 0010:acpi_idle_do_entry+0x1c9/0x250 drivers/acpi/processor_idle.c:516 Code: dd 38 6e f8 84 db 75 ac e8 54 32 6e f8 e8 0f 1c 74 f8 e9 0c 00 00 00 e8 45 32 6e f8 0f 00 2d 4e 4a c5 00 e8 39 32 6e f8 fb f4 <9c> 5b 81 e3 00 02 00 00 fa 31 ff 48 89 de e8 14 3a 6e f8 48 85 db RSP: 0018:ffffc90000d47d18 EFLAGS: 00000293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8880115c3780 RSI: ffffffff89052537 RDI: 0000000000000000 RBP: ffff888141127064 R08: 0000000000000001 R09: 0000000000000001 R10: ffffffff81794168 R11: 0000000000000000 R12: 0000000000000001 R13: ffff888141127000 R14: ffff888141127064 R15: ffff888143331804 acpi_idle_enter+0x361/0x500 drivers/acpi/processor_idle.c:647 cpuidle_enter_state+0x1b1/0xc80 drivers/cpuidle/cpuidle.c:237 cpuidle_enter+0x4a/0xa0 drivers/cpuidle/cpuidle.c:351 call_cpuidle kernel/sched/idle.c:158 [inline] cpuidle_idle_call kernel/sched/idle.c:239 [inline] do_idle+0x3e1/0x590 kernel/sched/idle.c:300 cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:397 start_secondary+0x274/0x350 arch/x86/kernel/smpboot.c:272 secondary_startup_64_no_verify+0xb0/0xbb which is due to the driver doing poll_wait() twice on the same wait_queue_head. That is perfectly valid, but from checking the rest of the kernel tree, it's the only driver that does this. We can handle this just fine, we just need to ignore the second addition as we'll get woken just fine on the first one. Cc: stable@vger.kernel.org # 5.8+ Fixes: 18bceab101ad ("io_uring: allow POLL_ADD with double poll_wait() users") Reported-by: syzbot+28abd693db9e92c160d8@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 94b1a0f48fed..eb4bc8967178 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4959,6 +4959,9 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, pt->error = -EINVAL; return; } + /* double add on the same waitqueue head, ignore */ + if (poll->head == head) + return; poll = kmalloc(sizeof(*poll), GFP_ATOMIC); if (!poll) { pt->error = -ENOMEM; -- cgit v1.2.3-59-g8ed1b From 70aacfe66136809d7f080f89c492c278298719f4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 1 Mar 2021 13:02:15 +0000 Subject: io_uring: kill sqo_dead and sqo submission halting As SQPOLL task doesn't poke into ->sqo_task anymore, there is no need to kill the sqo when the master task exits. Before it was necessary to avoid races accessing sqo_task->files with removing them. Signed-off-by: Pavel Begunkov [axboe: don't forget to enable SQPOLL before exit, if started disabled] Signed-off-by: Jens Axboe --- fs/io_uring.c | 45 ++++++++------------------------------------- 1 file changed, 8 insertions(+), 37 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index eb4bc8967178..6090a380e903 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -338,7 +338,6 @@ struct io_ring_ctx { unsigned int drain_next: 1; unsigned int eventfd_async: 1; unsigned int restricted: 1; - unsigned int sqo_dead: 1; unsigned int sqo_exec: 1; /* @@ -1967,7 +1966,7 @@ static void __io_req_task_submit(struct io_kiocb *req) /* ctx stays valid until unlock, even if we drop all ours ctx->refs */ mutex_lock(&ctx->uring_lock); - if (!ctx->sqo_dead && !(current->flags & PF_EXITING) && !current->in_execve) + if (!(current->flags & PF_EXITING) && !current->in_execve) __io_queue_sqe(req); else __io_req_task_cancel(req, -EFAULT); @@ -6578,8 +6577,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (!list_empty(&ctx->iopoll_list)) io_do_iopoll(ctx, &nr_events, 0); - if (to_submit && !ctx->sqo_dead && - likely(!percpu_ref_is_dying(&ctx->refs))) + if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs))) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); } @@ -7818,7 +7816,7 @@ static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx) clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); reinit_completion(&sqd->completion); - ctx->sqo_dead = ctx->sqo_exec = 0; + ctx->sqo_exec = 0; sqd->task_pid = current->pid; current->flags |= PF_IO_WORKER; ret = io_wq_fork_thread(io_sq_thread, sqd); @@ -8529,10 +8527,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); - - if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead)) - ctx->sqo_dead = 1; - /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) @@ -8692,19 +8686,6 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, } } -static void io_disable_sqo_submit(struct io_ring_ctx *ctx) -{ - mutex_lock(&ctx->uring_lock); - ctx->sqo_dead = 1; - if (ctx->flags & IORING_SETUP_R_DISABLED) - io_sq_offload_start(ctx); - mutex_unlock(&ctx->uring_lock); - - /* make sure callers enter the ring to get error */ - if (ctx->rings) - io_ring_set_wakeup_flag(ctx); -} - /* * We need to iteratively cancel requests, in case a request has dependent * hard links. These persist even for failure of cancelations, hence keep @@ -8717,7 +8698,11 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, bool did_park = false; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - io_disable_sqo_submit(ctx); + /* never started, nothing to cancel */ + if (ctx->flags & IORING_SETUP_R_DISABLED) { + io_sq_offload_start(ctx); + return; + } did_park = io_sq_thread_park(ctx->sq_data); if (did_park) { task = ctx->sq_data->thread; @@ -8838,7 +8823,6 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx) if (!sqd) return; - io_disable_sqo_submit(ctx); if (!io_sq_thread_park(sqd)) return; tctx = ctx->sq_data->thread->io_uring; @@ -8883,7 +8867,6 @@ void __io_uring_task_cancel(void) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - /* trigger io_disable_sqo_submit() */ if (tctx->sqpoll) { struct file *file; unsigned long index; @@ -8996,22 +8979,14 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) do { if (!io_sqring_full(ctx)) break; - prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); - if (unlikely(ctx->sqo_dead)) { - ret = -EOWNERDEAD; - goto out; - } - if (!io_sqring_full(ctx)) break; - schedule(); } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); -out: return ret; } @@ -9093,8 +9068,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ctx->sqo_exec = 0; } ret = -EOWNERDEAD; - if (unlikely(ctx->sqo_dead)) - goto out; if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) { @@ -9466,7 +9439,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, */ ret = io_uring_install_fd(ctx, file); if (ret < 0) { - io_disable_sqo_submit(ctx); /* fput will clean it up */ fput(file); return ret; @@ -9475,7 +9447,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: - io_disable_sqo_submit(ctx); io_ring_ctx_wait_and_kill(ctx); return ret; } -- cgit v1.2.3-59-g8ed1b From 16270893d71219816513a255e6c3163bc7224ce4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 1 Mar 2021 13:02:16 +0000 Subject: io_uring: remove sqo_task Now, sqo_task is used only for a warning that is not interesting anymore since sqo_dead is gone, remove all of that including ctx->sqo_task. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6090a380e903..f060dcc1cc86 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -379,11 +379,6 @@ struct io_ring_ctx { struct io_rings *rings; - /* - * For SQPOLL usage - */ - struct task_struct *sqo_task; - /* Only used for accounting purposes */ struct mm_struct *mm_account; @@ -8747,10 +8742,6 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) fput(file); return ret; } - - /* one and only SQPOLL file note, held by sqo_task */ - WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && - current != ctx->sqo_task); } tctx->last = file; } @@ -9376,7 +9367,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, ctx->compat = in_compat_syscall(); if (!capable(CAP_IPC_LOCK)) ctx->user = get_uid(current_user()); - ctx->sqo_task = current; /* * This is just grabbed for accounting purposes. When a process exits, -- cgit v1.2.3-59-g8ed1b From dc7bbc9ef361bea331bf5258a35abcdef619d44d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 Mar 2021 09:09:56 -0700 Subject: io-wq: fix error path leak of buffered write hash map The 'err' path should include the hash put, we already grabbed a reference once we get that far. Fixes: e941894eae31 ("io-wq: make buffered file write hashed work map per-ctx") Signed-off-by: Jens Axboe --- fs/io-wq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 946826beefe6..dc430381b694 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -1047,8 +1047,8 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) if (!ret) return wq; - io_wq_put_hash(data->hash); err: + io_wq_put_hash(data->hash); cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); for_each_node(node) kfree(wq->wqes[node]); -- cgit v1.2.3-59-g8ed1b From 3e6a0d3c7571ce3ed0d25c5c32543a54a7ebcd75 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 Mar 2021 13:56:00 -0700 Subject: io_uring: fix -EAGAIN retry with IOPOLL We no longer revert the iovec on -EIOCBQUEUED, see commit ab2125df921d, and this started causing issues for IOPOLL on devies that run out of request slots. Turns out what outside of needing a revert for those, we also had a bug where we didn't properly setup retry inside the submission path. That could cause re-import of the iovec, if any, and that could lead to spurious results if the application had those allocated on the stack. Catch -EAGAIN retry and make the iovec stable for IOPOLL, just like we do for !IOPOLL retries. Cc: # 5.9+ Reported-by: Abaci Robot Reported-by: Xiaoguang Wang Signed-off-by: Jens Axboe --- fs/io_uring.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f060dcc1cc86..361befaae28f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2423,23 +2423,32 @@ static bool io_resubmit_prep(struct io_kiocb *req) return false; return !io_setup_async_rw(req, iovec, inline_vecs, &iter, false); } -#endif -static bool io_rw_reissue(struct io_kiocb *req) +static bool io_rw_should_reissue(struct io_kiocb *req) { -#ifdef CONFIG_BLOCK umode_t mode = file_inode(req->file)->i_mode; + struct io_ring_ctx *ctx = req->ctx; if (!S_ISBLK(mode) && !S_ISREG(mode)) return false; - if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker()) + if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && + !(ctx->flags & IORING_SETUP_IOPOLL))) return false; /* * If ref is dying, we might be running poll reap from the exit work. * Don't attempt to reissue from that path, just let it fail with * -EAGAIN. */ - if (percpu_ref_is_dying(&req->ctx->refs)) + if (percpu_ref_is_dying(&ctx->refs)) + return false; + return true; +} +#endif + +static bool io_rw_reissue(struct io_kiocb *req) +{ +#ifdef CONFIG_BLOCK + if (!io_rw_should_reissue(req)) return false; lockdep_assert_held(&req->ctx->uring_lock); @@ -2482,6 +2491,19 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); +#ifdef CONFIG_BLOCK + /* Rewind iter, if we have one. iopoll path resubmits as usual */ + if (res == -EAGAIN && io_rw_should_reissue(req)) { + struct io_async_rw *rw = req->async_data; + + if (rw) + iov_iter_revert(&rw->iter, + req->result - iov_iter_count(&rw->iter)); + else if (!io_resubmit_prep(req)) + res = -EIO; + } +#endif + if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); @@ -3230,6 +3252,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) ret = io_iter_do_read(req, iter); if (ret == -EIOCBQUEUED) { + if (req->async_data) + iov_iter_revert(iter, io_size - iov_iter_count(iter)); goto out_free; } else if (ret == -EAGAIN) { /* IOPOLL retry should happen for io-wq threads */ @@ -3361,6 +3385,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) /* no retry on NONBLOCK nor RWF_NOWAIT */ if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) goto done; + if (ret2 == -EIOCBQUEUED && req->async_data) + iov_iter_revert(iter, io_size - iov_iter_count(iter)); if (!force_nonblock || ret2 != -EAGAIN) { /* IOPOLL retry should happen for io-wq threads */ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN) -- cgit v1.2.3-59-g8ed1b From 64c7212391e778949aa3055fb3863439417ddba9 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 1 Mar 2021 18:20:45 +0000 Subject: io_uring: choose right tctx->io_wq for try cancel When we cancel SQPOLL, @task in io_uring_try_cancel_requests() will differ from current. Use the right tctx from passed in @task, and don't forget that it can be NULL when the io_uring ctx exits. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 361befaae28f..d6c5465b372b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8636,7 +8636,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, struct files_struct *files) { struct io_task_cancel cancel = { .task = task, .files = files, }; - struct io_uring_task *tctx = current->io_uring; + struct task_struct *tctx_task = task ?: current; + struct io_uring_task *tctx = tctx_task->io_uring; while (1) { enum io_wq_cancel cret; -- cgit v1.2.3-59-g8ed1b From f85c310ac376ce81a954507315ff11be4ddbf214 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 1 Mar 2021 18:20:46 +0000 Subject: io_uring: inline io_req_clean_work() Inline io_req_clean_work(), less code and easier to analyse tctx dependencies and refs usage. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d6c5465b372b..afa0e91488e6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1167,22 +1167,6 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq) return false; } -static void io_req_clean_work(struct io_kiocb *req) -{ - if (req->flags & REQ_F_INFLIGHT) { - struct io_ring_ctx *ctx = req->ctx; - struct io_uring_task *tctx = req->task->io_uring; - unsigned long flags; - - spin_lock_irqsave(&ctx->inflight_lock, flags); - list_del(&req->inflight_entry); - spin_unlock_irqrestore(&ctx->inflight_lock, flags); - req->flags &= ~REQ_F_INFLIGHT; - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); - } -} - static void io_req_track_inflight(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; @@ -1671,7 +1655,19 @@ static void io_dismantle_req(struct io_kiocb *req) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); if (req->fixed_rsrc_refs) percpu_ref_put(req->fixed_rsrc_refs); - io_req_clean_work(req); + + if (req->flags & REQ_F_INFLIGHT) { + struct io_ring_ctx *ctx = req->ctx; + struct io_uring_task *tctx = req->task->io_uring; + unsigned long flags; + + spin_lock_irqsave(&ctx->inflight_lock, flags); + list_del(&req->inflight_entry); + spin_unlock_irqrestore(&ctx->inflight_lock, flags); + req->flags &= ~REQ_F_INFLIGHT; + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); + } } static inline void io_put_task(struct task_struct *task, int nr) -- cgit v1.2.3-59-g8ed1b From ebf936670721be805a9cb87781a5ee9271ba4633 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 1 Mar 2021 18:20:47 +0000 Subject: io_uring: inline __io_queue_async_work() __io_queue_async_work() is only called from io_queue_async_work(), inline it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index afa0e91488e6..840b73db9c3d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1205,7 +1205,7 @@ static void io_prep_async_link(struct io_kiocb *req) io_prep_async_work(cur); } -static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req) +static void io_queue_async_work(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; struct io_kiocb *link = io_prep_linked_timeout(req); @@ -1216,18 +1216,9 @@ static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req) trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, &req->work, req->flags); - io_wq_enqueue(tctx->io_wq, &req->work); - return link; -} - -static void io_queue_async_work(struct io_kiocb *req) -{ - struct io_kiocb *link; - /* init ->work of the whole link before punting */ io_prep_async_link(req); - link = __io_queue_async_work(req); - + io_wq_enqueue(tctx->io_wq, &req->work); if (link) io_queue_linked_timeout(link); } -- cgit v1.2.3-59-g8ed1b From b23fcf477f85164f3b33b2e8c2c99b2ec61ba902 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 1 Mar 2021 18:20:48 +0000 Subject: io_uring: remove extra in_idle wake up io_dismantle_req() is always followed by io_put_task(), which already do proper in_idle wake ups, so we can skip waking the owner task in io_dismantle_req(). The rules are simpler now, do io_put_task() shortly after ending a request, and it will be fine. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 840b73db9c3d..26e83cabf3bf 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1649,18 +1649,16 @@ static void io_dismantle_req(struct io_kiocb *req) if (req->flags & REQ_F_INFLIGHT) { struct io_ring_ctx *ctx = req->ctx; - struct io_uring_task *tctx = req->task->io_uring; unsigned long flags; spin_lock_irqsave(&ctx->inflight_lock, flags); list_del(&req->inflight_entry); spin_unlock_irqrestore(&ctx->inflight_lock, flags); req->flags &= ~REQ_F_INFLIGHT; - if (atomic_read(&tctx->in_idle)) - wake_up(&tctx->wait); } } +/* must to be called somewhat shortly after putting a request */ static inline void io_put_task(struct task_struct *task, int nr) { struct io_uring_task *tctx = task->io_uring; -- cgit v1.2.3-59-g8ed1b From e4b4a13f494120c475580927864cc1dd96f595d1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 Mar 2021 18:36:25 -0700 Subject: io_uring: ensure that threads freeze on suspend Alex reports that his system fails to suspend using 5.12-rc1, with the following dump: [ 240.650300] PM: suspend entry (deep) [ 240.650748] Filesystems sync: 0.000 seconds [ 240.725605] Freezing user space processes ... [ 260.739483] Freezing of tasks failed after 20.013 seconds (3 tasks refusing to freeze, wq_busy=0): [ 260.739497] task:iou-mgr-446 state:S stack: 0 pid: 516 ppid: 439 flags:0x00004224 [ 260.739504] Call Trace: [ 260.739507] ? sysvec_apic_timer_interrupt+0xb/0x81 [ 260.739515] ? pick_next_task_fair+0x197/0x1cde [ 260.739519] ? sysvec_reschedule_ipi+0x2f/0x6a [ 260.739522] ? asm_sysvec_reschedule_ipi+0x12/0x20 [ 260.739525] ? __schedule+0x57/0x6d6 [ 260.739529] ? del_timer_sync+0xb9/0x115 [ 260.739533] ? schedule+0x63/0xd5 [ 260.739536] ? schedule_timeout+0x219/0x356 [ 260.739540] ? __next_timer_interrupt+0xf1/0xf1 [ 260.739544] ? io_wq_manager+0x73/0xb1 [ 260.739549] ? io_wq_create+0x262/0x262 [ 260.739553] ? ret_from_fork+0x22/0x30 [ 260.739557] task:iou-mgr-517 state:S stack: 0 pid: 522 ppid: 439 flags:0x00004224 [ 260.739561] Call Trace: [ 260.739563] ? sysvec_apic_timer_interrupt+0xb/0x81 [ 260.739566] ? pick_next_task_fair+0x16f/0x1cde [ 260.739569] ? sysvec_apic_timer_interrupt+0xb/0x81 [ 260.739571] ? asm_sysvec_apic_timer_interrupt+0x12/0x20 [ 260.739574] ? __schedule+0x5b7/0x6d6 [ 260.739578] ? del_timer_sync+0x70/0x115 [ 260.739581] ? schedule_timeout+0x211/0x356 [ 260.739585] ? __next_timer_interrupt+0xf1/0xf1 [ 260.739588] ? io_wq_check_workers+0x15/0x11f [ 260.739592] ? io_wq_manager+0x69/0xb1 [ 260.739596] ? io_wq_create+0x262/0x262 [ 260.739600] ? ret_from_fork+0x22/0x30 [ 260.739603] task:iou-wrk-517 state:S stack: 0 pid: 523 ppid: 439 flags:0x00004224 [ 260.739607] Call Trace: [ 260.739609] ? __schedule+0x5b7/0x6d6 [ 260.739614] ? schedule+0x63/0xd5 [ 260.739617] ? schedule_timeout+0x219/0x356 [ 260.739621] ? __next_timer_interrupt+0xf1/0xf1 [ 260.739624] ? task_thread.isra.0+0x148/0x3af [ 260.739628] ? task_thread_unbound+0xa/0xa [ 260.739632] ? task_thread_bound+0x7/0x7 [ 260.739636] ? ret_from_fork+0x22/0x30 [ 260.739647] OOM killer enabled. [ 260.739648] Restarting tasks ... done. [ 260.740077] PM: suspend exit Play nice and ensure that any thread we create will call try_to_freeze() at an opportune time so that memory suspend can proceed. For the io-wq worker threads, mark them as PF_NOFREEZE. They could potentially be blocked for a long time. Reported-by: Alex Xu (Hello71) Tested-by: Alex Xu (Hello71) Signed-off-by: Jens Axboe --- fs/io-wq.c | 3 +++ fs/io_uring.c | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index dc430381b694..acffc85d1a93 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "../kernel/sched/sched.h" #include "io-wq.h" @@ -263,6 +264,7 @@ static void io_wqe_dec_running(struct io_worker *worker) static void io_worker_start(struct io_worker *worker) { + current->flags |= PF_NOFREEZE; worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); io_wqe_inc_running(worker); complete(&worker->started); @@ -731,6 +733,7 @@ static int io_wq_manager(void *data) set_current_state(TASK_INTERRUPTIBLE); io_wq_check_workers(wq); schedule_timeout(HZ); + try_to_freeze(); if (fatal_signal_pending(current)) set_bit(IO_WQ_BIT_EXIT, &wq->state); } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)); diff --git a/fs/io_uring.c b/fs/io_uring.c index 26e83cabf3bf..b1734efdc7e8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -74,13 +74,11 @@ #include #include #include -#include #include #include #include #include -#include -#include +#include #define CREATE_TRACE_POINTS #include @@ -6736,6 +6734,7 @@ static int io_sq_thread(void *data) io_ring_set_wakeup_flag(ctx); schedule(); + try_to_freeze(); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_clear_wakeup_flag(ctx); } -- cgit v1.2.3-59-g8ed1b From f01272541d2cd7b7f24909d63ea2b028a6a66293 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 Mar 2021 15:47:04 -0700 Subject: io-wq: ensure all pending work is canceled on exit If we race on shutting down the io-wq, then we should ensure that any work that was queued after workers shutdown is canceled. Harden the add work check a bit too, checking for IO_WQ_BIT_EXIT and cancel if it's set. Add a WARN_ON() for having any work before we kill the io-wq context. Reported-by: syzbot+91b4b56ead187d35c9d3@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index acffc85d1a93..19f18389ead2 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -129,6 +129,17 @@ struct io_wq { static enum cpuhp_state io_wq_online; +struct io_cb_cancel_data { + work_cancel_fn *fn; + void *data; + int nr_running; + int nr_pending; + bool cancel_all; +}; + +static void io_wqe_cancel_pending_work(struct io_wqe *wqe, + struct io_cb_cancel_data *match); + static bool io_worker_get(struct io_worker *worker) { return refcount_inc_not_zero(&worker->ref); @@ -713,6 +724,23 @@ static void io_wq_check_workers(struct io_wq *wq) } } +static bool io_wq_work_match_all(struct io_wq_work *work, void *data) +{ + return true; +} + +static void io_wq_cancel_pending(struct io_wq *wq) +{ + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_all, + .cancel_all = true, + }; + int node; + + for_each_node(node) + io_wqe_cancel_pending_work(wq->wqes[node], &match); +} + /* * Manager thread. Tasked with creating new workers, if we need them. */ @@ -748,6 +776,8 @@ static int io_wq_manager(void *data) /* we might not ever have created any workers */ if (atomic_read(&wq->worker_refs)) wait_for_completion(&wq->worker_done); + + io_wq_cancel_pending(wq); complete(&wq->exited); do_exit(0); } @@ -809,7 +839,8 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) unsigned long flags; /* Can only happen if manager creation fails after exec */ - if (unlikely(io_wq_fork_manager(wqe->wq))) { + if (io_wq_fork_manager(wqe->wq) || + test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) { work->flags |= IO_WQ_WORK_CANCEL; wqe->wq->do_work(work); return; @@ -845,14 +876,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val) work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); } -struct io_cb_cancel_data { - work_cancel_fn *fn; - void *data; - int nr_running; - int nr_pending; - bool cancel_all; -}; - static bool io_wq_worker_cancel(struct io_worker *worker, void *data) { struct io_cb_cancel_data *match = data; @@ -1086,6 +1109,7 @@ static void io_wq_destroy(struct io_wq *wq) struct io_wqe *wqe = wq->wqes[node]; list_del_init(&wqe->wait.entry); + WARN_ON_ONCE(!wq_list_empty(&wqe->work_list)); kfree(wqe); } spin_unlock_irq(&wq->hash->wait.lock); -- cgit v1.2.3-59-g8ed1b From 70d443d8463339869f371e77fa594b850f374565 Mon Sep 17 00:00:00 2001 From: "Yordan Karadzhov (VMware)" Date: Thu, 4 Mar 2021 11:23:48 +0200 Subject: tracing: Remove duplicate declaration from trace.h A declaration of function "int trace_empty(struct trace_iterator *iter)" shows up twice in the header file kernel/trace/trace.h Link: https://lkml.kernel.org/r/20210304092348.208033-1-y.karadz@gmail.com Signed-off-by: Yordan Karadzhov (VMware) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dec13ff66077..a6446c03cfbc 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -605,7 +605,6 @@ void trace_graph_function(struct trace_array *tr, void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); -int trace_empty(struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace); int trace_graph_entry(struct ftrace_graph_ent *trace); -- cgit v1.2.3-59-g8ed1b From 69268094a1c16f3f44b369f9da78ce98bab5f244 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 2 Mar 2021 09:49:28 +0100 Subject: tracing: Fix help text of TRACEPOINT_BENCHMARK in Kconfig It's "cond_resched()" not "cond_sched()". Link: https://lkml.kernel.org/r/1863065.aFVDpXsuPd@devpool47 Signed-off-by: Rolf Eike Beer Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9c266b93cbc0..7fa82778c3e6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -694,7 +694,7 @@ config TRACEPOINT_BENCHMARK help This option creates the tracepoint "benchmark:benchmark_event". When the tracepoint is enabled, it kicks off a kernel thread that - goes into an infinite loop (calling cond_sched() to let other tasks + goes into an infinite loop (calling cond_resched() to let other tasks run), and calls the tracepoint. Each iteration will record the time it took to write to the tracepoint and the next iteration that data will be passed to the tracepoint itself. That is, the tracepoint -- cgit v1.2.3-59-g8ed1b From 6f6be606e763f2da9fc21de00538c97fe4ca1492 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 3 Mar 2021 18:03:52 -0500 Subject: ring-buffer: Force before_stamp and write_stamp to be different on discard Part of the logic of the new time stamp code depends on the before_stamp and the write_stamp to be different if the write_stamp does not match the last event on the buffer, as it will be used to calculate the delta of the next event written on the buffer. The discard logic depends on this, as the next event to come in needs to inject a full timestamp as it can not rely on the last event timestamp in the buffer because it is unknown due to events after it being discarded. But by changing the write_stamp back to the time before it, it forces the next event to use a full time stamp, instead of relying on it. The issue came when a full time stamp was used for the event, and rb_time_delta() returns zero in that case. The update to the write_stamp (which subtracts delta) made it not change. Then when the event is removed from the buffer, because the before_stamp and write_stamp still match, the next event written would calculate its delta from the write_stamp, but that would be wrong as the write_stamp is of the time of the event that was discarded. In the case that the delta change being made to write_stamp is zero, set the before_stamp to zero as well, and this will force the next event to inject a full timestamp and not use the current write_stamp. Cc: stable@vger.kernel.org Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b9dad3500041..342f49c3ccc5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2814,6 +2814,17 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, write_stamp, write_stamp - delta)) return 0; + /* + * It's possible that the event time delta is zero + * (has the same time stamp as the previous event) + * in which case write_stamp and before_stamp could + * be the same. In such a case, force before_stamp + * to be different than write_stamp. It doesn't + * matter what it is, as long as its different. + */ + if (!delta) + rb_time_set(&cpu_buffer->before_stamp, 0); + /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume -- cgit v1.2.3-59-g8ed1b From 6549de1fe34162d7ace8b870ae11ca6cae5b8609 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 3 Mar 2021 18:23:40 -0500 Subject: ring-buffer: Add a little more information and a WARN when time stamp going backwards is detected When the CONFIG_RING_BUFFER_VALIDATE_TIME_DELTAS is enabled, and the time stamps are detected as not being valid, it reports information about the write stamp, but does not show the before_stamp which is still useful information. Also, it should give a warning once, such that tests detect this happening. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 342f49c3ccc5..68744c51517e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3318,9 +3318,13 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer, goto out; } atomic_inc(&cpu_buffer->record_disabled); - pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld after:%lld\n", - cpu_buffer->cpu, - ts + info->delta, info->ts, info->delta, info->after); + /* There's some cases in boot up that this can happen */ + WARN_ON_ONCE(system_state != SYSTEM_BOOTING); + pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s\n", + cpu_buffer->cpu, + ts + info->delta, info->ts, info->delta, + info->before, info->after, + full ? " (full)" : ""); dump_buffer_page(bpage, info, tail); atomic_dec(&ts_dump); /* Do not re-enable checking */ -- cgit v1.2.3-59-g8ed1b From f40fc799afc598b3d130d5a0ada994c9d4fb6cf8 Mon Sep 17 00:00:00 2001 From: Vamshi K Sthambamkadi Date: Thu, 4 Mar 2021 15:15:24 +0530 Subject: tracing: Fix memory leak in __create_synth_event() kmemleak report: unreferenced object 0xc5a6f708 (size 8): comm "ftracetest", pid 1209, jiffies 4294911500 (age 6.816s) hex dump (first 8 bytes): 00 c1 3d 60 14 83 1f 8a ..=`.... backtrace: [] __kmalloc_track_caller+0x2a6/0x460 [<7d3d60a6>] kstrndup+0x37/0x70 [<45a0e739>] argv_split+0x1c/0x120 [] __create_synth_event+0x192/0xb00 [<0708b8a3>] create_synth_event+0xbb/0x150 [<3d1941e1>] create_dyn_event+0x5c/0xb0 [<5cf8b9e3>] trace_parse_run_command+0xa7/0x140 [<04deb2ef>] dyn_event_write+0x10/0x20 [<8779ac95>] vfs_write+0xa9/0x3c0 [] ksys_write+0x89/0xc0 [] __ia32_sys_write+0x15/0x20 [<7ce02d85>] __do_fast_syscall_32+0x45/0x80 [] do_fast_syscall_32+0x29/0x60 [<2467454a>] do_SYSENTER_32+0x15/0x20 [<9beaa61d>] entry_SYSENTER_32+0xa9/0xfc unreferenced object 0xc5a6f078 (size 8): comm "ftracetest", pid 1209, jiffies 4294911500 (age 6.816s) hex dump (first 8 bytes): 08 f7 a6 c5 00 00 00 00 ........ backtrace: [] __kmalloc+0x2b6/0x470 [] argv_split+0x82/0x120 [] __create_synth_event+0x192/0xb00 [<0708b8a3>] create_synth_event+0xbb/0x150 [<3d1941e1>] create_dyn_event+0x5c/0xb0 [<5cf8b9e3>] trace_parse_run_command+0xa7/0x140 [<04deb2ef>] dyn_event_write+0x10/0x20 [<8779ac95>] vfs_write+0xa9/0x3c0 [] ksys_write+0x89/0xc0 [] __ia32_sys_write+0x15/0x20 [<7ce02d85>] __do_fast_syscall_32+0x45/0x80 [] do_fast_syscall_32+0x29/0x60 [<2467454a>] do_SYSENTER_32+0x15/0x20 [<9beaa61d>] entry_SYSENTER_32+0xa9/0xfc In __create_synth_event(), while iterating field/type arguments, the argv_split() will return array of atleast 2 elements even when zero arguments(argc=0) are passed. for e.g. when there is double delimiter or string ends with delimiter To fix call argv_free() even when argc=0. Link: https://lkml.kernel.org/r/20210304094521.GA1826@cosmos Signed-off-by: Vamshi K Sthambamkadi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_synth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 2979a96595b4..8d71e6c83f10 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1225,8 +1225,10 @@ static int __create_synth_event(const char *name, const char *raw_fields) goto err; } - if (!argc) + if (!argc) { + argv_free(argv); continue; + } n_fields_this_loop = 0; consumed = 0; -- cgit v1.2.3-59-g8ed1b From ee666a185558ac9a929e53b902a568442ed62416 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 1 Mar 2021 10:49:35 -0500 Subject: tracing: Skip selftests if tracing is disabled If tracing is disabled for some reason (traceoff_on_warning, command line, etc), the ftrace selftests are guaranteed to fail, as their results are defined by trace data in the ring buffers. If the ring buffers are turned off, the tests will fail, due to lack of data. Because tracing being disabled is for a specific reason (warning, user decided to, etc), it does not make sense to enable tracing to run the self tests, as the test output may corrupt the reason for the tracing to be disabled. Instead, simply skip the self tests and report that they are being skipped due to tracing being disabled. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e295c413580e..eccb4e1187cc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1929,6 +1929,12 @@ static int run_tracer_selftest(struct tracer *type) if (!selftests_can_run) return save_selftest(type); + if (!tracing_is_on()) { + pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", + type->name); + return 0; + } + /* * Run a selftest on this tracer. * Here we reset the trace buffer, and set the current -- cgit v1.2.3-59-g8ed1b From f9f344479d8b40b3b001c913fb992d85d19261d0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 26 Feb 2021 14:09:15 -0500 Subject: tracing: Fix comment about the trace_event_call flags In the declaration of the struct trace_event_call, the flags has the bits defined in the comment above it. But these bits are also defined by the TRACE_EVENT_FL_* enums just above the declaration of the struct. As the comment about the flags in the struct has become stale and incorrect, just replace it with a reference to the TRACE_EVENT_FL_* enum above. Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 7077fec653bb..28e7af1406f2 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -349,15 +349,8 @@ struct trace_event_call { struct event_filter *filter; void *mod; void *data; - /* - * bit 0: filter_active - * bit 1: allow trace by non root (cap any) - * bit 2: failed to apply filter - * bit 3: trace internal event (do not enable) - * bit 4: Event was enabled by module - * bit 5: use call filter rather than file filter - * bit 6: Event is a tracepoint - */ + + /* See the TRACE_EVENT_FL_* flags above */ int flags; /* static flags of different events */ #ifdef CONFIG_PERF_EVENTS -- cgit v1.2.3-59-g8ed1b From d734492a14a2da6e7bcce8cf66436a9cf4e51ddf Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 3 Mar 2021 17:55:46 +0900 Subject: btrfs: zoned: use sector_t for zone sectors We need to use sector_t for zone_sectors, or it would set the zone size to zero when the size >= 4GB (= 2^24 sectors) by shifting the zone_sectors value by SECTOR_SHIFT. We're assuming zones sizes up to 8GiB. Fixes: 5b316468983d ("btrfs: get zone information of zoned block devices") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 9a5cf153da89..43948bd40e02 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -269,7 +269,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) sector_t sector = 0; struct blk_zone *zones = NULL; unsigned int i, nreported = 0, nr_zones; - unsigned int zone_sectors; + sector_t zone_sectors; char *model, *emulated; int ret; @@ -658,7 +658,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, u64 *bytenr_ret) { struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES]; - unsigned int zone_sectors; + sector_t zone_sectors; u32 sb_zone; int ret; u8 zone_sectors_shift; -- cgit v1.2.3-59-g8ed1b From badae9c86979c459bd7d895d6d7ddc7a01131ff7 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 3 Mar 2021 17:55:48 +0900 Subject: btrfs: zoned: do not account freed region of read-only block group as zone_unusable We migrate zone unusable bytes to read-only bytes when a block group is set to read-only, and account all the free region as bytes_readonly. Thus, we should not increase block_group->zone_unusable when the block group is read-only. Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones") Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 711a6a751ae9..9988decd5717 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2555,7 +2555,12 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, to_unusable = size - to_free; ctl->free_space += to_free; - block_group->zone_unusable += to_unusable; + /* + * If the block group is read-only, we should account freed space into + * bytes_readonly. + */ + if (!block_group->ro) + block_group->zone_unusable += to_unusable; spin_unlock(&ctl->tree_lock); if (!used) { spin_lock(&block_group->lock); -- cgit v1.2.3-59-g8ed1b From a14e5ec66a7a66e57b24e2469f9212a78460207e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 23 Feb 2021 21:21:20 +0100 Subject: dm bufio: subtract the number of initial sectors in dm_bufio_get_device_size dm_bufio_get_device_size returns the device size in blocks. Before returning the value, we must subtract the nubmer of starting sectors. The number of starting sectors may not be divisible by block size. Note that currently, no target is using dm_bufio_set_sector_offset and dm_bufio_get_device_size simultaneously, so this change has no effect. However, an upcoming dm-verity-fec fix needs this change. Signed-off-by: Mikulas Patocka Reviewed-by: Milan Broz Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index fce4cbf9529d..50f3e673729c 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1526,6 +1526,10 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) { sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT; + if (s >= c->start) + s -= c->start; + else + s = 0; if (likely(c->sectors_per_block_bits >= 0)) s >>= c->sectors_per_block_bits; else -- cgit v1.2.3-59-g8ed1b From df7b59ba9245c4a3115ebaa905e3e5719a3810da Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 23 Feb 2021 21:21:21 +0100 Subject: dm verity: fix FEC for RS roots unaligned to block size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optional Forward Error Correction (FEC) code in dm-verity uses Reed-Solomon code and should support roots from 2 to 24. The error correction parity bytes (of roots lengths per RS block) are stored on a separate device in sequence without any padding. Currently, to access FEC device, the dm-verity-fec code uses dm-bufio client with block size set to verity data block (usually 4096 or 512 bytes). Because this block size is not divisible by some (most!) of the roots supported lengths, data repair cannot work for partially stored parity bytes. This fix changes FEC device dm-bufio block size to "roots << SECTOR_SHIFT" where we can be sure that the full parity data is always available. (There cannot be partial FEC blocks because parity must cover whole sectors.) Because the optional FEC starting offset could be unaligned to this new block size, we have to use dm_bufio_set_sector_offset() to configure it. The problem is easily reproduced using veritysetup, e.g. for roots=13: # create verity device with RS FEC dd if=/dev/urandom of=data.img bs=4096 count=8 status=none veritysetup format data.img hash.img --fec-device=fec.img --fec-roots=13 | awk '/^Root hash/{ print $3 }' >roothash # create an erasure that should be always repairable with this roots setting dd if=/dev/zero of=data.img conv=notrunc bs=1 count=8 seek=4088 status=none # try to read it through dm-verity veritysetup open data.img test hash.img --fec-device=fec.img --fec-roots=13 $(cat roothash) dd if=/dev/mapper/test of=/dev/null bs=4096 status=noxfer # wait for possible recursive recovery in kernel udevadm settle veritysetup close test With this fix, errors are properly repaired. device-mapper: verity-fec: 7:1: FEC 0: corrected 8 errors ... Without it, FEC code usually ends on unrecoverable failure in RS decoder: device-mapper: verity-fec: 7:1: FEC 0: failed to correct: -74 ... This problem is present in all kernels since the FEC code's introduction (kernel 4.5). It is thought that this problem is not visible in Android ecosystem because it always uses a default RS roots=2. Depends-on: a14e5ec66a7a ("dm bufio: subtract the number of initial sectors in dm_bufio_get_device_size") Signed-off-by: Milan Broz Tested-by: Jérôme Carretero Reviewed-by: Sami Tolvanen Cc: stable@vger.kernel.org # 4.5+ Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-fec.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index fb41b4f23c48..66f4c6398f67 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -61,19 +61,18 @@ static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio, static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, unsigned *offset, struct dm_buffer **buf) { - u64 position, block; + u64 position, block, rem; u8 *res; position = (index + rsb) * v->fec->roots; - block = position >> v->data_dev_block_bits; - *offset = (unsigned)(position - (block << v->data_dev_block_bits)); + block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem); + *offset = (unsigned)rem; - res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); + res = dm_bufio_read(v->fec->bufio, block, buf); if (IS_ERR(res)) { DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", v->data_dev->name, (unsigned long long)rsb, - (unsigned long long)(v->fec->start + block), - PTR_ERR(res)); + (unsigned long long)block, PTR_ERR(res)); *buf = NULL; } @@ -155,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, /* read the next block when we run out of parity bytes */ offset += v->fec->roots; - if (offset >= 1 << v->data_dev_block_bits) { + if (offset >= v->fec->roots << SECTOR_SHIFT) { dm_bufio_release(buf); par = fec_read_parity(v, rsb, block_offset, &offset, &buf); @@ -674,7 +673,7 @@ int verity_fec_ctr(struct dm_verity *v) { struct dm_verity_fec *f = v->fec; struct dm_target *ti = v->ti; - u64 hash_blocks; + u64 hash_blocks, fec_blocks; int ret; if (!verity_fec_is_enabled(v)) { @@ -744,15 +743,17 @@ int verity_fec_ctr(struct dm_verity *v) } f->bufio = dm_bufio_client_create(f->dev->bdev, - 1 << v->data_dev_block_bits, + f->roots << SECTOR_SHIFT, 1, 0, NULL, NULL); if (IS_ERR(f->bufio)) { ti->error = "Cannot initialize FEC bufio client"; return PTR_ERR(f->bufio); } - if (dm_bufio_get_device_size(f->bufio) < - ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) { + dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT)); + + fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT); + if (dm_bufio_get_device_size(f->bufio) < fec_blocks) { ti->error = "FEC device is too small"; return -E2BIG; } -- cgit v1.2.3-59-g8ed1b From b05a1bcd40184f12f2cd87db79e871aa8c17563f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 4 Mar 2021 13:59:24 +0000 Subject: io_uring: cancel-match based on flags Instead of going into request internals, like checking req->file->f_op, do match them based on REQ_F_INFLIGHT, it's set only when we want it to be reliably cancelled. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b1734efdc7e8..fb4abea1e5d6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -703,7 +703,7 @@ enum { /* fail rest of links */ REQ_F_FAIL_LINK = BIT(REQ_F_FAIL_LINK_BIT), - /* on inflight list */ + /* on inflight list, should be cancelled and waited on exit reliably */ REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), /* read/write uses file position */ REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), @@ -1069,7 +1069,7 @@ static bool io_match_task(struct io_kiocb *head, return true; io_for_each_link(req, head) { - if (req->file && req->file->f_op == &io_uring_fops) + if (req->flags & REQ_F_INFLIGHT) return true; if (req->task->files == files) return true; -- cgit v1.2.3-59-g8ed1b From dd59a3d595cc10230ded4c8b727b096e16bceeb5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 4 Mar 2021 13:59:25 +0000 Subject: io_uring: reliably cancel linked timeouts Linked timeouts are fired asynchronously (i.e. soft-irq), and use generic cancellation paths to do its stuff, including poking into io-wq. The problem is that it's racy to access tctx->io_wq, as io_uring_task_cancel() and others may be happening at this exact moment. Mark linked timeouts with REQ_F_INLIFGHT for now, making sure there are no timeouts before io-wq destraction. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index fb4abea1e5d6..e55369555e5c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5500,6 +5500,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, data->mode = io_translate_timeout_mode(flags); hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode); + io_req_track_inflight(req); return 0; } -- cgit v1.2.3-59-g8ed1b From cc440e8738e5c875297ac0e90316745093be7e28 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Mar 2021 12:21:05 -0700 Subject: kernel: provide create_io_thread() helper Provide a generic helper for setting up an io_uring worker. Returns a task_struct so that the caller can do whatever setup is needed, then call wake_up_new_task() to kick it into gear. Add a kernel_clone_args member, io_thread, which tells copy_process() to mark the task with PF_IO_WORKER. Signed-off-by: Jens Axboe --- include/linux/sched/task.h | 2 ++ kernel/fork.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index c0f71f2e7160..ef02be869cf2 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -31,6 +31,7 @@ struct kernel_clone_args { /* Number of elements in *set_tid */ size_t set_tid_size; int cgroup; + int io_thread; struct cgroup *cgrp; struct css_set *cset; }; @@ -82,6 +83,7 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); +struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); diff --git a/kernel/fork.c b/kernel/fork.c index d66cd1014211..d3171e8e88e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1940,6 +1940,8 @@ static __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; + if (args->io_thread) + p->flags |= PF_IO_WORKER; /* * This _must_ happen before we call free_task(), i.e. before we jump @@ -2410,6 +2412,34 @@ struct mm_struct *copy_init_mm(void) return dup_mm(NULL, &init_mm); } +/* + * This is like kernel_clone(), but shaved down and tailored to just + * creating io_uring workers. It returns a created task, or an error pointer. + * The returned task is inactive, and the caller must fire it up through + * wake_up_new_task(p). All signals are blocked in the created task. + */ +struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) +{ + unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| + CLONE_IO; + struct kernel_clone_args args = { + .flags = ((lower_32_bits(flags) | CLONE_VM | + CLONE_UNTRACED) & ~CSIGNAL), + .exit_signal = (lower_32_bits(flags) & CSIGNAL), + .stack = (unsigned long)fn, + .stack_size = (unsigned long)arg, + .io_thread = 1, + }; + struct task_struct *tsk; + + tsk = copy_process(NULL, 0, node, &args); + if (!IS_ERR(tsk)) { + sigfillset(&tsk->blocked); + sigdelsetmask(&tsk->blocked, sigmask(SIGKILL)); + } + return tsk; +} + /* * Ok, this is the main fork-routine. * -- cgit v1.2.3-59-g8ed1b From 688e8128b7a92df982709a4137ea4588d16f24aa Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Tue, 23 Feb 2021 13:06:24 -0800 Subject: scsi: iscsi: Restrict sessions and handles to admin capabilities Protect the iSCSI transport handle, available in sysfs, by requiring CAP_SYS_ADMIN to read it. Also protect the netlink socket by restricting reception of messages to ones sent with CAP_SYS_ADMIN. This disables normal users from being able to end arbitrary iSCSI sessions. Cc: stable@vger.kernel.org Reported-by: Adam Nichols Reviewed-by: Chris Leech Reviewed-by: Mike Christie Signed-off-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 969d24d580e2..69a1f55499ef 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -132,6 +132,9 @@ show_transport_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_internal *priv = dev_to_iscsi_internal(dev); + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; return sprintf(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport)); } static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL); @@ -3621,6 +3624,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) struct iscsi_cls_conn *conn; struct iscsi_endpoint *ep = NULL; + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE) *group = ISCSI_NL_GRP_UIP; else -- cgit v1.2.3-59-g8ed1b From ec98ea7070e94cc25a422ec97d1421e28d97b7ee Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 Feb 2021 18:00:17 -0800 Subject: scsi: iscsi: Ensure sysfs attributes are limited to PAGE_SIZE As the iSCSI parameters are exported back through sysfs, it should be enforcing that they never are more than PAGE_SIZE (which should be more than enough) before accepting updates through netlink. Change all iSCSI sysfs attributes to use sysfs_emit(). Cc: stable@vger.kernel.org Reported-by: Adam Nichols Reviewed-by: Lee Duncan Reviewed-by: Greg Kroah-Hartman Reviewed-by: Mike Christie Signed-off-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 148 ++++++++++++++++++------------------ drivers/scsi/scsi_transport_iscsi.c | 25 +++--- 2 files changed, 90 insertions(+), 83 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 7ad11e42306d..04633e5157e9 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3430,125 +3430,125 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session, switch(param) { case ISCSI_PARAM_FAST_ABORT: - len = sprintf(buf, "%d\n", session->fast_abort); + len = sysfs_emit(buf, "%d\n", session->fast_abort); break; case ISCSI_PARAM_ABORT_TMO: - len = sprintf(buf, "%d\n", session->abort_timeout); + len = sysfs_emit(buf, "%d\n", session->abort_timeout); break; case ISCSI_PARAM_LU_RESET_TMO: - len = sprintf(buf, "%d\n", session->lu_reset_timeout); + len = sysfs_emit(buf, "%d\n", session->lu_reset_timeout); break; case ISCSI_PARAM_TGT_RESET_TMO: - len = sprintf(buf, "%d\n", session->tgt_reset_timeout); + len = sysfs_emit(buf, "%d\n", session->tgt_reset_timeout); break; case ISCSI_PARAM_INITIAL_R2T_EN: - len = sprintf(buf, "%d\n", session->initial_r2t_en); + len = sysfs_emit(buf, "%d\n", session->initial_r2t_en); break; case ISCSI_PARAM_MAX_R2T: - len = sprintf(buf, "%hu\n", session->max_r2t); + len = sysfs_emit(buf, "%hu\n", session->max_r2t); break; case ISCSI_PARAM_IMM_DATA_EN: - len = sprintf(buf, "%d\n", session->imm_data_en); + len = sysfs_emit(buf, "%d\n", session->imm_data_en); break; case ISCSI_PARAM_FIRST_BURST: - len = sprintf(buf, "%u\n", session->first_burst); + len = sysfs_emit(buf, "%u\n", session->first_burst); break; case ISCSI_PARAM_MAX_BURST: - len = sprintf(buf, "%u\n", session->max_burst); + len = sysfs_emit(buf, "%u\n", session->max_burst); break; case ISCSI_PARAM_PDU_INORDER_EN: - len = sprintf(buf, "%d\n", session->pdu_inorder_en); + len = sysfs_emit(buf, "%d\n", session->pdu_inorder_en); break; case ISCSI_PARAM_DATASEQ_INORDER_EN: - len = sprintf(buf, "%d\n", session->dataseq_inorder_en); + len = sysfs_emit(buf, "%d\n", session->dataseq_inorder_en); break; case ISCSI_PARAM_DEF_TASKMGMT_TMO: - len = sprintf(buf, "%d\n", session->def_taskmgmt_tmo); + len = sysfs_emit(buf, "%d\n", session->def_taskmgmt_tmo); break; case ISCSI_PARAM_ERL: - len = sprintf(buf, "%d\n", session->erl); + len = sysfs_emit(buf, "%d\n", session->erl); break; case ISCSI_PARAM_TARGET_NAME: - len = sprintf(buf, "%s\n", session->targetname); + len = sysfs_emit(buf, "%s\n", session->targetname); break; case ISCSI_PARAM_TARGET_ALIAS: - len = sprintf(buf, "%s\n", session->targetalias); + len = sysfs_emit(buf, "%s\n", session->targetalias); break; case ISCSI_PARAM_TPGT: - len = sprintf(buf, "%d\n", session->tpgt); + len = sysfs_emit(buf, "%d\n", session->tpgt); break; case ISCSI_PARAM_USERNAME: - len = sprintf(buf, "%s\n", session->username); + len = sysfs_emit(buf, "%s\n", session->username); break; case ISCSI_PARAM_USERNAME_IN: - len = sprintf(buf, "%s\n", session->username_in); + len = sysfs_emit(buf, "%s\n", session->username_in); break; case ISCSI_PARAM_PASSWORD: - len = sprintf(buf, "%s\n", session->password); + len = sysfs_emit(buf, "%s\n", session->password); break; case ISCSI_PARAM_PASSWORD_IN: - len = sprintf(buf, "%s\n", session->password_in); + len = sysfs_emit(buf, "%s\n", session->password_in); break; case ISCSI_PARAM_IFACE_NAME: - len = sprintf(buf, "%s\n", session->ifacename); + len = sysfs_emit(buf, "%s\n", session->ifacename); break; case ISCSI_PARAM_INITIATOR_NAME: - len = sprintf(buf, "%s\n", session->initiatorname); + len = sysfs_emit(buf, "%s\n", session->initiatorname); break; case ISCSI_PARAM_BOOT_ROOT: - len = sprintf(buf, "%s\n", session->boot_root); + len = sysfs_emit(buf, "%s\n", session->boot_root); break; case ISCSI_PARAM_BOOT_NIC: - len = sprintf(buf, "%s\n", session->boot_nic); + len = sysfs_emit(buf, "%s\n", session->boot_nic); break; case ISCSI_PARAM_BOOT_TARGET: - len = sprintf(buf, "%s\n", session->boot_target); + len = sysfs_emit(buf, "%s\n", session->boot_target); break; case ISCSI_PARAM_AUTO_SND_TGT_DISABLE: - len = sprintf(buf, "%u\n", session->auto_snd_tgt_disable); + len = sysfs_emit(buf, "%u\n", session->auto_snd_tgt_disable); break; case ISCSI_PARAM_DISCOVERY_SESS: - len = sprintf(buf, "%u\n", session->discovery_sess); + len = sysfs_emit(buf, "%u\n", session->discovery_sess); break; case ISCSI_PARAM_PORTAL_TYPE: - len = sprintf(buf, "%s\n", session->portal_type); + len = sysfs_emit(buf, "%s\n", session->portal_type); break; case ISCSI_PARAM_CHAP_AUTH_EN: - len = sprintf(buf, "%u\n", session->chap_auth_en); + len = sysfs_emit(buf, "%u\n", session->chap_auth_en); break; case ISCSI_PARAM_DISCOVERY_LOGOUT_EN: - len = sprintf(buf, "%u\n", session->discovery_logout_en); + len = sysfs_emit(buf, "%u\n", session->discovery_logout_en); break; case ISCSI_PARAM_BIDI_CHAP_EN: - len = sprintf(buf, "%u\n", session->bidi_chap_en); + len = sysfs_emit(buf, "%u\n", session->bidi_chap_en); break; case ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL: - len = sprintf(buf, "%u\n", session->discovery_auth_optional); + len = sysfs_emit(buf, "%u\n", session->discovery_auth_optional); break; case ISCSI_PARAM_DEF_TIME2WAIT: - len = sprintf(buf, "%d\n", session->time2wait); + len = sysfs_emit(buf, "%d\n", session->time2wait); break; case ISCSI_PARAM_DEF_TIME2RETAIN: - len = sprintf(buf, "%d\n", session->time2retain); + len = sysfs_emit(buf, "%d\n", session->time2retain); break; case ISCSI_PARAM_TSID: - len = sprintf(buf, "%u\n", session->tsid); + len = sysfs_emit(buf, "%u\n", session->tsid); break; case ISCSI_PARAM_ISID: - len = sprintf(buf, "%02x%02x%02x%02x%02x%02x\n", + len = sysfs_emit(buf, "%02x%02x%02x%02x%02x%02x\n", session->isid[0], session->isid[1], session->isid[2], session->isid[3], session->isid[4], session->isid[5]); break; case ISCSI_PARAM_DISCOVERY_PARENT_IDX: - len = sprintf(buf, "%u\n", session->discovery_parent_idx); + len = sysfs_emit(buf, "%u\n", session->discovery_parent_idx); break; case ISCSI_PARAM_DISCOVERY_PARENT_TYPE: if (session->discovery_parent_type) - len = sprintf(buf, "%s\n", + len = sysfs_emit(buf, "%s\n", session->discovery_parent_type); else - len = sprintf(buf, "\n"); + len = sysfs_emit(buf, "\n"); break; default: return -ENOSYS; @@ -3580,16 +3580,16 @@ int iscsi_conn_get_addr_param(struct sockaddr_storage *addr, case ISCSI_PARAM_CONN_ADDRESS: case ISCSI_HOST_PARAM_IPADDRESS: if (sin) - len = sprintf(buf, "%pI4\n", &sin->sin_addr.s_addr); + len = sysfs_emit(buf, "%pI4\n", &sin->sin_addr.s_addr); else - len = sprintf(buf, "%pI6\n", &sin6->sin6_addr); + len = sysfs_emit(buf, "%pI6\n", &sin6->sin6_addr); break; case ISCSI_PARAM_CONN_PORT: case ISCSI_PARAM_LOCAL_PORT: if (sin) - len = sprintf(buf, "%hu\n", be16_to_cpu(sin->sin_port)); + len = sysfs_emit(buf, "%hu\n", be16_to_cpu(sin->sin_port)); else - len = sprintf(buf, "%hu\n", + len = sysfs_emit(buf, "%hu\n", be16_to_cpu(sin6->sin6_port)); break; default: @@ -3608,88 +3608,88 @@ int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn, switch(param) { case ISCSI_PARAM_PING_TMO: - len = sprintf(buf, "%u\n", conn->ping_timeout); + len = sysfs_emit(buf, "%u\n", conn->ping_timeout); break; case ISCSI_PARAM_RECV_TMO: - len = sprintf(buf, "%u\n", conn->recv_timeout); + len = sysfs_emit(buf, "%u\n", conn->recv_timeout); break; case ISCSI_PARAM_MAX_RECV_DLENGTH: - len = sprintf(buf, "%u\n", conn->max_recv_dlength); + len = sysfs_emit(buf, "%u\n", conn->max_recv_dlength); break; case ISCSI_PARAM_MAX_XMIT_DLENGTH: - len = sprintf(buf, "%u\n", conn->max_xmit_dlength); + len = sysfs_emit(buf, "%u\n", conn->max_xmit_dlength); break; case ISCSI_PARAM_HDRDGST_EN: - len = sprintf(buf, "%d\n", conn->hdrdgst_en); + len = sysfs_emit(buf, "%d\n", conn->hdrdgst_en); break; case ISCSI_PARAM_DATADGST_EN: - len = sprintf(buf, "%d\n", conn->datadgst_en); + len = sysfs_emit(buf, "%d\n", conn->datadgst_en); break; case ISCSI_PARAM_IFMARKER_EN: - len = sprintf(buf, "%d\n", conn->ifmarker_en); + len = sysfs_emit(buf, "%d\n", conn->ifmarker_en); break; case ISCSI_PARAM_OFMARKER_EN: - len = sprintf(buf, "%d\n", conn->ofmarker_en); + len = sysfs_emit(buf, "%d\n", conn->ofmarker_en); break; case ISCSI_PARAM_EXP_STATSN: - len = sprintf(buf, "%u\n", conn->exp_statsn); + len = sysfs_emit(buf, "%u\n", conn->exp_statsn); break; case ISCSI_PARAM_PERSISTENT_PORT: - len = sprintf(buf, "%d\n", conn->persistent_port); + len = sysfs_emit(buf, "%d\n", conn->persistent_port); break; case ISCSI_PARAM_PERSISTENT_ADDRESS: - len = sprintf(buf, "%s\n", conn->persistent_address); + len = sysfs_emit(buf, "%s\n", conn->persistent_address); break; case ISCSI_PARAM_STATSN: - len = sprintf(buf, "%u\n", conn->statsn); + len = sysfs_emit(buf, "%u\n", conn->statsn); break; case ISCSI_PARAM_MAX_SEGMENT_SIZE: - len = sprintf(buf, "%u\n", conn->max_segment_size); + len = sysfs_emit(buf, "%u\n", conn->max_segment_size); break; case ISCSI_PARAM_KEEPALIVE_TMO: - len = sprintf(buf, "%u\n", conn->keepalive_tmo); + len = sysfs_emit(buf, "%u\n", conn->keepalive_tmo); break; case ISCSI_PARAM_LOCAL_PORT: - len = sprintf(buf, "%u\n", conn->local_port); + len = sysfs_emit(buf, "%u\n", conn->local_port); break; case ISCSI_PARAM_TCP_TIMESTAMP_STAT: - len = sprintf(buf, "%u\n", conn->tcp_timestamp_stat); + len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_stat); break; case ISCSI_PARAM_TCP_NAGLE_DISABLE: - len = sprintf(buf, "%u\n", conn->tcp_nagle_disable); + len = sysfs_emit(buf, "%u\n", conn->tcp_nagle_disable); break; case ISCSI_PARAM_TCP_WSF_DISABLE: - len = sprintf(buf, "%u\n", conn->tcp_wsf_disable); + len = sysfs_emit(buf, "%u\n", conn->tcp_wsf_disable); break; case ISCSI_PARAM_TCP_TIMER_SCALE: - len = sprintf(buf, "%u\n", conn->tcp_timer_scale); + len = sysfs_emit(buf, "%u\n", conn->tcp_timer_scale); break; case ISCSI_PARAM_TCP_TIMESTAMP_EN: - len = sprintf(buf, "%u\n", conn->tcp_timestamp_en); + len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_en); break; case ISCSI_PARAM_IP_FRAGMENT_DISABLE: - len = sprintf(buf, "%u\n", conn->fragment_disable); + len = sysfs_emit(buf, "%u\n", conn->fragment_disable); break; case ISCSI_PARAM_IPV4_TOS: - len = sprintf(buf, "%u\n", conn->ipv4_tos); + len = sysfs_emit(buf, "%u\n", conn->ipv4_tos); break; case ISCSI_PARAM_IPV6_TC: - len = sprintf(buf, "%u\n", conn->ipv6_traffic_class); + len = sysfs_emit(buf, "%u\n", conn->ipv6_traffic_class); break; case ISCSI_PARAM_IPV6_FLOW_LABEL: - len = sprintf(buf, "%u\n", conn->ipv6_flow_label); + len = sysfs_emit(buf, "%u\n", conn->ipv6_flow_label); break; case ISCSI_PARAM_IS_FW_ASSIGNED_IPV6: - len = sprintf(buf, "%u\n", conn->is_fw_assigned_ipv6); + len = sysfs_emit(buf, "%u\n", conn->is_fw_assigned_ipv6); break; case ISCSI_PARAM_TCP_XMIT_WSF: - len = sprintf(buf, "%u\n", conn->tcp_xmit_wsf); + len = sysfs_emit(buf, "%u\n", conn->tcp_xmit_wsf); break; case ISCSI_PARAM_TCP_RECV_WSF: - len = sprintf(buf, "%u\n", conn->tcp_recv_wsf); + len = sysfs_emit(buf, "%u\n", conn->tcp_recv_wsf); break; case ISCSI_PARAM_LOCAL_IPADDR: - len = sprintf(buf, "%s\n", conn->local_ipaddr); + len = sysfs_emit(buf, "%s\n", conn->local_ipaddr); break; default: return -ENOSYS; @@ -3707,13 +3707,13 @@ int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, switch (param) { case ISCSI_HOST_PARAM_NETDEV_NAME: - len = sprintf(buf, "%s\n", ihost->netdev); + len = sysfs_emit(buf, "%s\n", ihost->netdev); break; case ISCSI_HOST_PARAM_HWADDRESS: - len = sprintf(buf, "%s\n", ihost->hwaddress); + len = sysfs_emit(buf, "%s\n", ihost->hwaddress); break; case ISCSI_HOST_PARAM_INITIATOR_NAME: - len = sprintf(buf, "%s\n", ihost->initiatorname); + len = sysfs_emit(buf, "%s\n", ihost->initiatorname); break; default: return -ENOSYS; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 69a1f55499ef..c1eff85a8976 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -135,7 +135,8 @@ show_transport_handle(struct device *dev, struct device_attribute *attr, if (!capable(CAP_SYS_ADMIN)) return -EACCES; - return sprintf(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport)); + return sysfs_emit(buf, "%llu\n", + (unsigned long long)iscsi_handle(priv->iscsi_transport)); } static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL); @@ -145,7 +146,7 @@ show_transport_##name(struct device *dev, \ struct device_attribute *attr,char *buf) \ { \ struct iscsi_internal *priv = dev_to_iscsi_internal(dev); \ - return sprintf(buf, format"\n", priv->iscsi_transport->name); \ + return sysfs_emit(buf, format"\n", priv->iscsi_transport->name);\ } \ static DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL); @@ -186,7 +187,7 @@ static ssize_t show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); - return sprintf(buf, "%llu\n", (unsigned long long) ep->id); + return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id); } static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL); @@ -2883,6 +2884,9 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) struct iscsi_cls_session *session; int err = 0, value = 0; + if (ev->u.set_param.len > PAGE_SIZE) + return -EINVAL; + session = iscsi_session_lookup(ev->u.set_param.sid); conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid); if (!conn || !session) @@ -3030,6 +3034,9 @@ iscsi_set_host_param(struct iscsi_transport *transport, if (!transport->set_host_param) return -ENOSYS; + if (ev->u.set_host_param.len > PAGE_SIZE) + return -EINVAL; + shost = scsi_host_lookup(ev->u.set_host_param.host_no); if (!shost) { printk(KERN_ERR "set_host_param could not find host no %u\n", @@ -3963,7 +3970,7 @@ static ssize_t show_conn_state(struct device *dev, conn->state < ARRAY_SIZE(connection_state_names)) state = connection_state_names[conn->state]; - return sprintf(buf, "%s\n", state); + return sysfs_emit(buf, "%s\n", state); } static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state, NULL); @@ -4191,7 +4198,7 @@ show_priv_session_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); - return sprintf(buf, "%s\n", iscsi_session_state_name(session->state)); + return sysfs_emit(buf, "%s\n", iscsi_session_state_name(session->state)); } static ISCSI_CLASS_ATTR(priv_sess, state, S_IRUGO, show_priv_session_state, NULL); @@ -4200,7 +4207,7 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); - return sprintf(buf, "%d\n", session->creator); + return sysfs_emit(buf, "%d\n", session->creator); } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); @@ -4209,7 +4216,7 @@ show_priv_session_target_id(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); - return sprintf(buf, "%d\n", session->target_id); + return sysfs_emit(buf, "%d\n", session->target_id); } static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, show_priv_session_target_id, NULL); @@ -4222,8 +4229,8 @@ show_priv_session_##field(struct device *dev, \ struct iscsi_cls_session *session = \ iscsi_dev_to_session(dev->parent); \ if (session->field == -1) \ - return sprintf(buf, "off\n"); \ - return sprintf(buf, format"\n", session->field); \ + return sysfs_emit(buf, "off\n"); \ + return sysfs_emit(buf, format"\n", session->field); \ } #define iscsi_priv_session_attr_store(field) \ -- cgit v1.2.3-59-g8ed1b From f9dbdf97a5bd92b1a49cee3d591b55b11fd7a6d5 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Tue, 23 Feb 2021 21:39:01 -0800 Subject: scsi: iscsi: Verify lengths on passthrough PDUs Open-iSCSI sends passthrough PDUs over netlink, but the kernel should be verifying that the provided PDU header and data lengths fall within the netlink message to prevent accessing beyond that in memory. Cc: stable@vger.kernel.org Reported-by: Adam Nichols Reviewed-by: Lee Duncan Reviewed-by: Mike Christie Signed-off-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index c1eff85a8976..91074fd97f64 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3624,6 +3624,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; u32 portid; + u32 pdu_len; struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_transport *transport = NULL; struct iscsi_internal *priv; @@ -3766,6 +3767,14 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) err = -EINVAL; break; case ISCSI_UEVENT_SEND_PDU: + pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); + + if ((ev->u.send_pdu.hdr_size > pdu_len) || + (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) { + err = -EINVAL; + break; + } + conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); if (conn) { mutex_lock(&conn_mutex); -- cgit v1.2.3-59-g8ed1b From 5e112d3fb89703a4981ded60561b5647db3693bf Mon Sep 17 00:00:00 2001 From: Julian Einwag Date: Tue, 16 Feb 2021 13:25:43 +0100 Subject: nvme-pci: mark Seagate Nytro XM1440 as QUIRK_NO_NS_DESC_LIST. The kernel fails to fully detect these SSDs, only the character devices are present: [ 10.785605] nvme nvme0: pci function 0000:04:00.0 [ 10.876787] nvme nvme1: pci function 0000:81:00.0 [ 13.198614] nvme nvme0: missing or invalid SUBNQN field. [ 13.198658] nvme nvme1: missing or invalid SUBNQN field. [ 13.206896] nvme nvme0: Shutdown timeout set to 20 seconds [ 13.215035] nvme nvme1: Shutdown timeout set to 20 seconds [ 13.225407] nvme nvme0: 16/0/0 default/read/poll queues [ 13.233602] nvme nvme1: 16/0/0 default/read/poll queues [ 13.239627] nvme nvme0: Identify Descriptors failed (8194) [ 13.246315] nvme nvme1: Identify Descriptors failed (8194) Adding the NVME_QUIRK_NO_NS_DESC_LIST fixes this problem. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=205679 Signed-off-by: Julian Einwag Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 38b0d694dfc9..65e01c34d024 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3234,7 +3234,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ -- cgit v1.2.3-59-g8ed1b From dc22c1c058b5c4fe967a20589e36f029ee42a706 Mon Sep 17 00:00:00 2001 From: Zoltán Böszörményi Date: Sun, 21 Feb 2021 06:12:16 +0100 Subject: nvme-pci: mark Kingston SKC2000 as not supporting the deepest power state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My 2TB SKC2000 showed the exact same symptoms that were provided in 538e4a8c57 ("nvme-pci: avoid the deepest sleep state on Kingston A2000 SSDs"), i.e. a complete NVME lockup that needed cold boot to get it back. According to some sources, the A2000 is simply a rebadged SKC2000 with a slightly optimized firmware. Adding the SKC2000 PCI ID to the quirk list with the same workaround as the A2000 made my laptop survive a 5 hours long Yocto bootstrap buildfest which reliably triggered the SSD lockup previously. Signed-off-by: Zoltán Böszörményi Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 65e01c34d024..8c5c3b5a579f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3266,6 +3266,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), -- cgit v1.2.3-59-g8ed1b From 6e6a6828c517fb6819479bf5187df5f39084eb9e Mon Sep 17 00:00:00 2001 From: Pascal Terjan Date: Tue, 23 Feb 2021 22:10:46 +0000 Subject: nvme-pci: add quirks for Lexar 256GB SSD Add the NVME_QUIRK_NO_NS_DESC_LIST and NVME_QUIRK_IGNORE_DEV_SUBNQN quirks for this buggy device. Reported and tested in https://bugs.mageia.org/show_bug.cgi?id=28417 Signed-off-by: Pascal Terjan Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8c5c3b5a579f..17ab3320d28b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3249,6 +3249,9 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ + .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ -- cgit v1.2.3-59-g8ed1b From 78570f8873c8cd44c12714c7fa7db2601ec5617d Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 12 Feb 2021 10:30:15 +0100 Subject: nvme-hwmon: Return error code when registration fails The hwmon pointer wont be NULL if the registration fails. Though the exit code path will assign it to ctrl->hwmon_device. Later nvme_hwmon_exit() will try to free the invalid pointer. Avoid this by returning the error code from hwmon_device_register_with_info(). Fixes: ed7770f66286 ("nvme/hwmon: rework to avoid devm allocation") Signed-off-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/hwmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 8f9e96986780..0a586d712920 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -248,6 +248,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl) if (IS_ERR(hwmon)) { dev_warn(dev, "Failed to instantiate hwmon device\n"); kfree(data); + return PTR_ERR(hwmon); } ctrl->hwmon_device = hwmon; return 0; -- cgit v1.2.3-59-g8ed1b From 32feb6de47242e54692eceab52cfae8616aa0518 Mon Sep 17 00:00:00 2001 From: Martin George Date: Thu, 11 Feb 2021 23:28:26 +0530 Subject: nvme-fabrics: fix kato initialization Currently kato is initialized to NVME_DEFAULT_KATO for both discovery & i/o controllers. This is a problem specifically for non-persistent discovery controllers since it always ends up with a non-zero kato value. Fix this by initializing kato to zero instead, and ensuring various controllers are assigned appropriate kato values as follows: non-persistent controllers - kato set to zero persistent controllers - kato set to NVMF_DEV_DISC_TMO (or any positive int via nvme-cli) i/o controllers - kato set to NVME_DEFAULT_KATO (or any positive int via nvme-cli) Signed-off-by: Martin George Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5dfd806fc2d2..604ab0e5a2ad 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -630,7 +630,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->queue_size = NVMF_DEF_QUEUE_SIZE; opts->nr_io_queues = num_online_cpus(); opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; - opts->kato = NVME_DEFAULT_KATO; + opts->kato = 0; opts->duplicate_connect = false; opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO; opts->hdr_digest = false; @@ -893,6 +893,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->nr_write_queues = 0; opts->nr_poll_queues = 0; opts->duplicate_connect = true; + } else { + if (!opts->kato) + opts->kato = NVME_DEFAULT_KATO; } if (ctrl_loss_tmo < 0) { opts->max_reconnects = -1; -- cgit v1.2.3-59-g8ed1b From d9f273b7585c380d7a10d4b3187ddc2d37f2740b Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 17 Feb 2021 17:19:40 +0000 Subject: nvmet: model_number must be immutable once set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we have already established connection to nvmf target, it shouldn't be allowed to change the model_number. E.g. if someone will identify ctrl and get model_number of "my_model" later on will change the model_numbel via configfs to "my_new_model" this will break the NVMe specification for "Get Log Page – Persistent Event Log" that refers to Model Number as: "This field contains the same value as reported in the Model Number field of the Identify Controller data structure, bytes 63:24." Although it doesn't mentioned explicitly that this field can't be changed, we can assume it. So allow setting this field only once: using configfs or in the first identify ctrl operation. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 36 ++++++++++++++++++++--------- drivers/nvme/target/configfs.c | 50 +++++++++++++++++++---------------------- drivers/nvme/target/core.c | 2 +- drivers/nvme/target/nvmet.h | 7 +----- 4 files changed, 50 insertions(+), 45 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index bc6a774f2124..fe6b8aa90b53 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -313,27 +313,40 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); } -static void nvmet_id_set_model_number(struct nvme_id_ctrl *id, - struct nvmet_subsys *subsys) +static u16 nvmet_set_model_number(struct nvmet_subsys *subsys) { - const char *model = NVMET_DEFAULT_CTRL_MODEL; - struct nvmet_subsys_model *subsys_model; + u16 status = 0; + + mutex_lock(&subsys->lock); + if (!subsys->model_number) { + subsys->model_number = + kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL); + if (!subsys->model_number) + status = NVME_SC_INTERNAL; + } + mutex_unlock(&subsys->lock); - rcu_read_lock(); - subsys_model = rcu_dereference(subsys->model); - if (subsys_model) - model = subsys_model->number; - memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' '); - rcu_read_unlock(); + return status; } static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; + struct nvmet_subsys *subsys = ctrl->subsys; struct nvme_id_ctrl *id; u32 cmd_capsule_size; u16 status = 0; + /* + * If there is no model number yet, set it now. It will then remain + * stable for the life time of the subsystem. + */ + if (!subsys->model_number) { + status = nvmet_set_model_number(subsys); + if (status) + goto out; + } + id = kzalloc(sizeof(*id), GFP_KERNEL); if (!id) { status = NVME_SC_INTERNAL; @@ -347,7 +360,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) memset(id->sn, ' ', sizeof(id->sn)); bin2hex(id->sn, &ctrl->subsys->serial, min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); - nvmet_id_set_model_number(id, ctrl->subsys); + memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number, + strlen(subsys->model_number), ' '); memcpy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE), ' '); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 635a7cb45d0b..e5dbd1923b7b 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1118,16 +1118,12 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); - struct nvmet_subsys_model *subsys_model; - char *model = NVMET_DEFAULT_CTRL_MODEL; int ret; - rcu_read_lock(); - subsys_model = rcu_dereference(subsys->model); - if (subsys_model) - model = subsys_model->number; - ret = snprintf(page, PAGE_SIZE, "%s\n", model); - rcu_read_unlock(); + mutex_lock(&subsys->lock); + ret = snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number ? + subsys->model_number : NVMET_DEFAULT_CTRL_MODEL); + mutex_unlock(&subsys->lock); return ret; } @@ -1138,14 +1134,17 @@ static bool nvmet_is_ascii(const char c) return c >= 0x20 && c <= 0x7e; } -static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, - const char *page, size_t count) +static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, + const char *page, size_t count) { - struct nvmet_subsys *subsys = to_subsys(item); - struct nvmet_subsys_model *new_model; - char *new_model_number; int pos = 0, len; + if (subsys->model_number) { + pr_err("Can't set model number. %s is already assigned\n", + subsys->model_number); + return -EINVAL; + } + len = strcspn(page, "\n"); if (!len) return -EINVAL; @@ -1155,28 +1154,25 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, return -EINVAL; } - new_model_number = kmemdup_nul(page, len, GFP_KERNEL); - if (!new_model_number) + subsys->model_number = kmemdup_nul(page, len, GFP_KERNEL); + if (!subsys->model_number) return -ENOMEM; + return count; +} - new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL); - if (!new_model) { - kfree(new_model_number); - return -ENOMEM; - } - memcpy(new_model->number, new_model_number, len); +static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + ssize_t ret; down_write(&nvmet_config_sem); mutex_lock(&subsys->lock); - new_model = rcu_replace_pointer(subsys->model, new_model, - mutex_is_locked(&subsys->lock)); + ret = nvmet_subsys_attr_model_store_locked(subsys, page, count); mutex_unlock(&subsys->lock); up_write(&nvmet_config_sem); - kfree_rcu(new_model, rcuhead); - kfree(new_model_number); - - return count; + return ret; } CONFIGFS_ATTR(nvmet_subsys_, attr_model); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 67bbf0e3b507..be6fcdaf51a7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1532,7 +1532,7 @@ static void nvmet_subsys_free(struct kref *ref) nvmet_passthru_subsys_free(subsys); kfree(subsys->subsysnqn); - kfree_rcu(subsys->model, rcuhead); + kfree(subsys->model_number); kfree(subsys); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index cdfa537b1c0a..4b84edb49f22 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -208,11 +208,6 @@ struct nvmet_ctrl { bool pi_support; }; -struct nvmet_subsys_model { - struct rcu_head rcuhead; - char number[]; -}; - struct nvmet_subsys { enum nvme_subsys_type type; @@ -242,7 +237,7 @@ struct nvmet_subsys { struct config_group namespaces_group; struct config_group allowed_hosts_group; - struct nvmet_subsys_model __rcu *model; + char *model_number; #ifdef CONFIG_NVME_TARGET_PASSTHRU struct nvme_ctrl *passthru_ctrl; -- cgit v1.2.3-59-g8ed1b From 46fe18b16c4656969347fc0a3d83a034e47d9119 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Mar 2021 12:39:36 -0700 Subject: io_uring: move to using create_io_thread() This allows us to do task creation and setup without needing to use completions to try and synchronize with the starting thread. Get rid of the old io_wq_fork_thread() wrapper, and the 'wq' and 'worker' startup completion events - we can now do setup before the task is running. Signed-off-by: Jens Axboe --- fs/io-wq.c | 123 +++++++++++++++++----------------------------------------- fs/io-wq.h | 2 - fs/io_uring.c | 38 +++++++++--------- 3 files changed, 54 insertions(+), 109 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 19f18389ead2..d7cfe8fd282a 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -54,7 +54,6 @@ struct io_worker { spinlock_t lock; struct completion ref_done; - struct completion started; struct rcu_head rcu; }; @@ -116,7 +115,6 @@ struct io_wq { struct io_wq_hash *hash; refcount_t refs; - struct completion started; struct completion exited; atomic_t worker_refs; @@ -199,6 +197,7 @@ static void io_worker_exit(struct io_worker *worker) kfree_rcu(worker, rcu); if (atomic_dec_and_test(&wqe->wq->worker_refs)) complete(&wqe->wq->worker_done); + do_exit(0); } static inline bool io_wqe_run_queue(struct io_wqe *wqe) @@ -273,14 +272,6 @@ static void io_wqe_dec_running(struct io_worker *worker) io_wqe_wake_worker(wqe, acct); } -static void io_worker_start(struct io_worker *worker) -{ - current->flags |= PF_NOFREEZE; - worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); - io_wqe_inc_running(worker); - complete(&worker->started); -} - /* * Worker will start processing some work. Move it to the busy list, if * it's currently on the freelist @@ -489,8 +480,13 @@ static int io_wqe_worker(void *data) struct io_worker *worker = data; struct io_wqe *wqe = worker->wqe; struct io_wq *wq = wqe->wq; + char buf[TASK_COMM_LEN]; - io_worker_start(worker); + worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); + io_wqe_inc_running(worker); + + sprintf(buf, "iou-wrk-%d", wq->task_pid); + set_task_comm(current, buf); while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { set_current_state(TASK_INTERRUPTIBLE); @@ -565,67 +561,11 @@ void io_wq_worker_sleeping(struct task_struct *tsk) raw_spin_unlock_irq(&worker->wqe->lock); } -static int task_thread(void *data, int index) -{ - struct io_worker *worker = data; - struct io_wqe *wqe = worker->wqe; - struct io_wqe_acct *acct = &wqe->acct[index]; - struct io_wq *wq = wqe->wq; - char buf[TASK_COMM_LEN]; - - sprintf(buf, "iou-wrk-%d", wq->task_pid); - set_task_comm(current, buf); - - current->pf_io_worker = worker; - worker->task = current; - - set_cpus_allowed_ptr(current, cpumask_of_node(wqe->node)); - current->flags |= PF_NO_SETAFFINITY; - - raw_spin_lock_irq(&wqe->lock); - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); - list_add_tail_rcu(&worker->all_list, &wqe->all_list); - worker->flags |= IO_WORKER_F_FREE; - if (index == IO_WQ_ACCT_BOUND) - worker->flags |= IO_WORKER_F_BOUND; - if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) - worker->flags |= IO_WORKER_F_FIXED; - acct->nr_workers++; - raw_spin_unlock_irq(&wqe->lock); - - io_wqe_worker(data); - do_exit(0); -} - -static int task_thread_bound(void *data) -{ - return task_thread(data, IO_WQ_ACCT_BOUND); -} - -static int task_thread_unbound(void *data) -{ - return task_thread(data, IO_WQ_ACCT_UNBOUND); -} - -pid_t io_wq_fork_thread(int (*fn)(void *), void *arg) -{ - unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| - CLONE_IO|SIGCHLD; - struct kernel_clone_args args = { - .flags = ((lower_32_bits(flags) | CLONE_VM | - CLONE_UNTRACED) & ~CSIGNAL), - .exit_signal = (lower_32_bits(flags) & CSIGNAL), - .stack = (unsigned long)fn, - .stack_size = (unsigned long)arg, - }; - - return kernel_clone(&args); -} - static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) { + struct io_wqe_acct *acct = &wqe->acct[index]; struct io_worker *worker; - pid_t pid; + struct task_struct *tsk; __set_current_state(TASK_RUNNING); @@ -638,21 +578,33 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) worker->wqe = wqe; spin_lock_init(&worker->lock); init_completion(&worker->ref_done); - init_completion(&worker->started); atomic_inc(&wq->worker_refs); - if (index == IO_WQ_ACCT_BOUND) - pid = io_wq_fork_thread(task_thread_bound, worker); - else - pid = io_wq_fork_thread(task_thread_unbound, worker); - if (pid < 0) { + tsk = create_io_thread(io_wqe_worker, worker, wqe->node); + if (IS_ERR(tsk)) { if (atomic_dec_and_test(&wq->worker_refs)) complete(&wq->worker_done); kfree(worker); return false; } - wait_for_completion(&worker->started); + + tsk->pf_io_worker = worker; + worker->task = tsk; + set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node)); + tsk->flags |= PF_NOFREEZE | PF_NO_SETAFFINITY; + + raw_spin_lock_irq(&wqe->lock); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); + list_add_tail_rcu(&worker->all_list, &wqe->all_list); + worker->flags |= IO_WORKER_F_FREE; + if (index == IO_WQ_ACCT_BOUND) + worker->flags |= IO_WORKER_F_BOUND; + if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) + worker->flags |= IO_WORKER_F_FIXED; + acct->nr_workers++; + raw_spin_unlock_irq(&wqe->lock); + wake_up_new_task(tsk); return true; } @@ -696,6 +648,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe, static bool io_wq_worker_wake(struct io_worker *worker, void *data) { + set_notify_signal(worker->task); wake_up_process(worker->task); return false; } @@ -752,10 +705,6 @@ static int io_wq_manager(void *data) sprintf(buf, "iou-mgr-%d", wq->task_pid); set_task_comm(current, buf); - current->flags |= PF_IO_WORKER; - wq->manager = get_task_struct(current); - - complete(&wq->started); do { set_current_state(TASK_INTERRUPTIBLE); @@ -815,21 +764,20 @@ append: static int io_wq_fork_manager(struct io_wq *wq) { - int ret; + struct task_struct *tsk; if (wq->manager) return 0; reinit_completion(&wq->worker_done); - current->flags |= PF_IO_WORKER; - ret = io_wq_fork_thread(io_wq_manager, wq); - current->flags &= ~PF_IO_WORKER; - if (ret >= 0) { - wait_for_completion(&wq->started); + tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE); + if (!IS_ERR(tsk)) { + wq->manager = get_task_struct(tsk); + wake_up_new_task(tsk); return 0; } - return ret; + return PTR_ERR(tsk); } static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) @@ -1062,7 +1010,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) } wq->task_pid = current->pid; - init_completion(&wq->started); init_completion(&wq->exited); refcount_set(&wq->refs, 1); diff --git a/fs/io-wq.h b/fs/io-wq.h index 42f0be64a84d..5fbf7997149e 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -119,8 +119,6 @@ void io_wq_put_and_exit(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_hash_work(struct io_wq_work *work, void *val); -pid_t io_wq_fork_thread(int (*fn)(void *), void *arg); - static inline bool io_wq_is_hashed(struct io_wq_work *work) { return work->flags & IO_WQ_WORK_HASHED; diff --git a/fs/io_uring.c b/fs/io_uring.c index e55369555e5c..76e243c056b5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6668,7 +6668,6 @@ static int io_sq_thread(void *data) sprintf(buf, "iou-sqp-%d", sqd->task_pid); set_task_comm(current, buf); - sqd->thread = current; current->pf_io_worker = NULL; if (sqd->sq_cpu != -1) @@ -6677,8 +6676,6 @@ static int io_sq_thread(void *data) set_cpus_allowed_ptr(current, cpu_online_mask); current->flags |= PF_NO_SETAFFINITY; - complete(&sqd->completion); - wait_for_completion(&sqd->startup); while (!io_sq_thread_should_stop(sqd)) { @@ -7818,21 +7815,22 @@ void __io_uring_free(struct task_struct *tsk) static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx) { + struct task_struct *tsk; int ret; clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); reinit_completion(&sqd->completion); ctx->sqo_exec = 0; sqd->task_pid = current->pid; - current->flags |= PF_IO_WORKER; - ret = io_wq_fork_thread(io_sq_thread, sqd); - current->flags &= ~PF_IO_WORKER; - if (ret < 0) { - sqd->thread = NULL; - return ret; - } - wait_for_completion(&sqd->completion); - return io_uring_alloc_task_context(sqd->thread, ctx); + tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); + if (IS_ERR(tsk)) + return PTR_ERR(tsk); + ret = io_uring_alloc_task_context(tsk, ctx); + if (ret) + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + sqd->thread = tsk; + wake_up_new_task(tsk); + return ret; } static int io_sq_offload_create(struct io_ring_ctx *ctx, @@ -7855,6 +7853,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, fdput(f); } if (ctx->flags & IORING_SETUP_SQPOLL) { + struct task_struct *tsk; struct io_sq_data *sqd; ret = -EPERM; @@ -7896,15 +7895,16 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, } sqd->task_pid = current->pid; - current->flags |= PF_IO_WORKER; - ret = io_wq_fork_thread(io_sq_thread, sqd); - current->flags &= ~PF_IO_WORKER; - if (ret < 0) { - sqd->thread = NULL; + tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); + if (IS_ERR(tsk)) { + ret = PTR_ERR(tsk); goto err; } - wait_for_completion(&sqd->completion); - ret = io_uring_alloc_task_context(sqd->thread, ctx); + ret = io_uring_alloc_task_context(tsk, ctx); + if (ret) + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + sqd->thread = tsk; + wake_up_new_task(tsk); if (ret) goto err; } else if (p->flags & IORING_SETUP_SQ_AFF) { -- cgit v1.2.3-59-g8ed1b From ca0a26511c679a797f86589894a4523db36d833e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Mar 2021 17:15:48 -0700 Subject: io_uring: don't keep looping for more events if we can't flush overflow It doesn't make sense to wait for more events to come in, if we can't even flush the overflow we already have to the ring. Return -EBUSY for that condition, just like we do for attempts to submit with overflow pending. Cc: stable@vger.kernel.org # 5.11 Signed-off-by: Jens Axboe --- fs/io_uring.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 76e243c056b5..044170165402 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1451,18 +1451,22 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, return all_flushed; } -static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, +static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, struct files_struct *files) { + bool ret = true; + if (test_bit(0, &ctx->cq_check_overflow)) { /* iopoll syncs against uring_lock, not completion_lock */ if (ctx->flags & IORING_SETUP_IOPOLL) mutex_lock(&ctx->uring_lock); - __io_cqring_overflow_flush(ctx, force, tsk, files); + ret = __io_cqring_overflow_flush(ctx, force, tsk, files); if (ctx->flags & IORING_SETUP_IOPOLL) mutex_unlock(&ctx->uring_lock); } + + return ret; } static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) @@ -6883,11 +6887,16 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); trace_io_uring_cqring_wait(ctx, min_events); do { - io_cqring_overflow_flush(ctx, false, NULL, NULL); + /* if we can't even flush overflow, don't wait for more */ + if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) { + ret = -EBUSY; + break; + } prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); finish_wait(&ctx->wait, &iowq.wq); + cond_resched(); } while (ret > 0); restore_saved_sigmask_unless(ret == -EINTR); -- cgit v1.2.3-59-g8ed1b From b5b0ecb736f1ce1e68eb50613c0cfecff10198eb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Mar 2021 21:02:58 -0700 Subject: io_uring: clear IOCB_WAITQ for non -EIOCBQUEUED return The callback can only be armed, if we get -EIOCBQUEUED returned. It's important that we clear the WAITQ bit for other cases, otherwise we can queue for async retry and filemap will assume that we're armed and return -EAGAIN instead of just blocking for the IO. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 044170165402..5762750c666c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3286,6 +3286,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) if (ret == -EIOCBQUEUED) return 0; /* we got some bytes, but not all. retry. */ + kiocb->ki_flags &= ~IOCB_WAITQ; } while (ret > 0 && ret < io_size); done: kiocb_done(kiocb, ret, issue_flags); -- cgit v1.2.3-59-g8ed1b From 09ca6c40c2024211657fdb2c50522a355610c3b7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 5 Mar 2021 08:14:08 -0700 Subject: io-wq: kill hashed waitqueue before manager exits If we race with shutting down the io-wq context and someone queueing a hashed entry, then we can exit the manager with it armed. If it then triggers after the manager has exited, we can have a use-after-free where io_wqe_hash_wake() attempts to wake a now gone manager process. Move the killing of the hashed write queue into the manager itself, so that we know we've killed it before the task exits. Fixes: e941894eae31 ("io-wq: make buffered file write hashed work map per-ctx") Signed-off-by: Jens Axboe --- fs/io-wq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index d7cfe8fd282a..28868eb4cd09 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -726,6 +726,11 @@ static int io_wq_manager(void *data) if (atomic_read(&wq->worker_refs)) wait_for_completion(&wq->worker_done); + spin_lock_irq(&wq->hash->wait.lock); + for_each_node(node) + list_del_init(&wq->wqes[node]->wait.entry); + spin_unlock_irq(&wq->hash->wait.lock); + io_wq_cancel_pending(wq); complete(&wq->exited); do_exit(0); @@ -1051,15 +1056,11 @@ static void io_wq_destroy(struct io_wq *wq) set_bit(IO_WQ_BIT_EXIT, &wq->state); io_wq_destroy_manager(wq); - spin_lock_irq(&wq->hash->wait.lock); for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; - - list_del_init(&wqe->wait.entry); WARN_ON_ONCE(!wq_list_empty(&wqe->work_list)); kfree(wqe); } - spin_unlock_irq(&wq->hash->wait.lock); io_wq_put_hash(wq->hash); kfree(wq->wqes); kfree(wq); -- cgit v1.2.3-59-g8ed1b From 86e0d6766cf909813474857bd22fdc04c97c0b36 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 5 Mar 2021 08:44:39 -0700 Subject: io_uring: make SQPOLL thread parking saner We have this weird true/false return from parking, and then some of the callers decide to look at that. It can lead to unbalanced parks and sqd locking. Have the callers check the thread status once it's parked. We know we have the lock at that point, so it's either valid or it's NULL. Fix race with parking on thread exit. We need to be careful here with ordering of the sdq->lock and the IO_SQ_THREAD_SHOULD_PARK bit. Rename sqd->completion to sqd->parked to reflect that this is the only thing this completion event doesn. Signed-off-by: Jens Axboe --- fs/io_uring.c | 65 +++++++++++++++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5762750c666c..d30cbf0f7b1c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -274,7 +274,7 @@ struct io_sq_data { unsigned long state; struct completion startup; - struct completion completion; + struct completion parked; struct completion exited; }; @@ -6656,7 +6656,7 @@ static void io_sq_thread_parkme(struct io_sq_data *sqd) * wait_task_inactive(). */ preempt_disable(); - complete(&sqd->completion); + complete(&sqd->parked); schedule_preempt_disabled(); preempt_enable(); } @@ -6751,14 +6751,18 @@ static int io_sq_thread(void *data) io_run_task_work(); - if (io_sq_thread_should_park(sqd)) - io_sq_thread_parkme(sqd); - /* - * Clear thread under lock so that concurrent parks work correctly + * Ensure that we park properly if racing with someone trying to park + * while we're exiting. If we fail to grab the lock, check park and + * park if necessary. The ordering with the park bit and the lock + * ensures that we catch this reliably. */ - complete(&sqd->completion); - mutex_lock(&sqd->lock); + if (!mutex_trylock(&sqd->lock)) { + if (io_sq_thread_should_park(sqd)) + io_sq_thread_parkme(sqd); + mutex_lock(&sqd->lock); + } + sqd->thread = NULL; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { ctx->sqo_exec = 1; @@ -7067,29 +7071,25 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) static void io_sq_thread_unpark(struct io_sq_data *sqd) __releases(&sqd->lock) { - if (!sqd->thread) - return; if (sqd->thread == current) return; clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - wake_up_state(sqd->thread, TASK_PARKED); + if (sqd->thread) + wake_up_state(sqd->thread, TASK_PARKED); mutex_unlock(&sqd->lock); } -static bool io_sq_thread_park(struct io_sq_data *sqd) +static void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { if (sqd->thread == current) - return true; + return; + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); mutex_lock(&sqd->lock); - if (!sqd->thread) { - mutex_unlock(&sqd->lock); - return false; + if (sqd->thread) { + wake_up_process(sqd->thread); + wait_for_completion(&sqd->parked); } - set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - wake_up_process(sqd->thread); - wait_for_completion(&sqd->completion); - return true; } static void io_sq_thread_stop(struct io_sq_data *sqd) @@ -7185,7 +7185,7 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p) mutex_init(&sqd->lock); init_waitqueue_head(&sqd->wait); init_completion(&sqd->startup); - init_completion(&sqd->completion); + init_completion(&sqd->parked); init_completion(&sqd->exited); return sqd; } @@ -7829,7 +7829,7 @@ static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx) int ret; clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); - reinit_completion(&sqd->completion); + reinit_completion(&sqd->parked); ctx->sqo_exec = 0; sqd->task_pid = current->pid; tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); @@ -8712,7 +8712,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct files_struct *files) { struct task_struct *task = current; - bool did_park = false; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { /* never started, nothing to cancel */ @@ -8720,11 +8719,10 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, io_sq_offload_start(ctx); return; } - did_park = io_sq_thread_park(ctx->sq_data); - if (did_park) { - task = ctx->sq_data->thread; + io_sq_thread_park(ctx->sq_data); + task = ctx->sq_data->thread; + if (task) atomic_inc(&task->io_uring->in_idle); - } } io_cancel_defer_files(ctx, task, files); @@ -8733,10 +8731,10 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, if (!files) io_uring_try_cancel_requests(ctx, task, NULL); - if (did_park) { + if (task) atomic_dec(&task->io_uring->in_idle); + if (ctx->sq_data) io_sq_thread_unpark(ctx->sq_data); - } } /* @@ -8836,15 +8834,12 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx) if (!sqd) return; - if (!io_sq_thread_park(sqd)) - return; - tctx = ctx->sq_data->thread->io_uring; - /* can happen on fork/alloc failure, just ignore that state */ - if (!tctx) { + io_sq_thread_park(sqd); + if (!sqd->thread || !sqd->thread->io_uring) { io_sq_thread_unpark(sqd); return; } - + tctx = ctx->sq_data->thread->io_uring; atomic_inc(&tctx->in_idle); do { /* read completions before cancelations */ -- cgit v1.2.3-59-g8ed1b From e45cff58858883290c98f65d409839a7295c95f3 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 28 Feb 2021 22:35:14 +0000 Subject: io_uring: don't restrict issue_flags for io_openat 45d189c606292 ("io_uring: replace force_nonblock with flags") did something strange for io_openat() slicing all issue_flags but IO_URING_F_NONBLOCK. Not a bug for now, but better to just forward the flags. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d30cbf0f7b1c..92c25b5f1349 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3828,7 +3828,7 @@ err: static int io_openat(struct io_kiocb *req, unsigned int issue_flags) { - return io_openat2(req, issue_flags & IO_URING_F_NONBLOCK); + return io_openat2(req, issue_flags); } static int io_remove_buffers_prep(struct io_kiocb *req, -- cgit v1.2.3-59-g8ed1b From 21e27ac82db637d2f48f07b3777aae8e7ca52613 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 4 Mar 2021 13:20:49 -0600 Subject: RDMA/rxe: Fix missed IB reference counting in loopback When the noted patch below extending the reference taken by rxe_get_dev_from_net() in rxe_udp_encap_recv() until each skb is freed it was not matched by a reference in the loopback path resulting in underflows. Fixes: 899aba891cab ("RDMA/rxe: Fix FIXME in rxe_udp_encap_recv()") Link: https://lore.kernel.org/r/20210304192048.2958-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0701bd1ffd1a..01662727dca0 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -407,14 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } +/* fix up a send packet to match the packets + * received from UDP before looping them back + */ void rxe_loopback(struct sk_buff *skb) { + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + if (skb->protocol == htons(ETH_P_IP)) skb_pull(skb, sizeof(struct iphdr)); else skb_pull(skb, sizeof(struct ipv6hdr)); - rxe_rcv(skb); + if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev))) + kfree_skb(skb); + else + rxe_rcv(skb); } struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, -- cgit v1.2.3-59-g8ed1b From 5e4a7ccc965d951b0885875e903a32c6d4368573 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 4 Mar 2021 13:20:49 -0600 Subject: RDMA/rxe: Fix extra deref in rxe_rcv_mcast_pkt() rxe_rcv_mcast_pkt() dropped a reference to ib_device when no error occurred causing an underflow on the reference counter. This code is cleaned up to be clearer and easier to read. Fixes: 899aba891cab ("RDMA/rxe: Fix FIXME in rxe_udp_encap_recv()") Link: https://lore.kernel.org/r/20210304192048.2958-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_recv.c | 59 +++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 45d2f711bce2..7a49e27da23a 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -237,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) struct rxe_mc_elem *mce; struct rxe_qp *qp; union ib_gid dgid; - struct sk_buff *per_qp_skb; - struct rxe_pkt_info *per_qp_pkt; int err; if (skb->protocol == htons(ETH_P_IP)) @@ -250,10 +248,15 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) /* lookup mcast group corresponding to mgid, takes a ref */ mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); if (!mcg) - goto err1; /* mcast group not registered */ + goto drop; /* mcast group not registered */ spin_lock_bh(&mcg->mcg_lock); + /* this is unreliable datagram service so we let + * failures to deliver a multicast packet to a + * single QP happen and just move on and try + * the rest of them on the list + */ list_for_each_entry(mce, &mcg->qp_list, qp_list) { qp = mce->qp; @@ -266,39 +269,47 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) if (err) continue; - /* for all but the last qp create a new clone of the - * skb and pass to the qp. If an error occurs in the - * checks for the last qp in the list we need to - * free the skb since it hasn't been passed on to - * rxe_rcv_pkt() which would free it later. + /* for all but the last QP create a new clone of the + * skb and pass to the QP. Pass the original skb to + * the last QP in the list. */ if (mce->qp_list.next != &mcg->qp_list) { - per_qp_skb = skb_clone(skb, GFP_ATOMIC); - if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) { - kfree_skb(per_qp_skb); + struct sk_buff *cskb; + struct rxe_pkt_info *cpkt; + + cskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!cskb)) continue; + + if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) { + kfree_skb(cskb); + break; } + + cpkt = SKB_TO_PKT(cskb); + cpkt->qp = qp; + rxe_add_ref(qp); + rxe_rcv_pkt(cpkt, cskb); } else { - per_qp_skb = skb; - /* show we have consumed the skb */ - skb = NULL; + pkt->qp = qp; + rxe_add_ref(qp); + rxe_rcv_pkt(pkt, skb); + skb = NULL; /* mark consumed */ } - - if (unlikely(!per_qp_skb)) - continue; - - per_qp_pkt = SKB_TO_PKT(per_qp_skb); - per_qp_pkt->qp = qp; - rxe_add_ref(qp); - rxe_rcv_pkt(per_qp_pkt, per_qp_skb); } spin_unlock_bh(&mcg->mcg_lock); rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ -err1: - /* free skb if not consumed */ + if (likely(!skb)) + return; + + /* This only occurs if one of the checks fails on the last + * QP in the list above + */ + +drop: kfree_skb(skb); ib_device_put(&rxe->ib_dev); } -- cgit v1.2.3-59-g8ed1b From 545c4ab463c2224557e56b2609f88ed5be265405 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 4 Mar 2021 13:20:49 -0600 Subject: RDMA/rxe: Fix errant WARN_ONCE in rxe_completer() In rxe_comp.c in rxe_completer() the function free_pkt() did not clear skb which triggered a warning at 'done:' and could possibly at 'exit:'. The WARN_ONCE() calls are not actually needed. The call to free_pkt() is moved to the end to clearly show that all skbs are freed. Fixes: 899aba891cab ("RDMA/rxe: Fix FIXME in rxe_udp_encap_recv()") Link: https://lore.kernel.org/r/20210304192048.2958-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 55 +++++++++++++++--------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index a8ac791a1bb9..17a361b8dbb1 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -547,6 +547,7 @@ int rxe_completer(void *arg) struct sk_buff *skb = NULL; struct rxe_pkt_info *pkt = NULL; enum comp_state state; + int ret = 0; rxe_add_ref(qp); @@ -554,7 +555,8 @@ int rxe_completer(void *arg) qp->req.state == QP_STATE_RESET) { rxe_drain_resp_pkts(qp, qp->valid && qp->req.state == QP_STATE_ERROR); - goto exit; + ret = -EAGAIN; + goto done; } if (qp->comp.timeout) { @@ -564,8 +566,10 @@ int rxe_completer(void *arg) qp->comp.timeout_retry = 0; } - if (qp->req.need_retry) - goto exit; + if (qp->req.need_retry) { + ret = -EAGAIN; + goto done; + } state = COMPST_GET_ACK; @@ -636,8 +640,6 @@ int rxe_completer(void *arg) break; case COMPST_DONE: - if (pkt) - free_pkt(pkt); goto done; case COMPST_EXIT: @@ -660,7 +662,8 @@ int rxe_completer(void *arg) qp->qp_timeout_jiffies) mod_timer(&qp->retrans_timer, jiffies + qp->qp_timeout_jiffies); - goto exit; + ret = -EAGAIN; + goto done; case COMPST_ERROR_RETRY: /* we come here if the retry timer fired and we did @@ -672,18 +675,18 @@ int rxe_completer(void *arg) */ /* there is nothing to retry in this case */ - if (!wqe || (wqe->state == wqe_state_posted)) - goto exit; + if (!wqe || (wqe->state == wqe_state_posted)) { + pr_warn("Retry attempted without a valid wqe\n"); + ret = -EAGAIN; + goto done; + } /* if we've started a retry, don't start another * retry sequence, unless this is a timeout. */ if (qp->comp.started_retry && - !qp->comp.timeout_retry) { - if (pkt) - free_pkt(pkt); + !qp->comp.timeout_retry) goto done; - } if (qp->comp.retry_cnt > 0) { if (qp->comp.retry_cnt != 7) @@ -704,8 +707,6 @@ int rxe_completer(void *arg) qp->comp.started_retry = 1; rxe_run_task(&qp->req.task, 0); } - if (pkt) - free_pkt(pkt); goto done; } else { @@ -726,8 +727,8 @@ int rxe_completer(void *arg) mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); - free_pkt(pkt); - goto exit; + ret = -EAGAIN; + goto done; } else { rxe_counter_inc(rxe, RXE_CNT_RNR_RETRY_EXCEEDED); @@ -740,25 +741,15 @@ int rxe_completer(void *arg) WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); - if (pkt) - free_pkt(pkt); - goto exit; + ret = -EAGAIN; + goto done; } } -exit: - /* we come here if we are done with processing and want the task to - * exit from the loop calling us - */ - WARN_ON_ONCE(skb); - rxe_drop_ref(qp); - return -EAGAIN; - done: - /* we come here if we have processed a packet we want the task to call - * us again to see if there is anything else to do - */ - WARN_ON_ONCE(skb); + if (pkt) + free_pkt(pkt); rxe_drop_ref(qp); - return 0; + + return ret; } -- cgit v1.2.3-59-g8ed1b From a38fd8748464831584a19438cbb3082b5a2dab15 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 5 Mar 2021 17:33:41 -0800 Subject: Linux 5.12-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f9b54da2fca0..31dcdb3d61fa 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Frozen Wasteland # *DOCUMENTATION* -- cgit v1.2.3-59-g8ed1b From 0ae0337f929a970ee8d83e0e95e6b8d05562ce3b Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:01 +0100 Subject: uapi: virtio_ids: add a sound device type ID from OASIS spec The OASIS virtio spec defines a sound device type ID that is not present in the header yet. Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-2-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- include/uapi/linux/virtio_ids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index bc1c0621f5ed..029a2e07a7f9 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -51,6 +51,7 @@ #define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ -- cgit v1.2.3-59-g8ed1b From de3a9980d8c34b2479173e809afa820473db676a Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:02 +0100 Subject: ALSA: virtio: add virtio sound driver Introduce skeleton of the virtio sound driver. The driver implements the virtio sound device specification, which has become part of the virtio standard. Initial initialization of the device, virtqueues and creation of an empty ALSA sound device. Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-3-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- MAINTAINERS | 9 ++ include/uapi/linux/virtio_snd.h | 334 ++++++++++++++++++++++++++++++++++++++++ sound/Kconfig | 2 + sound/Makefile | 3 +- sound/virtio/Kconfig | 10 ++ sound/virtio/Makefile | 7 + sound/virtio/virtio_card.c | 324 ++++++++++++++++++++++++++++++++++++++ sound/virtio/virtio_card.h | 65 ++++++++ 8 files changed, 753 insertions(+), 1 deletion(-) create mode 100644 include/uapi/linux/virtio_snd.h create mode 100644 sound/virtio/Kconfig create mode 100644 sound/virtio/Makefile create mode 100644 sound/virtio/virtio_card.c create mode 100644 sound/virtio/virtio_card.h diff --git a/MAINTAINERS b/MAINTAINERS index d92f85ca831d..a68c543c28f1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19086,6 +19086,15 @@ W: https://virtio-mem.gitlab.io/ F: drivers/virtio/virtio_mem.c F: include/uapi/linux/virtio_mem.h +VIRTIO SOUND DRIVER +M: Anton Yakovlev +M: "Michael S. Tsirkin" +L: virtualization@lists.linux-foundation.org +L: alsa-devel@alsa-project.org (moderated for non-subscribers) +S: Maintained +F: include/uapi/linux/virtio_snd.h +F: sound/virtio/* + VIRTUAL BOX GUEST DEVICE DRIVER M: Hans de Goede M: Arnd Bergmann diff --git a/include/uapi/linux/virtio_snd.h b/include/uapi/linux/virtio_snd.h new file mode 100644 index 000000000000..dfe49547a7b0 --- /dev/null +++ b/include/uapi/linux/virtio_snd.h @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* + * Copyright (C) 2021 OpenSynergy GmbH + */ +#ifndef VIRTIO_SND_IF_H +#define VIRTIO_SND_IF_H + +#include + +/******************************************************************************* + * CONFIGURATION SPACE + */ +struct virtio_snd_config { + /* # of available physical jacks */ + __le32 jacks; + /* # of available PCM streams */ + __le32 streams; + /* # of available channel maps */ + __le32 chmaps; +}; + +enum { + /* device virtqueue indexes */ + VIRTIO_SND_VQ_CONTROL = 0, + VIRTIO_SND_VQ_EVENT, + VIRTIO_SND_VQ_TX, + VIRTIO_SND_VQ_RX, + /* # of device virtqueues */ + VIRTIO_SND_VQ_MAX +}; + +/******************************************************************************* + * COMMON DEFINITIONS + */ + +/* supported dataflow directions */ +enum { + VIRTIO_SND_D_OUTPUT = 0, + VIRTIO_SND_D_INPUT +}; + +enum { + /* jack control request types */ + VIRTIO_SND_R_JACK_INFO = 1, + VIRTIO_SND_R_JACK_REMAP, + + /* PCM control request types */ + VIRTIO_SND_R_PCM_INFO = 0x0100, + VIRTIO_SND_R_PCM_SET_PARAMS, + VIRTIO_SND_R_PCM_PREPARE, + VIRTIO_SND_R_PCM_RELEASE, + VIRTIO_SND_R_PCM_START, + VIRTIO_SND_R_PCM_STOP, + + /* channel map control request types */ + VIRTIO_SND_R_CHMAP_INFO = 0x0200, + + /* jack event types */ + VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, + VIRTIO_SND_EVT_JACK_DISCONNECTED, + + /* PCM event types */ + VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, + VIRTIO_SND_EVT_PCM_XRUN, + + /* common status codes */ + VIRTIO_SND_S_OK = 0x8000, + VIRTIO_SND_S_BAD_MSG, + VIRTIO_SND_S_NOT_SUPP, + VIRTIO_SND_S_IO_ERR +}; + +/* common header */ +struct virtio_snd_hdr { + __le32 code; +}; + +/* event notification */ +struct virtio_snd_event { + /* VIRTIO_SND_EVT_XXX */ + struct virtio_snd_hdr hdr; + /* optional event data */ + __le32 data; +}; + +/* common control request to query an item information */ +struct virtio_snd_query_info { + /* VIRTIO_SND_R_XXX_INFO */ + struct virtio_snd_hdr hdr; + /* item start identifier */ + __le32 start_id; + /* item count to query */ + __le32 count; + /* item information size in bytes */ + __le32 size; +}; + +/* common item information header */ +struct virtio_snd_info { + /* function group node id (High Definition Audio Specification 7.1.2) */ + __le32 hda_fn_nid; +}; + +/******************************************************************************* + * JACK CONTROL MESSAGES + */ +struct virtio_snd_jack_hdr { + /* VIRTIO_SND_R_JACK_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::jacks - 1 */ + __le32 jack_id; +}; + +/* supported jack features */ +enum { + VIRTIO_SND_JACK_F_REMAP = 0 +}; + +struct virtio_snd_jack_info { + /* common header */ + struct virtio_snd_info hdr; + /* supported feature bit map (1 << VIRTIO_SND_JACK_F_XXX) */ + __le32 features; + /* pin configuration (High Definition Audio Specification 7.3.3.31) */ + __le32 hda_reg_defconf; + /* pin capabilities (High Definition Audio Specification 7.3.4.9) */ + __le32 hda_reg_caps; + /* current jack connection status (0: disconnected, 1: connected) */ + __u8 connected; + + __u8 padding[7]; +}; + +/* jack remapping control request */ +struct virtio_snd_jack_remap { + /* .code = VIRTIO_SND_R_JACK_REMAP */ + struct virtio_snd_jack_hdr hdr; + /* selected association number */ + __le32 association; + /* selected sequence number */ + __le32 sequence; +}; + +/******************************************************************************* + * PCM CONTROL MESSAGES + */ +struct virtio_snd_pcm_hdr { + /* VIRTIO_SND_R_PCM_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::streams - 1 */ + __le32 stream_id; +}; + +/* supported PCM stream features */ +enum { + VIRTIO_SND_PCM_F_SHMEM_HOST = 0, + VIRTIO_SND_PCM_F_SHMEM_GUEST, + VIRTIO_SND_PCM_F_MSG_POLLING, + VIRTIO_SND_PCM_F_EVT_SHMEM_PERIODS, + VIRTIO_SND_PCM_F_EVT_XRUNS +}; + +/* supported PCM sample formats */ +enum { + /* analog formats (width / physical width) */ + VIRTIO_SND_PCM_FMT_IMA_ADPCM = 0, /* 4 / 4 bits */ + VIRTIO_SND_PCM_FMT_MU_LAW, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_A_LAW, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_S8, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_U8, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_S16, /* 16 / 16 bits */ + VIRTIO_SND_PCM_FMT_U16, /* 16 / 16 bits */ + VIRTIO_SND_PCM_FMT_S18_3, /* 18 / 24 bits */ + VIRTIO_SND_PCM_FMT_U18_3, /* 18 / 24 bits */ + VIRTIO_SND_PCM_FMT_S20_3, /* 20 / 24 bits */ + VIRTIO_SND_PCM_FMT_U20_3, /* 20 / 24 bits */ + VIRTIO_SND_PCM_FMT_S24_3, /* 24 / 24 bits */ + VIRTIO_SND_PCM_FMT_U24_3, /* 24 / 24 bits */ + VIRTIO_SND_PCM_FMT_S20, /* 20 / 32 bits */ + VIRTIO_SND_PCM_FMT_U20, /* 20 / 32 bits */ + VIRTIO_SND_PCM_FMT_S24, /* 24 / 32 bits */ + VIRTIO_SND_PCM_FMT_U24, /* 24 / 32 bits */ + VIRTIO_SND_PCM_FMT_S32, /* 32 / 32 bits */ + VIRTIO_SND_PCM_FMT_U32, /* 32 / 32 bits */ + VIRTIO_SND_PCM_FMT_FLOAT, /* 32 / 32 bits */ + VIRTIO_SND_PCM_FMT_FLOAT64, /* 64 / 64 bits */ + /* digital formats (width / physical width) */ + VIRTIO_SND_PCM_FMT_DSD_U8, /* 8 / 8 bits */ + VIRTIO_SND_PCM_FMT_DSD_U16, /* 16 / 16 bits */ + VIRTIO_SND_PCM_FMT_DSD_U32, /* 32 / 32 bits */ + VIRTIO_SND_PCM_FMT_IEC958_SUBFRAME /* 32 / 32 bits */ +}; + +/* supported PCM frame rates */ +enum { + VIRTIO_SND_PCM_RATE_5512 = 0, + VIRTIO_SND_PCM_RATE_8000, + VIRTIO_SND_PCM_RATE_11025, + VIRTIO_SND_PCM_RATE_16000, + VIRTIO_SND_PCM_RATE_22050, + VIRTIO_SND_PCM_RATE_32000, + VIRTIO_SND_PCM_RATE_44100, + VIRTIO_SND_PCM_RATE_48000, + VIRTIO_SND_PCM_RATE_64000, + VIRTIO_SND_PCM_RATE_88200, + VIRTIO_SND_PCM_RATE_96000, + VIRTIO_SND_PCM_RATE_176400, + VIRTIO_SND_PCM_RATE_192000, + VIRTIO_SND_PCM_RATE_384000 +}; + +struct virtio_snd_pcm_info { + /* common header */ + struct virtio_snd_info hdr; + /* supported feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */ + __le32 features; + /* supported sample format bit map (1 << VIRTIO_SND_PCM_FMT_XXX) */ + __le64 formats; + /* supported frame rate bit map (1 << VIRTIO_SND_PCM_RATE_XXX) */ + __le64 rates; + /* dataflow direction (VIRTIO_SND_D_XXX) */ + __u8 direction; + /* minimum # of supported channels */ + __u8 channels_min; + /* maximum # of supported channels */ + __u8 channels_max; + + __u8 padding[5]; +}; + +/* set PCM stream format */ +struct virtio_snd_pcm_set_params { + /* .code = VIRTIO_SND_R_PCM_SET_PARAMS */ + struct virtio_snd_pcm_hdr hdr; + /* size of the hardware buffer */ + __le32 buffer_bytes; + /* size of the hardware period */ + __le32 period_bytes; + /* selected feature bit map (1 << VIRTIO_SND_PCM_F_XXX) */ + __le32 features; + /* selected # of channels */ + __u8 channels; + /* selected sample format (VIRTIO_SND_PCM_FMT_XXX) */ + __u8 format; + /* selected frame rate (VIRTIO_SND_PCM_RATE_XXX) */ + __u8 rate; + + __u8 padding; +}; + +/******************************************************************************* + * PCM I/O MESSAGES + */ + +/* I/O request header */ +struct virtio_snd_pcm_xfer { + /* 0 ... virtio_snd_config::streams - 1 */ + __le32 stream_id; +}; + +/* I/O request status */ +struct virtio_snd_pcm_status { + /* VIRTIO_SND_S_XXX */ + __le32 status; + /* current device latency */ + __le32 latency_bytes; +}; + +/******************************************************************************* + * CHANNEL MAP CONTROL MESSAGES + */ +struct virtio_snd_chmap_hdr { + /* VIRTIO_SND_R_CHMAP_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::chmaps - 1 */ + __le32 chmap_id; +}; + +/* standard channel position definition */ +enum { + VIRTIO_SND_CHMAP_NONE = 0, /* undefined */ + VIRTIO_SND_CHMAP_NA, /* silent */ + VIRTIO_SND_CHMAP_MONO, /* mono stream */ + VIRTIO_SND_CHMAP_FL, /* front left */ + VIRTIO_SND_CHMAP_FR, /* front right */ + VIRTIO_SND_CHMAP_RL, /* rear left */ + VIRTIO_SND_CHMAP_RR, /* rear right */ + VIRTIO_SND_CHMAP_FC, /* front center */ + VIRTIO_SND_CHMAP_LFE, /* low frequency (LFE) */ + VIRTIO_SND_CHMAP_SL, /* side left */ + VIRTIO_SND_CHMAP_SR, /* side right */ + VIRTIO_SND_CHMAP_RC, /* rear center */ + VIRTIO_SND_CHMAP_FLC, /* front left center */ + VIRTIO_SND_CHMAP_FRC, /* front right center */ + VIRTIO_SND_CHMAP_RLC, /* rear left center */ + VIRTIO_SND_CHMAP_RRC, /* rear right center */ + VIRTIO_SND_CHMAP_FLW, /* front left wide */ + VIRTIO_SND_CHMAP_FRW, /* front right wide */ + VIRTIO_SND_CHMAP_FLH, /* front left high */ + VIRTIO_SND_CHMAP_FCH, /* front center high */ + VIRTIO_SND_CHMAP_FRH, /* front right high */ + VIRTIO_SND_CHMAP_TC, /* top center */ + VIRTIO_SND_CHMAP_TFL, /* top front left */ + VIRTIO_SND_CHMAP_TFR, /* top front right */ + VIRTIO_SND_CHMAP_TFC, /* top front center */ + VIRTIO_SND_CHMAP_TRL, /* top rear left */ + VIRTIO_SND_CHMAP_TRR, /* top rear right */ + VIRTIO_SND_CHMAP_TRC, /* top rear center */ + VIRTIO_SND_CHMAP_TFLC, /* top front left center */ + VIRTIO_SND_CHMAP_TFRC, /* top front right center */ + VIRTIO_SND_CHMAP_TSL, /* top side left */ + VIRTIO_SND_CHMAP_TSR, /* top side right */ + VIRTIO_SND_CHMAP_LLFE, /* left LFE */ + VIRTIO_SND_CHMAP_RLFE, /* right LFE */ + VIRTIO_SND_CHMAP_BC, /* bottom center */ + VIRTIO_SND_CHMAP_BLC, /* bottom left center */ + VIRTIO_SND_CHMAP_BRC /* bottom right center */ +}; + +/* maximum possible number of channels */ +#define VIRTIO_SND_CHMAP_MAX_SIZE 18 + +struct virtio_snd_chmap_info { + /* common header */ + struct virtio_snd_info hdr; + /* dataflow direction (VIRTIO_SND_D_XXX) */ + __u8 direction; + /* # of valid channel position values */ + __u8 channels; + /* channel position values (VIRTIO_SND_CHMAP_XXX) */ + __u8 positions[VIRTIO_SND_CHMAP_MAX_SIZE]; +}; + +#endif /* VIRTIO_SND_IF_H */ diff --git a/sound/Kconfig b/sound/Kconfig index 36785410fbe1..e56d96d2b11c 100644 --- a/sound/Kconfig +++ b/sound/Kconfig @@ -99,6 +99,8 @@ source "sound/synth/Kconfig" source "sound/xen/Kconfig" +source "sound/virtio/Kconfig" + endif # SND endif # !UML diff --git a/sound/Makefile b/sound/Makefile index 797ecdcd35e2..04ef04b1168f 100644 --- a/sound/Makefile +++ b/sound/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_SOUND) += soundcore.o obj-$(CONFIG_DMASOUND) += oss/dmasound/ obj-$(CONFIG_SND) += core/ i2c/ drivers/ isa/ pci/ ppc/ arm/ sh/ synth/ usb/ \ - firewire/ sparc/ spi/ parisc/ pcmcia/ mips/ soc/ atmel/ hda/ x86/ xen/ + firewire/ sparc/ spi/ parisc/ pcmcia/ mips/ soc/ atmel/ hda/ x86/ xen/ \ + virtio/ obj-$(CONFIG_SND_AOA) += aoa/ # This one must be compilable even if sound is configured out diff --git a/sound/virtio/Kconfig b/sound/virtio/Kconfig new file mode 100644 index 000000000000..094cba24ee5b --- /dev/null +++ b/sound/virtio/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0+ +# Sound card driver for virtio + +config SND_VIRTIO + tristate "Virtio sound driver" + depends on VIRTIO + select SND_PCM + select SND_JACK + help + This is the virtual sound driver for virtio. Say Y or M. diff --git a/sound/virtio/Makefile b/sound/virtio/Makefile new file mode 100644 index 000000000000..8c87ebb9982b --- /dev/null +++ b/sound/virtio/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0+ + +obj-$(CONFIG_SND_VIRTIO) += virtio_snd.o + +virtio_snd-objs := \ + virtio_card.o + diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c new file mode 100644 index 000000000000..5a37056858e9 --- /dev/null +++ b/sound/virtio/virtio_card.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#include +#include +#include +#include +#include + +#include "virtio_card.h" + +static void virtsnd_remove(struct virtio_device *vdev); + +/** + * virtsnd_event_send() - Add an event to the event queue. + * @vqueue: Underlying event virtqueue. + * @event: Event. + * @notify: Indicates whether or not to send a notification to the device. + * @gfp: Kernel flags for memory allocation. + * + * Context: Any context. + */ +static void virtsnd_event_send(struct virtqueue *vqueue, + struct virtio_snd_event *event, bool notify, + gfp_t gfp) +{ + struct scatterlist sg; + struct scatterlist *psgs[1] = { &sg }; + + /* reset event content */ + memset(event, 0, sizeof(*event)); + + sg_init_one(&sg, event, sizeof(*event)); + + if (virtqueue_add_sgs(vqueue, psgs, 0, 1, event, gfp) || !notify) + return; + + if (virtqueue_kick_prepare(vqueue)) + virtqueue_notify(vqueue); +} + +/** + * virtsnd_event_dispatch() - Dispatch an event from the device side. + * @snd: VirtIO sound device. + * @event: VirtIO sound event. + * + * Context: Any context. + */ +static void virtsnd_event_dispatch(struct virtio_snd *snd, + struct virtio_snd_event *event) +{ +} + +/** + * virtsnd_event_notify_cb() - Dispatch all reported events from the event queue. + * @vqueue: Underlying event virtqueue. + * + * This callback function is called upon a vring interrupt request from the + * device. + * + * Context: Interrupt context. + */ +static void virtsnd_event_notify_cb(struct virtqueue *vqueue) +{ + struct virtio_snd *snd = vqueue->vdev->priv; + struct virtio_snd_queue *queue = virtsnd_event_queue(snd); + struct virtio_snd_event *event; + u32 length; + unsigned long flags; + + spin_lock_irqsave(&queue->lock, flags); + do { + virtqueue_disable_cb(vqueue); + while ((event = virtqueue_get_buf(vqueue, &length))) { + virtsnd_event_dispatch(snd, event); + virtsnd_event_send(vqueue, event, true, GFP_ATOMIC); + } + if (unlikely(virtqueue_is_broken(vqueue))) + break; + } while (!virtqueue_enable_cb(vqueue)); + spin_unlock_irqrestore(&queue->lock, flags); +} + +/** + * virtsnd_find_vqs() - Enumerate and initialize all virtqueues. + * @snd: VirtIO sound device. + * + * After calling this function, the event queue is disabled. + * + * Context: Any context. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_find_vqs(struct virtio_snd *snd) +{ + struct virtio_device *vdev = snd->vdev; + static vq_callback_t *callbacks[VIRTIO_SND_VQ_MAX] = { + [VIRTIO_SND_VQ_EVENT] = virtsnd_event_notify_cb + }; + static const char *names[VIRTIO_SND_VQ_MAX] = { + [VIRTIO_SND_VQ_EVENT] = "virtsnd-event" + }; + struct virtqueue *vqs[VIRTIO_SND_VQ_MAX] = { 0 }; + unsigned int i; + unsigned int n; + int rc; + + rc = virtio_find_vqs(vdev, VIRTIO_SND_VQ_MAX, vqs, callbacks, names, + NULL); + if (rc) { + dev_err(&vdev->dev, "failed to initialize virtqueues\n"); + return rc; + } + + for (i = 0; i < VIRTIO_SND_VQ_MAX; ++i) + snd->queues[i].vqueue = vqs[i]; + + /* Allocate events and populate the event queue */ + virtqueue_disable_cb(vqs[VIRTIO_SND_VQ_EVENT]); + + n = virtqueue_get_vring_size(vqs[VIRTIO_SND_VQ_EVENT]); + + snd->event_msgs = kmalloc_array(n, sizeof(*snd->event_msgs), + GFP_KERNEL); + if (!snd->event_msgs) + return -ENOMEM; + + for (i = 0; i < n; ++i) + virtsnd_event_send(vqs[VIRTIO_SND_VQ_EVENT], + &snd->event_msgs[i], false, GFP_KERNEL); + + return 0; +} + +/** + * virtsnd_enable_event_vq() - Enable the event virtqueue. + * @snd: VirtIO sound device. + * + * Context: Any context. + */ +static void virtsnd_enable_event_vq(struct virtio_snd *snd) +{ + struct virtio_snd_queue *queue = virtsnd_event_queue(snd); + + if (!virtqueue_enable_cb(queue->vqueue)) + virtsnd_event_notify_cb(queue->vqueue); +} + +/** + * virtsnd_disable_event_vq() - Disable the event virtqueue. + * @snd: VirtIO sound device. + * + * Context: Any context. + */ +static void virtsnd_disable_event_vq(struct virtio_snd *snd) +{ + struct virtio_snd_queue *queue = virtsnd_event_queue(snd); + struct virtio_snd_event *event; + u32 length; + unsigned long flags; + + if (queue->vqueue) { + spin_lock_irqsave(&queue->lock, flags); + virtqueue_disable_cb(queue->vqueue); + while ((event = virtqueue_get_buf(queue->vqueue, &length))) + virtsnd_event_dispatch(snd, event); + spin_unlock_irqrestore(&queue->lock, flags); + } +} + +/** + * virtsnd_build_devs() - Read configuration and build ALSA devices. + * @snd: VirtIO sound device. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_build_devs(struct virtio_snd *snd) +{ + struct virtio_device *vdev = snd->vdev; + struct device *dev = &vdev->dev; + int rc; + + rc = snd_card_new(dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, + THIS_MODULE, 0, &snd->card); + if (rc < 0) + return rc; + + snd->card->private_data = snd; + + strscpy(snd->card->driver, VIRTIO_SND_CARD_DRIVER, + sizeof(snd->card->driver)); + strscpy(snd->card->shortname, VIRTIO_SND_CARD_NAME, + sizeof(snd->card->shortname)); + if (dev->parent->bus) + snprintf(snd->card->longname, sizeof(snd->card->longname), + VIRTIO_SND_CARD_NAME " at %s/%s/%s", + dev->parent->bus->name, dev_name(dev->parent), + dev_name(dev)); + else + snprintf(snd->card->longname, sizeof(snd->card->longname), + VIRTIO_SND_CARD_NAME " at %s/%s", + dev_name(dev->parent), dev_name(dev)); + + return snd_card_register(snd->card); +} + +/** + * virtsnd_validate() - Validate if the device can be started. + * @vdev: VirtIO parent device. + * + * Context: Any context. + * Return: 0 on success, -EINVAL on failure. + */ +static int virtsnd_validate(struct virtio_device *vdev) +{ + if (!vdev->config->get) { + dev_err(&vdev->dev, "configuration access disabled\n"); + return -EINVAL; + } + + if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { + dev_err(&vdev->dev, + "device does not comply with spec version 1.x\n"); + return -EINVAL; + } + + return 0; +} + +/** + * virtsnd_probe() - Create and initialize the device. + * @vdev: VirtIO parent device. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_probe(struct virtio_device *vdev) +{ + struct virtio_snd *snd; + unsigned int i; + int rc; + + snd = devm_kzalloc(&vdev->dev, sizeof(*snd), GFP_KERNEL); + if (!snd) + return -ENOMEM; + + snd->vdev = vdev; + + vdev->priv = snd; + + for (i = 0; i < VIRTIO_SND_VQ_MAX; ++i) + spin_lock_init(&snd->queues[i].lock); + + rc = virtsnd_find_vqs(snd); + if (rc) + goto on_exit; + + virtio_device_ready(vdev); + + rc = virtsnd_build_devs(snd); + if (rc) + goto on_exit; + + virtsnd_enable_event_vq(snd); + +on_exit: + if (rc) + virtsnd_remove(vdev); + + return rc; +} + +/** + * virtsnd_remove() - Remove VirtIO and ALSA devices. + * @vdev: VirtIO parent device. + * + * Context: Any context that permits to sleep. + */ +static void virtsnd_remove(struct virtio_device *vdev) +{ + struct virtio_snd *snd = vdev->priv; + + virtsnd_disable_event_vq(snd); + + if (snd->card) + snd_card_free(snd->card); + + vdev->config->del_vqs(vdev); + vdev->config->reset(vdev); + + kfree(snd->event_msgs); +} + +static const struct virtio_device_id id_table[] = { + { VIRTIO_ID_SOUND, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static struct virtio_driver virtsnd_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .validate = virtsnd_validate, + .probe = virtsnd_probe, + .remove = virtsnd_remove, +}; + +static int __init init(void) +{ + return register_virtio_driver(&virtsnd_driver); +} +module_init(init); + +static void __exit fini(void) +{ + unregister_virtio_driver(&virtsnd_driver); +} +module_exit(fini); + +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_DESCRIPTION("Virtio sound card driver"); +MODULE_LICENSE("GPL"); diff --git a/sound/virtio/virtio_card.h b/sound/virtio/virtio_card.h new file mode 100644 index 000000000000..b903b1b12e90 --- /dev/null +++ b/sound/virtio/virtio_card.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#ifndef VIRTIO_SND_CARD_H +#define VIRTIO_SND_CARD_H + +#include +#include +#include +#include + +#define VIRTIO_SND_CARD_DRIVER "virtio-snd" +#define VIRTIO_SND_CARD_NAME "VirtIO SoundCard" + +/** + * struct virtio_snd_queue - Virtqueue wrapper structure. + * @lock: Used to synchronize access to a virtqueue. + * @vqueue: Underlying virtqueue. + */ +struct virtio_snd_queue { + spinlock_t lock; + struct virtqueue *vqueue; +}; + +/** + * struct virtio_snd - VirtIO sound card device. + * @vdev: Underlying virtio device. + * @queues: Virtqueue wrappers. + * @card: ALSA sound card. + * @event_msgs: Device events. + */ +struct virtio_snd { + struct virtio_device *vdev; + struct virtio_snd_queue queues[VIRTIO_SND_VQ_MAX]; + struct snd_card *card; + struct virtio_snd_event *event_msgs; +}; + +static inline struct virtio_snd_queue * +virtsnd_control_queue(struct virtio_snd *snd) +{ + return &snd->queues[VIRTIO_SND_VQ_CONTROL]; +} + +static inline struct virtio_snd_queue * +virtsnd_event_queue(struct virtio_snd *snd) +{ + return &snd->queues[VIRTIO_SND_VQ_EVENT]; +} + +static inline struct virtio_snd_queue * +virtsnd_tx_queue(struct virtio_snd *snd) +{ + return &snd->queues[VIRTIO_SND_VQ_TX]; +} + +static inline struct virtio_snd_queue * +virtsnd_rx_queue(struct virtio_snd *snd) +{ + return &snd->queues[VIRTIO_SND_VQ_RX]; +} + +#endif /* VIRTIO_SND_CARD_H */ -- cgit v1.2.3-59-g8ed1b From 9d45e514da88ff74fc24ffb34e7d6eb92576440b Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:03 +0100 Subject: ALSA: virtio: handling control messages The control queue can be used by different parts of the driver to send commands to the device. Control messages can be either synchronous or asynchronous. The lifetime of a message is controlled by a reference count. Introduce a module parameter to set the message completion timeout: msg_timeout_ms [=1000] Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-4-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- sound/virtio/Makefile | 3 +- sound/virtio/virtio_card.c | 13 ++ sound/virtio/virtio_card.h | 7 + sound/virtio/virtio_ctl_msg.c | 310 ++++++++++++++++++++++++++++++++++++++++++ sound/virtio/virtio_ctl_msg.h | 78 +++++++++++ 5 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 sound/virtio/virtio_ctl_msg.c create mode 100644 sound/virtio/virtio_ctl_msg.h diff --git a/sound/virtio/Makefile b/sound/virtio/Makefile index 8c87ebb9982b..dc551e637441 100644 --- a/sound/virtio/Makefile +++ b/sound/virtio/Makefile @@ -3,5 +3,6 @@ obj-$(CONFIG_SND_VIRTIO) += virtio_snd.o virtio_snd-objs := \ - virtio_card.o + virtio_card.o \ + virtio_ctl_msg.o diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index 5a37056858e9..b757b2444078 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -11,6 +11,10 @@ #include "virtio_card.h" +u32 virtsnd_msg_timeout_ms = MSEC_PER_SEC; +module_param_named(msg_timeout_ms, virtsnd_msg_timeout_ms, uint, 0644); +MODULE_PARM_DESC(msg_timeout_ms, "Message completion timeout in milliseconds"); + static void virtsnd_remove(struct virtio_device *vdev); /** @@ -96,9 +100,11 @@ static int virtsnd_find_vqs(struct virtio_snd *snd) { struct virtio_device *vdev = snd->vdev; static vq_callback_t *callbacks[VIRTIO_SND_VQ_MAX] = { + [VIRTIO_SND_VQ_CONTROL] = virtsnd_ctl_notify_cb, [VIRTIO_SND_VQ_EVENT] = virtsnd_event_notify_cb }; static const char *names[VIRTIO_SND_VQ_MAX] = { + [VIRTIO_SND_VQ_CONTROL] = "virtsnd-ctl", [VIRTIO_SND_VQ_EVENT] = "virtsnd-event" }; struct virtqueue *vqs[VIRTIO_SND_VQ_MAX] = { 0 }; @@ -226,6 +232,11 @@ static int virtsnd_validate(struct virtio_device *vdev) return -EINVAL; } + if (!virtsnd_msg_timeout_ms) { + dev_err(&vdev->dev, "msg_timeout_ms value cannot be zero\n"); + return -EINVAL; + } + return 0; } @@ -247,6 +258,7 @@ static int virtsnd_probe(struct virtio_device *vdev) return -ENOMEM; snd->vdev = vdev; + INIT_LIST_HEAD(&snd->ctl_msgs); vdev->priv = snd; @@ -283,6 +295,7 @@ static void virtsnd_remove(struct virtio_device *vdev) struct virtio_snd *snd = vdev->priv; virtsnd_disable_event_vq(snd); + virtsnd_ctl_msg_cancel_all(snd); if (snd->card) snd_card_free(snd->card); diff --git a/sound/virtio/virtio_card.h b/sound/virtio/virtio_card.h index b903b1b12e90..1e76eeff160f 100644 --- a/sound/virtio/virtio_card.h +++ b/sound/virtio/virtio_card.h @@ -11,6 +11,8 @@ #include #include +#include "virtio_ctl_msg.h" + #define VIRTIO_SND_CARD_DRIVER "virtio-snd" #define VIRTIO_SND_CARD_NAME "VirtIO SoundCard" @@ -29,15 +31,20 @@ struct virtio_snd_queue { * @vdev: Underlying virtio device. * @queues: Virtqueue wrappers. * @card: ALSA sound card. + * @ctl_msgs: Pending control request list. * @event_msgs: Device events. */ struct virtio_snd { struct virtio_device *vdev; struct virtio_snd_queue queues[VIRTIO_SND_VQ_MAX]; struct snd_card *card; + struct list_head ctl_msgs; struct virtio_snd_event *event_msgs; }; +/* Message completion timeout in milliseconds (module parameter). */ +extern u32 virtsnd_msg_timeout_ms; + static inline struct virtio_snd_queue * virtsnd_control_queue(struct virtio_snd *snd) { diff --git a/sound/virtio/virtio_ctl_msg.c b/sound/virtio/virtio_ctl_msg.c new file mode 100644 index 000000000000..26ff7e7cc041 --- /dev/null +++ b/sound/virtio/virtio_ctl_msg.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#include +#include + +#include "virtio_card.h" + +/** + * struct virtio_snd_msg - Control message. + * @sg_request: Scattergather list containing a device request (header). + * @sg_response: Scattergather list containing a device response (status). + * @list: Pending message list entry. + * @notify: Request completed notification. + * @ref_count: Reference count used to manage a message lifetime. + */ +struct virtio_snd_msg { + struct scatterlist sg_request; + struct scatterlist sg_response; + struct list_head list; + struct completion notify; + refcount_t ref_count; +}; + +/** + * virtsnd_ctl_msg_ref() - Increment reference counter for the message. + * @msg: Control message. + * + * Context: Any context. + */ +void virtsnd_ctl_msg_ref(struct virtio_snd_msg *msg) +{ + refcount_inc(&msg->ref_count); +} + +/** + * virtsnd_ctl_msg_unref() - Decrement reference counter for the message. + * @msg: Control message. + * + * The message will be freed when the ref_count value is 0. + * + * Context: Any context. + */ +void virtsnd_ctl_msg_unref(struct virtio_snd_msg *msg) +{ + if (refcount_dec_and_test(&msg->ref_count)) + kfree(msg); +} + +/** + * virtsnd_ctl_msg_request() - Get a pointer to the request header. + * @msg: Control message. + * + * Context: Any context. + */ +void *virtsnd_ctl_msg_request(struct virtio_snd_msg *msg) +{ + return sg_virt(&msg->sg_request); +} + +/** + * virtsnd_ctl_msg_request() - Get a pointer to the response header. + * @msg: Control message. + * + * Context: Any context. + */ +void *virtsnd_ctl_msg_response(struct virtio_snd_msg *msg) +{ + return sg_virt(&msg->sg_response); +} + +/** + * virtsnd_ctl_msg_alloc() - Allocate and initialize a control message. + * @request_size: Size of request header. + * @response_size: Size of response header. + * @gfp: Kernel flags for memory allocation. + * + * The message will be automatically freed when the ref_count value is 0. + * + * Context: Any context. May sleep if @gfp flags permit. + * Return: Allocated message on success, NULL on failure. + */ +struct virtio_snd_msg *virtsnd_ctl_msg_alloc(size_t request_size, + size_t response_size, gfp_t gfp) +{ + struct virtio_snd_msg *msg; + + if (!request_size || !response_size) + return NULL; + + msg = kzalloc(sizeof(*msg) + request_size + response_size, gfp); + if (!msg) + return NULL; + + sg_init_one(&msg->sg_request, (u8 *)msg + sizeof(*msg), request_size); + sg_init_one(&msg->sg_response, (u8 *)msg + sizeof(*msg) + request_size, + response_size); + + INIT_LIST_HEAD(&msg->list); + init_completion(&msg->notify); + /* This reference is dropped in virtsnd_ctl_msg_complete(). */ + refcount_set(&msg->ref_count, 1); + + return msg; +} + +/** + * virtsnd_ctl_msg_send() - Send a control message. + * @snd: VirtIO sound device. + * @msg: Control message. + * @out_sgs: Additional sg-list to attach to the request header (may be NULL). + * @in_sgs: Additional sg-list to attach to the response header (may be NULL). + * @nowait: Flag indicating whether to wait for completion. + * + * Context: Any context. Takes and releases the control queue spinlock. + * May sleep if @nowait is false. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_ctl_msg_send(struct virtio_snd *snd, struct virtio_snd_msg *msg, + struct scatterlist *out_sgs, + struct scatterlist *in_sgs, bool nowait) +{ + struct virtio_device *vdev = snd->vdev; + struct virtio_snd_queue *queue = virtsnd_control_queue(snd); + unsigned int js = msecs_to_jiffies(virtsnd_msg_timeout_ms); + struct virtio_snd_hdr *request = virtsnd_ctl_msg_request(msg); + struct virtio_snd_hdr *response = virtsnd_ctl_msg_response(msg); + unsigned int nouts = 0; + unsigned int nins = 0; + struct scatterlist *psgs[4]; + bool notify = false; + unsigned long flags; + int rc; + + virtsnd_ctl_msg_ref(msg); + + /* Set the default status in case the message was canceled. */ + response->code = cpu_to_le32(VIRTIO_SND_S_IO_ERR); + + psgs[nouts++] = &msg->sg_request; + if (out_sgs) + psgs[nouts++] = out_sgs; + + psgs[nouts + nins++] = &msg->sg_response; + if (in_sgs) + psgs[nouts + nins++] = in_sgs; + + spin_lock_irqsave(&queue->lock, flags); + rc = virtqueue_add_sgs(queue->vqueue, psgs, nouts, nins, msg, + GFP_ATOMIC); + if (!rc) { + notify = virtqueue_kick_prepare(queue->vqueue); + + list_add_tail(&msg->list, &snd->ctl_msgs); + } + spin_unlock_irqrestore(&queue->lock, flags); + + if (rc) { + dev_err(&vdev->dev, "failed to send control message (0x%08x)\n", + le32_to_cpu(request->code)); + + /* + * Since in this case virtsnd_ctl_msg_complete() will not be + * called, it is necessary to decrement the reference count. + */ + virtsnd_ctl_msg_unref(msg); + + goto on_exit; + } + + if (notify) + virtqueue_notify(queue->vqueue); + + if (nowait) + goto on_exit; + + rc = wait_for_completion_interruptible_timeout(&msg->notify, js); + if (rc <= 0) { + if (!rc) { + dev_err(&vdev->dev, + "control message (0x%08x) timeout\n", + le32_to_cpu(request->code)); + rc = -ETIMEDOUT; + } + + goto on_exit; + } + + switch (le32_to_cpu(response->code)) { + case VIRTIO_SND_S_OK: + rc = 0; + break; + case VIRTIO_SND_S_NOT_SUPP: + rc = -EOPNOTSUPP; + break; + case VIRTIO_SND_S_IO_ERR: + rc = -EIO; + break; + default: + rc = -EINVAL; + break; + } + +on_exit: + virtsnd_ctl_msg_unref(msg); + + return rc; +} + +/** + * virtsnd_ctl_msg_complete() - Complete a control message. + * @msg: Control message. + * + * Context: Any context. Expects the control queue spinlock to be held by + * caller. + */ +void virtsnd_ctl_msg_complete(struct virtio_snd_msg *msg) +{ + list_del(&msg->list); + complete(&msg->notify); + + virtsnd_ctl_msg_unref(msg); +} + +/** + * virtsnd_ctl_msg_cancel_all() - Cancel all pending control messages. + * @snd: VirtIO sound device. + * + * Context: Any context. + */ +void virtsnd_ctl_msg_cancel_all(struct virtio_snd *snd) +{ + struct virtio_snd_queue *queue = virtsnd_control_queue(snd); + unsigned long flags; + + spin_lock_irqsave(&queue->lock, flags); + while (!list_empty(&snd->ctl_msgs)) { + struct virtio_snd_msg *msg = + list_first_entry(&snd->ctl_msgs, struct virtio_snd_msg, + list); + + virtsnd_ctl_msg_complete(msg); + } + spin_unlock_irqrestore(&queue->lock, flags); +} + +/** + * virtsnd_ctl_query_info() - Query the item configuration from the device. + * @snd: VirtIO sound device. + * @command: Control request code (VIRTIO_SND_R_XXX_INFO). + * @start_id: Item start identifier. + * @count: Item count to query. + * @size: Item information size in bytes. + * @info: Buffer for storing item information. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_ctl_query_info(struct virtio_snd *snd, int command, int start_id, + int count, size_t size, void *info) +{ + struct virtio_snd_msg *msg; + struct virtio_snd_query_info *query; + struct scatterlist sg; + + msg = virtsnd_ctl_msg_alloc(sizeof(*query), + sizeof(struct virtio_snd_hdr), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + query = virtsnd_ctl_msg_request(msg); + query->hdr.code = cpu_to_le32(command); + query->start_id = cpu_to_le32(start_id); + query->count = cpu_to_le32(count); + query->size = cpu_to_le32(size); + + sg_init_one(&sg, info, count * size); + + return virtsnd_ctl_msg_send(snd, msg, NULL, &sg, false); +} + +/** + * virtsnd_ctl_notify_cb() - Process all completed control messages. + * @vqueue: Underlying control virtqueue. + * + * This callback function is called upon a vring interrupt request from the + * device. + * + * Context: Interrupt context. Takes and releases the control queue spinlock. + */ +void virtsnd_ctl_notify_cb(struct virtqueue *vqueue) +{ + struct virtio_snd *snd = vqueue->vdev->priv; + struct virtio_snd_queue *queue = virtsnd_control_queue(snd); + struct virtio_snd_msg *msg; + u32 length; + unsigned long flags; + + spin_lock_irqsave(&queue->lock, flags); + do { + virtqueue_disable_cb(vqueue); + while ((msg = virtqueue_get_buf(vqueue, &length))) + virtsnd_ctl_msg_complete(msg); + if (unlikely(virtqueue_is_broken(vqueue))) + break; + } while (!virtqueue_enable_cb(vqueue)); + spin_unlock_irqrestore(&queue->lock, flags); +} diff --git a/sound/virtio/virtio_ctl_msg.h b/sound/virtio/virtio_ctl_msg.h new file mode 100644 index 000000000000..7f4db044f28e --- /dev/null +++ b/sound/virtio/virtio_ctl_msg.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#ifndef VIRTIO_SND_MSG_H +#define VIRTIO_SND_MSG_H + +#include +#include + +struct virtio_snd; +struct virtio_snd_msg; + +void virtsnd_ctl_msg_ref(struct virtio_snd_msg *msg); + +void virtsnd_ctl_msg_unref(struct virtio_snd_msg *msg); + +void *virtsnd_ctl_msg_request(struct virtio_snd_msg *msg); + +void *virtsnd_ctl_msg_response(struct virtio_snd_msg *msg); + +struct virtio_snd_msg *virtsnd_ctl_msg_alloc(size_t request_size, + size_t response_size, gfp_t gfp); + +int virtsnd_ctl_msg_send(struct virtio_snd *snd, struct virtio_snd_msg *msg, + struct scatterlist *out_sgs, + struct scatterlist *in_sgs, bool nowait); + +/** + * virtsnd_ctl_msg_send_sync() - Simplified sending of synchronous message. + * @snd: VirtIO sound device. + * @msg: Control message. + * + * After returning from this function, the message will be deleted. If message + * content is still needed, the caller must additionally to + * virtsnd_ctl_msg_ref/unref() it. + * + * The msg_timeout_ms module parameter defines the message completion timeout. + * If the message is not completed within this time, the function will return an + * error. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + * + * The return value is a message status code (VIRTIO_SND_S_XXX) converted to an + * appropriate -errno value. + */ +static inline int virtsnd_ctl_msg_send_sync(struct virtio_snd *snd, + struct virtio_snd_msg *msg) +{ + return virtsnd_ctl_msg_send(snd, msg, NULL, NULL, false); +} + +/** + * virtsnd_ctl_msg_send_async() - Simplified sending of asynchronous message. + * @snd: VirtIO sound device. + * @msg: Control message. + * + * Context: Any context. + * Return: 0 on success, -errno on failure. + */ +static inline int virtsnd_ctl_msg_send_async(struct virtio_snd *snd, + struct virtio_snd_msg *msg) +{ + return virtsnd_ctl_msg_send(snd, msg, NULL, NULL, true); +} + +void virtsnd_ctl_msg_cancel_all(struct virtio_snd *snd); + +void virtsnd_ctl_msg_complete(struct virtio_snd_msg *msg); + +int virtsnd_ctl_query_info(struct virtio_snd *snd, int command, int start_id, + int count, size_t size, void *info); + +void virtsnd_ctl_notify_cb(struct virtqueue *vqueue); + +#endif /* VIRTIO_SND_MSG_H */ -- cgit v1.2.3-59-g8ed1b From 29b96bf50ba958eb5f097cdc3fbd4c1acf9547a2 Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:04 +0100 Subject: ALSA: virtio: build PCM devices and substream hardware descriptors Like the HDA specification, the virtio sound device specification links PCM substreams, jacks and PCM channel maps into functional groups. For each discovered group, a PCM device is created, the number of which coincides with the group number. Introduce the module parameters for setting the hardware buffer parameters: pcm_buffer_ms [=160] pcm_periods_min [=2] pcm_periods_max [=16] pcm_period_ms_min [=10] pcm_period_ms_max [=80] Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-5-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- sound/virtio/Makefile | 3 +- sound/virtio/virtio_card.c | 18 ++ sound/virtio/virtio_card.h | 10 + sound/virtio/virtio_pcm.c | 479 +++++++++++++++++++++++++++++++++++++++++++++ sound/virtio/virtio_pcm.h | 72 +++++++ 5 files changed, 581 insertions(+), 1 deletion(-) create mode 100644 sound/virtio/virtio_pcm.c create mode 100644 sound/virtio/virtio_pcm.h diff --git a/sound/virtio/Makefile b/sound/virtio/Makefile index dc551e637441..69162a545a41 100644 --- a/sound/virtio/Makefile +++ b/sound/virtio/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_SND_VIRTIO) += virtio_snd.o virtio_snd-objs := \ virtio_card.o \ - virtio_ctl_msg.o + virtio_ctl_msg.o \ + virtio_pcm.o diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index b757b2444078..11c76ee311b7 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -209,6 +209,16 @@ static int virtsnd_build_devs(struct virtio_snd *snd) VIRTIO_SND_CARD_NAME " at %s/%s", dev_name(dev->parent), dev_name(dev)); + rc = virtsnd_pcm_parse_cfg(snd); + if (rc) + return rc; + + if (snd->nsubstreams) { + rc = virtsnd_pcm_build_devs(snd); + if (rc) + return rc; + } + return snd_card_register(snd->card); } @@ -237,6 +247,9 @@ static int virtsnd_validate(struct virtio_device *vdev) return -EINVAL; } + if (virtsnd_pcm_validate(vdev)) + return -EINVAL; + return 0; } @@ -259,6 +272,7 @@ static int virtsnd_probe(struct virtio_device *vdev) snd->vdev = vdev; INIT_LIST_HEAD(&snd->ctl_msgs); + INIT_LIST_HEAD(&snd->pcm_list); vdev->priv = snd; @@ -293,6 +307,7 @@ on_exit: static void virtsnd_remove(struct virtio_device *vdev) { struct virtio_snd *snd = vdev->priv; + unsigned int i; virtsnd_disable_event_vq(snd); virtsnd_ctl_msg_cancel_all(snd); @@ -303,6 +318,9 @@ static void virtsnd_remove(struct virtio_device *vdev) vdev->config->del_vqs(vdev); vdev->config->reset(vdev); + for (i = 0; snd->substreams && i < snd->nsubstreams; ++i) + cancel_work_sync(&snd->substreams[i].elapsed_period); + kfree(snd->event_msgs); } diff --git a/sound/virtio/virtio_card.h b/sound/virtio/virtio_card.h index 1e76eeff160f..77a1b7255370 100644 --- a/sound/virtio/virtio_card.h +++ b/sound/virtio/virtio_card.h @@ -12,9 +12,13 @@ #include #include "virtio_ctl_msg.h" +#include "virtio_pcm.h" #define VIRTIO_SND_CARD_DRIVER "virtio-snd" #define VIRTIO_SND_CARD_NAME "VirtIO SoundCard" +#define VIRTIO_SND_PCM_NAME "VirtIO PCM" + +struct virtio_pcm_substream; /** * struct virtio_snd_queue - Virtqueue wrapper structure. @@ -33,6 +37,9 @@ struct virtio_snd_queue { * @card: ALSA sound card. * @ctl_msgs: Pending control request list. * @event_msgs: Device events. + * @pcm_list: VirtIO PCM device list. + * @substreams: VirtIO PCM substreams. + * @nsubstreams: Number of PCM substreams. */ struct virtio_snd { struct virtio_device *vdev; @@ -40,6 +47,9 @@ struct virtio_snd { struct snd_card *card; struct list_head ctl_msgs; struct virtio_snd_event *event_msgs; + struct list_head pcm_list; + struct virtio_pcm_substream *substreams; + u32 nsubstreams; }; /* Message completion timeout in milliseconds (module parameter). */ diff --git a/sound/virtio/virtio_pcm.c b/sound/virtio/virtio_pcm.c new file mode 100644 index 000000000000..e16567e2e214 --- /dev/null +++ b/sound/virtio/virtio_pcm.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#include +#include + +#include "virtio_card.h" + +static u32 pcm_buffer_ms = 160; +module_param(pcm_buffer_ms, uint, 0644); +MODULE_PARM_DESC(pcm_buffer_ms, "PCM substream buffer time in milliseconds"); + +static u32 pcm_periods_min = 2; +module_param(pcm_periods_min, uint, 0644); +MODULE_PARM_DESC(pcm_periods_min, "Minimum number of PCM periods"); + +static u32 pcm_periods_max = 16; +module_param(pcm_periods_max, uint, 0644); +MODULE_PARM_DESC(pcm_periods_max, "Maximum number of PCM periods"); + +static u32 pcm_period_ms_min = 10; +module_param(pcm_period_ms_min, uint, 0644); +MODULE_PARM_DESC(pcm_period_ms_min, "Minimum PCM period time in milliseconds"); + +static u32 pcm_period_ms_max = 80; +module_param(pcm_period_ms_max, uint, 0644); +MODULE_PARM_DESC(pcm_period_ms_max, "Maximum PCM period time in milliseconds"); + +/* Map for converting VirtIO format to ALSA format. */ +static const snd_pcm_format_t g_v2a_format_map[] = { + [VIRTIO_SND_PCM_FMT_IMA_ADPCM] = SNDRV_PCM_FORMAT_IMA_ADPCM, + [VIRTIO_SND_PCM_FMT_MU_LAW] = SNDRV_PCM_FORMAT_MU_LAW, + [VIRTIO_SND_PCM_FMT_A_LAW] = SNDRV_PCM_FORMAT_A_LAW, + [VIRTIO_SND_PCM_FMT_S8] = SNDRV_PCM_FORMAT_S8, + [VIRTIO_SND_PCM_FMT_U8] = SNDRV_PCM_FORMAT_U8, + [VIRTIO_SND_PCM_FMT_S16] = SNDRV_PCM_FORMAT_S16_LE, + [VIRTIO_SND_PCM_FMT_U16] = SNDRV_PCM_FORMAT_U16_LE, + [VIRTIO_SND_PCM_FMT_S18_3] = SNDRV_PCM_FORMAT_S18_3LE, + [VIRTIO_SND_PCM_FMT_U18_3] = SNDRV_PCM_FORMAT_U18_3LE, + [VIRTIO_SND_PCM_FMT_S20_3] = SNDRV_PCM_FORMAT_S20_3LE, + [VIRTIO_SND_PCM_FMT_U20_3] = SNDRV_PCM_FORMAT_U20_3LE, + [VIRTIO_SND_PCM_FMT_S24_3] = SNDRV_PCM_FORMAT_S24_3LE, + [VIRTIO_SND_PCM_FMT_U24_3] = SNDRV_PCM_FORMAT_U24_3LE, + [VIRTIO_SND_PCM_FMT_S20] = SNDRV_PCM_FORMAT_S20_LE, + [VIRTIO_SND_PCM_FMT_U20] = SNDRV_PCM_FORMAT_U20_LE, + [VIRTIO_SND_PCM_FMT_S24] = SNDRV_PCM_FORMAT_S24_LE, + [VIRTIO_SND_PCM_FMT_U24] = SNDRV_PCM_FORMAT_U24_LE, + [VIRTIO_SND_PCM_FMT_S32] = SNDRV_PCM_FORMAT_S32_LE, + [VIRTIO_SND_PCM_FMT_U32] = SNDRV_PCM_FORMAT_U32_LE, + [VIRTIO_SND_PCM_FMT_FLOAT] = SNDRV_PCM_FORMAT_FLOAT_LE, + [VIRTIO_SND_PCM_FMT_FLOAT64] = SNDRV_PCM_FORMAT_FLOAT64_LE, + [VIRTIO_SND_PCM_FMT_DSD_U8] = SNDRV_PCM_FORMAT_DSD_U8, + [VIRTIO_SND_PCM_FMT_DSD_U16] = SNDRV_PCM_FORMAT_DSD_U16_LE, + [VIRTIO_SND_PCM_FMT_DSD_U32] = SNDRV_PCM_FORMAT_DSD_U32_LE, + [VIRTIO_SND_PCM_FMT_IEC958_SUBFRAME] = + SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE +}; + +/* Map for converting VirtIO frame rate to ALSA frame rate. */ +struct virtsnd_v2a_rate { + unsigned int alsa_bit; + unsigned int rate; +}; + +static const struct virtsnd_v2a_rate g_v2a_rate_map[] = { + [VIRTIO_SND_PCM_RATE_5512] = { SNDRV_PCM_RATE_5512, 5512 }, + [VIRTIO_SND_PCM_RATE_8000] = { SNDRV_PCM_RATE_8000, 8000 }, + [VIRTIO_SND_PCM_RATE_11025] = { SNDRV_PCM_RATE_11025, 11025 }, + [VIRTIO_SND_PCM_RATE_16000] = { SNDRV_PCM_RATE_16000, 16000 }, + [VIRTIO_SND_PCM_RATE_22050] = { SNDRV_PCM_RATE_22050, 22050 }, + [VIRTIO_SND_PCM_RATE_32000] = { SNDRV_PCM_RATE_32000, 32000 }, + [VIRTIO_SND_PCM_RATE_44100] = { SNDRV_PCM_RATE_44100, 44100 }, + [VIRTIO_SND_PCM_RATE_48000] = { SNDRV_PCM_RATE_48000, 48000 }, + [VIRTIO_SND_PCM_RATE_64000] = { SNDRV_PCM_RATE_64000, 64000 }, + [VIRTIO_SND_PCM_RATE_88200] = { SNDRV_PCM_RATE_88200, 88200 }, + [VIRTIO_SND_PCM_RATE_96000] = { SNDRV_PCM_RATE_96000, 96000 }, + [VIRTIO_SND_PCM_RATE_176400] = { SNDRV_PCM_RATE_176400, 176400 }, + [VIRTIO_SND_PCM_RATE_192000] = { SNDRV_PCM_RATE_192000, 192000 } +}; + +/** + * virtsnd_pcm_build_hw() - Parse substream config and build HW descriptor. + * @vss: VirtIO substream. + * @info: VirtIO substream information entry. + * + * Context: Any context. + * Return: 0 on success, -EINVAL if configuration is invalid. + */ +static int virtsnd_pcm_build_hw(struct virtio_pcm_substream *vss, + struct virtio_snd_pcm_info *info) +{ + struct virtio_device *vdev = vss->snd->vdev; + unsigned int i; + u64 values; + size_t sample_max = 0; + size_t sample_min = 0; + + vss->features = le32_to_cpu(info->features); + + /* + * TODO: set SNDRV_PCM_INFO_{BATCH,BLOCK_TRANSFER} if device supports + * only message-based transport. + */ + vss->hw.info = + SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_BATCH | + SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_PAUSE; + + if (!info->channels_min || info->channels_min > info->channels_max) { + dev_err(&vdev->dev, + "SID %u: invalid channel range [%u %u]\n", + vss->sid, info->channels_min, info->channels_max); + return -EINVAL; + } + + vss->hw.channels_min = info->channels_min; + vss->hw.channels_max = info->channels_max; + + values = le64_to_cpu(info->formats); + + vss->hw.formats = 0; + + for (i = 0; i < ARRAY_SIZE(g_v2a_format_map); ++i) + if (values & (1ULL << i)) { + snd_pcm_format_t alsa_fmt = g_v2a_format_map[i]; + int bytes = snd_pcm_format_physical_width(alsa_fmt) / 8; + + if (!sample_min || sample_min > bytes) + sample_min = bytes; + + if (sample_max < bytes) + sample_max = bytes; + + vss->hw.formats |= pcm_format_to_bits(alsa_fmt); + } + + if (!vss->hw.formats) { + dev_err(&vdev->dev, + "SID %u: no supported PCM sample formats found\n", + vss->sid); + return -EINVAL; + } + + values = le64_to_cpu(info->rates); + + vss->hw.rates = 0; + + for (i = 0; i < ARRAY_SIZE(g_v2a_rate_map); ++i) + if (values & (1ULL << i)) { + if (!vss->hw.rate_min || + vss->hw.rate_min > g_v2a_rate_map[i].rate) + vss->hw.rate_min = g_v2a_rate_map[i].rate; + + if (vss->hw.rate_max < g_v2a_rate_map[i].rate) + vss->hw.rate_max = g_v2a_rate_map[i].rate; + + vss->hw.rates |= g_v2a_rate_map[i].alsa_bit; + } + + if (!vss->hw.rates) { + dev_err(&vdev->dev, + "SID %u: no supported PCM frame rates found\n", + vss->sid); + return -EINVAL; + } + + vss->hw.periods_min = pcm_periods_min; + vss->hw.periods_max = pcm_periods_max; + + /* + * We must ensure that there is enough space in the buffer to store + * pcm_buffer_ms ms for the combination (Cmax, Smax, Rmax), where: + * Cmax = maximum supported number of channels, + * Smax = maximum supported sample size in bytes, + * Rmax = maximum supported frame rate. + */ + vss->hw.buffer_bytes_max = + PAGE_ALIGN(sample_max * vss->hw.channels_max * pcm_buffer_ms * + (vss->hw.rate_max / MSEC_PER_SEC)); + + /* + * We must ensure that the minimum period size is enough to store + * pcm_period_ms_min ms for the combination (Cmin, Smin, Rmin), where: + * Cmin = minimum supported number of channels, + * Smin = minimum supported sample size in bytes, + * Rmin = minimum supported frame rate. + */ + vss->hw.period_bytes_min = + sample_min * vss->hw.channels_min * pcm_period_ms_min * + (vss->hw.rate_min / MSEC_PER_SEC); + + /* + * We must ensure that the maximum period size is enough to store + * pcm_period_ms_max ms for the combination (Cmax, Smax, Rmax). + */ + vss->hw.period_bytes_max = + sample_max * vss->hw.channels_max * pcm_period_ms_max * + (vss->hw.rate_max / MSEC_PER_SEC); + + return 0; +} + +/** + * virtsnd_pcm_find() - Find the PCM device for the specified node ID. + * @snd: VirtIO sound device. + * @nid: Function node ID. + * + * Context: Any context. + * Return: a pointer to the PCM device or ERR_PTR(-ENOENT). + */ +struct virtio_pcm *virtsnd_pcm_find(struct virtio_snd *snd, u32 nid) +{ + struct virtio_pcm *vpcm; + + list_for_each_entry(vpcm, &snd->pcm_list, list) + if (vpcm->nid == nid) + return vpcm; + + return ERR_PTR(-ENOENT); +} + +/** + * virtsnd_pcm_find_or_create() - Find or create the PCM device for the + * specified node ID. + * @snd: VirtIO sound device. + * @nid: Function node ID. + * + * Context: Any context that permits to sleep. + * Return: a pointer to the PCM device or ERR_PTR(-errno). + */ +struct virtio_pcm *virtsnd_pcm_find_or_create(struct virtio_snd *snd, u32 nid) +{ + struct virtio_device *vdev = snd->vdev; + struct virtio_pcm *vpcm; + + vpcm = virtsnd_pcm_find(snd, nid); + if (!IS_ERR(vpcm)) + return vpcm; + + vpcm = devm_kzalloc(&vdev->dev, sizeof(*vpcm), GFP_KERNEL); + if (!vpcm) + return ERR_PTR(-ENOMEM); + + vpcm->nid = nid; + list_add_tail(&vpcm->list, &snd->pcm_list); + + return vpcm; +} + +/** + * virtsnd_pcm_validate() - Validate if the device can be started. + * @vdev: VirtIO parent device. + * + * Context: Any context. + * Return: 0 on success, -EINVAL on failure. + */ +int virtsnd_pcm_validate(struct virtio_device *vdev) +{ + if (pcm_periods_min < 2 || pcm_periods_min > pcm_periods_max) { + dev_err(&vdev->dev, + "invalid range [%u %u] of the number of PCM periods\n", + pcm_periods_min, pcm_periods_max); + return -EINVAL; + } + + if (!pcm_period_ms_min || pcm_period_ms_min > pcm_period_ms_max) { + dev_err(&vdev->dev, + "invalid range [%u %u] of the size of the PCM period\n", + pcm_period_ms_min, pcm_period_ms_max); + return -EINVAL; + } + + if (pcm_buffer_ms < pcm_periods_min * pcm_period_ms_min) { + dev_err(&vdev->dev, + "pcm_buffer_ms(=%u) value cannot be < %u ms\n", + pcm_buffer_ms, pcm_periods_min * pcm_period_ms_min); + return -EINVAL; + } + + if (pcm_period_ms_max > pcm_buffer_ms / 2) { + dev_err(&vdev->dev, + "pcm_period_ms_max(=%u) value cannot be > %u ms\n", + pcm_period_ms_max, pcm_buffer_ms / 2); + return -EINVAL; + } + + return 0; +} + +/** + * virtsnd_pcm_period_elapsed() - Kernel work function to handle the elapsed + * period state. + * @work: Elapsed period work. + * + * The main purpose of this function is to call snd_pcm_period_elapsed() in + * a process context, not in an interrupt context. This is necessary because PCM + * devices operate in non-atomic mode. + * + * Context: Process context. + */ +static void virtsnd_pcm_period_elapsed(struct work_struct *work) +{ + struct virtio_pcm_substream *vss = + container_of(work, struct virtio_pcm_substream, elapsed_period); + + snd_pcm_period_elapsed(vss->substream); +} + +/** + * virtsnd_pcm_parse_cfg() - Parse the stream configuration. + * @snd: VirtIO sound device. + * + * This function is called during initial device initialization. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_pcm_parse_cfg(struct virtio_snd *snd) +{ + struct virtio_device *vdev = snd->vdev; + struct virtio_snd_pcm_info *info; + u32 i; + int rc; + + virtio_cread_le(vdev, struct virtio_snd_config, streams, + &snd->nsubstreams); + if (!snd->nsubstreams) + return 0; + + snd->substreams = devm_kcalloc(&vdev->dev, snd->nsubstreams, + sizeof(*snd->substreams), GFP_KERNEL); + if (!snd->substreams) + return -ENOMEM; + + info = kcalloc(snd->nsubstreams, sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + rc = virtsnd_ctl_query_info(snd, VIRTIO_SND_R_PCM_INFO, 0, + snd->nsubstreams, sizeof(*info), info); + if (rc) + goto on_exit; + + for (i = 0; i < snd->nsubstreams; ++i) { + struct virtio_pcm_substream *vss = &snd->substreams[i]; + struct virtio_pcm *vpcm; + + vss->snd = snd; + vss->sid = i; + INIT_WORK(&vss->elapsed_period, virtsnd_pcm_period_elapsed); + + rc = virtsnd_pcm_build_hw(vss, &info[i]); + if (rc) + goto on_exit; + + vss->nid = le32_to_cpu(info[i].hdr.hda_fn_nid); + + vpcm = virtsnd_pcm_find_or_create(snd, vss->nid); + if (IS_ERR(vpcm)) { + rc = PTR_ERR(vpcm); + goto on_exit; + } + + switch (info[i].direction) { + case VIRTIO_SND_D_OUTPUT: + vss->direction = SNDRV_PCM_STREAM_PLAYBACK; + break; + case VIRTIO_SND_D_INPUT: + vss->direction = SNDRV_PCM_STREAM_CAPTURE; + break; + default: + dev_err(&vdev->dev, "SID %u: unknown direction (%u)\n", + vss->sid, info[i].direction); + rc = -EINVAL; + goto on_exit; + } + + vpcm->streams[vss->direction].nsubstreams++; + } + +on_exit: + kfree(info); + + return rc; +} + +/** + * virtsnd_pcm_build_devs() - Build ALSA PCM devices. + * @snd: VirtIO sound device. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_pcm_build_devs(struct virtio_snd *snd) +{ + struct virtio_device *vdev = snd->vdev; + struct virtio_pcm *vpcm; + u32 i; + int rc; + + list_for_each_entry(vpcm, &snd->pcm_list, list) { + unsigned int npbs = + vpcm->streams[SNDRV_PCM_STREAM_PLAYBACK].nsubstreams; + unsigned int ncps = + vpcm->streams[SNDRV_PCM_STREAM_CAPTURE].nsubstreams; + + if (!npbs && !ncps) + continue; + + rc = snd_pcm_new(snd->card, VIRTIO_SND_CARD_DRIVER, vpcm->nid, + npbs, ncps, &vpcm->pcm); + if (rc) { + dev_err(&vdev->dev, "snd_pcm_new[%u] failed: %d\n", + vpcm->nid, rc); + return rc; + } + + vpcm->pcm->info_flags = 0; + vpcm->pcm->dev_class = SNDRV_PCM_CLASS_GENERIC; + vpcm->pcm->dev_subclass = SNDRV_PCM_SUBCLASS_GENERIC_MIX; + snprintf(vpcm->pcm->name, sizeof(vpcm->pcm->name), + VIRTIO_SND_PCM_NAME " %u", vpcm->pcm->device); + vpcm->pcm->private_data = vpcm; + vpcm->pcm->nonatomic = true; + + for (i = 0; i < ARRAY_SIZE(vpcm->streams); ++i) { + struct virtio_pcm_stream *stream = &vpcm->streams[i]; + + if (!stream->nsubstreams) + continue; + + stream->substreams = + devm_kcalloc(&vdev->dev, stream->nsubstreams, + sizeof(*stream->substreams), + GFP_KERNEL); + if (!stream->substreams) + return -ENOMEM; + + stream->nsubstreams = 0; + } + } + + for (i = 0; i < snd->nsubstreams; ++i) { + struct virtio_pcm_stream *vs; + struct virtio_pcm_substream *vss = &snd->substreams[i]; + + vpcm = virtsnd_pcm_find(snd, vss->nid); + if (IS_ERR(vpcm)) + return PTR_ERR(vpcm); + + vs = &vpcm->streams[vss->direction]; + vs->substreams[vs->nsubstreams++] = vss; + } + + list_for_each_entry(vpcm, &snd->pcm_list, list) { + for (i = 0; i < ARRAY_SIZE(vpcm->streams); ++i) { + struct virtio_pcm_stream *vs = &vpcm->streams[i]; + struct snd_pcm_str *ks = &vpcm->pcm->streams[i]; + struct snd_pcm_substream *kss; + + if (!vs->nsubstreams) + continue; + + for (kss = ks->substream; kss; kss = kss->next) + vs->substreams[kss->number]->substream = kss; + } + + snd_pcm_set_managed_buffer_all(vpcm->pcm, + SNDRV_DMA_TYPE_VMALLOC, NULL, + 0, 0); + } + + return 0; +} diff --git a/sound/virtio/virtio_pcm.h b/sound/virtio/virtio_pcm.h new file mode 100644 index 000000000000..84f2f3f14f48 --- /dev/null +++ b/sound/virtio/virtio_pcm.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#ifndef VIRTIO_SND_PCM_H +#define VIRTIO_SND_PCM_H + +#include +#include +#include + +struct virtio_pcm; +struct virtio_pcm_msg; + +/** + * struct virtio_pcm_substream - VirtIO PCM substream. + * @snd: VirtIO sound device. + * @nid: Function group node identifier. + * @sid: Stream identifier. + * @direction: Stream data flow direction (SNDRV_PCM_STREAM_XXX). + * @features: Stream VirtIO feature bit map (1 << VIRTIO_SND_PCM_F_XXX). + * @substream: Kernel ALSA substream. + * @hw: Kernel ALSA substream hardware descriptor. + * @elapsed_period: Kernel work to handle the elapsed period state. + */ +struct virtio_pcm_substream { + struct virtio_snd *snd; + u32 nid; + u32 sid; + u32 direction; + u32 features; + struct snd_pcm_substream *substream; + struct snd_pcm_hardware hw; + struct work_struct elapsed_period; +}; + +/** + * struct virtio_pcm_stream - VirtIO PCM stream. + * @substreams: VirtIO substreams belonging to the stream. + * @nsubstreams: Number of substreams. + */ +struct virtio_pcm_stream { + struct virtio_pcm_substream **substreams; + u32 nsubstreams; +}; + +/** + * struct virtio_pcm - VirtIO PCM device. + * @list: VirtIO PCM list entry. + * @nid: Function group node identifier. + * @pcm: Kernel PCM device. + * @streams: VirtIO PCM streams (playback and capture). + */ +struct virtio_pcm { + struct list_head list; + u32 nid; + struct snd_pcm *pcm; + struct virtio_pcm_stream streams[SNDRV_PCM_STREAM_LAST + 1]; +}; + +int virtsnd_pcm_validate(struct virtio_device *vdev); + +int virtsnd_pcm_parse_cfg(struct virtio_snd *snd); + +int virtsnd_pcm_build_devs(struct virtio_snd *snd); + +struct virtio_pcm *virtsnd_pcm_find(struct virtio_snd *snd, u32 nid); + +struct virtio_pcm *virtsnd_pcm_find_or_create(struct virtio_snd *snd, u32 nid); + +#endif /* VIRTIO_SND_PCM_H */ -- cgit v1.2.3-59-g8ed1b From f40a28679e0b7cb3a9cc6627a8dbb40961990f0a Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:05 +0100 Subject: ALSA: virtio: handling control and I/O messages for the PCM device The driver implements a message-based transport for I/O substream operations. Before the start of the substream, the hardware buffer is sliced into I/O messages, the number of which is equal to the current number of periods. The size of each message is equal to the current size of one period. I/O messages are organized in an ordered queue. The completion of the I/O message indicates an elapsed period (the only exception is the end of the stream for the capture substream). Upon completion, the message is automatically re-added to the end of the queue. Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-6-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- sound/virtio/Makefile | 3 +- sound/virtio/virtio_card.c | 22 ++- sound/virtio/virtio_card.h | 9 + sound/virtio/virtio_pcm.c | 32 ++++ sound/virtio/virtio_pcm.h | 40 ++++ sound/virtio/virtio_pcm_msg.c | 414 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 515 insertions(+), 5 deletions(-) create mode 100644 sound/virtio/virtio_pcm_msg.c diff --git a/sound/virtio/Makefile b/sound/virtio/Makefile index 69162a545a41..626af3cc3ed7 100644 --- a/sound/virtio/Makefile +++ b/sound/virtio/Makefile @@ -5,5 +5,6 @@ obj-$(CONFIG_SND_VIRTIO) += virtio_snd.o virtio_snd-objs := \ virtio_card.o \ virtio_ctl_msg.o \ - virtio_pcm.o + virtio_pcm.o \ + virtio_pcm_msg.o diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index 11c76ee311b7..57b9b7f3a9c0 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -55,6 +55,12 @@ static void virtsnd_event_send(struct virtqueue *vqueue, static void virtsnd_event_dispatch(struct virtio_snd *snd, struct virtio_snd_event *event) { + switch (le32_to_cpu(event->hdr.code)) { + case VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED: + case VIRTIO_SND_EVT_PCM_XRUN: + virtsnd_pcm_event(snd, event); + break; + } } /** @@ -101,11 +107,15 @@ static int virtsnd_find_vqs(struct virtio_snd *snd) struct virtio_device *vdev = snd->vdev; static vq_callback_t *callbacks[VIRTIO_SND_VQ_MAX] = { [VIRTIO_SND_VQ_CONTROL] = virtsnd_ctl_notify_cb, - [VIRTIO_SND_VQ_EVENT] = virtsnd_event_notify_cb + [VIRTIO_SND_VQ_EVENT] = virtsnd_event_notify_cb, + [VIRTIO_SND_VQ_TX] = virtsnd_pcm_tx_notify_cb, + [VIRTIO_SND_VQ_RX] = virtsnd_pcm_rx_notify_cb }; static const char *names[VIRTIO_SND_VQ_MAX] = { [VIRTIO_SND_VQ_CONTROL] = "virtsnd-ctl", - [VIRTIO_SND_VQ_EVENT] = "virtsnd-event" + [VIRTIO_SND_VQ_EVENT] = "virtsnd-event", + [VIRTIO_SND_VQ_TX] = "virtsnd-tx", + [VIRTIO_SND_VQ_RX] = "virtsnd-rx" }; struct virtqueue *vqs[VIRTIO_SND_VQ_MAX] = { 0 }; unsigned int i; @@ -318,8 +328,12 @@ static void virtsnd_remove(struct virtio_device *vdev) vdev->config->del_vqs(vdev); vdev->config->reset(vdev); - for (i = 0; snd->substreams && i < snd->nsubstreams; ++i) - cancel_work_sync(&snd->substreams[i].elapsed_period); + for (i = 0; snd->substreams && i < snd->nsubstreams; ++i) { + struct virtio_pcm_substream *vss = &snd->substreams[i]; + + cancel_work_sync(&vss->elapsed_period); + virtsnd_pcm_msg_free(vss); + } kfree(snd->event_msgs); } diff --git a/sound/virtio/virtio_card.h b/sound/virtio/virtio_card.h index 77a1b7255370..c43f9744d362 100644 --- a/sound/virtio/virtio_card.h +++ b/sound/virtio/virtio_card.h @@ -79,4 +79,13 @@ virtsnd_rx_queue(struct virtio_snd *snd) return &snd->queues[VIRTIO_SND_VQ_RX]; } +static inline struct virtio_snd_queue * +virtsnd_pcm_queue(struct virtio_pcm_substream *vss) +{ + if (vss->direction == SNDRV_PCM_STREAM_PLAYBACK) + return virtsnd_tx_queue(vss->snd); + else + return virtsnd_rx_queue(vss->snd); +} + #endif /* VIRTIO_SND_CARD_H */ diff --git a/sound/virtio/virtio_pcm.c b/sound/virtio/virtio_pcm.c index e16567e2e214..2dcd763efa29 100644 --- a/sound/virtio/virtio_pcm.c +++ b/sound/virtio/virtio_pcm.c @@ -353,6 +353,8 @@ int virtsnd_pcm_parse_cfg(struct virtio_snd *snd) vss->snd = snd; vss->sid = i; INIT_WORK(&vss->elapsed_period, virtsnd_pcm_period_elapsed); + init_waitqueue_head(&vss->msg_empty); + spin_lock_init(&vss->lock); rc = virtsnd_pcm_build_hw(vss, &info[i]); if (rc) @@ -477,3 +479,33 @@ int virtsnd_pcm_build_devs(struct virtio_snd *snd) return 0; } + +/** + * virtsnd_pcm_event() - Handle the PCM device event notification. + * @snd: VirtIO sound device. + * @event: VirtIO sound event. + * + * Context: Interrupt context. + */ +void virtsnd_pcm_event(struct virtio_snd *snd, struct virtio_snd_event *event) +{ + struct virtio_pcm_substream *vss; + u32 sid = le32_to_cpu(event->data); + + if (sid >= snd->nsubstreams) + return; + + vss = &snd->substreams[sid]; + + switch (le32_to_cpu(event->hdr.code)) { + case VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED: + /* TODO: deal with shmem elapsed period */ + break; + case VIRTIO_SND_EVT_PCM_XRUN: + spin_lock(&vss->lock); + if (vss->xfer_enabled) + vss->xfer_xrun = true; + spin_unlock(&vss->lock); + break; + } +} diff --git a/sound/virtio/virtio_pcm.h b/sound/virtio/virtio_pcm.h index 84f2f3f14f48..6722f1139666 100644 --- a/sound/virtio/virtio_pcm.h +++ b/sound/virtio/virtio_pcm.h @@ -23,6 +23,17 @@ struct virtio_pcm_msg; * @substream: Kernel ALSA substream. * @hw: Kernel ALSA substream hardware descriptor. * @elapsed_period: Kernel work to handle the elapsed period state. + * @lock: Spinlock that protects fields shared by interrupt handlers and + * substream operators. + * @buffer_bytes: Current buffer size in bytes. + * @hw_ptr: Substream hardware pointer value in bytes [0 ... buffer_bytes). + * @xfer_enabled: Data transfer state (0 - off, 1 - on). + * @xfer_xrun: Data underflow/overflow state (0 - no xrun, 1 - xrun). + * @msgs: Allocated I/O messages. + * @nmsgs: Number of allocated I/O messages. + * @msg_last_enqueued: Index of the last I/O message added to the virtqueue. + * @msg_count: Number of pending I/O messages in the virtqueue. + * @msg_empty: Notify when msg_count is zero. */ struct virtio_pcm_substream { struct virtio_snd *snd; @@ -33,6 +44,16 @@ struct virtio_pcm_substream { struct snd_pcm_substream *substream; struct snd_pcm_hardware hw; struct work_struct elapsed_period; + spinlock_t lock; + size_t buffer_bytes; + size_t hw_ptr; + bool xfer_enabled; + bool xfer_xrun; + struct virtio_pcm_msg **msgs; + unsigned int nmsgs; + int msg_last_enqueued; + unsigned int msg_count; + wait_queue_head_t msg_empty; }; /** @@ -65,8 +86,27 @@ int virtsnd_pcm_parse_cfg(struct virtio_snd *snd); int virtsnd_pcm_build_devs(struct virtio_snd *snd); +void virtsnd_pcm_event(struct virtio_snd *snd, struct virtio_snd_event *event); + +void virtsnd_pcm_tx_notify_cb(struct virtqueue *vqueue); + +void virtsnd_pcm_rx_notify_cb(struct virtqueue *vqueue); + struct virtio_pcm *virtsnd_pcm_find(struct virtio_snd *snd, u32 nid); struct virtio_pcm *virtsnd_pcm_find_or_create(struct virtio_snd *snd, u32 nid); +struct virtio_snd_msg * +virtsnd_pcm_ctl_msg_alloc(struct virtio_pcm_substream *vss, + unsigned int command, gfp_t gfp); + +int virtsnd_pcm_msg_alloc(struct virtio_pcm_substream *vss, + unsigned int periods, unsigned int period_bytes); + +void virtsnd_pcm_msg_free(struct virtio_pcm_substream *vss); + +int virtsnd_pcm_msg_send(struct virtio_pcm_substream *vss); + +unsigned int virtsnd_pcm_msg_pending_num(struct virtio_pcm_substream *vss); + #endif /* VIRTIO_SND_PCM_H */ diff --git a/sound/virtio/virtio_pcm_msg.c b/sound/virtio/virtio_pcm_msg.c new file mode 100644 index 000000000000..f88c8f29cbd8 --- /dev/null +++ b/sound/virtio/virtio_pcm_msg.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#include + +#include "virtio_card.h" + +/** + * struct virtio_pcm_msg - VirtIO I/O message. + * @substream: VirtIO PCM substream. + * @xfer: Request header payload. + * @status: Response header payload. + * @length: Data length in bytes. + * @sgs: Payload scatter-gather table. + */ +struct virtio_pcm_msg { + struct virtio_pcm_substream *substream; + struct virtio_snd_pcm_xfer xfer; + struct virtio_snd_pcm_status status; + size_t length; + struct scatterlist sgs[0]; +}; + +/** + * enum pcm_msg_sg_index - Index values for the virtio_pcm_msg->sgs field in + * an I/O message. + * @PCM_MSG_SG_XFER: Element containing a virtio_snd_pcm_xfer structure. + * @PCM_MSG_SG_STATUS: Element containing a virtio_snd_pcm_status structure. + * @PCM_MSG_SG_DATA: The first element containing a data buffer. + */ +enum pcm_msg_sg_index { + PCM_MSG_SG_XFER = 0, + PCM_MSG_SG_STATUS, + PCM_MSG_SG_DATA +}; + +/** + * virtsnd_pcm_sg_num() - Count the number of sg-elements required to represent + * vmalloc'ed buffer. + * @data: Pointer to vmalloc'ed buffer. + * @length: Buffer size. + * + * Context: Any context. + * Return: Number of physically contiguous parts in the @data. + */ +static int virtsnd_pcm_sg_num(u8 *data, unsigned int length) +{ + phys_addr_t sg_address; + unsigned int sg_length; + int num = 0; + + while (length) { + struct page *pg = vmalloc_to_page(data); + phys_addr_t pg_address = page_to_phys(pg); + size_t pg_length; + + pg_length = PAGE_SIZE - offset_in_page(data); + if (pg_length > length) + pg_length = length; + + if (!num || sg_address + sg_length != pg_address) { + sg_address = pg_address; + sg_length = pg_length; + num++; + } else { + sg_length += pg_length; + } + + data += pg_length; + length -= pg_length; + } + + return num; +} + +/** + * virtsnd_pcm_sg_from() - Build sg-list from vmalloc'ed buffer. + * @sgs: Preallocated sg-list to populate. + * @nsgs: The maximum number of elements in the @sgs. + * @data: Pointer to vmalloc'ed buffer. + * @length: Buffer size. + * + * Splits the buffer into physically contiguous parts and makes an sg-list of + * such parts. + * + * Context: Any context. + */ +static void virtsnd_pcm_sg_from(struct scatterlist *sgs, int nsgs, u8 *data, + unsigned int length) +{ + int idx = -1; + + while (length) { + struct page *pg = vmalloc_to_page(data); + size_t pg_length; + + pg_length = PAGE_SIZE - offset_in_page(data); + if (pg_length > length) + pg_length = length; + + if (idx == -1 || + sg_phys(&sgs[idx]) + sgs[idx].length != page_to_phys(pg)) { + if (idx + 1 == nsgs) + break; + sg_set_page(&sgs[++idx], pg, pg_length, + offset_in_page(data)); + } else { + sgs[idx].length += pg_length; + } + + data += pg_length; + length -= pg_length; + } + + sg_mark_end(&sgs[idx]); +} + +/** + * virtsnd_pcm_msg_alloc() - Allocate I/O messages. + * @vss: VirtIO PCM substream. + * @periods: Current number of periods. + * @period_bytes: Current period size in bytes. + * + * The function slices the buffer into @periods parts (each with the size of + * @period_bytes), and creates @periods corresponding I/O messages. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -ENOMEM on failure. + */ +int virtsnd_pcm_msg_alloc(struct virtio_pcm_substream *vss, + unsigned int periods, unsigned int period_bytes) +{ + struct snd_pcm_runtime *runtime = vss->substream->runtime; + unsigned int i; + + vss->msgs = kcalloc(periods, sizeof(*vss->msgs), GFP_KERNEL); + if (!vss->msgs) + return -ENOMEM; + + vss->nmsgs = periods; + + for (i = 0; i < periods; ++i) { + u8 *data = runtime->dma_area + period_bytes * i; + int sg_num = virtsnd_pcm_sg_num(data, period_bytes); + struct virtio_pcm_msg *msg; + + msg = kzalloc(sizeof(*msg) + sizeof(*msg->sgs) * (sg_num + 2), + GFP_KERNEL); + if (!msg) + return -ENOMEM; + + msg->substream = vss; + sg_init_one(&msg->sgs[PCM_MSG_SG_XFER], &msg->xfer, + sizeof(msg->xfer)); + sg_init_one(&msg->sgs[PCM_MSG_SG_STATUS], &msg->status, + sizeof(msg->status)); + msg->length = period_bytes; + virtsnd_pcm_sg_from(&msg->sgs[PCM_MSG_SG_DATA], sg_num, data, + period_bytes); + + vss->msgs[i] = msg; + } + + return 0; +} + +/** + * virtsnd_pcm_msg_free() - Free all allocated I/O messages. + * @vss: VirtIO PCM substream. + * + * Context: Any context. + */ +void virtsnd_pcm_msg_free(struct virtio_pcm_substream *vss) +{ + unsigned int i; + + for (i = 0; vss->msgs && i < vss->nmsgs; ++i) + kfree(vss->msgs[i]); + kfree(vss->msgs); + + vss->msgs = NULL; + vss->nmsgs = 0; +} + +/** + * virtsnd_pcm_msg_send() - Send asynchronous I/O messages. + * @vss: VirtIO PCM substream. + * + * All messages are organized in an ordered circular list. Each time the + * function is called, all currently non-enqueued messages are added to the + * virtqueue. For this, the function keeps track of two values: + * + * msg_last_enqueued = index of the last enqueued message, + * msg_count = # of pending messages in the virtqueue. + * + * Context: Any context. Expects the tx/rx queue and the VirtIO substream + * spinlocks to be held by caller. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_pcm_msg_send(struct virtio_pcm_substream *vss) +{ + struct snd_pcm_runtime *runtime = vss->substream->runtime; + struct virtio_snd *snd = vss->snd; + struct virtio_device *vdev = snd->vdev; + struct virtqueue *vqueue = virtsnd_pcm_queue(vss)->vqueue; + int i; + int n; + bool notify = false; + + i = (vss->msg_last_enqueued + 1) % runtime->periods; + n = runtime->periods - vss->msg_count; + + for (; n; --n, i = (i + 1) % runtime->periods) { + struct virtio_pcm_msg *msg = vss->msgs[i]; + struct scatterlist *psgs[] = { + &msg->sgs[PCM_MSG_SG_XFER], + &msg->sgs[PCM_MSG_SG_DATA], + &msg->sgs[PCM_MSG_SG_STATUS] + }; + int rc; + + msg->xfer.stream_id = cpu_to_le32(vss->sid); + memset(&msg->status, 0, sizeof(msg->status)); + + if (vss->direction == SNDRV_PCM_STREAM_PLAYBACK) + rc = virtqueue_add_sgs(vqueue, psgs, 2, 1, msg, + GFP_ATOMIC); + else + rc = virtqueue_add_sgs(vqueue, psgs, 1, 2, msg, + GFP_ATOMIC); + + if (rc) { + dev_err(&vdev->dev, + "SID %u: failed to send I/O message\n", + vss->sid); + return rc; + } + + vss->msg_last_enqueued = i; + vss->msg_count++; + } + + if (!(vss->features & (1U << VIRTIO_SND_PCM_F_MSG_POLLING))) + notify = virtqueue_kick_prepare(vqueue); + + if (notify) + virtqueue_notify(vqueue); + + return 0; +} + +/** + * virtsnd_pcm_msg_pending_num() - Returns the number of pending I/O messages. + * @vss: VirtIO substream. + * + * Context: Any context. + * Return: Number of messages. + */ +unsigned int virtsnd_pcm_msg_pending_num(struct virtio_pcm_substream *vss) +{ + unsigned int num; + unsigned long flags; + + spin_lock_irqsave(&vss->lock, flags); + num = vss->msg_count; + spin_unlock_irqrestore(&vss->lock, flags); + + return num; +} + +/** + * virtsnd_pcm_msg_complete() - Complete an I/O message. + * @msg: I/O message. + * @written_bytes: Number of bytes written to the message. + * + * Completion of the message means the elapsed period. If transmission is + * allowed, then each completed message is immediately placed back at the end + * of the queue. + * + * For the playback substream, @written_bytes is equal to sizeof(msg->status). + * + * For the capture substream, @written_bytes is equal to sizeof(msg->status) + * plus the number of captured bytes. + * + * Context: Interrupt context. Takes and releases the VirtIO substream spinlock. + */ +static void virtsnd_pcm_msg_complete(struct virtio_pcm_msg *msg, + size_t written_bytes) +{ + struct virtio_pcm_substream *vss = msg->substream; + + /* + * hw_ptr always indicates the buffer position of the first I/O message + * in the virtqueue. Therefore, on each completion of an I/O message, + * the hw_ptr value is unconditionally advanced. + */ + spin_lock(&vss->lock); + /* + * If the capture substream returned an incorrect status, then just + * increase the hw_ptr by the message size. + */ + if (vss->direction == SNDRV_PCM_STREAM_PLAYBACK || + written_bytes <= sizeof(msg->status)) + vss->hw_ptr += msg->length; + else + vss->hw_ptr += written_bytes - sizeof(msg->status); + + if (vss->hw_ptr >= vss->buffer_bytes) + vss->hw_ptr -= vss->buffer_bytes; + + vss->xfer_xrun = false; + vss->msg_count--; + + if (vss->xfer_enabled) { + struct snd_pcm_runtime *runtime = vss->substream->runtime; + + runtime->delay = + bytes_to_frames(runtime, + le32_to_cpu(msg->status.latency_bytes)); + + schedule_work(&vss->elapsed_period); + + virtsnd_pcm_msg_send(vss); + } else if (!vss->msg_count) { + wake_up_all(&vss->msg_empty); + } + spin_unlock(&vss->lock); +} + +/** + * virtsnd_pcm_notify_cb() - Process all completed I/O messages. + * @queue: Underlying tx/rx virtqueue. + * + * Context: Interrupt context. Takes and releases the tx/rx queue spinlock. + */ +static inline void virtsnd_pcm_notify_cb(struct virtio_snd_queue *queue) +{ + struct virtio_pcm_msg *msg; + u32 written_bytes; + unsigned long flags; + + spin_lock_irqsave(&queue->lock, flags); + do { + virtqueue_disable_cb(queue->vqueue); + while ((msg = virtqueue_get_buf(queue->vqueue, &written_bytes))) + virtsnd_pcm_msg_complete(msg, written_bytes); + if (unlikely(virtqueue_is_broken(queue->vqueue))) + break; + } while (!virtqueue_enable_cb(queue->vqueue)); + spin_unlock_irqrestore(&queue->lock, flags); +} + +/** + * virtsnd_pcm_tx_notify_cb() - Process all completed TX messages. + * @vqueue: Underlying tx virtqueue. + * + * Context: Interrupt context. + */ +void virtsnd_pcm_tx_notify_cb(struct virtqueue *vqueue) +{ + struct virtio_snd *snd = vqueue->vdev->priv; + + virtsnd_pcm_notify_cb(virtsnd_tx_queue(snd)); +} + +/** + * virtsnd_pcm_rx_notify_cb() - Process all completed RX messages. + * @vqueue: Underlying rx virtqueue. + * + * Context: Interrupt context. + */ +void virtsnd_pcm_rx_notify_cb(struct virtqueue *vqueue) +{ + struct virtio_snd *snd = vqueue->vdev->priv; + + virtsnd_pcm_notify_cb(virtsnd_rx_queue(snd)); +} + +/** + * virtsnd_pcm_ctl_msg_alloc() - Allocate and initialize the PCM device control + * message for the specified substream. + * @vss: VirtIO PCM substream. + * @command: Control request code (VIRTIO_SND_R_PCM_XXX). + * @gfp: Kernel flags for memory allocation. + * + * Context: Any context. May sleep if @gfp flags permit. + * Return: Allocated message on success, NULL on failure. + */ +struct virtio_snd_msg * +virtsnd_pcm_ctl_msg_alloc(struct virtio_pcm_substream *vss, + unsigned int command, gfp_t gfp) +{ + size_t request_size = sizeof(struct virtio_snd_pcm_hdr); + size_t response_size = sizeof(struct virtio_snd_hdr); + struct virtio_snd_msg *msg; + + switch (command) { + case VIRTIO_SND_R_PCM_SET_PARAMS: + request_size = sizeof(struct virtio_snd_pcm_set_params); + break; + } + + msg = virtsnd_ctl_msg_alloc(request_size, response_size, gfp); + if (msg) { + struct virtio_snd_pcm_hdr *hdr = virtsnd_ctl_msg_request(msg); + + hdr->hdr.code = cpu_to_le32(command); + hdr->stream_id = cpu_to_le32(vss->sid); + } + + return msg; +} -- cgit v1.2.3-59-g8ed1b From da76e9f3e43a7195c69d370ee514cccae6517c76 Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:06 +0100 Subject: ALSA: virtio: PCM substream operators Introduce the operators required for the operation of substreams. Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-7-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- sound/virtio/Makefile | 3 +- sound/virtio/virtio_pcm.c | 2 + sound/virtio/virtio_pcm.h | 5 + sound/virtio/virtio_pcm_ops.c | 445 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 454 insertions(+), 1 deletion(-) create mode 100644 sound/virtio/virtio_pcm_ops.c diff --git a/sound/virtio/Makefile b/sound/virtio/Makefile index 626af3cc3ed7..34493226793f 100644 --- a/sound/virtio/Makefile +++ b/sound/virtio/Makefile @@ -6,5 +6,6 @@ virtio_snd-objs := \ virtio_card.o \ virtio_ctl_msg.o \ virtio_pcm.o \ - virtio_pcm_msg.o + virtio_pcm_msg.o \ + virtio_pcm_ops.o diff --git a/sound/virtio/virtio_pcm.c b/sound/virtio/virtio_pcm.c index 2dcd763efa29..c10d91fff2fb 100644 --- a/sound/virtio/virtio_pcm.c +++ b/sound/virtio/virtio_pcm.c @@ -470,6 +470,8 @@ int virtsnd_pcm_build_devs(struct virtio_snd *snd) for (kss = ks->substream; kss; kss = kss->next) vs->substreams[kss->number]->substream = kss; + + snd_pcm_set_ops(vpcm->pcm, i, &virtsnd_pcm_ops); } snd_pcm_set_managed_buffer_all(vpcm->pcm, diff --git a/sound/virtio/virtio_pcm.h b/sound/virtio/virtio_pcm.h index 6722f1139666..efd0228746cf 100644 --- a/sound/virtio/virtio_pcm.h +++ b/sound/virtio/virtio_pcm.h @@ -29,6 +29,8 @@ struct virtio_pcm_msg; * @hw_ptr: Substream hardware pointer value in bytes [0 ... buffer_bytes). * @xfer_enabled: Data transfer state (0 - off, 1 - on). * @xfer_xrun: Data underflow/overflow state (0 - no xrun, 1 - xrun). + * @stopped: True if the substream is stopped and must be released on the device + * side. * @msgs: Allocated I/O messages. * @nmsgs: Number of allocated I/O messages. * @msg_last_enqueued: Index of the last I/O message added to the virtqueue. @@ -49,6 +51,7 @@ struct virtio_pcm_substream { size_t hw_ptr; bool xfer_enabled; bool xfer_xrun; + bool stopped; struct virtio_pcm_msg **msgs; unsigned int nmsgs; int msg_last_enqueued; @@ -80,6 +83,8 @@ struct virtio_pcm { struct virtio_pcm_stream streams[SNDRV_PCM_STREAM_LAST + 1]; }; +extern const struct snd_pcm_ops virtsnd_pcm_ops; + int virtsnd_pcm_validate(struct virtio_device *vdev); int virtsnd_pcm_parse_cfg(struct virtio_snd *snd); diff --git a/sound/virtio/virtio_pcm_ops.c b/sound/virtio/virtio_pcm_ops.c new file mode 100644 index 000000000000..0682a2df6c8c --- /dev/null +++ b/sound/virtio/virtio_pcm_ops.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#include + +#include "virtio_card.h" + +/* + * I/O messages lifetime + * --------------------- + * + * Allocation: + * Messages are initially allocated in the ops->hw_params() after the size and + * number of periods have been successfully negotiated. + * + * Freeing: + * Messages can be safely freed after the queue has been successfully flushed + * (RELEASE command in the ops->sync_stop()) and the ops->hw_free() has been + * called. + * + * When the substream stops, the ops->sync_stop() waits until the device has + * completed all pending messages. This wait can be interrupted either by a + * signal or due to a timeout. In this case, the device can still access + * messages even after calling ops->hw_free(). It can also issue an interrupt, + * and the interrupt handler will also try to access message structures. + * + * Therefore, freeing of already allocated messages occurs: + * + * - in ops->hw_params(), if this operator was called several times in a row, + * or if ops->hw_free() failed to free messages previously; + * + * - in ops->hw_free(), if the queue has been successfully flushed; + * + * - in dev->release(). + */ + +/* Map for converting ALSA format to VirtIO format. */ +struct virtsnd_a2v_format { + snd_pcm_format_t alsa_bit; + unsigned int vio_bit; +}; + +static const struct virtsnd_a2v_format g_a2v_format_map[] = { + { SNDRV_PCM_FORMAT_IMA_ADPCM, VIRTIO_SND_PCM_FMT_IMA_ADPCM }, + { SNDRV_PCM_FORMAT_MU_LAW, VIRTIO_SND_PCM_FMT_MU_LAW }, + { SNDRV_PCM_FORMAT_A_LAW, VIRTIO_SND_PCM_FMT_A_LAW }, + { SNDRV_PCM_FORMAT_S8, VIRTIO_SND_PCM_FMT_S8 }, + { SNDRV_PCM_FORMAT_U8, VIRTIO_SND_PCM_FMT_U8 }, + { SNDRV_PCM_FORMAT_S16_LE, VIRTIO_SND_PCM_FMT_S16 }, + { SNDRV_PCM_FORMAT_U16_LE, VIRTIO_SND_PCM_FMT_U16 }, + { SNDRV_PCM_FORMAT_S18_3LE, VIRTIO_SND_PCM_FMT_S18_3 }, + { SNDRV_PCM_FORMAT_U18_3LE, VIRTIO_SND_PCM_FMT_U18_3 }, + { SNDRV_PCM_FORMAT_S20_3LE, VIRTIO_SND_PCM_FMT_S20_3 }, + { SNDRV_PCM_FORMAT_U20_3LE, VIRTIO_SND_PCM_FMT_U20_3 }, + { SNDRV_PCM_FORMAT_S24_3LE, VIRTIO_SND_PCM_FMT_S24_3 }, + { SNDRV_PCM_FORMAT_U24_3LE, VIRTIO_SND_PCM_FMT_U24_3 }, + { SNDRV_PCM_FORMAT_S20_LE, VIRTIO_SND_PCM_FMT_S20 }, + { SNDRV_PCM_FORMAT_U20_LE, VIRTIO_SND_PCM_FMT_U20 }, + { SNDRV_PCM_FORMAT_S24_LE, VIRTIO_SND_PCM_FMT_S24 }, + { SNDRV_PCM_FORMAT_U24_LE, VIRTIO_SND_PCM_FMT_U24 }, + { SNDRV_PCM_FORMAT_S32_LE, VIRTIO_SND_PCM_FMT_S32 }, + { SNDRV_PCM_FORMAT_U32_LE, VIRTIO_SND_PCM_FMT_U32 }, + { SNDRV_PCM_FORMAT_FLOAT_LE, VIRTIO_SND_PCM_FMT_FLOAT }, + { SNDRV_PCM_FORMAT_FLOAT64_LE, VIRTIO_SND_PCM_FMT_FLOAT64 }, + { SNDRV_PCM_FORMAT_DSD_U8, VIRTIO_SND_PCM_FMT_DSD_U8 }, + { SNDRV_PCM_FORMAT_DSD_U16_LE, VIRTIO_SND_PCM_FMT_DSD_U16 }, + { SNDRV_PCM_FORMAT_DSD_U32_LE, VIRTIO_SND_PCM_FMT_DSD_U32 }, + { SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE, + VIRTIO_SND_PCM_FMT_IEC958_SUBFRAME } +}; + +/* Map for converting ALSA frame rate to VirtIO frame rate. */ +struct virtsnd_a2v_rate { + unsigned int rate; + unsigned int vio_bit; +}; + +static const struct virtsnd_a2v_rate g_a2v_rate_map[] = { + { 5512, VIRTIO_SND_PCM_RATE_5512 }, + { 8000, VIRTIO_SND_PCM_RATE_8000 }, + { 11025, VIRTIO_SND_PCM_RATE_11025 }, + { 16000, VIRTIO_SND_PCM_RATE_16000 }, + { 22050, VIRTIO_SND_PCM_RATE_22050 }, + { 32000, VIRTIO_SND_PCM_RATE_32000 }, + { 44100, VIRTIO_SND_PCM_RATE_44100 }, + { 48000, VIRTIO_SND_PCM_RATE_48000 }, + { 64000, VIRTIO_SND_PCM_RATE_64000 }, + { 88200, VIRTIO_SND_PCM_RATE_88200 }, + { 96000, VIRTIO_SND_PCM_RATE_96000 }, + { 176400, VIRTIO_SND_PCM_RATE_176400 }, + { 192000, VIRTIO_SND_PCM_RATE_192000 } +}; + +static int virtsnd_pcm_sync_stop(struct snd_pcm_substream *substream); + +/** + * virtsnd_pcm_open() - Open the PCM substream. + * @substream: Kernel ALSA substream. + * + * Context: Process context. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_pcm_open(struct snd_pcm_substream *substream) +{ + struct virtio_pcm *vpcm = snd_pcm_substream_chip(substream); + struct virtio_pcm_stream *vs = &vpcm->streams[substream->stream]; + struct virtio_pcm_substream *vss = vs->substreams[substream->number]; + + substream->runtime->hw = vss->hw; + substream->private_data = vss; + + snd_pcm_hw_constraint_integer(substream->runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + + vss->stopped = !!virtsnd_pcm_msg_pending_num(vss); + + /* + * If the substream has already been used, then the I/O queue may be in + * an invalid state. Just in case, we do a check and try to return the + * queue to its original state, if necessary. + */ + return virtsnd_pcm_sync_stop(substream); +} + +/** + * virtsnd_pcm_close() - Close the PCM substream. + * @substream: Kernel ALSA substream. + * + * Context: Process context. + * Return: 0. + */ +static int virtsnd_pcm_close(struct snd_pcm_substream *substream) +{ + return 0; +} + +/** + * virtsnd_pcm_dev_set_params() - Set the parameters of the PCM substream on + * the device side. + * @vss: VirtIO PCM substream. + * @buffer_bytes: Size of the hardware buffer. + * @period_bytes: Size of the hardware period. + * @channels: Selected number of channels. + * @format: Selected sample format (SNDRV_PCM_FORMAT_XXX). + * @rate: Selected frame rate. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_pcm_dev_set_params(struct virtio_pcm_substream *vss, + unsigned int buffer_bytes, + unsigned int period_bytes, + unsigned int channels, + snd_pcm_format_t format, + unsigned int rate) +{ + struct virtio_snd_msg *msg; + struct virtio_snd_pcm_set_params *request; + unsigned int i; + int vformat = -1; + int vrate = -1; + + for (i = 0; i < ARRAY_SIZE(g_a2v_format_map); ++i) + if (g_a2v_format_map[i].alsa_bit == format) { + vformat = g_a2v_format_map[i].vio_bit; + + break; + } + + for (i = 0; i < ARRAY_SIZE(g_a2v_rate_map); ++i) + if (g_a2v_rate_map[i].rate == rate) { + vrate = g_a2v_rate_map[i].vio_bit; + + break; + } + + if (vformat == -1 || vrate == -1) + return -EINVAL; + + msg = virtsnd_pcm_ctl_msg_alloc(vss, VIRTIO_SND_R_PCM_SET_PARAMS, + GFP_KERNEL); + if (!msg) + return -ENOMEM; + + request = virtsnd_ctl_msg_request(msg); + request->buffer_bytes = cpu_to_le32(buffer_bytes); + request->period_bytes = cpu_to_le32(period_bytes); + request->channels = channels; + request->format = vformat; + request->rate = vrate; + + if (vss->features & (1U << VIRTIO_SND_PCM_F_MSG_POLLING)) + request->features |= + cpu_to_le32(1U << VIRTIO_SND_PCM_F_MSG_POLLING); + + if (vss->features & (1U << VIRTIO_SND_PCM_F_EVT_XRUNS)) + request->features |= + cpu_to_le32(1U << VIRTIO_SND_PCM_F_EVT_XRUNS); + + return virtsnd_ctl_msg_send_sync(vss->snd, msg); +} + +/** + * virtsnd_pcm_hw_params() - Set the parameters of the PCM substream. + * @substream: Kernel ALSA substream. + * @hw_params: Hardware parameters. + * + * Context: Process context. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + struct virtio_pcm_substream *vss = snd_pcm_substream_chip(substream); + struct virtio_device *vdev = vss->snd->vdev; + int rc; + + if (virtsnd_pcm_msg_pending_num(vss)) { + dev_err(&vdev->dev, "SID %u: invalid I/O queue state\n", + vss->sid); + return -EBADFD; + } + + rc = virtsnd_pcm_dev_set_params(vss, params_buffer_bytes(hw_params), + params_period_bytes(hw_params), + params_channels(hw_params), + params_format(hw_params), + params_rate(hw_params)); + if (rc) + return rc; + + /* + * Free previously allocated messages if ops->hw_params() is called + * several times in a row, or if ops->hw_free() failed to free messages. + */ + virtsnd_pcm_msg_free(vss); + + return virtsnd_pcm_msg_alloc(vss, params_periods(hw_params), + params_period_bytes(hw_params)); +} + +/** + * virtsnd_pcm_hw_free() - Reset the parameters of the PCM substream. + * @substream: Kernel ALSA substream. + * + * Context: Process context. + * Return: 0 + */ +static int virtsnd_pcm_hw_free(struct snd_pcm_substream *substream) +{ + struct virtio_pcm_substream *vss = snd_pcm_substream_chip(substream); + + /* If the queue is flushed, we can safely free the messages here. */ + if (!virtsnd_pcm_msg_pending_num(vss)) + virtsnd_pcm_msg_free(vss); + + return 0; +} + +/** + * virtsnd_pcm_prepare() - Prepare the PCM substream. + * @substream: Kernel ALSA substream. + * + * Context: Process context. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_pcm_prepare(struct snd_pcm_substream *substream) +{ + struct virtio_pcm_substream *vss = snd_pcm_substream_chip(substream); + struct virtio_device *vdev = vss->snd->vdev; + struct virtio_snd_msg *msg; + + if (virtsnd_pcm_msg_pending_num(vss)) { + dev_err(&vdev->dev, "SID %u: invalid I/O queue state\n", + vss->sid); + return -EBADFD; + } + + vss->buffer_bytes = snd_pcm_lib_buffer_bytes(substream); + vss->hw_ptr = 0; + vss->xfer_xrun = false; + vss->msg_last_enqueued = -1; + vss->msg_count = 0; + + msg = virtsnd_pcm_ctl_msg_alloc(vss, VIRTIO_SND_R_PCM_PREPARE, + GFP_KERNEL); + if (!msg) + return -ENOMEM; + + return virtsnd_ctl_msg_send_sync(vss->snd, msg); +} + +/** + * virtsnd_pcm_trigger() - Process command for the PCM substream. + * @substream: Kernel ALSA substream. + * @command: Substream command (SNDRV_PCM_TRIGGER_XXX). + * + * Context: Any context. Takes and releases the VirtIO substream spinlock. + * May take and release the tx/rx queue spinlock. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_pcm_trigger(struct snd_pcm_substream *substream, int command) +{ + struct virtio_pcm_substream *vss = snd_pcm_substream_chip(substream); + struct virtio_snd *snd = vss->snd; + struct virtio_snd_queue *queue; + struct virtio_snd_msg *msg; + unsigned long flags; + int rc; + + switch (command) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + queue = virtsnd_pcm_queue(vss); + + spin_lock_irqsave(&queue->lock, flags); + spin_lock(&vss->lock); + rc = virtsnd_pcm_msg_send(vss); + if (!rc) + vss->xfer_enabled = true; + spin_unlock(&vss->lock); + spin_unlock_irqrestore(&queue->lock, flags); + if (rc) + return rc; + + msg = virtsnd_pcm_ctl_msg_alloc(vss, VIRTIO_SND_R_PCM_START, + GFP_KERNEL); + if (!msg) { + spin_lock_irqsave(&vss->lock, flags); + vss->xfer_enabled = false; + spin_unlock_irqrestore(&vss->lock, flags); + + return -ENOMEM; + } + + return virtsnd_ctl_msg_send_sync(snd, msg); + case SNDRV_PCM_TRIGGER_STOP: + vss->stopped = true; + fallthrough; + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + spin_lock_irqsave(&vss->lock, flags); + vss->xfer_enabled = false; + spin_unlock_irqrestore(&vss->lock, flags); + + msg = virtsnd_pcm_ctl_msg_alloc(vss, VIRTIO_SND_R_PCM_STOP, + GFP_KERNEL); + if (!msg) + return -ENOMEM; + + return virtsnd_ctl_msg_send_sync(snd, msg); + default: + return -EINVAL; + } +} + +/** + * virtsnd_pcm_sync_stop() - Synchronous PCM substream stop. + * @substream: Kernel ALSA substream. + * + * The function can be called both from the upper level or from the driver + * itself. + * + * Context: Process context. Takes and releases the VirtIO substream spinlock. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct virtio_pcm_substream *vss = snd_pcm_substream_chip(substream); + struct virtio_snd *snd = vss->snd; + struct virtio_snd_msg *msg; + unsigned int js = msecs_to_jiffies(virtsnd_msg_timeout_ms); + int rc; + + cancel_work_sync(&vss->elapsed_period); + + if (!vss->stopped) + return 0; + + msg = virtsnd_pcm_ctl_msg_alloc(vss, VIRTIO_SND_R_PCM_RELEASE, + GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rc = virtsnd_ctl_msg_send_sync(snd, msg); + if (rc) + return rc; + + /* + * The spec states that upon receipt of the RELEASE command "the device + * MUST complete all pending I/O messages for the specified stream ID". + * Thus, we consider the absence of I/O messages in the queue as an + * indication that the substream has been released. + */ + rc = wait_event_interruptible_timeout(vss->msg_empty, + !virtsnd_pcm_msg_pending_num(vss), + js); + if (rc <= 0) { + dev_warn(&snd->vdev->dev, "SID %u: failed to flush I/O queue\n", + vss->sid); + + return !rc ? -ETIMEDOUT : rc; + } + + vss->stopped = false; + + return 0; +} + +/** + * virtsnd_pcm_pointer() - Get the current hardware position for the PCM + * substream. + * @substream: Kernel ALSA substream. + * + * Context: Any context. Takes and releases the VirtIO substream spinlock. + * Return: Hardware position in frames inside [0 ... buffer_size) range. + */ +static snd_pcm_uframes_t +virtsnd_pcm_pointer(struct snd_pcm_substream *substream) +{ + struct virtio_pcm_substream *vss = snd_pcm_substream_chip(substream); + snd_pcm_uframes_t hw_ptr = SNDRV_PCM_POS_XRUN; + unsigned long flags; + + spin_lock_irqsave(&vss->lock, flags); + if (!vss->xfer_xrun) + hw_ptr = bytes_to_frames(substream->runtime, vss->hw_ptr); + spin_unlock_irqrestore(&vss->lock, flags); + + return hw_ptr; +} + +/* PCM substream operators map. */ +const struct snd_pcm_ops virtsnd_pcm_ops = { + .open = virtsnd_pcm_open, + .close = virtsnd_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = virtsnd_pcm_hw_params, + .hw_free = virtsnd_pcm_hw_free, + .prepare = virtsnd_pcm_prepare, + .trigger = virtsnd_pcm_trigger, + .sync_stop = virtsnd_pcm_sync_stop, + .pointer = virtsnd_pcm_pointer, +}; -- cgit v1.2.3-59-g8ed1b From ca61a41f389c80db091db9d4ad5a651e2b4c9f70 Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:07 +0100 Subject: ALSA: virtio: introduce jack support Enumerate all available jacks and create ALSA controls. At the moment jacks have a simple implementation and can only be used to receive notifications about a plugged in/out device. Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-8-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- sound/virtio/Makefile | 1 + sound/virtio/virtio_card.c | 14 +++ sound/virtio/virtio_card.h | 12 +++ sound/virtio/virtio_jack.c | 233 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 260 insertions(+) create mode 100644 sound/virtio/virtio_jack.c diff --git a/sound/virtio/Makefile b/sound/virtio/Makefile index 34493226793f..09f485291285 100644 --- a/sound/virtio/Makefile +++ b/sound/virtio/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_SND_VIRTIO) += virtio_snd.o virtio_snd-objs := \ virtio_card.o \ virtio_ctl_msg.o \ + virtio_jack.o \ virtio_pcm.o \ virtio_pcm_msg.o \ virtio_pcm_ops.o diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index 57b9b7f3a9c0..89bd66c1256e 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -56,6 +56,10 @@ static void virtsnd_event_dispatch(struct virtio_snd *snd, struct virtio_snd_event *event) { switch (le32_to_cpu(event->hdr.code)) { + case VIRTIO_SND_EVT_JACK_CONNECTED: + case VIRTIO_SND_EVT_JACK_DISCONNECTED: + virtsnd_jack_event(snd, event); + break; case VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED: case VIRTIO_SND_EVT_PCM_XRUN: virtsnd_pcm_event(snd, event); @@ -219,10 +223,20 @@ static int virtsnd_build_devs(struct virtio_snd *snd) VIRTIO_SND_CARD_NAME " at %s/%s", dev_name(dev->parent), dev_name(dev)); + rc = virtsnd_jack_parse_cfg(snd); + if (rc) + return rc; + rc = virtsnd_pcm_parse_cfg(snd); if (rc) return rc; + if (snd->njacks) { + rc = virtsnd_jack_build_devs(snd); + if (rc) + return rc; + } + if (snd->nsubstreams) { rc = virtsnd_pcm_build_devs(snd); if (rc) diff --git a/sound/virtio/virtio_card.h b/sound/virtio/virtio_card.h index c43f9744d362..f154313c79fd 100644 --- a/sound/virtio/virtio_card.h +++ b/sound/virtio/virtio_card.h @@ -18,6 +18,7 @@ #define VIRTIO_SND_CARD_NAME "VirtIO SoundCard" #define VIRTIO_SND_PCM_NAME "VirtIO PCM" +struct virtio_jack; struct virtio_pcm_substream; /** @@ -38,6 +39,8 @@ struct virtio_snd_queue { * @ctl_msgs: Pending control request list. * @event_msgs: Device events. * @pcm_list: VirtIO PCM device list. + * @jacks: VirtIO jacks. + * @njacks: Number of jacks. * @substreams: VirtIO PCM substreams. * @nsubstreams: Number of PCM substreams. */ @@ -48,6 +51,8 @@ struct virtio_snd { struct list_head ctl_msgs; struct virtio_snd_event *event_msgs; struct list_head pcm_list; + struct virtio_jack *jacks; + u32 njacks; struct virtio_pcm_substream *substreams; u32 nsubstreams; }; @@ -88,4 +93,11 @@ virtsnd_pcm_queue(struct virtio_pcm_substream *vss) return virtsnd_rx_queue(vss->snd); } +int virtsnd_jack_parse_cfg(struct virtio_snd *snd); + +int virtsnd_jack_build_devs(struct virtio_snd *snd); + +void virtsnd_jack_event(struct virtio_snd *snd, + struct virtio_snd_event *event); + #endif /* VIRTIO_SND_CARD_H */ diff --git a/sound/virtio/virtio_jack.c b/sound/virtio/virtio_jack.c new file mode 100644 index 000000000000..c69f1dcdcc84 --- /dev/null +++ b/sound/virtio/virtio_jack.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#include +#include +#include + +#include "virtio_card.h" + +/** + * DOC: Implementation Status + * + * At the moment jacks have a simple implementation and can only be used to + * receive notifications about a plugged in/out device. + * + * VIRTIO_SND_R_JACK_REMAP + * is not supported + */ + +/** + * struct virtio_jack - VirtIO jack. + * @jack: Kernel jack control. + * @nid: Functional group node identifier. + * @features: Jack virtio feature bit map (1 << VIRTIO_SND_JACK_F_XXX). + * @defconf: Pin default configuration value. + * @caps: Pin capabilities value. + * @connected: Current jack connection status. + * @type: Kernel jack type (SND_JACK_XXX). + */ +struct virtio_jack { + struct snd_jack *jack; + u32 nid; + u32 features; + u32 defconf; + u32 caps; + bool connected; + int type; +}; + +/** + * virtsnd_jack_get_label() - Get the name string for the jack. + * @vjack: VirtIO jack. + * + * Returns the jack name based on the default pin configuration value (see HDA + * specification). + * + * Context: Any context. + * Return: Name string. + */ +static const char *virtsnd_jack_get_label(struct virtio_jack *vjack) +{ + unsigned int defconf = vjack->defconf; + unsigned int device = + (defconf & AC_DEFCFG_DEVICE) >> AC_DEFCFG_DEVICE_SHIFT; + unsigned int location = + (defconf & AC_DEFCFG_LOCATION) >> AC_DEFCFG_LOCATION_SHIFT; + + switch (device) { + case AC_JACK_LINE_OUT: + return "Line Out"; + case AC_JACK_SPEAKER: + return "Speaker"; + case AC_JACK_HP_OUT: + return "Headphone"; + case AC_JACK_CD: + return "CD"; + case AC_JACK_SPDIF_OUT: + case AC_JACK_DIG_OTHER_OUT: + if (location == AC_JACK_LOC_HDMI) + return "HDMI Out"; + else + return "SPDIF Out"; + case AC_JACK_LINE_IN: + return "Line"; + case AC_JACK_AUX: + return "Aux"; + case AC_JACK_MIC_IN: + return "Mic"; + case AC_JACK_SPDIF_IN: + return "SPDIF In"; + case AC_JACK_DIG_OTHER_IN: + return "Digital In"; + default: + return "Misc"; + } +} + +/** + * virtsnd_jack_get_type() - Get the type for the jack. + * @vjack: VirtIO jack. + * + * Returns the jack type based on the default pin configuration value (see HDA + * specification). + * + * Context: Any context. + * Return: SND_JACK_XXX value. + */ +static int virtsnd_jack_get_type(struct virtio_jack *vjack) +{ + unsigned int defconf = vjack->defconf; + unsigned int device = + (defconf & AC_DEFCFG_DEVICE) >> AC_DEFCFG_DEVICE_SHIFT; + + switch (device) { + case AC_JACK_LINE_OUT: + case AC_JACK_SPEAKER: + return SND_JACK_LINEOUT; + case AC_JACK_HP_OUT: + return SND_JACK_HEADPHONE; + case AC_JACK_SPDIF_OUT: + case AC_JACK_DIG_OTHER_OUT: + return SND_JACK_AVOUT; + case AC_JACK_MIC_IN: + return SND_JACK_MICROPHONE; + default: + return SND_JACK_LINEIN; + } +} + +/** + * virtsnd_jack_parse_cfg() - Parse the jack configuration. + * @snd: VirtIO sound device. + * + * This function is called during initial device initialization. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_jack_parse_cfg(struct virtio_snd *snd) +{ + struct virtio_device *vdev = snd->vdev; + struct virtio_snd_jack_info *info; + u32 i; + int rc; + + virtio_cread_le(vdev, struct virtio_snd_config, jacks, &snd->njacks); + if (!snd->njacks) + return 0; + + snd->jacks = devm_kcalloc(&vdev->dev, snd->njacks, sizeof(*snd->jacks), + GFP_KERNEL); + if (!snd->jacks) + return -ENOMEM; + + info = kcalloc(snd->njacks, sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + rc = virtsnd_ctl_query_info(snd, VIRTIO_SND_R_JACK_INFO, 0, snd->njacks, + sizeof(*info), info); + if (rc) + goto on_exit; + + for (i = 0; i < snd->njacks; ++i) { + struct virtio_jack *vjack = &snd->jacks[i]; + + vjack->nid = le32_to_cpu(info[i].hdr.hda_fn_nid); + vjack->features = le32_to_cpu(info[i].features); + vjack->defconf = le32_to_cpu(info[i].hda_reg_defconf); + vjack->caps = le32_to_cpu(info[i].hda_reg_caps); + vjack->connected = info[i].connected; + } + +on_exit: + kfree(info); + + return rc; +} + +/** + * virtsnd_jack_build_devs() - Build ALSA controls for jacks. + * @snd: VirtIO sound device. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_jack_build_devs(struct virtio_snd *snd) +{ + u32 i; + int rc; + + for (i = 0; i < snd->njacks; ++i) { + struct virtio_jack *vjack = &snd->jacks[i]; + + vjack->type = virtsnd_jack_get_type(vjack); + + rc = snd_jack_new(snd->card, virtsnd_jack_get_label(vjack), + vjack->type, &vjack->jack, true, true); + if (rc) + return rc; + + if (vjack->jack) + vjack->jack->private_data = vjack; + + snd_jack_report(vjack->jack, + vjack->connected ? vjack->type : 0); + } + + return 0; +} + +/** + * virtsnd_jack_event() - Handle the jack event notification. + * @snd: VirtIO sound device. + * @event: VirtIO sound event. + * + * Context: Interrupt context. + */ +void virtsnd_jack_event(struct virtio_snd *snd, struct virtio_snd_event *event) +{ + u32 jack_id = le32_to_cpu(event->data); + struct virtio_jack *vjack; + + if (jack_id >= snd->njacks) + return; + + vjack = &snd->jacks[jack_id]; + + switch (le32_to_cpu(event->hdr.code)) { + case VIRTIO_SND_EVT_JACK_CONNECTED: + vjack->connected = true; + break; + case VIRTIO_SND_EVT_JACK_DISCONNECTED: + vjack->connected = false; + break; + default: + return; + } + + snd_jack_report(vjack->jack, vjack->connected ? vjack->type : 0); +} -- cgit v1.2.3-59-g8ed1b From 19325fedf245ca932c58a629d3888a9a393534ab Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:08 +0100 Subject: ALSA: virtio: introduce PCM channel map support Enumerate all available PCM channel maps and create ALSA controls. Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-9-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- sound/virtio/Makefile | 1 + sound/virtio/virtio_card.c | 10 ++ sound/virtio/virtio_card.h | 8 ++ sound/virtio/virtio_chmap.c | 219 ++++++++++++++++++++++++++++++++++++++++++++ sound/virtio/virtio_pcm.h | 4 + 5 files changed, 242 insertions(+) create mode 100644 sound/virtio/virtio_chmap.c diff --git a/sound/virtio/Makefile b/sound/virtio/Makefile index 09f485291285..2742bddb8874 100644 --- a/sound/virtio/Makefile +++ b/sound/virtio/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_SND_VIRTIO) += virtio_snd.o virtio_snd-objs := \ virtio_card.o \ + virtio_chmap.o \ virtio_ctl_msg.o \ virtio_jack.o \ virtio_pcm.o \ diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index 89bd66c1256e..1c03fcc41c3b 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -231,6 +231,10 @@ static int virtsnd_build_devs(struct virtio_snd *snd) if (rc) return rc; + rc = virtsnd_chmap_parse_cfg(snd); + if (rc) + return rc; + if (snd->njacks) { rc = virtsnd_jack_build_devs(snd); if (rc) @@ -243,6 +247,12 @@ static int virtsnd_build_devs(struct virtio_snd *snd) return rc; } + if (snd->nchmaps) { + rc = virtsnd_chmap_build_devs(snd); + if (rc) + return rc; + } + return snd_card_register(snd->card); } diff --git a/sound/virtio/virtio_card.h b/sound/virtio/virtio_card.h index f154313c79fd..86ef3941895e 100644 --- a/sound/virtio/virtio_card.h +++ b/sound/virtio/virtio_card.h @@ -43,6 +43,8 @@ struct virtio_snd_queue { * @njacks: Number of jacks. * @substreams: VirtIO PCM substreams. * @nsubstreams: Number of PCM substreams. + * @chmaps: VirtIO channel maps. + * @nchmaps: Number of channel maps. */ struct virtio_snd { struct virtio_device *vdev; @@ -55,6 +57,8 @@ struct virtio_snd { u32 njacks; struct virtio_pcm_substream *substreams; u32 nsubstreams; + struct virtio_snd_chmap_info *chmaps; + u32 nchmaps; }; /* Message completion timeout in milliseconds (module parameter). */ @@ -100,4 +104,8 @@ int virtsnd_jack_build_devs(struct virtio_snd *snd); void virtsnd_jack_event(struct virtio_snd *snd, struct virtio_snd_event *event); +int virtsnd_chmap_parse_cfg(struct virtio_snd *snd); + +int virtsnd_chmap_build_devs(struct virtio_snd *snd); + #endif /* VIRTIO_SND_CARD_H */ diff --git a/sound/virtio/virtio_chmap.c b/sound/virtio/virtio_chmap.c new file mode 100644 index 000000000000..5bc924933a59 --- /dev/null +++ b/sound/virtio/virtio_chmap.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * virtio-snd: Virtio sound device + * Copyright (C) 2021 OpenSynergy GmbH + */ +#include + +#include "virtio_card.h" + +/* VirtIO->ALSA channel position map */ +static const u8 g_v2a_position_map[] = { + [VIRTIO_SND_CHMAP_NONE] = SNDRV_CHMAP_UNKNOWN, + [VIRTIO_SND_CHMAP_NA] = SNDRV_CHMAP_NA, + [VIRTIO_SND_CHMAP_MONO] = SNDRV_CHMAP_MONO, + [VIRTIO_SND_CHMAP_FL] = SNDRV_CHMAP_FL, + [VIRTIO_SND_CHMAP_FR] = SNDRV_CHMAP_FR, + [VIRTIO_SND_CHMAP_RL] = SNDRV_CHMAP_RL, + [VIRTIO_SND_CHMAP_RR] = SNDRV_CHMAP_RR, + [VIRTIO_SND_CHMAP_FC] = SNDRV_CHMAP_FC, + [VIRTIO_SND_CHMAP_LFE] = SNDRV_CHMAP_LFE, + [VIRTIO_SND_CHMAP_SL] = SNDRV_CHMAP_SL, + [VIRTIO_SND_CHMAP_SR] = SNDRV_CHMAP_SR, + [VIRTIO_SND_CHMAP_RC] = SNDRV_CHMAP_RC, + [VIRTIO_SND_CHMAP_FLC] = SNDRV_CHMAP_FLC, + [VIRTIO_SND_CHMAP_FRC] = SNDRV_CHMAP_FRC, + [VIRTIO_SND_CHMAP_RLC] = SNDRV_CHMAP_RLC, + [VIRTIO_SND_CHMAP_RRC] = SNDRV_CHMAP_RRC, + [VIRTIO_SND_CHMAP_FLW] = SNDRV_CHMAP_FLW, + [VIRTIO_SND_CHMAP_FRW] = SNDRV_CHMAP_FRW, + [VIRTIO_SND_CHMAP_FLH] = SNDRV_CHMAP_FLH, + [VIRTIO_SND_CHMAP_FCH] = SNDRV_CHMAP_FCH, + [VIRTIO_SND_CHMAP_FRH] = SNDRV_CHMAP_FRH, + [VIRTIO_SND_CHMAP_TC] = SNDRV_CHMAP_TC, + [VIRTIO_SND_CHMAP_TFL] = SNDRV_CHMAP_TFL, + [VIRTIO_SND_CHMAP_TFR] = SNDRV_CHMAP_TFR, + [VIRTIO_SND_CHMAP_TFC] = SNDRV_CHMAP_TFC, + [VIRTIO_SND_CHMAP_TRL] = SNDRV_CHMAP_TRL, + [VIRTIO_SND_CHMAP_TRR] = SNDRV_CHMAP_TRR, + [VIRTIO_SND_CHMAP_TRC] = SNDRV_CHMAP_TRC, + [VIRTIO_SND_CHMAP_TFLC] = SNDRV_CHMAP_TFLC, + [VIRTIO_SND_CHMAP_TFRC] = SNDRV_CHMAP_TFRC, + [VIRTIO_SND_CHMAP_TSL] = SNDRV_CHMAP_TSL, + [VIRTIO_SND_CHMAP_TSR] = SNDRV_CHMAP_TSR, + [VIRTIO_SND_CHMAP_LLFE] = SNDRV_CHMAP_LLFE, + [VIRTIO_SND_CHMAP_RLFE] = SNDRV_CHMAP_RLFE, + [VIRTIO_SND_CHMAP_BC] = SNDRV_CHMAP_BC, + [VIRTIO_SND_CHMAP_BLC] = SNDRV_CHMAP_BLC, + [VIRTIO_SND_CHMAP_BRC] = SNDRV_CHMAP_BRC +}; + +/** + * virtsnd_chmap_parse_cfg() - Parse the channel map configuration. + * @snd: VirtIO sound device. + * + * This function is called during initial device initialization. + * + * Context: Any context that permits to sleep. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_chmap_parse_cfg(struct virtio_snd *snd) +{ + struct virtio_device *vdev = snd->vdev; + u32 i; + int rc; + + virtio_cread_le(vdev, struct virtio_snd_config, chmaps, &snd->nchmaps); + if (!snd->nchmaps) + return 0; + + snd->chmaps = devm_kcalloc(&vdev->dev, snd->nchmaps, + sizeof(*snd->chmaps), GFP_KERNEL); + if (!snd->chmaps) + return -ENOMEM; + + rc = virtsnd_ctl_query_info(snd, VIRTIO_SND_R_CHMAP_INFO, 0, + snd->nchmaps, sizeof(*snd->chmaps), + snd->chmaps); + if (rc) + return rc; + + /* Count the number of channel maps per each PCM device/stream. */ + for (i = 0; i < snd->nchmaps; ++i) { + struct virtio_snd_chmap_info *info = &snd->chmaps[i]; + u32 nid = le32_to_cpu(info->hdr.hda_fn_nid); + struct virtio_pcm *vpcm; + struct virtio_pcm_stream *vs; + + vpcm = virtsnd_pcm_find_or_create(snd, nid); + if (IS_ERR(vpcm)) + return PTR_ERR(vpcm); + + switch (info->direction) { + case VIRTIO_SND_D_OUTPUT: + vs = &vpcm->streams[SNDRV_PCM_STREAM_PLAYBACK]; + break; + case VIRTIO_SND_D_INPUT: + vs = &vpcm->streams[SNDRV_PCM_STREAM_CAPTURE]; + break; + default: + dev_err(&vdev->dev, + "chmap #%u: unknown direction (%u)\n", i, + info->direction); + return -EINVAL; + } + + vs->nchmaps++; + } + + return 0; +} + +/** + * virtsnd_chmap_add_ctls() - Create an ALSA control for channel maps. + * @pcm: ALSA PCM device. + * @direction: PCM stream direction (SNDRV_PCM_STREAM_XXX). + * @vs: VirtIO PCM stream. + * + * Context: Any context. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_chmap_add_ctls(struct snd_pcm *pcm, int direction, + struct virtio_pcm_stream *vs) +{ + u32 i; + int max_channels = 0; + + for (i = 0; i < vs->nchmaps; i++) + if (max_channels < vs->chmaps[i].channels) + max_channels = vs->chmaps[i].channels; + + return snd_pcm_add_chmap_ctls(pcm, direction, vs->chmaps, max_channels, + 0, NULL); +} + +/** + * virtsnd_chmap_build_devs() - Build ALSA controls for channel maps. + * @snd: VirtIO sound device. + * + * Context: Any context. + * Return: 0 on success, -errno on failure. + */ +int virtsnd_chmap_build_devs(struct virtio_snd *snd) +{ + struct virtio_device *vdev = snd->vdev; + struct virtio_pcm *vpcm; + struct virtio_pcm_stream *vs; + u32 i; + int rc; + + /* Allocate channel map elements per each PCM device/stream. */ + list_for_each_entry(vpcm, &snd->pcm_list, list) { + for (i = 0; i < ARRAY_SIZE(vpcm->streams); ++i) { + vs = &vpcm->streams[i]; + + if (!vs->nchmaps) + continue; + + vs->chmaps = devm_kcalloc(&vdev->dev, vs->nchmaps + 1, + sizeof(*vs->chmaps), + GFP_KERNEL); + if (!vs->chmaps) + return -ENOMEM; + + vs->nchmaps = 0; + } + } + + /* Initialize channel maps per each PCM device/stream. */ + for (i = 0; i < snd->nchmaps; ++i) { + struct virtio_snd_chmap_info *info = &snd->chmaps[i]; + unsigned int channels = info->channels; + unsigned int ch; + struct snd_pcm_chmap_elem *chmap; + + vpcm = virtsnd_pcm_find(snd, le32_to_cpu(info->hdr.hda_fn_nid)); + if (IS_ERR(vpcm)) + return PTR_ERR(vpcm); + + if (info->direction == VIRTIO_SND_D_OUTPUT) + vs = &vpcm->streams[SNDRV_PCM_STREAM_PLAYBACK]; + else + vs = &vpcm->streams[SNDRV_PCM_STREAM_CAPTURE]; + + chmap = &vs->chmaps[vs->nchmaps++]; + + if (channels > ARRAY_SIZE(chmap->map)) + channels = ARRAY_SIZE(chmap->map); + + chmap->channels = channels; + + for (ch = 0; ch < channels; ++ch) { + u8 position = info->positions[ch]; + + if (position >= ARRAY_SIZE(g_v2a_position_map)) + return -EINVAL; + + chmap->map[ch] = g_v2a_position_map[position]; + } + } + + /* Create an ALSA control per each PCM device/stream. */ + list_for_each_entry(vpcm, &snd->pcm_list, list) { + if (!vpcm->pcm) + continue; + + for (i = 0; i < ARRAY_SIZE(vpcm->streams); ++i) { + vs = &vpcm->streams[i]; + + if (!vs->nchmaps) + continue; + + rc = virtsnd_chmap_add_ctls(vpcm->pcm, i, vs); + if (rc) + return rc; + } + } + + return 0; +} diff --git a/sound/virtio/virtio_pcm.h b/sound/virtio/virtio_pcm.h index efd0228746cf..1353fdc9bd69 100644 --- a/sound/virtio/virtio_pcm.h +++ b/sound/virtio/virtio_pcm.h @@ -63,10 +63,14 @@ struct virtio_pcm_substream { * struct virtio_pcm_stream - VirtIO PCM stream. * @substreams: VirtIO substreams belonging to the stream. * @nsubstreams: Number of substreams. + * @chmaps: Kernel channel maps belonging to the stream. + * @nchmaps: Number of channel maps. */ struct virtio_pcm_stream { struct virtio_pcm_substream **substreams; u32 nsubstreams; + struct snd_pcm_chmap_elem *chmaps; + u32 nchmaps; }; /** -- cgit v1.2.3-59-g8ed1b From 575483e90a3292c2afceb7161732046e411d6fdd Mon Sep 17 00:00:00 2001 From: Anton Yakovlev Date: Tue, 2 Mar 2021 17:47:09 +0100 Subject: ALSA: virtio: introduce device suspend/resume support All running PCM substreams are stopped on device suspend and restarted on device resume. Signed-off-by: Anton Yakovlev Link: https://lore.kernel.org/r/20210302164709.3142702-10-anton.yakovlev@opensynergy.com Signed-off-by: Takashi Iwai --- sound/virtio/virtio_card.c | 56 +++++++++++++++++++++++++++++++++++++++++++ sound/virtio/virtio_pcm.h | 3 +++ sound/virtio/virtio_pcm_ops.c | 33 +++++++++++++++++++------ 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/sound/virtio/virtio_card.c b/sound/virtio/virtio_card.c index 1c03fcc41c3b..ae9128063917 100644 --- a/sound/virtio/virtio_card.c +++ b/sound/virtio/virtio_card.c @@ -362,6 +362,58 @@ static void virtsnd_remove(struct virtio_device *vdev) kfree(snd->event_msgs); } +#ifdef CONFIG_PM_SLEEP +/** + * virtsnd_freeze() - Suspend device. + * @vdev: VirtIO parent device. + * + * Context: Any context. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_freeze(struct virtio_device *vdev) +{ + struct virtio_snd *snd = vdev->priv; + unsigned int i; + + virtsnd_disable_event_vq(snd); + virtsnd_ctl_msg_cancel_all(snd); + + vdev->config->del_vqs(vdev); + vdev->config->reset(vdev); + + for (i = 0; i < snd->nsubstreams; ++i) + cancel_work_sync(&snd->substreams[i].elapsed_period); + + kfree(snd->event_msgs); + snd->event_msgs = NULL; + + return 0; +} + +/** + * virtsnd_restore() - Resume device. + * @vdev: VirtIO parent device. + * + * Context: Any context. + * Return: 0 on success, -errno on failure. + */ +static int virtsnd_restore(struct virtio_device *vdev) +{ + struct virtio_snd *snd = vdev->priv; + int rc; + + rc = virtsnd_find_vqs(snd); + if (rc) + return rc; + + virtio_device_ready(vdev); + + virtsnd_enable_event_vq(snd); + + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + static const struct virtio_device_id id_table[] = { { VIRTIO_ID_SOUND, VIRTIO_DEV_ANY_ID }, { 0 }, @@ -374,6 +426,10 @@ static struct virtio_driver virtsnd_driver = { .validate = virtsnd_validate, .probe = virtsnd_probe, .remove = virtsnd_remove, +#ifdef CONFIG_PM_SLEEP + .freeze = virtsnd_freeze, + .restore = virtsnd_restore, +#endif }; static int __init init(void) diff --git a/sound/virtio/virtio_pcm.h b/sound/virtio/virtio_pcm.h index 1353fdc9bd69..062eb8e8f2cf 100644 --- a/sound/virtio/virtio_pcm.h +++ b/sound/virtio/virtio_pcm.h @@ -31,6 +31,8 @@ struct virtio_pcm_msg; * @xfer_xrun: Data underflow/overflow state (0 - no xrun, 1 - xrun). * @stopped: True if the substream is stopped and must be released on the device * side. + * @suspended: True if the substream is suspended and must be reconfigured on + * the device side at resume. * @msgs: Allocated I/O messages. * @nmsgs: Number of allocated I/O messages. * @msg_last_enqueued: Index of the last I/O message added to the virtqueue. @@ -52,6 +54,7 @@ struct virtio_pcm_substream { bool xfer_enabled; bool xfer_xrun; bool stopped; + bool suspended; struct virtio_pcm_msg **msgs; unsigned int nmsgs; int msg_last_enqueued; diff --git a/sound/virtio/virtio_pcm_ops.c b/sound/virtio/virtio_pcm_ops.c index 0682a2df6c8c..f8bfb87624be 100644 --- a/sound/virtio/virtio_pcm_ops.c +++ b/sound/virtio/virtio_pcm_ops.c @@ -115,6 +115,7 @@ static int virtsnd_pcm_open(struct snd_pcm_substream *substream) SNDRV_PCM_HW_PARAM_PERIODS); vss->stopped = !!virtsnd_pcm_msg_pending_num(vss); + vss->suspended = false; /* * If the substream has already been used, then the I/O queue may be in @@ -272,16 +273,31 @@ static int virtsnd_pcm_prepare(struct snd_pcm_substream *substream) struct virtio_device *vdev = vss->snd->vdev; struct virtio_snd_msg *msg; - if (virtsnd_pcm_msg_pending_num(vss)) { - dev_err(&vdev->dev, "SID %u: invalid I/O queue state\n", - vss->sid); - return -EBADFD; + if (!vss->suspended) { + if (virtsnd_pcm_msg_pending_num(vss)) { + dev_err(&vdev->dev, "SID %u: invalid I/O queue state\n", + vss->sid); + return -EBADFD; + } + + vss->buffer_bytes = snd_pcm_lib_buffer_bytes(substream); + vss->hw_ptr = 0; + vss->msg_last_enqueued = -1; + } else { + struct snd_pcm_runtime *runtime = substream->runtime; + unsigned int buffer_bytes = snd_pcm_lib_buffer_bytes(substream); + unsigned int period_bytes = snd_pcm_lib_period_bytes(substream); + int rc; + + rc = virtsnd_pcm_dev_set_params(vss, buffer_bytes, period_bytes, + runtime->channels, + runtime->format, runtime->rate); + if (rc) + return rc; } - vss->buffer_bytes = snd_pcm_lib_buffer_bytes(substream); - vss->hw_ptr = 0; vss->xfer_xrun = false; - vss->msg_last_enqueued = -1; + vss->suspended = false; vss->msg_count = 0; msg = virtsnd_pcm_ctl_msg_alloc(vss, VIRTIO_SND_R_PCM_PREPARE, @@ -336,6 +352,9 @@ static int virtsnd_pcm_trigger(struct snd_pcm_substream *substream, int command) } return virtsnd_ctl_msg_send_sync(snd, msg); + case SNDRV_PCM_TRIGGER_SUSPEND: + vss->suspended = true; + fallthrough; case SNDRV_PCM_TRIGGER_STOP: vss->stopped = true; fallthrough; -- cgit v1.2.3-59-g8ed1b