diff options
Diffstat (limited to 'drivers/staging/erofs')
22 files changed, 0 insertions, 7559 deletions
diff --git a/drivers/staging/erofs/Documentation/filesystems/erofs.txt b/drivers/staging/erofs/Documentation/filesystems/erofs.txt deleted file mode 100644 index 74cf84ac48a3..000000000000 --- a/drivers/staging/erofs/Documentation/filesystems/erofs.txt +++ /dev/null @@ -1,209 +0,0 @@ -Overview -======== - -EROFS file-system stands for Enhanced Read-Only File System. Different -from other read-only file systems, it aims to be designed for flexibility, -scalability, but be kept simple and high performance. - -It is designed as a better filesystem solution for the following scenarios: - - read-only storage media or - - - part of a fully trusted read-only solution, which means it needs to be - immutable and bit-for-bit identical to the official golden image for - their releases due to security and other considerations and - - - hope to save some extra storage space with guaranteed end-to-end performance - by using reduced metadata and transparent file compression, especially - for those embedded devices with limited memory (ex, smartphone); - -Here is the main features of EROFS: - - Little endian on-disk design; - - - Currently 4KB block size (nobh) and therefore maximum 16TB address space; - - - Metadata & data could be mixed by design; - - - 2 inode versions for different requirements: - v1 v2 - Inode metadata size: 32 bytes 64 bytes - Max file size: 4 GB 16 EB (also limited by max. vol size) - Max uids/gids: 65536 4294967296 - File creation time: no yes (64 + 32-bit timestamp) - Max hardlinks: 65536 4294967296 - Metadata reserved: 4 bytes 14 bytes - - - Support extended attributes (xattrs) as an option; - - - Support xattr inline and tail-end data inline for all files; - - - Support POSIX.1e ACLs by using xattrs; - - - Support transparent file compression as an option: - LZ4 algorithm with 4 KB fixed-output compression for high performance; - -The following git tree provides the file system user-space tools under -development (ex, formatting tool mkfs.erofs): ->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git - -Bugs and patches are welcome, please kindly help us and send to the following -linux-erofs mailing list: ->> linux-erofs mailing list <linux-erofs@lists.ozlabs.org> - -Note that EROFS is still working in progress as a Linux staging driver, -Cc the staging mailing list as well is highly recommended: ->> Linux Driver Project Developer List <devel@driverdev.osuosl.org> - -Mount options -============= - -fault_injection=%d Enable fault injection in all supported types with - specified injection rate. Supported injection type: - Type_Name Type_Value - FAULT_KMALLOC 0x000000001 - FAULT_READ_IO 0x000000002 -(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled - by default if CONFIG_EROFS_FS_XATTR is selected. -(no)acl Setup POSIX Access Control List. Note: acl is enabled - by default if CONFIG_EROFS_FS_POSIX_ACL is selected. - -On-disk details -=============== - -Summary -------- -Different from other read-only file systems, an EROFS volume is designed -to be as simple as possible: - - |-> aligned with the block size - ____________________________________________________________ - | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data | - |_|__|_|_____|__________|_____|______|__________|_____|______| - 0 +1K - -All data areas should be aligned with the block size, but metadata areas -may not. All metadatas can be now observed in two different spaces (views): - 1. Inode metadata space - Each valid inode should be aligned with an inode slot, which is a fixed - value (32 bytes) and designed to be kept in line with v1 inode size. - - Each inode can be directly found with the following formula: - inode offset = meta_blkaddr * block_size + 32 * nid - - |-> aligned with 8B - |-> followed closely - + meta_blkaddr blocks |-> another slot - _____________________________________________________________________ - | ... | inode | xattrs | extents | data inline | ... | inode ... - |________|_______|(optional)|(optional)|__(optional)_|_____|__________ - |-> aligned with the inode slot size - . . - . . - . . - . . - . . - . . - .____________________________________________________|-> aligned with 4B - | xattr_ibody_header | shared xattrs | inline xattrs | - |____________________|_______________|_______________| - |-> 12 bytes <-|->x * 4 bytes<-| . - . . . - . . . - . . . - ._______________________________.______________________. - | id | id | id | id | ... | id | ent | ... | ent| ... | - |____|____|____|____|______|____|_____|_____|____|_____| - |-> aligned with 4B - |-> aligned with 4B - - Inode could be 32 or 64 bytes, which can be distinguished from a common - field which all inode versions have -- i_advise: - - __________________ __________________ - | i_advise | | i_advise | - |__________________| |__________________| - | ... | | ... | - | | | | - |__________________| 32 bytes | | - | | - |__________________| 64 bytes - - Xattrs, extents, data inline are followed by the corresponding inode with - proper alignes, and they could be optional for different data mappings, - _currently_ there are totally 3 valid data mappings supported: - - 1) flat file data without data inline (no extent); - 2) fixed-output size data compression (must have extents); - 3) flat file data with tail-end data inline (no extent); - - The size of the optional xattrs is indicated by i_xattr_count in inode - header. Large xattrs or xattrs shared by many different files can be - stored in shared xattrs metadata rather than inlined right after inode. - - 2. Shared xattrs metadata space - Shared xattrs space is similar to the above inode space, started with - a specific block indicated by xattr_blkaddr, organized one by one with - proper align. - - Each share xattr can also be directly found by the following formula: - xattr offset = xattr_blkaddr * block_size + 4 * xattr_id - - |-> aligned by 4 bytes - + xattr_blkaddr blocks |-> aligned with 4 bytes - _________________________________________________________________________ - | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... - |________|_____________|_____________|_____|______________|_______________ - -Directories ------------ -All directories are now organized in a compact on-disk format. Note that -each directory block is divided into index and name areas in order to support -random file lookup, and all directory entries are _strictly_ recorded in -alphabetical order in order to support improved prefix binary search -algorithm (could refer to the related source code). - - ___________________________ - / | - / ______________|________________ - / / | nameoff1 | nameoffN-1 - ____________.______________._______________v________________v__________ -| dirent | dirent | ... | dirent | filename | filename | ... | filename | -|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| - \ ^ - \ | * could have - \ | trailing '\0' - \________________________| nameoff0 - - Directory block - -Note that apart from the offset of the first filename, nameoff0 also indicates -the total number of directory entries in this block since it is no need to -introduce another on-disk field at all. - -Compression ------------ -Currently, EROFS supports 4KB fixed-output clustersize transparent file -compression, as illustrated below: - - |---- Variant-Length Extent ----|-------- VLE --------|----- VLE ----- - clusterofs clusterofs clusterofs - | | | logical data -_________v_______________________________v_____________________v_______________ -... | . | | . | | . | ... -____|____.________|_____________|________.____|_____________|__.__________|____ - |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-| - size size size size size - . . . . - . . . . - . . . . - _______._____________._____________._____________._____________________ - ... | | | | ... physical data - _______|_____________|_____________|_____________|_____________________ - |-> cluster <-|-> cluster <-|-> cluster <-| - size size size - -Currently each on-disk physical cluster can contain 4KB (un)compressed data -at most. For each logical cluster, there is a corresponding on-disk index to -describe its cluster type, physical cluster address, etc. - -See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. - diff --git a/drivers/staging/erofs/Kconfig b/drivers/staging/erofs/Kconfig deleted file mode 100644 index d04b798a8efb..000000000000 --- a/drivers/staging/erofs/Kconfig +++ /dev/null @@ -1,151 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -config EROFS_FS - tristate "EROFS filesystem support" - depends on BLOCK - help - EROFS(Enhanced Read-Only File System) is a lightweight - read-only file system with modern designs (eg. page-sized - blocks, inline xattrs/data, etc.) for scenarios which need - high-performance read-only requirements, eg. firmwares in - mobile phone or LIVECDs. - - It also provides VLE compression support, focusing on - random read improvements, keeping relatively lower - compression ratios, which is useful for high-performance - devices with limited memory and ROM space. - - If unsure, say N. - -config EROFS_FS_DEBUG - bool "EROFS debugging feature" - depends on EROFS_FS - help - Print EROFS debugging messages and enable more BUG_ONs - which check the filesystem consistency aggressively. - - For daily use, say N. - -config EROFS_FS_XATTR - bool "EROFS extended attributes" - depends on EROFS_FS - default y - help - Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - <http://acl.bestbits.at/> for details). - - If unsure, say N. - -config EROFS_FS_POSIX_ACL - bool "EROFS Access Control Lists" - depends on EROFS_FS_XATTR - select FS_POSIX_ACL - default y - help - Posix Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. - - To learn more about Access Control Lists, visit the POSIX ACLs for - Linux website <http://acl.bestbits.at/>. - - If you don't know what Access Control Lists are, say N. - -config EROFS_FS_SECURITY - bool "EROFS Security Labels" - depends on EROFS_FS_XATTR - help - Security labels provide an access control facility to support Linux - Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO - Linux. This option enables an extended attribute handler for file - security labels in the erofs filesystem, so that it requires enabling - the extended attribute support in advance. - - If you are not using a security module, say N. - -config EROFS_FS_USE_VM_MAP_RAM - bool "EROFS VM_MAP_RAM Support" - depends on EROFS_FS - help - use vm_map_ram/vm_unmap_ram instead of vmap/vunmap. - - If you don't know what these are, say N. - -config EROFS_FAULT_INJECTION - bool "EROFS fault injection facility" - depends on EROFS_FS - help - Test EROFS to inject faults such as ENOMEM, EIO, and so on. - If unsure, say N. - -config EROFS_FS_IO_MAX_RETRIES - int "EROFS IO Maximum Retries" - depends on EROFS_FS - default "5" - help - Maximum retry count of IO Errors. - - If unsure, leave the default value (5 retries, 6 IOs at most). - -config EROFS_FS_ZIP - bool "EROFS Data Compresssion Support" - depends on EROFS_FS - select LZ4_DECOMPRESS - help - Currently we support LZ4 VLE Compression only. - Play at your own risk. - - If you don't want to use compression feature, say N. - -config EROFS_FS_CLUSTER_PAGE_LIMIT - int "EROFS Cluster Pages Hard Limit" - depends on EROFS_FS_ZIP - range 1 256 - default "1" - help - Indicates VLE compressed pages hard limit of a - compressed cluster. - - For example, if files of a image are compressed - into 8k-unit, the hard limit should not be less - than 2. Otherwise, the image cannot be mounted - correctly on this kernel. - -choice - prompt "EROFS VLE Data Decompression mode" - depends on EROFS_FS_ZIP - default EROFS_FS_ZIP_CACHE_BIPOLAR - help - EROFS supports three options for VLE decompression. - "In-place Decompression Only" consumes the minimum memory - with lowest random read. - - "Bipolar Cached Decompression" consumes the maximum memory - with highest random read. - - If unsure, select "Bipolar Cached Decompression" - -config EROFS_FS_ZIP_NO_CACHE - bool "In-place Decompression Only" - help - Read compressed data into page cache and do in-place - decompression directly. - -config EROFS_FS_ZIP_CACHE_UNIPOLAR - bool "Unipolar Cached Decompression" - help - For each request, it caches the last compressed page - for further reading. - It still decompresses in place for the rest compressed pages. - -config EROFS_FS_ZIP_CACHE_BIPOLAR - bool "Bipolar Cached Decompression" - help - For each request, it caches the both end compressed pages - for further reading. - It still decompresses in place for the rest compressed pages. - - Recommended for performance priority. - -endchoice - diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile deleted file mode 100644 index e704d9e51514..000000000000 --- a/drivers/staging/erofs/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -EROFS_VERSION = "1.0pre1" - -ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\" - -obj-$(CONFIG_EROFS_FS) += erofs.o -# staging requirement: to be self-contained in its own directory -ccflags-y += -I $(srctree)/$(src)/include -erofs-objs := super.o inode.o data.o namei.o dir.o utils.o -erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o -erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o zmap.o decompressor.o - diff --git a/drivers/staging/erofs/TODO b/drivers/staging/erofs/TODO deleted file mode 100644 index a8608b2f72bd..000000000000 --- a/drivers/staging/erofs/TODO +++ /dev/null @@ -1,46 +0,0 @@ - -EROFS is still working in progress, thus it is not suitable -for all productive uses. play at your own risk :) - -TODO List: - - add the missing error handling code - (mainly existed in xattr and decompression submodules); - - - finalize erofs ondisk format design (which means that - minor on-disk revisions could happen later); - - - documentation and detailed technical analysis; - - - general code review and clean up - (including confusing variable names and code snippets); - - - support larger compressed clustersizes for selection - (currently erofs only works as expected with the page-sized - compressed cluster configuration, usually 4KB); - - - support more lossless data compression algorithms - in addition to LZ4 algorithms in VLE approach; - - - data deduplication and other useful features. - -The following git tree provides the file system user-space -tools under development (ex, formatting tool mkfs.erofs): ->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git - -The open-source development of erofs-utils is at the early stage. -Contact the original author Li Guifu <bluce.liguifu@huawei.com> and -the co-maintainer Fang Wei <fangwei1@huawei.com> for the latest news -and more details. - -Code, suggestions, etc, are welcome. Please feel free to -ask and send patches, - -To: - linux-erofs mailing list <linux-erofs@lists.ozlabs.org> - Gao Xiang <gaoxiang25@huawei.com> - Chao Yu <yuchao0@huawei.com> - -Cc: (for linux-kernel upstream patches) - Greg Kroah-Hartman <gregkh@linuxfoundation.org> - linux-staging mailing list <devel@driverdev.osuosl.org> - diff --git a/drivers/staging/erofs/compress.h b/drivers/staging/erofs/compress.h deleted file mode 100644 index c43aa3374d28..000000000000 --- a/drivers/staging/erofs/compress.h +++ /dev/null @@ -1,62 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * linux/drivers/staging/erofs/compress.h - * - * Copyright (C) 2019 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - */ -#ifndef __EROFS_FS_COMPRESS_H -#define __EROFS_FS_COMPRESS_H - -#include "internal.h" - -enum { - Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, - Z_EROFS_COMPRESSION_RUNTIME_MAX -}; - -struct z_erofs_decompress_req { - struct super_block *sb; - struct page **in, **out; - - unsigned short pageofs_out; - unsigned int inputsize, outputsize; - - /* indicate the algorithm will be used for decompression */ - unsigned int alg; - bool inplace_io, partial_decoding; -}; - -/* - * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) - - * used to mark temporary allocated pages from other - * file/cached pages and NULL mapping pages. - */ -#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D) - -/* check if a page is marked as staging */ -static inline bool z_erofs_page_is_staging(struct page *page) -{ - return page->mapping == Z_EROFS_MAPPING_STAGING; -} - -static inline bool z_erofs_put_stagingpage(struct list_head *pagepool, - struct page *page) -{ - if (!z_erofs_page_is_staging(page)) - return false; - - /* staging pages should not be used by others at the same time */ - if (page_ref_count(page) > 1) - put_page(page); - else - list_add(&page->lru, pagepool); - return true; -} - -int z_erofs_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool); - -#endif - diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c deleted file mode 100644 index cc31c3e5984c..000000000000 --- a/drivers/staging/erofs/data.c +++ /dev/null @@ -1,400 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/data.c - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include "internal.h" -#include <linux/prefetch.h> - -#include <trace/events/erofs.h> - -static inline void read_endio(struct bio *bio) -{ - struct super_block *const sb = bio->bi_private; - struct bio_vec *bvec; - blk_status_t err = bio->bi_status; - struct bvec_iter_all iter_all; - - if (time_to_inject(EROFS_SB(sb), FAULT_READ_IO)) { - erofs_show_injection_info(FAULT_READ_IO); - err = BLK_STS_IOERR; - } - - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - - /* page is already locked */ - DBG_BUGON(PageUptodate(page)); - - if (unlikely(err)) - SetPageError(page); - else - SetPageUptodate(page); - - unlock_page(page); - /* page could be reclaimed now */ - } - bio_put(bio); -} - -/* prio -- true is used for dir */ -struct page *__erofs_get_meta_page(struct super_block *sb, - erofs_blk_t blkaddr, bool prio, bool nofail) -{ - struct inode *const bd_inode = sb->s_bdev->bd_inode; - struct address_space *const mapping = bd_inode->i_mapping; - /* prefer retrying in the allocator to blindly looping below */ - const gfp_t gfp = mapping_gfp_constraint(mapping, ~__GFP_FS) | - (nofail ? __GFP_NOFAIL : 0); - unsigned int io_retries = nofail ? EROFS_IO_MAX_RETRIES_NOFAIL : 0; - struct page *page; - int err; - -repeat: - page = find_or_create_page(mapping, blkaddr, gfp); - if (unlikely(!page)) { - DBG_BUGON(nofail); - return ERR_PTR(-ENOMEM); - } - DBG_BUGON(!PageLocked(page)); - - if (!PageUptodate(page)) { - struct bio *bio; - - bio = erofs_grab_bio(sb, blkaddr, 1, sb, read_endio, nofail); - if (IS_ERR(bio)) { - DBG_BUGON(nofail); - err = PTR_ERR(bio); - goto err_out; - } - - err = bio_add_page(bio, page, PAGE_SIZE, 0); - if (unlikely(err != PAGE_SIZE)) { - err = -EFAULT; - goto err_out; - } - - __submit_bio(bio, REQ_OP_READ, - REQ_META | (prio ? REQ_PRIO : 0)); - - lock_page(page); - - /* this page has been truncated by others */ - if (unlikely(page->mapping != mapping)) { -unlock_repeat: - unlock_page(page); - put_page(page); - goto repeat; - } - - /* more likely a read error */ - if (unlikely(!PageUptodate(page))) { - if (io_retries) { - --io_retries; - goto unlock_repeat; - } - err = -EIO; - goto err_out; - } - } - return page; - -err_out: - unlock_page(page); - put_page(page); - return ERR_PTR(err); -} - -static int erofs_map_blocks_flatmode(struct inode *inode, - struct erofs_map_blocks *map, - int flags) -{ - int err = 0; - erofs_blk_t nblocks, lastblk; - u64 offset = map->m_la; - struct erofs_vnode *vi = EROFS_V(inode); - - trace_erofs_map_blocks_flatmode_enter(inode, map, flags); - - nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE); - lastblk = nblocks - is_inode_flat_inline(inode); - - if (unlikely(offset >= inode->i_size)) { - /* leave out-of-bound access unmapped */ - map->m_flags = 0; - map->m_plen = 0; - goto out; - } - - /* there is no hole in flatmode */ - map->m_flags = EROFS_MAP_MAPPED; - - if (offset < blknr_to_addr(lastblk)) { - map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la; - map->m_plen = blknr_to_addr(lastblk) - offset; - } else if (is_inode_flat_inline(inode)) { - /* 2 - inode inline B: inode, [xattrs], inline last blk... */ - struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); - - map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize + - vi->xattr_isize + erofs_blkoff(map->m_la); - map->m_plen = inode->i_size - offset; - - /* inline data should locate in one meta block */ - if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) { - DBG_BUGON(1); - err = -EIO; - goto err_out; - } - - map->m_flags |= EROFS_MAP_META; - } else { - errln("internal error @ nid: %llu (size %llu), m_la 0x%llx", - vi->nid, inode->i_size, map->m_la); - DBG_BUGON(1); - err = -EIO; - goto err_out; - } - -out: - map->m_llen = map->m_plen; - -err_out: - trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0); - return err; -} - -int erofs_map_blocks(struct inode *inode, - struct erofs_map_blocks *map, int flags) -{ - if (unlikely(is_inode_layout_compression(inode))) { - int err = z_erofs_map_blocks_iter(inode, map, flags); - - if (map->mpage) { - put_page(map->mpage); - map->mpage = NULL; - } - return err; - } - return erofs_map_blocks_flatmode(inode, map, flags); -} - -static inline struct bio *erofs_read_raw_page(struct bio *bio, - struct address_space *mapping, - struct page *page, - erofs_off_t *last_block, - unsigned int nblocks, - bool ra) -{ - struct inode *const inode = mapping->host; - struct super_block *const sb = inode->i_sb; - erofs_off_t current_block = (erofs_off_t)page->index; - int err; - - DBG_BUGON(!nblocks); - - if (PageUptodate(page)) { - err = 0; - goto has_updated; - } - - if (cleancache_get_page(page) == 0) { - err = 0; - SetPageUptodate(page); - goto has_updated; - } - - /* note that for readpage case, bio also equals to NULL */ - if (bio && - /* not continuous */ - *last_block + 1 != current_block) { -submit_bio_retry: - __submit_bio(bio, REQ_OP_READ, 0); - bio = NULL; - } - - if (!bio) { - struct erofs_map_blocks map = { - .m_la = blknr_to_addr(current_block), - }; - erofs_blk_t blknr; - unsigned int blkoff; - - err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); - if (unlikely(err)) - goto err_out; - - /* zero out the holed page */ - if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) { - zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); - - /* imply err = 0, see erofs_map_blocks */ - goto has_updated; - } - - /* for RAW access mode, m_plen must be equal to m_llen */ - DBG_BUGON(map.m_plen != map.m_llen); - - blknr = erofs_blknr(map.m_pa); - blkoff = erofs_blkoff(map.m_pa); - - /* deal with inline page */ - if (map.m_flags & EROFS_MAP_META) { - void *vsrc, *vto; - struct page *ipage; - - DBG_BUGON(map.m_plen > PAGE_SIZE); - - ipage = erofs_get_meta_page(inode->i_sb, blknr, 0); - - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto err_out; - } - - vsrc = kmap_atomic(ipage); - vto = kmap_atomic(page); - memcpy(vto, vsrc + blkoff, map.m_plen); - memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen); - kunmap_atomic(vto); - kunmap_atomic(vsrc); - flush_dcache_page(page); - - SetPageUptodate(page); - /* TODO: could we unlock the page earlier? */ - unlock_page(ipage); - put_page(ipage); - - /* imply err = 0, see erofs_map_blocks */ - goto has_updated; - } - - /* pa must be block-aligned for raw reading */ - DBG_BUGON(erofs_blkoff(map.m_pa)); - - /* max # of continuous pages */ - if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE)) - nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE); - if (nblocks > BIO_MAX_PAGES) - nblocks = BIO_MAX_PAGES; - - bio = erofs_grab_bio(sb, blknr, nblocks, sb, - read_endio, false); - if (IS_ERR(bio)) { - err = PTR_ERR(bio); - bio = NULL; - goto err_out; - } - } - - err = bio_add_page(bio, page, PAGE_SIZE, 0); - /* out of the extent or bio is full */ - if (err < PAGE_SIZE) - goto submit_bio_retry; - - *last_block = current_block; - - /* shift in advance in case of it followed by too many gaps */ - if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) { - /* err should reassign to 0 after submitting */ - err = 0; - goto submit_bio_out; - } - - return bio; - -err_out: - /* for sync reading, set page error immediately */ - if (!ra) { - SetPageError(page); - ClearPageUptodate(page); - } -has_updated: - unlock_page(page); - - /* if updated manually, continuous pages has a gap */ - if (bio) -submit_bio_out: - __submit_bio(bio, REQ_OP_READ, 0); - - return unlikely(err) ? ERR_PTR(err) : NULL; -} - -/* - * since we dont have write or truncate flows, so no inode - * locking needs to be held at the moment. - */ -static int erofs_raw_access_readpage(struct file *file, struct page *page) -{ - erofs_off_t last_block; - struct bio *bio; - - trace_erofs_readpage(page, true); - - bio = erofs_read_raw_page(NULL, page->mapping, - page, &last_block, 1, false); - - if (IS_ERR(bio)) - return PTR_ERR(bio); - - DBG_BUGON(bio); /* since we have only one bio -- must be NULL */ - return 0; -} - -static int erofs_raw_access_readpages(struct file *filp, - struct address_space *mapping, - struct list_head *pages, - unsigned int nr_pages) -{ - erofs_off_t last_block; - struct bio *bio = NULL; - gfp_t gfp = readahead_gfp_mask(mapping); - struct page *page = list_last_entry(pages, struct page, lru); - - trace_erofs_readpages(mapping->host, page, nr_pages, true); - - for (; nr_pages; --nr_pages) { - page = list_entry(pages->prev, struct page, lru); - - prefetchw(&page->flags); - list_del(&page->lru); - - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) { - bio = erofs_read_raw_page(bio, mapping, page, - &last_block, nr_pages, true); - - /* all the page errors are ignored when readahead */ - if (IS_ERR(bio)) { - pr_err("%s, readahead error at page %lu of nid %llu\n", - __func__, page->index, - EROFS_V(mapping->host)->nid); - - bio = NULL; - } - } - - /* pages could still be locked */ - put_page(page); - } - DBG_BUGON(!list_empty(pages)); - - /* the rare case (end in gaps) */ - if (unlikely(bio)) - __submit_bio(bio, REQ_OP_READ, 0); - return 0; -} - -/* for uncompressed (aligned) files and raw access for other files */ -const struct address_space_operations erofs_raw_access_aops = { - .readpage = erofs_raw_access_readpage, - .readpages = erofs_raw_access_readpages, -}; - diff --git a/drivers/staging/erofs/decompressor.c b/drivers/staging/erofs/decompressor.c deleted file mode 100644 index 1fb0abb98dff..000000000000 --- a/drivers/staging/erofs/decompressor.c +++ /dev/null @@ -1,335 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/decompressor.c - * - * Copyright (C) 2019 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - */ -#include "compress.h" -#include <linux/lz4.h> - -#ifndef LZ4_DISTANCE_MAX /* history window size */ -#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ -#endif - -#define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1) -#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN -#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) -#endif - -struct z_erofs_decompressor { - /* - * if destpages have sparsed pages, fill them with bounce pages. - * it also check whether destpages indicate continuous physical memory. - */ - int (*prepare_destpages)(struct z_erofs_decompress_req *rq, - struct list_head *pagepool); - int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out); - char *name; -}; - -static int lz4_prepare_destpages(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) -{ - const unsigned int nr = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL }; - unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES, - BITS_PER_LONG)] = { 0 }; - void *kaddr = NULL; - unsigned int i, j, top; - - top = 0; - for (i = j = 0; i < nr; ++i, ++j) { - struct page *const page = rq->out[i]; - struct page *victim; - - if (j >= LZ4_MAX_DISTANCE_PAGES) - j = 0; - - /* 'valid' bounced can only be tested after a complete round */ - if (test_bit(j, bounced)) { - DBG_BUGON(i < LZ4_MAX_DISTANCE_PAGES); - DBG_BUGON(top >= LZ4_MAX_DISTANCE_PAGES); - availables[top++] = rq->out[i - LZ4_MAX_DISTANCE_PAGES]; - } - - if (page) { - __clear_bit(j, bounced); - if (kaddr) { - if (kaddr + PAGE_SIZE == page_address(page)) - kaddr += PAGE_SIZE; - else - kaddr = NULL; - } else if (!i) { - kaddr = page_address(page); - } - continue; - } - kaddr = NULL; - __set_bit(j, bounced); - - if (top) { - victim = availables[--top]; - get_page(victim); - } else { - if (!list_empty(pagepool)) { - victim = lru_to_page(pagepool); - list_del(&victim->lru); - DBG_BUGON(page_ref_count(victim) != 1); - } else { - victim = alloc_pages(GFP_KERNEL, 0); - if (!victim) - return -ENOMEM; - } - victim->mapping = Z_EROFS_MAPPING_STAGING; - } - rq->out[i] = victim; - } - return kaddr ? 1 : 0; -} - -static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq, - u8 *src, unsigned int pageofs_in) -{ - /* - * if in-place decompression is ongoing, those decompressed - * pages should be copied in order to avoid being overlapped. - */ - struct page **in = rq->in; - u8 *const tmp = erofs_get_pcpubuf(0); - u8 *tmpp = tmp; - unsigned int inlen = rq->inputsize - pageofs_in; - unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in); - - while (tmpp < tmp + inlen) { - if (!src) - src = kmap_atomic(*in); - memcpy(tmpp, src + pageofs_in, count); - kunmap_atomic(src); - src = NULL; - tmpp += count; - pageofs_in = 0; - count = PAGE_SIZE; - ++in; - } - return tmp; -} - -static int lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) -{ - unsigned int inputmargin, inlen; - u8 *src; - bool copied, support_0padding; - int ret; - - if (rq->inputsize > PAGE_SIZE) - return -ENOTSUPP; - - src = kmap_atomic(*rq->in); - inputmargin = 0; - support_0padding = false; - - /* decompression inplace is only safe when 0padding is enabled */ - if (EROFS_SB(rq->sb)->requirements & EROFS_REQUIREMENT_LZ4_0PADDING) { - support_0padding = true; - - while (!src[inputmargin & ~PAGE_MASK]) - if (!(++inputmargin & ~PAGE_MASK)) - break; - - if (inputmargin >= rq->inputsize) { - kunmap_atomic(src); - return -EIO; - } - } - - copied = false; - inlen = rq->inputsize - inputmargin; - if (rq->inplace_io) { - const uint oend = (rq->pageofs_out + - rq->outputsize) & ~PAGE_MASK; - const uint nr = PAGE_ALIGN(rq->pageofs_out + - rq->outputsize) >> PAGE_SHIFT; - - if (rq->partial_decoding || !support_0padding || - rq->out[nr - 1] != rq->in[0] || - rq->inputsize - oend < - LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) { - src = generic_copy_inplace_data(rq, src, inputmargin); - inputmargin = 0; - copied = true; - } - } - - ret = LZ4_decompress_safe_partial(src + inputmargin, out, - inlen, rq->outputsize, - rq->outputsize); - if (ret < 0) { - errln("%s, failed to decompress, in[%p, %u, %u] out[%p, %u]", - __func__, src + inputmargin, inlen, inputmargin, - out, rq->outputsize); - WARN_ON(1); - print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, - 16, 1, src + inputmargin, inlen, true); - print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, - 16, 1, out, rq->outputsize, true); - ret = -EIO; - } - - if (copied) - erofs_put_pcpubuf(src); - else - kunmap_atomic(src); - return ret; -} - -static struct z_erofs_decompressor decompressors[] = { - [Z_EROFS_COMPRESSION_SHIFTED] = { - .name = "shifted" - }, - [Z_EROFS_COMPRESSION_LZ4] = { - .prepare_destpages = lz4_prepare_destpages, - .decompress = lz4_decompress, - .name = "lz4" - }, -}; - -static void copy_from_pcpubuf(struct page **out, const char *dst, - unsigned short pageofs_out, - unsigned int outputsize) -{ - const char *end = dst + outputsize; - const unsigned int righthalf = PAGE_SIZE - pageofs_out; - const char *cur = dst - pageofs_out; - - while (cur < end) { - struct page *const page = *out++; - - if (page) { - char *buf = kmap_atomic(page); - - if (cur >= dst) { - memcpy(buf, cur, min_t(uint, PAGE_SIZE, - end - cur)); - } else { - memcpy(buf + pageofs_out, cur + pageofs_out, - min_t(uint, righthalf, end - cur)); - } - kunmap_atomic(buf); - } - cur += PAGE_SIZE; - } -} - -static int decompress_generic(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) -{ - const unsigned int nrpages_out = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - const struct z_erofs_decompressor *alg = decompressors + rq->alg; - unsigned int dst_maptype; - void *dst; - int ret; - - if (nrpages_out == 1 && !rq->inplace_io) { - DBG_BUGON(!*rq->out); - dst = kmap_atomic(*rq->out); - dst_maptype = 0; - goto dstmap_out; - } - - /* - * For the case of small output size (especially much less - * than PAGE_SIZE), memcpy the decompressed data rather than - * compressed data is preferred. - */ - if (rq->outputsize <= PAGE_SIZE * 7 / 8) { - dst = erofs_get_pcpubuf(0); - if (IS_ERR(dst)) - return PTR_ERR(dst); - - rq->inplace_io = false; - ret = alg->decompress(rq, dst); - if (!ret) - copy_from_pcpubuf(rq->out, dst, rq->pageofs_out, - rq->outputsize); - - erofs_put_pcpubuf(dst); - return ret; - } - - ret = alg->prepare_destpages(rq, pagepool); - if (ret < 0) { - return ret; - } else if (ret) { - dst = page_address(*rq->out); - dst_maptype = 1; - goto dstmap_out; - } - - dst = erofs_vmap(rq->out, nrpages_out); - if (!dst) - return -ENOMEM; - dst_maptype = 2; - -dstmap_out: - ret = alg->decompress(rq, dst + rq->pageofs_out); - - if (!dst_maptype) - kunmap_atomic(dst); - else if (dst_maptype == 2) - erofs_vunmap(dst, nrpages_out); - return ret; -} - -static int shifted_decompress(const struct z_erofs_decompress_req *rq, - struct list_head *pagepool) -{ - const unsigned int nrpages_out = - PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out; - unsigned char *src, *dst; - - if (nrpages_out > 2) { - DBG_BUGON(1); - return -EIO; - } - - if (rq->out[0] == *rq->in) { - DBG_BUGON(nrpages_out != 1); - return 0; - } - - src = kmap_atomic(*rq->in); - if (!rq->out[0]) { - dst = NULL; - } else { - dst = kmap_atomic(rq->out[0]); - memcpy(dst + rq->pageofs_out, src, righthalf); - } - - if (rq->out[1] == *rq->in) { - memmove(src, src + righthalf, rq->pageofs_out); - } else if (nrpages_out == 2) { - if (dst) - kunmap_atomic(dst); - DBG_BUGON(!rq->out[1]); - dst = kmap_atomic(rq->out[1]); - memcpy(dst, src + righthalf, rq->pageofs_out); - } - if (dst) - kunmap_atomic(dst); - kunmap_atomic(src); - return 0; -} - -int z_erofs_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) -{ - if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) - return shifted_decompress(rq, pagepool); - return decompress_generic(rq, pagepool); -} - diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c deleted file mode 100644 index dbf6a151886c..000000000000 --- a/drivers/staging/erofs/dir.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/dir.c - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include "internal.h" - -static const unsigned char erofs_filetype_table[EROFS_FT_MAX] = { - [EROFS_FT_UNKNOWN] = DT_UNKNOWN, - [EROFS_FT_REG_FILE] = DT_REG, - [EROFS_FT_DIR] = DT_DIR, - [EROFS_FT_CHRDEV] = DT_CHR, - [EROFS_FT_BLKDEV] = DT_BLK, - [EROFS_FT_FIFO] = DT_FIFO, - [EROFS_FT_SOCK] = DT_SOCK, - [EROFS_FT_SYMLINK] = DT_LNK, -}; - -static void debug_one_dentry(unsigned char d_type, const char *de_name, - unsigned int de_namelen) -{ -#ifdef CONFIG_EROFS_FS_DEBUG - /* since the on-disk name could not have the trailing '\0' */ - unsigned char dbg_namebuf[EROFS_NAME_LEN + 1]; - - memcpy(dbg_namebuf, de_name, de_namelen); - dbg_namebuf[de_namelen] = '\0'; - - debugln("found dirent %s de_len %u d_type %d", dbg_namebuf, - de_namelen, d_type); -#endif -} - -static int erofs_fill_dentries(struct dir_context *ctx, - void *dentry_blk, unsigned int *ofs, - unsigned int nameoff, unsigned int maxsize) -{ - struct erofs_dirent *de = dentry_blk + *ofs; - const struct erofs_dirent *end = dentry_blk + nameoff; - - while (de < end) { - const char *de_name; - unsigned int de_namelen; - unsigned char d_type; - - if (de->file_type < EROFS_FT_MAX) - d_type = erofs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; - - nameoff = le16_to_cpu(de->nameoff); - de_name = (char *)dentry_blk + nameoff; - - /* the last dirent in the block? */ - if (de + 1 >= end) - de_namelen = strnlen(de_name, maxsize - nameoff); - else - de_namelen = le16_to_cpu(de[1].nameoff) - nameoff; - - /* a corrupted entry is found */ - if (unlikely(nameoff + de_namelen > maxsize || - de_namelen > EROFS_NAME_LEN)) { - DBG_BUGON(1); - return -EIO; - } - - debug_one_dentry(d_type, de_name, de_namelen); - if (!dir_emit(ctx, de_name, de_namelen, - le64_to_cpu(de->nid), d_type)) - /* stopped by some reason */ - return 1; - ++de; - *ofs += sizeof(struct erofs_dirent); - } - *ofs = maxsize; - return 0; -} - -static int erofs_readdir(struct file *f, struct dir_context *ctx) -{ - struct inode *dir = file_inode(f); - struct address_space *mapping = dir->i_mapping; - const size_t dirsize = i_size_read(dir); - unsigned int i = ctx->pos / EROFS_BLKSIZ; - unsigned int ofs = ctx->pos % EROFS_BLKSIZ; - int err = 0; - bool initial = true; - - while (ctx->pos < dirsize) { - struct page *dentry_page; - struct erofs_dirent *de; - unsigned int nameoff, maxsize; - - dentry_page = read_mapping_page(mapping, i, NULL); - if (IS_ERR(dentry_page)) - continue; - - de = (struct erofs_dirent *)kmap(dentry_page); - - nameoff = le16_to_cpu(de->nameoff); - - if (unlikely(nameoff < sizeof(struct erofs_dirent) || - nameoff >= PAGE_SIZE)) { - errln("%s, invalid de[0].nameoff %u", - __func__, nameoff); - - err = -EIO; - goto skip_this; - } - - maxsize = min_t(unsigned int, - dirsize - ctx->pos + ofs, PAGE_SIZE); - - /* search dirents at the arbitrary position */ - if (unlikely(initial)) { - initial = false; - - ofs = roundup(ofs, sizeof(struct erofs_dirent)); - if (unlikely(ofs >= nameoff)) - goto skip_this; - } - - err = erofs_fill_dentries(ctx, de, &ofs, nameoff, maxsize); -skip_this: - kunmap(dentry_page); - - put_page(dentry_page); - - ctx->pos = blknr_to_addr(i) + ofs; - - if (unlikely(err)) - break; - ++i; - ofs = 0; - } - return err < 0 ? err : 0; -} - -const struct file_operations erofs_dir_fops = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .iterate_shared = erofs_readdir, -}; - diff --git a/drivers/staging/erofs/erofs_fs.h b/drivers/staging/erofs/erofs_fs.h deleted file mode 100644 index 9f61abb7c1ca..000000000000 --- a/drivers/staging/erofs/erofs_fs.h +++ /dev/null @@ -1,322 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Apache-2.0 - * - * linux/drivers/staging/erofs/erofs_fs.h - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is dual-licensed; you may select either the GNU General Public - * License version 2 or Apache License, Version 2.0. See the file COPYING - * in the main directory of the Linux distribution for more details. - */ -#ifndef __EROFS_FS_H -#define __EROFS_FS_H - -/* Enhanced(Extended) ROM File System */ -#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2 -#define EROFS_SUPER_OFFSET 1024 - -/* - * Any bits that aren't in EROFS_ALL_REQUIREMENTS should be - * incompatible with this kernel version. - */ -#define EROFS_REQUIREMENT_LZ4_0PADDING 0x00000001 -#define EROFS_ALL_REQUIREMENTS EROFS_REQUIREMENT_LZ4_0PADDING - -struct erofs_super_block { -/* 0 */__le32 magic; /* in the little endian */ -/* 4 */__le32 checksum; /* crc32c(super_block) */ -/* 8 */__le32 features; /* (aka. feature_compat) */ -/* 12 */__u8 blkszbits; /* support block_size == PAGE_SIZE only */ -/* 13 */__u8 reserved; - -/* 14 */__le16 root_nid; -/* 16 */__le64 inos; /* total valid ino # (== f_files - f_favail) */ - -/* 24 */__le64 build_time; /* inode v1 time derivation */ -/* 32 */__le32 build_time_nsec; -/* 36 */__le32 blocks; /* used for statfs */ -/* 40 */__le32 meta_blkaddr; -/* 44 */__le32 xattr_blkaddr; -/* 48 */__u8 uuid[16]; /* 128-bit uuid for volume */ -/* 64 */__u8 volume_name[16]; /* volume name */ -/* 80 */__le32 requirements; /* (aka. feature_incompat) */ - -/* 84 */__u8 reserved2[44]; -} __packed; /* 128 bytes */ - -/* - * erofs inode data mapping: - * 0 - inode plain without inline data A: - * inode, [xattrs], ... | ... | no-holed data - * 1 - inode VLE compression B (legacy): - * inode, [xattrs], extents ... | ... - * 2 - inode plain with inline data C: - * inode, [xattrs], last_inline_data, ... | ... | no-holed data - * 3 - inode compression D: - * inode, [xattrs], map_header, extents ... | ... - * 4~7 - reserved - */ -enum { - EROFS_INODE_FLAT_PLAIN, - EROFS_INODE_FLAT_COMPRESSION_LEGACY, - EROFS_INODE_FLAT_INLINE, - EROFS_INODE_FLAT_COMPRESSION, - EROFS_INODE_LAYOUT_MAX -}; - -static bool erofs_inode_is_data_compressed(unsigned int datamode) -{ - if (datamode == EROFS_INODE_FLAT_COMPRESSION) - return true; - return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY; -} - -/* bit definitions of inode i_advise */ -#define EROFS_I_VERSION_BITS 1 -#define EROFS_I_DATA_MAPPING_BITS 3 - -#define EROFS_I_VERSION_BIT 0 -#define EROFS_I_DATA_MAPPING_BIT 1 - -struct erofs_inode_v1 { -/* 0 */__le16 i_advise; - -/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ -/* 2 */__le16 i_xattr_icount; -/* 4 */__le16 i_mode; -/* 6 */__le16 i_nlink; -/* 8 */__le32 i_size; -/* 12 */__le32 i_reserved; -/* 16 */union { - /* file total compressed blocks for data mapping 1 */ - __le32 compressed_blocks; - __le32 raw_blkaddr; - - /* for device files, used to indicate old/new device # */ - __le32 rdev; - } i_u __packed; -/* 20 */__le32 i_ino; /* only used for 32-bit stat compatibility */ -/* 24 */__le16 i_uid; -/* 26 */__le16 i_gid; -/* 28 */__le32 i_checksum; -} __packed; - -/* 32 bytes on-disk inode */ -#define EROFS_INODE_LAYOUT_V1 0 -/* 64 bytes on-disk inode */ -#define EROFS_INODE_LAYOUT_V2 1 - -struct erofs_inode_v2 { - __le16 i_advise; - - /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */ - __le16 i_xattr_icount; - __le16 i_mode; - __le16 i_reserved; /* 8 bytes */ - __le64 i_size; /* 16 bytes */ - union { - /* file total compressed blocks for data mapping 1 */ - __le32 compressed_blocks; - __le32 raw_blkaddr; - - /* for device files, used to indicate old/new device # */ - __le32 rdev; - } i_u __packed; - - /* only used for 32-bit stat compatibility */ - __le32 i_ino; /* 24 bytes */ - - __le32 i_uid; - __le32 i_gid; - __le64 i_ctime; /* 32 bytes */ - __le32 i_ctime_nsec; - __le32 i_nlink; - __u8 i_reserved2[12]; - __le32 i_checksum; /* 64 bytes */ -} __packed; - -#define EROFS_MAX_SHARED_XATTRS (128) -/* h_shared_count between 129 ... 255 are special # */ -#define EROFS_SHARED_XATTR_EXTENT (255) - -/* - * inline xattrs (n == i_xattr_icount): - * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes - * 12 bytes / \ - * / \ - * /-----------------------\ - * | erofs_xattr_entries+ | - * +-----------------------+ - * inline xattrs must starts in erofs_xattr_ibody_header, - * for read-only fs, no need to introduce h_refcount - */ -struct erofs_xattr_ibody_header { - __le32 h_checksum; - __u8 h_shared_count; - __u8 h_reserved[7]; - __le32 h_shared_xattrs[0]; /* shared xattr id array */ -} __packed; - -/* Name indexes */ -#define EROFS_XATTR_INDEX_USER 1 -#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS 2 -#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -#define EROFS_XATTR_INDEX_TRUSTED 4 -#define EROFS_XATTR_INDEX_LUSTRE 5 -#define EROFS_XATTR_INDEX_SECURITY 6 - -/* xattr entry (for both inline & shared xattrs) */ -struct erofs_xattr_entry { - __u8 e_name_len; /* length of name */ - __u8 e_name_index; /* attribute name index */ - __le16 e_value_size; /* size of attribute value */ - /* followed by e_name and e_value */ - char e_name[0]; /* attribute name */ -} __packed; - -#define ondisk_xattr_ibody_size(count) ({\ - u32 __count = le16_to_cpu(count); \ - ((__count) == 0) ? 0 : \ - sizeof(struct erofs_xattr_ibody_header) + \ - sizeof(__u32) * ((__count) - 1); }) - -#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry)) -#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \ - sizeof(struct erofs_xattr_entry) + \ - (entry)->e_name_len + le16_to_cpu((entry)->e_value_size)) - -/* available compression algorithm types */ -enum { - Z_EROFS_COMPRESSION_LZ4, - Z_EROFS_COMPRESSION_MAX -}; - -/* - * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) - * e.g. for 4k logical cluster size, 4B if compacted 2B is off; - * (4B) + 2B + (4B) if compacted 2B is on. - */ -#define Z_EROFS_ADVISE_COMPACTED_2B_BIT 0 - -#define Z_EROFS_ADVISE_COMPACTED_2B (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT) - -struct z_erofs_map_header { - __le32 h_reserved1; - __le16 h_advise; - /* - * bit 0-3 : algorithm type of head 1 (logical cluster type 01); - * bit 4-7 : algorithm type of head 2 (logical cluster type 11). - */ - __u8 h_algorithmtype; - /* - * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; - * bit 3-4 : (physical - logical) cluster bits of head 1: - * For example, if logical clustersize = 4096, 1 for 8192. - * bit 5-7 : (physical - logical) cluster bits of head 2. - */ - __u8 h_clusterbits; -}; - -#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8 - -/* - * Z_EROFS Variable-sized Logical Extent cluster type: - * 0 - literal (uncompressed) cluster - * 1 - compressed cluster (for the head logical cluster) - * 2 - compressed cluster (for the other logical clusters) - * - * In detail, - * 0 - literal (uncompressed) cluster, - * di_advise = 0 - * di_clusterofs = the literal data offset of the cluster - * di_blkaddr = the blkaddr of the literal cluster - * - * 1 - compressed cluster (for the head logical cluster) - * di_advise = 1 - * di_clusterofs = the decompressed data offset of the cluster - * di_blkaddr = the blkaddr of the compressed cluster - * - * 2 - compressed cluster (for the other logical clusters) - * di_advise = 2 - * di_clusterofs = - * the decompressed data offset in its own head cluster - * di_u.delta[0] = distance to its corresponding head cluster - * di_u.delta[1] = distance to its corresponding tail cluster - * (di_advise could be 0, 1 or 2) - */ -enum { - Z_EROFS_VLE_CLUSTER_TYPE_PLAIN, - Z_EROFS_VLE_CLUSTER_TYPE_HEAD, - Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD, - Z_EROFS_VLE_CLUSTER_TYPE_RESERVED, - Z_EROFS_VLE_CLUSTER_TYPE_MAX -}; - -#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 -#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 - -struct z_erofs_vle_decompressed_index { - __le16 di_advise; - /* where to decompress in the head cluster */ - __le16 di_clusterofs; - - union { - /* for the head cluster */ - __le32 blkaddr; - /* - * for the rest clusters - * eg. for 4k page-sized cluster, maximum 4K*64k = 256M) - * [0] - pointing to the head cluster - * [1] - pointing to the tail cluster - */ - __le16 delta[2]; - } di_u __packed; /* 8 bytes */ -} __packed; - -#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \ - (round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \ - sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING) - -/* dirent sorts in alphabet order, thus we can do binary search */ -struct erofs_dirent { - __le64 nid; /* 0, node number */ - __le16 nameoff; /* 8, start offset of file name */ - __u8 file_type; /* 10, file type */ - __u8 reserved; /* 11, reserved */ -} __packed; - -/* file types used in inode_info->flags */ -enum { - EROFS_FT_UNKNOWN, - EROFS_FT_REG_FILE, - EROFS_FT_DIR, - EROFS_FT_CHRDEV, - EROFS_FT_BLKDEV, - EROFS_FT_FIFO, - EROFS_FT_SOCK, - EROFS_FT_SYMLINK, - EROFS_FT_MAX -}; - -#define EROFS_NAME_LEN 255 - -/* check the EROFS on-disk layout strictly at compile time */ -static inline void erofs_check_ondisk_layout_definitions(void) -{ - BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128); - BUILD_BUG_ON(sizeof(struct erofs_inode_v1) != 32); - BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64); - BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); - BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4); - BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); - BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8); - BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); - - BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < - Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); -} - -#endif - diff --git a/drivers/staging/erofs/include/linux/tagptr.h b/drivers/staging/erofs/include/linux/tagptr.h deleted file mode 100644 index ccd106dbd48e..000000000000 --- a/drivers/staging/erofs/include/linux/tagptr.h +++ /dev/null @@ -1,110 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Tagged pointer implementation - * - * Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com> - */ -#ifndef _LINUX_TAGPTR_H -#define _LINUX_TAGPTR_H - -#include <linux/types.h> -#include <linux/build_bug.h> - -/* - * the name of tagged pointer types are tagptr{1, 2, 3...}_t - * avoid directly using the internal structs __tagptr{1, 2, 3...} - */ -#define __MAKE_TAGPTR(n) \ -typedef struct __tagptr##n { \ - uintptr_t v; \ -} tagptr##n##_t; - -__MAKE_TAGPTR(1) -__MAKE_TAGPTR(2) -__MAKE_TAGPTR(3) -__MAKE_TAGPTR(4) - -#undef __MAKE_TAGPTR - -extern void __compiletime_error("bad tagptr tags") - __bad_tagptr_tags(void); - -extern void __compiletime_error("bad tagptr type") - __bad_tagptr_type(void); - -/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */ -#define __tagptr_mask_1(ptr, n) \ - __builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \ - (1UL << (n)) - 1 : - -#define __tagptr_mask(ptr) (\ - __tagptr_mask_1(ptr, 1) ( \ - __tagptr_mask_1(ptr, 2) ( \ - __tagptr_mask_1(ptr, 3) ( \ - __tagptr_mask_1(ptr, 4) ( \ - __bad_tagptr_type(), 0))))) - -/* generate a tagged pointer from a raw value */ -#define tagptr_init(type, val) \ - ((typeof(type)){ .v = (uintptr_t)(val) }) - -/* - * directly cast a tagged pointer to the native pointer type, which - * could be used for backward compatibility of existing code. - */ -#define tagptr_cast_ptr(tptr) ((void *)(tptr).v) - -/* encode tagged pointers */ -#define tagptr_fold(type, ptr, _tags) ({ \ - const typeof(_tags) tags = (_tags); \ - if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \ - __bad_tagptr_tags(); \ -tagptr_init(type, (uintptr_t)(ptr) | tags); }) - -/* decode tagged pointers */ -#define tagptr_unfold_ptr(tptr) \ - ((void *)((tptr).v & ~__tagptr_mask(tptr))) - -#define tagptr_unfold_tags(tptr) \ - ((tptr).v & __tagptr_mask(tptr)) - -/* operations for the tagger pointer */ -#define tagptr_eq(_tptr1, _tptr2) ({ \ - typeof(_tptr1) tptr1 = (_tptr1); \ - typeof(_tptr2) tptr2 = (_tptr2); \ - (void)(&tptr1 == &tptr2); \ -(tptr1).v == (tptr2).v; }) - -/* lock-free CAS operation */ -#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \ - typeof(_ptptr) ptptr = (_ptptr); \ - typeof(_o) o = (_o); \ - typeof(_n) n = (_n); \ - (void)(&o == &n); \ - (void)(&o == ptptr); \ -tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); }) - -/* wrap WRITE_ONCE if atomic update is needed */ -#define tagptr_replace_tags(_ptptr, tags) ({ \ - typeof(_ptptr) ptptr = (_ptptr); \ - *ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \ -*ptptr; }) - -#define tagptr_set_tags(_ptptr, _tags) ({ \ - typeof(_ptptr) ptptr = (_ptptr); \ - const typeof(_tags) tags = (_tags); \ - if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \ - __bad_tagptr_tags(); \ - ptptr->v |= tags; \ -*ptptr; }) - -#define tagptr_clear_tags(_ptptr, _tags) ({ \ - typeof(_ptptr) ptptr = (_ptptr); \ - const typeof(_tags) tags = (_tags); \ - if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \ - __bad_tagptr_tags(); \ - ptptr->v &= ~tags; \ -*ptptr; }) - -#endif - diff --git a/drivers/staging/erofs/include/trace/events/erofs.h b/drivers/staging/erofs/include/trace/events/erofs.h deleted file mode 100644 index 660c92fc1803..000000000000 --- a/drivers/staging/erofs/include/trace/events/erofs.h +++ /dev/null @@ -1,256 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM erofs - -#if !defined(_TRACE_EROFS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_EROFS_H - -#include <linux/tracepoint.h> - -#define show_dev(dev) MAJOR(dev), MINOR(dev) -#define show_dev_nid(entry) show_dev(entry->dev), entry->nid - -#define show_file_type(type) \ - __print_symbolic(type, \ - { 0, "FILE" }, \ - { 1, "DIR" }) - -#define show_map_flags(flags) __print_flags(flags, "|", \ - { EROFS_GET_BLOCKS_RAW, "RAW" }) - -#define show_mflags(flags) __print_flags(flags, "", \ - { EROFS_MAP_MAPPED, "M" }, \ - { EROFS_MAP_META, "I" }, \ - { EROFS_MAP_ZIPPED, "Z" }) - -TRACE_EVENT(erofs_lookup, - - TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags), - - TP_ARGS(dir, dentry, flags), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(erofs_nid_t, nid ) - __field(const char *, name ) - __field(unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = dir->i_sb->s_dev; - __entry->nid = EROFS_V(dir)->nid; - __entry->name = dentry->d_name.name; - __entry->flags = flags; - ), - - TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x", - show_dev_nid(__entry), - __entry->name, - __entry->flags) -); - -TRACE_EVENT(erofs_fill_inode, - TP_PROTO(struct inode *inode, int isdir), - TP_ARGS(inode, isdir), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(erofs_nid_t, nid ) - __field(erofs_blk_t, blkaddr ) - __field(unsigned int, ofs ) - __field(int, isdir ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; - __entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid)); - __entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid)); - __entry->isdir = isdir; - ), - - TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d", - show_dev_nid(__entry), - __entry->blkaddr, __entry->ofs, - __entry->isdir) -); - -TRACE_EVENT(erofs_readpage, - - TP_PROTO(struct page *page, bool raw), - - TP_ARGS(page, raw), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(erofs_nid_t, nid ) - __field(int, dir ) - __field(pgoff_t, index ) - __field(int, uptodate) - __field(bool, raw ) - ), - - TP_fast_assign( - __entry->dev = page->mapping->host->i_sb->s_dev; - __entry->nid = EROFS_V(page->mapping->host)->nid; - __entry->dir = S_ISDIR(page->mapping->host->i_mode); - __entry->index = page->index; - __entry->uptodate = PageUptodate(page); - __entry->raw = raw; - ), - - TP_printk("dev = (%d,%d), nid = %llu, %s, index = %lu, uptodate = %d " - "raw = %d", - show_dev_nid(__entry), - show_file_type(__entry->dir), - (unsigned long)__entry->index, - __entry->uptodate, - __entry->raw) -); - -TRACE_EVENT(erofs_readpages, - - TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage, - bool raw), - - TP_ARGS(inode, page, nrpage, raw), - - TP_STRUCT__entry( - __field(dev_t, dev ) - __field(erofs_nid_t, nid ) - __field(pgoff_t, start ) - __field(unsigned int, nrpage ) - __field(bool, raw ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; - __entry->start = page->index; - __entry->nrpage = nrpage; - __entry->raw = raw; - ), - - TP_printk("dev = (%d,%d), nid = %llu, start = %lu nrpage = %u raw = %d", - show_dev_nid(__entry), - (unsigned long)__entry->start, - __entry->nrpage, - __entry->raw) -); - -DECLARE_EVENT_CLASS(erofs__map_blocks_enter, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned int flags), - - TP_ARGS(inode, map, flags), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( erofs_nid_t, nid ) - __field( erofs_off_t, la ) - __field( u64, llen ) - __field( unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; - __entry->la = map->m_la; - __entry->llen = map->m_llen; - __entry->flags = flags; - ), - - TP_printk("dev = (%d,%d), nid = %llu, la %llu llen %llu flags %s", - show_dev_nid(__entry), - __entry->la, __entry->llen, - __entry->flags ? show_map_flags(__entry->flags) : "NULL") -); - -DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned flags), - - TP_ARGS(inode, map, flags) -); - -DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned int flags), - - TP_ARGS(inode, map, flags) -); - -DECLARE_EVENT_CLASS(erofs__map_blocks_exit, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned int flags, int ret), - - TP_ARGS(inode, map, flags, ret), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( erofs_nid_t, nid ) - __field( unsigned int, flags ) - __field( erofs_off_t, la ) - __field( erofs_off_t, pa ) - __field( u64, llen ) - __field( u64, plen ) - __field( unsigned int, mflags ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; - __entry->flags = flags; - __entry->la = map->m_la; - __entry->pa = map->m_pa; - __entry->llen = map->m_llen; - __entry->plen = map->m_plen; - __entry->mflags = map->m_flags; - __entry->ret = ret; - ), - - TP_printk("dev = (%d,%d), nid = %llu, flags %s " - "la %llu pa %llu llen %llu plen %llu mflags %s ret %d", - show_dev_nid(__entry), - __entry->flags ? show_map_flags(__entry->flags) : "NULL", - __entry->la, __entry->pa, __entry->llen, __entry->plen, - show_mflags(__entry->mflags), __entry->ret) -); - -DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned flags, int ret), - - TP_ARGS(inode, map, flags, ret) -); - -DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned int flags, int ret), - - TP_ARGS(inode, map, flags, ret) -); - -TRACE_EVENT(erofs_destroy_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( erofs_nid_t, nid ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_V(inode)->nid; - ), - - TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry)) -); - -#endif /* _TRACE_EROFS_H */ - - /* This part must be outside protection */ -#include <trace/define_trace.h> diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c deleted file mode 100644 index 4c3d8bf8d249..000000000000 --- a/drivers/staging/erofs/inode.c +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/inode.c - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include "xattr.h" - -#include <trace/events/erofs.h> - -/* no locking */ -static int read_inode(struct inode *inode, void *data) -{ - struct erofs_vnode *vi = EROFS_V(inode); - struct erofs_inode_v1 *v1 = data; - const unsigned int advise = le16_to_cpu(v1->i_advise); - erofs_blk_t nblks = 0; - - vi->datamode = __inode_data_mapping(advise); - - if (unlikely(vi->datamode >= EROFS_INODE_LAYOUT_MAX)) { - errln("unsupported data mapping %u of nid %llu", - vi->datamode, vi->nid); - DBG_BUGON(1); - return -EIO; - } - - if (__inode_version(advise) == EROFS_INODE_LAYOUT_V2) { - struct erofs_inode_v2 *v2 = data; - - vi->inode_isize = sizeof(struct erofs_inode_v2); - vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount); - - inode->i_mode = le16_to_cpu(v2->i_mode); - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) { - vi->raw_blkaddr = le32_to_cpu(v2->i_u.raw_blkaddr); - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - inode->i_rdev = - new_decode_dev(le32_to_cpu(v2->i_u.rdev)); - } else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - inode->i_rdev = 0; - } else { - return -EIO; - } - - i_uid_write(inode, le32_to_cpu(v2->i_uid)); - i_gid_write(inode, le32_to_cpu(v2->i_gid)); - set_nlink(inode, le32_to_cpu(v2->i_nlink)); - - /* ns timestamp */ - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = - le64_to_cpu(v2->i_ctime); - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = - le32_to_cpu(v2->i_ctime_nsec); - - inode->i_size = le64_to_cpu(v2->i_size); - - /* total blocks for compressed files */ - if (is_inode_layout_compression(inode)) - nblks = le32_to_cpu(v2->i_u.compressed_blocks); - } else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) { - struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); - - vi->inode_isize = sizeof(struct erofs_inode_v1); - vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount); - - inode->i_mode = le16_to_cpu(v1->i_mode); - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) { - vi->raw_blkaddr = le32_to_cpu(v1->i_u.raw_blkaddr); - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - inode->i_rdev = - new_decode_dev(le32_to_cpu(v1->i_u.rdev)); - } else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - inode->i_rdev = 0; - } else { - return -EIO; - } - - i_uid_write(inode, le16_to_cpu(v1->i_uid)); - i_gid_write(inode, le16_to_cpu(v1->i_gid)); - set_nlink(inode, le16_to_cpu(v1->i_nlink)); - - /* use build time to derive all file time */ - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = - sbi->build_time; - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = - sbi->build_time_nsec; - - inode->i_size = le32_to_cpu(v1->i_size); - if (is_inode_layout_compression(inode)) - nblks = le32_to_cpu(v1->i_u.compressed_blocks); - } else { - errln("unsupported on-disk inode version %u of nid %llu", - __inode_version(advise), vi->nid); - DBG_BUGON(1); - return -EIO; - } - - if (!nblks) - /* measure inode.i_blocks as generic filesystems */ - inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; - else - inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK; - return 0; -} - -/* - * try_lock can be required since locking order is: - * file data(fs_inode) - * meta(bd_inode) - * but the majority of the callers is "iget", - * in that case we are pretty sure no deadlock since - * no data operations exist. However I tend to - * try_lock since it takes no much overhead and - * will success immediately. - */ -static int fill_inline_data(struct inode *inode, void *data, - unsigned int m_pofs) -{ - struct erofs_vnode *vi = EROFS_V(inode); - struct erofs_sb_info *sbi = EROFS_I_SB(inode); - - /* should be inode inline C */ - if (!is_inode_flat_inline(inode)) - return 0; - - /* fast symlink (following ext4) */ - if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) { - char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL); - - if (unlikely(!lnk)) - return -ENOMEM; - - m_pofs += vi->inode_isize + vi->xattr_isize; - - /* inline symlink data shouldn't across page boundary as well */ - if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) { - DBG_BUGON(1); - kfree(lnk); - return -EIO; - } - - /* get in-page inline data */ - memcpy(lnk, data + m_pofs, inode->i_size); - lnk[inode->i_size] = '\0'; - - inode->i_link = lnk; - set_inode_fast_symlink(inode); - } - return 0; -} - -static int fill_inode(struct inode *inode, int isdir) -{ - struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); - struct erofs_vnode *vi = EROFS_V(inode); - struct page *page; - void *data; - int err; - erofs_blk_t blkaddr; - unsigned int ofs; - - trace_erofs_fill_inode(inode, isdir); - - blkaddr = erofs_blknr(iloc(sbi, vi->nid)); - ofs = erofs_blkoff(iloc(sbi, vi->nid)); - - debugln("%s, reading inode nid %llu at %u of blkaddr %u", - __func__, vi->nid, ofs, blkaddr); - - page = erofs_get_meta_page(inode->i_sb, blkaddr, isdir); - - if (IS_ERR(page)) { - errln("failed to get inode (nid: %llu) page, err %ld", - vi->nid, PTR_ERR(page)); - return PTR_ERR(page); - } - - DBG_BUGON(!PageUptodate(page)); - data = page_address(page); - - err = read_inode(inode, data + ofs); - if (!err) { - /* setup the new inode */ - if (S_ISREG(inode->i_mode)) { - inode->i_op = &erofs_generic_iops; - inode->i_fop = &generic_ro_fops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &erofs_dir_iops; - inode->i_fop = &erofs_dir_fops; - } else if (S_ISLNK(inode->i_mode)) { - /* by default, page_get_link is used for symlink */ - inode->i_op = &erofs_symlink_iops; - inode_nohighmem(inode); - } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || - S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - inode->i_op = &erofs_generic_iops; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - goto out_unlock; - } else { - err = -EIO; - goto out_unlock; - } - - if (is_inode_layout_compression(inode)) { - err = z_erofs_fill_inode(inode); - goto out_unlock; - } - - inode->i_mapping->a_ops = &erofs_raw_access_aops; - - /* fill last page if inline data is available */ - err = fill_inline_data(inode, data, ofs); - } - -out_unlock: - unlock_page(page); - put_page(page); - return err; -} - -/* - * erofs nid is 64bits, but i_ino is 'unsigned long', therefore - * we should do more for 32-bit platform to find the right inode. - */ -#if BITS_PER_LONG == 32 -static int erofs_ilookup_test_actor(struct inode *inode, void *opaque) -{ - const erofs_nid_t nid = *(erofs_nid_t *)opaque; - - return EROFS_V(inode)->nid == nid; -} - -static int erofs_iget_set_actor(struct inode *inode, void *opaque) -{ - const erofs_nid_t nid = *(erofs_nid_t *)opaque; - - inode->i_ino = erofs_inode_hash(nid); - return 0; -} -#endif - -static inline struct inode *erofs_iget_locked(struct super_block *sb, - erofs_nid_t nid) -{ - const unsigned long hashval = erofs_inode_hash(nid); - -#if BITS_PER_LONG >= 64 - /* it is safe to use iget_locked for >= 64-bit platform */ - return iget_locked(sb, hashval); -#else - return iget5_locked(sb, hashval, erofs_ilookup_test_actor, - erofs_iget_set_actor, &nid); -#endif -} - -struct inode *erofs_iget(struct super_block *sb, - erofs_nid_t nid, - bool isdir) -{ - struct inode *inode = erofs_iget_locked(sb, nid); - - if (unlikely(!inode)) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) { - int err; - struct erofs_vnode *vi = EROFS_V(inode); - - vi->nid = nid; - - err = fill_inode(inode, isdir); - if (likely(!err)) - unlock_new_inode(inode); - else { - iget_failed(inode); - inode = ERR_PTR(err); - } - } - return inode; -} - -int erofs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_flags) -{ - struct inode *const inode = d_inode(path->dentry); - - if (is_inode_layout_compression(inode)) - stat->attributes |= STATX_ATTR_COMPRESSED; - - stat->attributes |= STATX_ATTR_IMMUTABLE; - stat->attributes_mask |= (STATX_ATTR_COMPRESSED | - STATX_ATTR_IMMUTABLE); - - generic_fillattr(inode, stat); - return 0; -} - -const struct inode_operations erofs_generic_iops = { - .getattr = erofs_getattr, -#ifdef CONFIG_EROFS_FS_XATTR - .listxattr = erofs_listxattr, -#endif - .get_acl = erofs_get_acl, -}; - -const struct inode_operations erofs_symlink_iops = { - .get_link = page_get_link, - .getattr = erofs_getattr, -#ifdef CONFIG_EROFS_FS_XATTR - .listxattr = erofs_listxattr, -#endif - .get_acl = erofs_get_acl, -}; - -const struct inode_operations erofs_fast_symlink_iops = { - .get_link = simple_get_link, - .getattr = erofs_getattr, -#ifdef CONFIG_EROFS_FS_XATTR - .listxattr = erofs_listxattr, -#endif - .get_acl = erofs_get_acl, -}; - diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h deleted file mode 100644 index 963cc1b8b896..000000000000 --- a/drivers/staging/erofs/internal.h +++ /dev/null @@ -1,642 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * linux/drivers/staging/erofs/internal.h - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#ifndef __INTERNAL_H -#define __INTERNAL_H - -#include <linux/fs.h> -#include <linux/dcache.h> -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/bio.h> -#include <linux/buffer_head.h> -#include <linux/cleancache.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include "erofs_fs.h" - -/* redefine pr_fmt "erofs: " */ -#undef pr_fmt -#define pr_fmt(fmt) "erofs: " fmt - -#define errln(x, ...) pr_err(x "\n", ##__VA_ARGS__) -#define infoln(x, ...) pr_info(x "\n", ##__VA_ARGS__) -#ifdef CONFIG_EROFS_FS_DEBUG -#define debugln(x, ...) pr_debug(x "\n", ##__VA_ARGS__) - -#define dbg_might_sleep might_sleep -#define DBG_BUGON BUG_ON -#else -#define debugln(x, ...) ((void)0) - -#define dbg_might_sleep() ((void)0) -#define DBG_BUGON(x) ((void)(x)) -#endif - -enum { - FAULT_KMALLOC, - FAULT_READ_IO, - FAULT_MAX, -}; - -#ifdef CONFIG_EROFS_FAULT_INJECTION -extern const char *erofs_fault_name[FAULT_MAX]; -#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) - -struct erofs_fault_info { - atomic_t inject_ops; - unsigned int inject_rate; - unsigned int inject_type; -}; -#endif - -#ifdef CONFIG_EROFS_FS_ZIP_CACHE_BIPOLAR -#define EROFS_FS_ZIP_CACHE_LVL (2) -#elif defined(EROFS_FS_ZIP_CACHE_UNIPOLAR) -#define EROFS_FS_ZIP_CACHE_LVL (1) -#else -#define EROFS_FS_ZIP_CACHE_LVL (0) -#endif - -#if (!defined(EROFS_FS_HAS_MANAGED_CACHE) && (EROFS_FS_ZIP_CACHE_LVL > 0)) -#define EROFS_FS_HAS_MANAGED_CACHE -#endif - -/* EROFS_SUPER_MAGIC_V1 to represent the whole file system */ -#define EROFS_SUPER_MAGIC EROFS_SUPER_MAGIC_V1 - -typedef u64 erofs_nid_t; - -struct erofs_sb_info { - /* list for all registered superblocks, mainly for shrinker */ - struct list_head list; - struct mutex umount_mutex; - - u32 blocks; - u32 meta_blkaddr; -#ifdef CONFIG_EROFS_FS_XATTR - u32 xattr_blkaddr; -#endif - - /* inode slot unit size in bit shift */ - unsigned char islotbits; -#ifdef CONFIG_EROFS_FS_ZIP - /* cluster size in bit shift */ - unsigned char clusterbits; - - /* the dedicated workstation for compression */ - struct radix_tree_root workstn_tree; - - /* threshold for decompression synchronously */ - unsigned int max_sync_decompress_pages; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - struct inode *managed_cache; -#endif - -#endif - - u32 build_time_nsec; - u64 build_time; - - /* what we really care is nid, rather than ino.. */ - erofs_nid_t root_nid; - /* used for statfs, f_files - f_favail */ - u64 inos; - - u8 uuid[16]; /* 128-bit uuid for volume */ - u8 volume_name[16]; /* volume name */ - u32 requirements; - - char *dev_name; - - unsigned int mount_opt; - unsigned int shrinker_run_no; - -#ifdef CONFIG_EROFS_FAULT_INJECTION - struct erofs_fault_info fault_info; /* For fault injection */ -#endif -}; - -#ifdef CONFIG_EROFS_FAULT_INJECTION -#define erofs_show_injection_info(type) \ - infoln("inject %s in %s of %pS", erofs_fault_name[type], \ - __func__, __builtin_return_address(0)) - -static inline bool time_to_inject(struct erofs_sb_info *sbi, int type) -{ - struct erofs_fault_info *ffi = &sbi->fault_info; - - if (!ffi->inject_rate) - return false; - - if (!IS_FAULT_SET(ffi, type)) - return false; - - atomic_inc(&ffi->inject_ops); - if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { - atomic_set(&ffi->inject_ops, 0); - return true; - } - return false; -} -#else -static inline bool time_to_inject(struct erofs_sb_info *sbi, int type) -{ - return false; -} - -static inline void erofs_show_injection_info(int type) -{ -} -#endif - -static inline void *erofs_kmalloc(struct erofs_sb_info *sbi, - size_t size, gfp_t flags) -{ - if (time_to_inject(sbi, FAULT_KMALLOC)) { - erofs_show_injection_info(FAULT_KMALLOC); - return NULL; - } - return kmalloc(size, flags); -} - -#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) -#define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info) - -/* Mount flags set via mount options or defaults */ -#define EROFS_MOUNT_XATTR_USER 0x00000010 -#define EROFS_MOUNT_POSIX_ACL 0x00000020 -#define EROFS_MOUNT_FAULT_INJECTION 0x00000040 - -#define clear_opt(sbi, option) ((sbi)->mount_opt &= ~EROFS_MOUNT_##option) -#define set_opt(sbi, option) ((sbi)->mount_opt |= EROFS_MOUNT_##option) -#define test_opt(sbi, option) ((sbi)->mount_opt & EROFS_MOUNT_##option) - -#ifdef CONFIG_EROFS_FS_ZIP -#define erofs_workstn_lock(sbi) xa_lock(&(sbi)->workstn_tree) -#define erofs_workstn_unlock(sbi) xa_unlock(&(sbi)->workstn_tree) - -/* basic unit of the workstation of a super_block */ -struct erofs_workgroup { - /* the workgroup index in the workstation */ - pgoff_t index; - - /* overall workgroup reference count */ - atomic_t refcount; -}; - -#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL) - -#if defined(CONFIG_SMP) -static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp, - int val) -{ - preempt_disable(); - if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) { - preempt_enable(); - return false; - } - return true; -} - -static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp, - int orig_val) -{ - /* - * other observers should notice all modifications - * in the freezing period. - */ - smp_mb(); - atomic_set(&grp->refcount, orig_val); - preempt_enable(); -} - -static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) -{ - return atomic_cond_read_relaxed(&grp->refcount, - VAL != EROFS_LOCKED_MAGIC); -} -#else -static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp, - int val) -{ - preempt_disable(); - /* no need to spin on UP platforms, let's just disable preemption. */ - if (val != atomic_read(&grp->refcount)) { - preempt_enable(); - return false; - } - return true; -} - -static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp, - int orig_val) -{ - preempt_enable(); -} - -static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) -{ - int v = atomic_read(&grp->refcount); - - /* workgroup is never freezed on uniprocessor systems */ - DBG_BUGON(v == EROFS_LOCKED_MAGIC); - return v; -} -#endif - -int erofs_workgroup_put(struct erofs_workgroup *grp); -struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, - pgoff_t index, bool *tag); -int erofs_register_workgroup(struct super_block *sb, - struct erofs_workgroup *grp, bool tag); -unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, - unsigned long nr_shrink, bool cleanup); -void erofs_workgroup_free_rcu(struct erofs_workgroup *grp); - -#ifdef EROFS_FS_HAS_MANAGED_CACHE -int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *egrp); -int erofs_try_to_free_cached_page(struct address_space *mapping, - struct page *page); - -#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) -static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, - struct page *page) -{ - return page->mapping == MNGD_MAPPING(sbi); -} -#else -#define MNGD_MAPPING(sbi) (NULL) -static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, - struct page *page) { return false; } -#endif - -#define DEFAULT_MAX_SYNC_DECOMPRESS_PAGES 3 - -static inline bool __should_decompress_synchronously(struct erofs_sb_info *sbi, - unsigned int nr) -{ - return nr <= sbi->max_sync_decompress_pages; -} - -int __init z_erofs_init_zip_subsystem(void); -void z_erofs_exit_zip_subsystem(void); -#else -/* dummy initializer/finalizer for the decompression subsystem */ -static inline int z_erofs_init_zip_subsystem(void) { return 0; } -static inline void z_erofs_exit_zip_subsystem(void) {} -#endif - -/* we strictly follow PAGE_SIZE and no buffer head yet */ -#define LOG_BLOCK_SIZE PAGE_SHIFT - -#undef LOG_SECTORS_PER_BLOCK -#define LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9) - -#undef SECTORS_PER_BLOCK -#define SECTORS_PER_BLOCK (1 << SECTORS_PER_BLOCK) - -#define EROFS_BLKSIZ (1 << LOG_BLOCK_SIZE) - -#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ) -#error erofs cannot be used in this platform -#endif - -#define ROOT_NID(sb) ((sb)->root_nid) - -#ifdef CONFIG_EROFS_FS_ZIP -/* hard limit of pages per compressed cluster */ -#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT) - -/* page count of a compressed cluster */ -#define erofs_clusterpages(sbi) ((1 << (sbi)->clusterbits) / PAGE_SIZE) - -#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES -#else -#define EROFS_PCPUBUF_NR_PAGES 0 -#endif - -typedef u64 erofs_off_t; - -/* data type for filesystem-wide blocks number */ -typedef u32 erofs_blk_t; - -#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ) -#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ) -#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ) - -static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid) -{ - return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits); -} - -/* atomic flag definitions */ -#define EROFS_V_EA_INITED_BIT 0 -#define EROFS_V_Z_INITED_BIT 1 - -/* bitlock definitions (arranged in reverse order) */ -#define EROFS_V_BL_XATTR_BIT (BITS_PER_LONG - 1) -#define EROFS_V_BL_Z_BIT (BITS_PER_LONG - 2) - -struct erofs_vnode { - erofs_nid_t nid; - - /* atomic flags (including bitlocks) */ - unsigned long flags; - - unsigned char datamode; - unsigned char inode_isize; - unsigned short xattr_isize; - - unsigned xattr_shared_count; - unsigned *xattr_shared_xattrs; - - union { - erofs_blk_t raw_blkaddr; -#ifdef CONFIG_EROFS_FS_ZIP - struct { - unsigned short z_advise; - unsigned char z_algorithmtype[2]; - unsigned char z_logical_clusterbits; - unsigned char z_physical_clusterbits[2]; - }; -#endif - }; - /* the corresponding vfs inode */ - struct inode vfs_inode; -}; - -#define EROFS_V(ptr) \ - container_of(ptr, struct erofs_vnode, vfs_inode) - -#define __inode_advise(x, bit, bits) \ - (((x) >> (bit)) & ((1 << (bits)) - 1)) - -#define __inode_version(advise) \ - __inode_advise(advise, EROFS_I_VERSION_BIT, \ - EROFS_I_VERSION_BITS) - -#define __inode_data_mapping(advise) \ - __inode_advise(advise, EROFS_I_DATA_MAPPING_BIT,\ - EROFS_I_DATA_MAPPING_BITS) - -static inline unsigned long inode_datablocks(struct inode *inode) -{ - /* since i_size cannot be changed */ - return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); -} - -static inline bool is_inode_layout_compression(struct inode *inode) -{ - return erofs_inode_is_data_compressed(EROFS_V(inode)->datamode); -} - -static inline bool is_inode_flat_inline(struct inode *inode) -{ - return EROFS_V(inode)->datamode == EROFS_INODE_FLAT_INLINE; -} - -extern const struct super_operations erofs_sops; - -extern const struct address_space_operations erofs_raw_access_aops; -#ifdef CONFIG_EROFS_FS_ZIP -extern const struct address_space_operations z_erofs_vle_normalaccess_aops; -#endif - -/* - * Logical to physical block mapping, used by erofs_map_blocks() - * - * Different with other file systems, it is used for 2 access modes: - * - * 1) RAW access mode: - * - * Users pass a valid (m_lblk, m_lofs -- usually 0) pair, - * and get the valid m_pblk, m_pofs and the longest m_len(in bytes). - * - * Note that m_lblk in the RAW access mode refers to the number of - * the compressed ondisk block rather than the uncompressed - * in-memory block for the compressed file. - * - * m_pofs equals to m_lofs except for the inline data page. - * - * 2) Normal access mode: - * - * If the inode is not compressed, it has no difference with - * the RAW access mode. However, if the inode is compressed, - * users should pass a valid (m_lblk, m_lofs) pair, and get - * the needed m_pblk, m_pofs, m_len to get the compressed data - * and the updated m_lblk, m_lofs which indicates the start - * of the corresponding uncompressed data in the file. - */ -enum { - BH_Zipped = BH_PrivateStart, - BH_FullMapped, -}; - -/* Has a disk mapping */ -#define EROFS_MAP_MAPPED (1 << BH_Mapped) -/* Located in metadata (could be copied from bd_inode) */ -#define EROFS_MAP_META (1 << BH_Meta) -/* The extent has been compressed */ -#define EROFS_MAP_ZIPPED (1 << BH_Zipped) -/* The length of extent is full */ -#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) - -struct erofs_map_blocks { - erofs_off_t m_pa, m_la; - u64 m_plen, m_llen; - - unsigned int m_flags; - - struct page *mpage; -}; - -/* Flags used by erofs_map_blocks() */ -#define EROFS_GET_BLOCKS_RAW 0x0001 - -/* zmap.c */ -#ifdef CONFIG_EROFS_FS_ZIP -int z_erofs_fill_inode(struct inode *inode); -int z_erofs_map_blocks_iter(struct inode *inode, - struct erofs_map_blocks *map, - int flags); -#else -static inline int z_erofs_fill_inode(struct inode *inode) { return -ENOTSUPP; } -static inline int z_erofs_map_blocks_iter(struct inode *inode, - struct erofs_map_blocks *map, - int flags) -{ - return -ENOTSUPP; -} -#endif - -/* data.c */ -static inline struct bio * -erofs_grab_bio(struct super_block *sb, - erofs_blk_t blkaddr, unsigned int nr_pages, void *bi_private, - bio_end_io_t endio, bool nofail) -{ - const gfp_t gfp = GFP_NOIO; - struct bio *bio; - - do { - if (nr_pages == 1) { - bio = bio_alloc(gfp | (nofail ? __GFP_NOFAIL : 0), 1); - if (unlikely(!bio)) { - DBG_BUGON(nofail); - return ERR_PTR(-ENOMEM); - } - break; - } - bio = bio_alloc(gfp, nr_pages); - nr_pages /= 2; - } while (unlikely(!bio)); - - bio->bi_end_io = endio; - bio_set_dev(bio, sb->s_bdev); - bio->bi_iter.bi_sector = (sector_t)blkaddr << LOG_SECTORS_PER_BLOCK; - bio->bi_private = bi_private; - return bio; -} - -static inline void __submit_bio(struct bio *bio, unsigned op, unsigned op_flags) -{ - bio_set_op_attrs(bio, op, op_flags); - submit_bio(bio); -} - -#ifndef CONFIG_EROFS_FS_IO_MAX_RETRIES -#define EROFS_IO_MAX_RETRIES_NOFAIL 0 -#else -#define EROFS_IO_MAX_RETRIES_NOFAIL CONFIG_EROFS_FS_IO_MAX_RETRIES -#endif - -struct page *__erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr, - bool prio, bool nofail); - -static inline struct page *erofs_get_meta_page(struct super_block *sb, - erofs_blk_t blkaddr, bool prio) -{ - return __erofs_get_meta_page(sb, blkaddr, prio, false); -} - -static inline struct page *erofs_get_meta_page_nofail(struct super_block *sb, - erofs_blk_t blkaddr, bool prio) -{ - return __erofs_get_meta_page(sb, blkaddr, prio, true); -} - -int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int); - -static inline struct page * -erofs_get_inline_page(struct inode *inode, - erofs_blk_t blkaddr) -{ - return erofs_get_meta_page(inode->i_sb, - blkaddr, S_ISDIR(inode->i_mode)); -} - -/* inode.c */ -static inline unsigned long erofs_inode_hash(erofs_nid_t nid) -{ -#if BITS_PER_LONG == 32 - return (nid >> 32) ^ (nid & 0xffffffff); -#else - return nid; -#endif -} - -extern const struct inode_operations erofs_generic_iops; -extern const struct inode_operations erofs_symlink_iops; -extern const struct inode_operations erofs_fast_symlink_iops; - -static inline void set_inode_fast_symlink(struct inode *inode) -{ - inode->i_op = &erofs_fast_symlink_iops; -} - -static inline bool is_inode_fast_symlink(struct inode *inode) -{ - return inode->i_op == &erofs_fast_symlink_iops; -} - -struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir); -int erofs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_flags); - -/* namei.c */ -extern const struct inode_operations erofs_dir_iops; - -int erofs_namei(struct inode *dir, struct qstr *name, - erofs_nid_t *nid, unsigned int *d_type); - -/* dir.c */ -extern const struct file_operations erofs_dir_fops; - -static inline void *erofs_vmap(struct page **pages, unsigned int count) -{ -#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM - int i = 0; - - while (1) { - void *addr = vm_map_ram(pages, count, -1, PAGE_KERNEL); - /* retry two more times (totally 3 times) */ - if (addr || ++i >= 3) - return addr; - vm_unmap_aliases(); - } - return NULL; -#else - return vmap(pages, count, VM_MAP, PAGE_KERNEL); -#endif -} - -static inline void erofs_vunmap(const void *mem, unsigned int count) -{ -#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM - vm_unmap_ram(mem, count); -#else - vunmap(mem); -#endif -} - -/* utils.c */ -extern struct shrinker erofs_shrinker_info; - -struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp); - -#if (EROFS_PCPUBUF_NR_PAGES > 0) -void *erofs_get_pcpubuf(unsigned int pagenr); -#define erofs_put_pcpubuf(buf) do { \ - (void)&(buf); \ - preempt_enable(); \ -} while (0) -#else -static inline void *erofs_get_pcpubuf(unsigned int pagenr) -{ - return ERR_PTR(-ENOTSUPP); -} - -#define erofs_put_pcpubuf(buf) do {} while (0) -#endif - -void erofs_register_super(struct super_block *sb); -void erofs_unregister_super(struct super_block *sb); - -#ifndef lru_to_page -#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) -#endif - -#endif - diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c deleted file mode 100644 index fd3ae78d0ba5..000000000000 --- a/drivers/staging/erofs/namei.c +++ /dev/null @@ -1,256 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/namei.c - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include "internal.h" -#include "xattr.h" - -#include <trace/events/erofs.h> - -struct erofs_qstr { - const unsigned char *name; - const unsigned char *end; -}; - -/* based on the end of qn is accurate and it must have the trailing '\0' */ -static inline int dirnamecmp(const struct erofs_qstr *qn, - const struct erofs_qstr *qd, - unsigned int *matched) -{ - unsigned int i = *matched; - - /* - * on-disk error, let's only BUG_ON in the debugging mode. - * otherwise, it will return 1 to just skip the invalid name - * and go on (in consideration of the lookup performance). - */ - DBG_BUGON(qd->name > qd->end); - - /* qd could not have trailing '\0' */ - /* However it is absolutely safe if < qd->end */ - while (qd->name + i < qd->end && qd->name[i] != '\0') { - if (qn->name[i] != qd->name[i]) { - *matched = i; - return qn->name[i] > qd->name[i] ? 1 : -1; - } - ++i; - } - *matched = i; - /* See comments in __d_alloc on the terminating NUL character */ - return qn->name[i] == '\0' ? 0 : 1; -} - -#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1)) - -static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, - u8 *data, - unsigned int dirblksize, - const int ndirents) -{ - int head, back; - unsigned int startprfx, endprfx; - struct erofs_dirent *const de = (struct erofs_dirent *)data; - - /* since the 1st dirent has been evaluated previously */ - head = 1; - back = ndirents - 1; - startprfx = endprfx = 0; - - while (head <= back) { - const int mid = head + (back - head) / 2; - const int nameoff = nameoff_from_disk(de[mid].nameoff, - dirblksize); - unsigned int matched = min(startprfx, endprfx); - struct erofs_qstr dname = { - .name = data + nameoff, - .end = unlikely(mid >= ndirents - 1) ? - data + dirblksize : - data + nameoff_from_disk(de[mid + 1].nameoff, - dirblksize) - }; - - /* string comparison without already matched prefix */ - int ret = dirnamecmp(name, &dname, &matched); - - if (unlikely(!ret)) { - return de + mid; - } else if (ret > 0) { - head = mid + 1; - startprfx = matched; - } else { - back = mid - 1; - endprfx = matched; - } - } - - return ERR_PTR(-ENOENT); -} - -static struct page *find_target_block_classic(struct inode *dir, - struct erofs_qstr *name, - int *_ndirents) -{ - unsigned int startprfx, endprfx; - int head, back; - struct address_space *const mapping = dir->i_mapping; - struct page *candidate = ERR_PTR(-ENOENT); - - startprfx = endprfx = 0; - head = 0; - back = inode_datablocks(dir) - 1; - - while (head <= back) { - const int mid = head + (back - head) / 2; - struct page *page = read_mapping_page(mapping, mid, NULL); - - if (!IS_ERR(page)) { - struct erofs_dirent *de = kmap_atomic(page); - const int nameoff = nameoff_from_disk(de->nameoff, - EROFS_BLKSIZ); - const int ndirents = nameoff / sizeof(*de); - int diff; - unsigned int matched; - struct erofs_qstr dname; - - if (unlikely(!ndirents)) { - DBG_BUGON(1); - kunmap_atomic(de); - put_page(page); - page = ERR_PTR(-EIO); - goto out; - } - - matched = min(startprfx, endprfx); - - dname.name = (u8 *)de + nameoff; - if (ndirents == 1) - dname.end = (u8 *)de + EROFS_BLKSIZ; - else - dname.end = (u8 *)de + - nameoff_from_disk(de[1].nameoff, - EROFS_BLKSIZ); - - /* string comparison without already matched prefix */ - diff = dirnamecmp(name, &dname, &matched); - kunmap_atomic(de); - - if (unlikely(!diff)) { - *_ndirents = 0; - goto out; - } else if (diff > 0) { - head = mid + 1; - startprfx = matched; - - if (!IS_ERR(candidate)) - put_page(candidate); - candidate = page; - *_ndirents = ndirents; - } else { - put_page(page); - - back = mid - 1; - endprfx = matched; - } - continue; - } -out: /* free if the candidate is valid */ - if (!IS_ERR(candidate)) - put_page(candidate); - return page; - } - return candidate; -} - -int erofs_namei(struct inode *dir, - struct qstr *name, - erofs_nid_t *nid, unsigned int *d_type) -{ - int ndirents; - struct page *page; - void *data; - struct erofs_dirent *de; - struct erofs_qstr qn; - - if (unlikely(!dir->i_size)) - return -ENOENT; - - qn.name = name->name; - qn.end = name->name + name->len; - - ndirents = 0; - page = find_target_block_classic(dir, &qn, &ndirents); - - if (IS_ERR(page)) - return PTR_ERR(page); - - data = kmap_atomic(page); - /* the target page has been mapped */ - if (ndirents) - de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); - else - de = (struct erofs_dirent *)data; - - if (!IS_ERR(de)) { - *nid = le64_to_cpu(de->nid); - *d_type = de->file_type; - } - - kunmap_atomic(data); - put_page(page); - - return PTR_ERR_OR_ZERO(de); -} - -/* NOTE: i_mutex is already held by vfs */ -static struct dentry *erofs_lookup(struct inode *dir, - struct dentry *dentry, - unsigned int flags) -{ - int err; - erofs_nid_t nid; - unsigned int d_type; - struct inode *inode; - - DBG_BUGON(!d_really_is_negative(dentry)); - /* dentry must be unhashed in lookup, no need to worry about */ - DBG_BUGON(!d_unhashed(dentry)); - - trace_erofs_lookup(dir, dentry, flags); - - /* file name exceeds fs limit */ - if (unlikely(dentry->d_name.len > EROFS_NAME_LEN)) - return ERR_PTR(-ENAMETOOLONG); - - /* false uninitialized warnings on gcc 4.8.x */ - err = erofs_namei(dir, &dentry->d_name, &nid, &d_type); - - if (err == -ENOENT) { - /* negative dentry */ - inode = NULL; - } else if (unlikely(err)) { - inode = ERR_PTR(err); - } else { - debugln("%s, %s (nid %llu) found, d_type %u", __func__, - dentry->d_name.name, nid, d_type); - inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR); - } - return d_splice_alias(inode, dentry); -} - -const struct inode_operations erofs_dir_iops = { - .lookup = erofs_lookup, - .getattr = erofs_getattr, -#ifdef CONFIG_EROFS_FS_XATTR - .listxattr = erofs_listxattr, -#endif - .get_acl = erofs_get_acl, -}; - diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c deleted file mode 100644 index 54494412eba4..000000000000 --- a/drivers/staging/erofs/super.c +++ /dev/null @@ -1,701 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/super.c - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include <linux/module.h> -#include <linux/buffer_head.h> -#include <linux/statfs.h> -#include <linux/parser.h> -#include <linux/seq_file.h> -#include "internal.h" -#include "xattr.h" - -#define CREATE_TRACE_POINTS -#include <trace/events/erofs.h> - -static struct kmem_cache *erofs_inode_cachep __read_mostly; - -static void init_once(void *ptr) -{ - struct erofs_vnode *vi = ptr; - - inode_init_once(&vi->vfs_inode); -} - -static int __init erofs_init_inode_cache(void) -{ - erofs_inode_cachep = kmem_cache_create("erofs_inode", - sizeof(struct erofs_vnode), 0, - SLAB_RECLAIM_ACCOUNT, - init_once); - - return erofs_inode_cachep ? 0 : -ENOMEM; -} - -static void erofs_exit_inode_cache(void) -{ - kmem_cache_destroy(erofs_inode_cachep); -} - -static struct inode *alloc_inode(struct super_block *sb) -{ - struct erofs_vnode *vi = - kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL); - - if (!vi) - return NULL; - - /* zero out everything except vfs_inode */ - memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode)); - return &vi->vfs_inode; -} - -static void free_inode(struct inode *inode) -{ - struct erofs_vnode *vi = EROFS_V(inode); - - /* be careful RCU symlink path (see ext4_inode_info->i_data)! */ - if (is_inode_fast_symlink(inode)) - kfree(inode->i_link); - - kfree(vi->xattr_shared_xattrs); - - kmem_cache_free(erofs_inode_cachep, vi); -} - -static bool check_layout_compatibility(struct super_block *sb, - struct erofs_super_block *layout) -{ - const unsigned int requirements = le32_to_cpu(layout->requirements); - - EROFS_SB(sb)->requirements = requirements; - - /* check if current kernel meets all mandatory requirements */ - if (requirements & (~EROFS_ALL_REQUIREMENTS)) { - errln("unidentified requirements %x, please upgrade kernel version", - requirements & ~EROFS_ALL_REQUIREMENTS); - return false; - } - return true; -} - -static int superblock_read(struct super_block *sb) -{ - struct erofs_sb_info *sbi; - struct buffer_head *bh; - struct erofs_super_block *layout; - unsigned int blkszbits; - int ret; - - bh = sb_bread(sb, 0); - - if (!bh) { - errln("cannot read erofs superblock"); - return -EIO; - } - - sbi = EROFS_SB(sb); - layout = (struct erofs_super_block *)((u8 *)bh->b_data - + EROFS_SUPER_OFFSET); - - ret = -EINVAL; - if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) { - errln("cannot find valid erofs superblock"); - goto out; - } - - blkszbits = layout->blkszbits; - /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ - if (unlikely(blkszbits != LOG_BLOCK_SIZE)) { - errln("blksize %u isn't supported on this platform", - 1 << blkszbits); - goto out; - } - - if (!check_layout_compatibility(sb, layout)) - goto out; - - sbi->blocks = le32_to_cpu(layout->blocks); - sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr); -#ifdef CONFIG_EROFS_FS_XATTR - sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr); -#endif - sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1; -#ifdef CONFIG_EROFS_FS_ZIP - /* TODO: clusterbits should be related to inode */ - sbi->clusterbits = blkszbits; - - if (1 << (sbi->clusterbits - PAGE_SHIFT) > Z_EROFS_CLUSTER_MAX_PAGES) - errln("clusterbits %u is not supported on this kernel", - sbi->clusterbits); -#endif - - sbi->root_nid = le16_to_cpu(layout->root_nid); - sbi->inos = le64_to_cpu(layout->inos); - - sbi->build_time = le64_to_cpu(layout->build_time); - sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec); - - memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid)); - memcpy(sbi->volume_name, layout->volume_name, - sizeof(layout->volume_name)); - - ret = 0; -out: - brelse(bh); - return ret; -} - -#ifdef CONFIG_EROFS_FAULT_INJECTION -const char *erofs_fault_name[FAULT_MAX] = { - [FAULT_KMALLOC] = "kmalloc", - [FAULT_READ_IO] = "read IO error", -}; - -static void __erofs_build_fault_attr(struct erofs_sb_info *sbi, - unsigned int rate) -{ - struct erofs_fault_info *ffi = &sbi->fault_info; - - if (rate) { - atomic_set(&ffi->inject_ops, 0); - ffi->inject_rate = rate; - ffi->inject_type = (1 << FAULT_MAX) - 1; - } else { - memset(ffi, 0, sizeof(struct erofs_fault_info)); - } - - set_opt(sbi, FAULT_INJECTION); -} - -static int erofs_build_fault_attr(struct erofs_sb_info *sbi, - substring_t *args) -{ - int rate = 0; - - if (args->from && match_int(args, &rate)) - return -EINVAL; - - __erofs_build_fault_attr(sbi, rate); - return 0; -} - -static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi) -{ - return sbi->fault_info.inject_rate; -} -#else -static void __erofs_build_fault_attr(struct erofs_sb_info *sbi, - unsigned int rate) -{ -} - -static int erofs_build_fault_attr(struct erofs_sb_info *sbi, - substring_t *args) -{ - infoln("fault_injection options not supported"); - return 0; -} - -static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi) -{ - return 0; -} -#endif - -static void default_options(struct erofs_sb_info *sbi) -{ - /* set up some FS parameters */ -#ifdef CONFIG_EROFS_FS_ZIP - sbi->max_sync_decompress_pages = DEFAULT_MAX_SYNC_DECOMPRESS_PAGES; -#endif - -#ifdef CONFIG_EROFS_FS_XATTR - set_opt(sbi, XATTR_USER); -#endif - -#ifdef CONFIG_EROFS_FS_POSIX_ACL - set_opt(sbi, POSIX_ACL); -#endif -} - -enum { - Opt_user_xattr, - Opt_nouser_xattr, - Opt_acl, - Opt_noacl, - Opt_fault_injection, - Opt_err -}; - -static match_table_t erofs_tokens = { - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_fault_injection, "fault_injection=%u"}, - {Opt_err, NULL} -}; - -static int parse_options(struct super_block *sb, char *options) -{ - substring_t args[MAX_OPT_ARGS]; - char *p; - int err; - - if (!options) - return 0; - - while ((p = strsep(&options, ","))) { - int token; - - if (!*p) - continue; - - args[0].to = args[0].from = NULL; - token = match_token(p, erofs_tokens, args); - - switch (token) { -#ifdef CONFIG_EROFS_FS_XATTR - case Opt_user_xattr: - set_opt(EROFS_SB(sb), XATTR_USER); - break; - case Opt_nouser_xattr: - clear_opt(EROFS_SB(sb), XATTR_USER); - break; -#else - case Opt_user_xattr: - infoln("user_xattr options not supported"); - break; - case Opt_nouser_xattr: - infoln("nouser_xattr options not supported"); - break; -#endif -#ifdef CONFIG_EROFS_FS_POSIX_ACL - case Opt_acl: - set_opt(EROFS_SB(sb), POSIX_ACL); - break; - case Opt_noacl: - clear_opt(EROFS_SB(sb), POSIX_ACL); - break; -#else - case Opt_acl: - infoln("acl options not supported"); - break; - case Opt_noacl: - infoln("noacl options not supported"); - break; -#endif - case Opt_fault_injection: - err = erofs_build_fault_attr(EROFS_SB(sb), args); - if (err) - return err; - break; - - default: - errln("Unrecognized mount option \"%s\" " - "or missing value", p); - return -EINVAL; - } - } - return 0; -} - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - -static const struct address_space_operations managed_cache_aops; - -static int managed_cache_releasepage(struct page *page, gfp_t gfp_mask) -{ - int ret = 1; /* 0 - busy */ - struct address_space *const mapping = page->mapping; - - DBG_BUGON(!PageLocked(page)); - DBG_BUGON(mapping->a_ops != &managed_cache_aops); - - if (PagePrivate(page)) - ret = erofs_try_to_free_cached_page(mapping, page); - - return ret; -} - -static void managed_cache_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) -{ - const unsigned int stop = length + offset; - - DBG_BUGON(!PageLocked(page)); - - /* Check for potential overflow in debug mode */ - DBG_BUGON(stop > PAGE_SIZE || stop < length); - - if (offset == 0 && stop == PAGE_SIZE) - while (!managed_cache_releasepage(page, GFP_NOFS)) - cond_resched(); -} - -static const struct address_space_operations managed_cache_aops = { - .releasepage = managed_cache_releasepage, - .invalidatepage = managed_cache_invalidatepage, -}; - -static struct inode *erofs_init_managed_cache(struct super_block *sb) -{ - struct inode *inode = new_inode(sb); - - if (unlikely(!inode)) - return ERR_PTR(-ENOMEM); - - set_nlink(inode, 1); - inode->i_size = OFFSET_MAX; - - inode->i_mapping->a_ops = &managed_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, - GFP_NOFS | __GFP_HIGHMEM | - __GFP_MOVABLE | __GFP_NOFAIL); - return inode; -} - -#endif - -static int erofs_read_super(struct super_block *sb, - const char *dev_name, - void *data, int silent) -{ - struct inode *inode; - struct erofs_sb_info *sbi; - int err = -EINVAL; - - infoln("read_super, device -> %s", dev_name); - infoln("options -> %s", (char *)data); - - if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) { - errln("failed to set erofs blksize"); - goto err; - } - - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (unlikely(!sbi)) { - err = -ENOMEM; - goto err; - } - sb->s_fs_info = sbi; - - err = superblock_read(sb); - if (err) - goto err_sbread; - - sb->s_magic = EROFS_SUPER_MAGIC; - sb->s_flags |= SB_RDONLY | SB_NOATIME; - sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_time_gran = 1; - - sb->s_op = &erofs_sops; - -#ifdef CONFIG_EROFS_FS_XATTR - sb->s_xattr = erofs_xattr_handlers; -#endif - - /* set erofs default mount options */ - default_options(sbi); - - err = parse_options(sb, data); - if (err) - goto err_parseopt; - - if (!silent) - infoln("root inode @ nid %llu", ROOT_NID(sbi)); - - if (test_opt(sbi, POSIX_ACL)) - sb->s_flags |= SB_POSIXACL; - else - sb->s_flags &= ~SB_POSIXACL; - -#ifdef CONFIG_EROFS_FS_ZIP - INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC); -#endif - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - sbi->managed_cache = erofs_init_managed_cache(sb); - if (IS_ERR(sbi->managed_cache)) { - err = PTR_ERR(sbi->managed_cache); - goto err_init_managed_cache; - } -#endif - - /* get the root inode */ - inode = erofs_iget(sb, ROOT_NID(sbi), true); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto err_iget; - } - - if (!S_ISDIR(inode->i_mode)) { - errln("rootino(nid %llu) is not a directory(i_mode %o)", - ROOT_NID(sbi), inode->i_mode); - err = -EINVAL; - iput(inode); - goto err_iget; - } - - sb->s_root = d_make_root(inode); - if (!sb->s_root) { - err = -ENOMEM; - goto err_iget; - } - - /* save the device name to sbi */ - sbi->dev_name = __getname(); - if (!sbi->dev_name) { - err = -ENOMEM; - goto err_devname; - } - - snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name); - sbi->dev_name[PATH_MAX - 1] = '\0'; - - erofs_register_super(sb); - - if (!silent) - infoln("mounted on %s with opts: %s.", dev_name, - (char *)data); - return 0; - /* - * please add a label for each exit point and use - * the following name convention, thus new features - * can be integrated easily without renaming labels. - */ -err_devname: - dput(sb->s_root); - sb->s_root = NULL; -err_iget: -#ifdef EROFS_FS_HAS_MANAGED_CACHE - iput(sbi->managed_cache); -err_init_managed_cache: -#endif -err_parseopt: -err_sbread: - sb->s_fs_info = NULL; - kfree(sbi); -err: - return err; -} - -/* - * could be triggered after deactivate_locked_super() - * is called, thus including umount and failed to initialize. - */ -static void erofs_put_super(struct super_block *sb) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - - /* for cases which are failed in "read_super" */ - if (!sbi) - return; - - WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); - - infoln("unmounted for %s", sbi->dev_name); - __putname(sbi->dev_name); - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - iput(sbi->managed_cache); -#endif - - mutex_lock(&sbi->umount_mutex); - -#ifdef CONFIG_EROFS_FS_ZIP - /* clean up the compression space of this sb */ - erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true); -#endif - - erofs_unregister_super(sb); - mutex_unlock(&sbi->umount_mutex); - - kfree(sbi); - sb->s_fs_info = NULL; -} - - -struct erofs_mount_private { - const char *dev_name; - char *options; -}; - -/* support mount_bdev() with options */ -static int erofs_fill_super(struct super_block *sb, - void *_priv, int silent) -{ - struct erofs_mount_private *priv = _priv; - - return erofs_read_super(sb, priv->dev_name, - priv->options, silent); -} - -static struct dentry *erofs_mount( - struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - struct erofs_mount_private priv = { - .dev_name = dev_name, - .options = data - }; - - return mount_bdev(fs_type, flags, dev_name, - &priv, erofs_fill_super); -} - -static void erofs_kill_sb(struct super_block *sb) -{ - kill_block_super(sb); -} - -static struct file_system_type erofs_fs_type = { - .owner = THIS_MODULE, - .name = "erofs", - .mount = erofs_mount, - .kill_sb = erofs_kill_sb, - .fs_flags = FS_REQUIRES_DEV, -}; -MODULE_ALIAS_FS("erofs"); - -static int __init erofs_module_init(void) -{ - int err; - - erofs_check_ondisk_layout_definitions(); - infoln("initializing erofs " EROFS_VERSION); - - err = erofs_init_inode_cache(); - if (err) - goto icache_err; - - err = register_shrinker(&erofs_shrinker_info); - if (err) - goto shrinker_err; - - err = z_erofs_init_zip_subsystem(); - if (err) - goto zip_err; - - err = register_filesystem(&erofs_fs_type); - if (err) - goto fs_err; - - infoln("successfully to initialize erofs"); - return 0; - -fs_err: - z_erofs_exit_zip_subsystem(); -zip_err: - unregister_shrinker(&erofs_shrinker_info); -shrinker_err: - erofs_exit_inode_cache(); -icache_err: - return err; -} - -static void __exit erofs_module_exit(void) -{ - unregister_filesystem(&erofs_fs_type); - z_erofs_exit_zip_subsystem(); - unregister_shrinker(&erofs_shrinker_info); - erofs_exit_inode_cache(); - infoln("successfully finalize erofs"); -} - -/* get filesystem statistics */ -static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) -{ - struct super_block *sb = dentry->d_sb; - struct erofs_sb_info *sbi = EROFS_SB(sb); - u64 id = huge_encode_dev(sb->s_bdev->bd_dev); - - buf->f_type = sb->s_magic; - buf->f_bsize = EROFS_BLKSIZ; - buf->f_blocks = sbi->blocks; - buf->f_bfree = buf->f_bavail = 0; - - buf->f_files = ULLONG_MAX; - buf->f_ffree = ULLONG_MAX - sbi->inos; - - buf->f_namelen = EROFS_NAME_LEN; - - buf->f_fsid.val[0] = (u32)id; - buf->f_fsid.val[1] = (u32)(id >> 32); - return 0; -} - -static int erofs_show_options(struct seq_file *seq, struct dentry *root) -{ - struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb); - -#ifdef CONFIG_EROFS_FS_XATTR - if (test_opt(sbi, XATTR_USER)) - seq_puts(seq, ",user_xattr"); - else - seq_puts(seq, ",nouser_xattr"); -#endif -#ifdef CONFIG_EROFS_FS_POSIX_ACL - if (test_opt(sbi, POSIX_ACL)) - seq_puts(seq, ",acl"); - else - seq_puts(seq, ",noacl"); -#endif - if (test_opt(sbi, FAULT_INJECTION)) - seq_printf(seq, ",fault_injection=%u", - erofs_get_fault_rate(sbi)); - return 0; -} - -static int erofs_remount(struct super_block *sb, int *flags, char *data) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - unsigned int org_mnt_opt = sbi->mount_opt; - unsigned int org_inject_rate = erofs_get_fault_rate(sbi); - int err; - - DBG_BUGON(!sb_rdonly(sb)); - err = parse_options(sb, data); - if (err) - goto out; - - if (test_opt(sbi, POSIX_ACL)) - sb->s_flags |= SB_POSIXACL; - else - sb->s_flags &= ~SB_POSIXACL; - - *flags |= SB_RDONLY; - return 0; -out: - __erofs_build_fault_attr(sbi, org_inject_rate); - sbi->mount_opt = org_mnt_opt; - - return err; -} - -const struct super_operations erofs_sops = { - .put_super = erofs_put_super, - .alloc_inode = alloc_inode, - .free_inode = free_inode, - .statfs = erofs_statfs, - .show_options = erofs_show_options, - .remount_fs = erofs_remount, -}; - -module_init(erofs_module_init); -module_exit(erofs_module_exit); - -MODULE_DESCRIPTION("Enhanced ROM File System"); -MODULE_AUTHOR("Gao Xiang, Yu Chao, Miao Xie, CONSUMER BG, HUAWEI Inc."); -MODULE_LICENSE("GPL"); - diff --git a/drivers/staging/erofs/unzip_pagevec.h b/drivers/staging/erofs/unzip_pagevec.h deleted file mode 100644 index 7af0ba8d8495..000000000000 --- a/drivers/staging/erofs/unzip_pagevec.h +++ /dev/null @@ -1,169 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * linux/drivers/staging/erofs/unzip_pagevec.h - * - * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#ifndef __EROFS_UNZIP_PAGEVEC_H -#define __EROFS_UNZIP_PAGEVEC_H - -#include <linux/tagptr.h> - -/* page type in pagevec for unzip subsystem */ -enum z_erofs_page_type { - /* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */ - Z_EROFS_PAGE_TYPE_EXCLUSIVE, - - Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED, - - Z_EROFS_VLE_PAGE_TYPE_HEAD, - Z_EROFS_VLE_PAGE_TYPE_MAX -}; - -extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0") - __bad_page_type_exclusive(void); - -/* pagevec tagged pointer */ -typedef tagptr2_t erofs_vtptr_t; - -/* pagevec collector */ -struct z_erofs_pagevec_ctor { - struct page *curr, *next; - erofs_vtptr_t *pages; - - unsigned int nr, index; -}; - -static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor, - bool atomic) -{ - if (!ctor->curr) - return; - - if (atomic) - kunmap_atomic(ctor->pages); - else - kunmap(ctor->curr); -} - -static inline struct page * -z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor, - unsigned nr) -{ - unsigned index; - - /* keep away from occupied pages */ - if (ctor->next) - return ctor->next; - - for (index = 0; index < nr; ++index) { - const erofs_vtptr_t t = ctor->pages[index]; - const unsigned tags = tagptr_unfold_tags(t); - - if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE) - return tagptr_unfold_ptr(t); - } - DBG_BUGON(nr >= ctor->nr); - return NULL; -} - -static inline void -z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor, - bool atomic) -{ - struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr); - - z_erofs_pagevec_ctor_exit(ctor, atomic); - - ctor->curr = next; - ctor->next = NULL; - ctor->pages = atomic ? - kmap_atomic(ctor->curr) : kmap(ctor->curr); - - ctor->nr = PAGE_SIZE / sizeof(struct page *); - ctor->index = 0; -} - -static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor, - unsigned nr, - erofs_vtptr_t *pages, unsigned i) -{ - ctor->nr = nr; - ctor->curr = ctor->next = NULL; - ctor->pages = pages; - - if (i >= nr) { - i -= nr; - z_erofs_pagevec_ctor_pagedown(ctor, false); - while (i > ctor->nr) { - i -= ctor->nr; - z_erofs_pagevec_ctor_pagedown(ctor, false); - } - } - - ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i); - ctor->index = i; -} - -static inline bool -z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor, - struct page *page, - enum z_erofs_page_type type, - bool *occupied) -{ - *occupied = false; - if (unlikely(!ctor->next && type)) - if (ctor->index + 1 == ctor->nr) - return false; - - if (unlikely(ctor->index >= ctor->nr)) - z_erofs_pagevec_ctor_pagedown(ctor, false); - - /* exclusive page type must be 0 */ - if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL) - __bad_page_type_exclusive(); - - /* should remind that collector->next never equal to 1, 2 */ - if (type == (uintptr_t)ctor->next) { - ctor->next = page; - *occupied = true; - } - - ctor->pages[ctor->index++] = - tagptr_fold(erofs_vtptr_t, page, type); - return true; -} - -static inline struct page * -z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor, - enum z_erofs_page_type *type) -{ - erofs_vtptr_t t; - - if (unlikely(ctor->index >= ctor->nr)) { - DBG_BUGON(!ctor->next); - z_erofs_pagevec_ctor_pagedown(ctor, true); - } - - t = ctor->pages[ctor->index]; - - *type = tagptr_unfold_tags(t); - - /* should remind that collector->next never equal to 1, 2 */ - if (*type == (uintptr_t)ctor->next) - ctor->next = tagptr_unfold_ptr(t); - - ctor->pages[ctor->index++] = - tagptr_fold(erofs_vtptr_t, NULL, 0); - - return tagptr_unfold_ptr(t); -} - -#endif - diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c deleted file mode 100644 index f0dab81ff816..000000000000 --- a/drivers/staging/erofs/unzip_vle.c +++ /dev/null @@ -1,1591 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/unzip_vle.c - * - * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include "unzip_vle.h" -#include "compress.h" -#include <linux/prefetch.h> - -#include <trace/events/erofs.h> - -/* - * a compressed_pages[] placeholder in order to avoid - * being filled with file pages for in-place decompression. - */ -#define PAGE_UNALLOCATED ((void *)0x5F0E4B1D) - -/* how to allocate cached pages for a workgroup */ -enum z_erofs_cache_alloctype { - DONTALLOC, /* don't allocate any cached pages */ - DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */ -}; - -/* - * tagged pointer with 1-bit tag for all compressed pages - * tag 0 - the page is just found with an extra page reference - */ -typedef tagptr1_t compressed_page_t; - -#define tag_compressed_page_justfound(page) \ - tagptr_fold(compressed_page_t, page, 1) - -static struct workqueue_struct *z_erofs_workqueue __read_mostly; -static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly; - -void z_erofs_exit_zip_subsystem(void) -{ - destroy_workqueue(z_erofs_workqueue); - kmem_cache_destroy(z_erofs_workgroup_cachep); -} - -static inline int init_unzip_workqueue(void) -{ - const unsigned int onlinecpus = num_possible_cpus(); - - /* - * we don't need too many threads, limiting threads - * could improve scheduling performance. - */ - z_erofs_workqueue = - alloc_workqueue("erofs_unzipd", - WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE, - onlinecpus + onlinecpus / 4); - - return z_erofs_workqueue ? 0 : -ENOMEM; -} - -static void init_once(void *ptr) -{ - struct z_erofs_vle_workgroup *grp = ptr; - struct z_erofs_vle_work *const work = - z_erofs_vle_grab_primary_work(grp); - unsigned int i; - - mutex_init(&work->lock); - work->nr_pages = 0; - work->vcnt = 0; - for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i) - grp->compressed_pages[i] = NULL; -} - -static void init_always(struct z_erofs_vle_workgroup *grp) -{ - struct z_erofs_vle_work *const work = - z_erofs_vle_grab_primary_work(grp); - - atomic_set(&grp->obj.refcount, 1); - grp->flags = 0; - - DBG_BUGON(work->nr_pages); - DBG_BUGON(work->vcnt); -} - -int __init z_erofs_init_zip_subsystem(void) -{ - z_erofs_workgroup_cachep = - kmem_cache_create("erofs_compress", - Z_EROFS_WORKGROUP_SIZE, 0, - SLAB_RECLAIM_ACCOUNT, init_once); - - if (z_erofs_workgroup_cachep) { - if (!init_unzip_workqueue()) - return 0; - - kmem_cache_destroy(z_erofs_workgroup_cachep); - } - return -ENOMEM; -} - -enum z_erofs_vle_work_role { - Z_EROFS_VLE_WORK_SECONDARY, - Z_EROFS_VLE_WORK_PRIMARY, - /* - * The current work was the tail of an exist chain, and the previous - * processed chained works are all decided to be hooked up to it. - * A new chain should be created for the remaining unprocessed works, - * therefore different from Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, - * the next work cannot reuse the whole page in the following scenario: - * ________________________________________________________________ - * | tail (partial) page | head (partial) page | - * | (belongs to the next work) | (belongs to the current work) | - * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________| - */ - Z_EROFS_VLE_WORK_PRIMARY_HOOKED, - /* - * The current work has been linked with the processed chained works, - * and could be also linked with the potential remaining works, which - * means if the processing page is the tail partial page of the work, - * the current work can safely use the whole page (since the next work - * is under control) for in-place decompression, as illustrated below: - * ________________________________________________________________ - * | tail (partial) page | head (partial) page | - * | (of the current work) | (of the previous work) | - * | PRIMARY_FOLLOWED or | | - * |_____PRIMARY_HOOKED____|____________PRIMARY_FOLLOWED____________| - * - * [ (*) the above page can be used for the current work itself. ] - */ - Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, - Z_EROFS_VLE_WORK_MAX -}; - -struct z_erofs_vle_work_builder { - enum z_erofs_vle_work_role role; - /* - * 'hosted = false' means that the current workgroup doesn't belong to - * the owned chained workgroups. In the other words, it is none of our - * business to submit this workgroup. - */ - bool hosted; - - struct z_erofs_vle_workgroup *grp; - struct z_erofs_vle_work *work; - struct z_erofs_pagevec_ctor vector; - - /* pages used for reading the compressed data */ - struct page **compressed_pages; - unsigned int compressed_deficit; -}; - -#define VLE_WORK_BUILDER_INIT() \ - { .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED } - -#ifdef EROFS_FS_HAS_MANAGED_CACHE -static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl, - struct address_space *mc, - pgoff_t index, - unsigned int clusterpages, - enum z_erofs_cache_alloctype type, - struct list_head *pagepool, - gfp_t gfp) -{ - struct page **const pages = bl->compressed_pages; - const unsigned int remaining = bl->compressed_deficit; - bool standalone = true; - unsigned int i, j = 0; - - if (bl->role < Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED) - return; - - gfp = mapping_gfp_constraint(mc, gfp) & ~__GFP_RECLAIM; - - index += clusterpages - remaining; - - for (i = 0; i < remaining; ++i) { - struct page *page; - compressed_page_t t; - - /* the compressed page was loaded before */ - if (READ_ONCE(pages[i])) - continue; - - page = find_get_page(mc, index + i); - - if (page) { - t = tag_compressed_page_justfound(page); - } else if (type == DELAYEDALLOC) { - t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED); - } else { /* DONTALLOC */ - if (standalone) - j = i; - standalone = false; - continue; - } - - if (!cmpxchg_relaxed(&pages[i], NULL, tagptr_cast_ptr(t))) - continue; - - if (page) - put_page(page); - } - bl->compressed_pages += j; - bl->compressed_deficit = remaining - j; - - if (standalone) - bl->role = Z_EROFS_VLE_WORK_PRIMARY; -} - -/* called by erofs_shrinker to get rid of all compressed_pages */ -int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *egrp) -{ - struct z_erofs_vle_workgroup *const grp = - container_of(egrp, struct z_erofs_vle_workgroup, obj); - struct address_space *const mapping = MNGD_MAPPING(sbi); - const int clusterpages = erofs_clusterpages(sbi); - int i; - - /* - * refcount of workgroup is now freezed as 1, - * therefore no need to worry about available decompression users. - */ - for (i = 0; i < clusterpages; ++i) { - struct page *page = grp->compressed_pages[i]; - - if (!page || page->mapping != mapping) - continue; - - /* block other users from reclaiming or migrating the page */ - if (!trylock_page(page)) - return -EBUSY; - - /* barrier is implied in the following 'unlock_page' */ - WRITE_ONCE(grp->compressed_pages[i], NULL); - - set_page_private(page, 0); - ClearPagePrivate(page); - - unlock_page(page); - put_page(page); - } - return 0; -} - -int erofs_try_to_free_cached_page(struct address_space *mapping, - struct page *page) -{ - struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb); - const unsigned int clusterpages = erofs_clusterpages(sbi); - struct z_erofs_vle_workgroup *const grp = (void *)page_private(page); - int ret = 0; /* 0 - busy */ - - if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) { - unsigned int i; - - for (i = 0; i < clusterpages; ++i) { - if (grp->compressed_pages[i] == page) { - WRITE_ONCE(grp->compressed_pages[i], NULL); - ret = 1; - break; - } - } - erofs_workgroup_unfreeze(&grp->obj, 1); - - if (ret) { - ClearPagePrivate(page); - put_page(page); - } - } - return ret; -} -#else -static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl, - struct address_space *mc, - pgoff_t index, - unsigned int clusterpages, - enum z_erofs_cache_alloctype type, - struct list_head *pagepool, - gfp_t gfp) -{ - /* nowhere to load compressed pages from */ -} -#endif - -/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ -static inline bool try_to_reuse_as_compressed_page( - struct z_erofs_vle_work_builder *b, - struct page *page) -{ - while (b->compressed_deficit) { - --b->compressed_deficit; - if (!cmpxchg(b->compressed_pages++, NULL, page)) - return true; - } - - return false; -} - -/* callers must be with work->lock held */ -static int z_erofs_vle_work_add_page( - struct z_erofs_vle_work_builder *builder, - struct page *page, - enum z_erofs_page_type type) -{ - int ret; - bool occupied; - - /* give priority for the compressed data storage */ - if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY && - type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && - try_to_reuse_as_compressed_page(builder, page)) - return 0; - - ret = z_erofs_pagevec_ctor_enqueue(&builder->vector, - page, type, &occupied); - builder->work->vcnt += (unsigned int)ret; - - return ret ? 0 : -EAGAIN; -} - -static enum z_erofs_vle_work_role -try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp, - z_erofs_vle_owned_workgrp_t *owned_head, - bool *hosted) -{ - DBG_BUGON(*hosted); - - /* let's claim these following types of workgroup */ -retry: - if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) { - /* type 1, nil workgroup */ - if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_NIL, - *owned_head) != Z_EROFS_VLE_WORKGRP_NIL) - goto retry; - - *owned_head = &grp->next; - *hosted = true; - /* lucky, I am the followee :) */ - return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; - - } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) { - /* - * type 2, link to the end of a existing open chain, - * be careful that its submission itself is governed - * by the original owned chain. - */ - if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL, - *owned_head) != Z_EROFS_VLE_WORKGRP_TAIL) - goto retry; - *owned_head = Z_EROFS_VLE_WORKGRP_TAIL; - return Z_EROFS_VLE_WORK_PRIMARY_HOOKED; - } - - return Z_EROFS_VLE_WORK_PRIMARY; /* :( better luck next time */ -} - -struct z_erofs_vle_work_finder { - struct super_block *sb; - pgoff_t idx; - unsigned int pageofs; - - struct z_erofs_vle_workgroup **grp_ret; - enum z_erofs_vle_work_role *role; - z_erofs_vle_owned_workgrp_t *owned_head; - bool *hosted; -}; - -static struct z_erofs_vle_work * -z_erofs_vle_work_lookup(const struct z_erofs_vle_work_finder *f) -{ - bool tag, primary; - struct erofs_workgroup *egrp; - struct z_erofs_vle_workgroup *grp; - struct z_erofs_vle_work *work; - - egrp = erofs_find_workgroup(f->sb, f->idx, &tag); - if (!egrp) { - *f->grp_ret = NULL; - return NULL; - } - - grp = container_of(egrp, struct z_erofs_vle_workgroup, obj); - *f->grp_ret = grp; - - work = z_erofs_vle_grab_work(grp, f->pageofs); - /* if multiref is disabled, `primary' is always true */ - primary = true; - - DBG_BUGON(work->pageofs != f->pageofs); - - /* - * lock must be taken first to avoid grp->next == NIL between - * claiming workgroup and adding pages: - * grp->next != NIL - * grp->next = NIL - * mutex_unlock_all - * mutex_lock(&work->lock) - * add all pages to pagevec - * - * [correct locking case 1]: - * mutex_lock(grp->work[a]) - * ... - * mutex_lock(grp->work[b]) mutex_lock(grp->work[c]) - * ... *role = SECONDARY - * add all pages to pagevec - * ... - * mutex_unlock(grp->work[c]) - * mutex_lock(grp->work[c]) - * ... - * grp->next = NIL - * mutex_unlock_all - * - * [correct locking case 2]: - * mutex_lock(grp->work[b]) - * ... - * mutex_lock(grp->work[a]) - * ... - * mutex_lock(grp->work[c]) - * ... - * grp->next = NIL - * mutex_unlock_all - * mutex_lock(grp->work[a]) - * *role = PRIMARY_OWNER - * add all pages to pagevec - * ... - */ - mutex_lock(&work->lock); - - *f->hosted = false; - if (!primary) - *f->role = Z_EROFS_VLE_WORK_SECONDARY; - else /* claim the workgroup if possible */ - *f->role = try_to_claim_workgroup(grp, f->owned_head, - f->hosted); - return work; -} - -static struct z_erofs_vle_work * -z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f, - struct erofs_map_blocks *map) -{ - bool gnew = false; - struct z_erofs_vle_workgroup *grp = *f->grp_ret; - struct z_erofs_vle_work *work; - - /* if multiref is disabled, grp should never be nullptr */ - if (unlikely(grp)) { - DBG_BUGON(1); - return ERR_PTR(-EINVAL); - } - - /* no available workgroup, let's allocate one */ - grp = kmem_cache_alloc(z_erofs_workgroup_cachep, GFP_NOFS); - if (unlikely(!grp)) - return ERR_PTR(-ENOMEM); - - init_always(grp); - grp->obj.index = f->idx; - grp->llen = map->m_llen; - - z_erofs_vle_set_workgrp_fmt(grp, (map->m_flags & EROFS_MAP_ZIPPED) ? - Z_EROFS_VLE_WORKGRP_FMT_LZ4 : - Z_EROFS_VLE_WORKGRP_FMT_PLAIN); - - if (map->m_flags & EROFS_MAP_FULL_MAPPED) - grp->flags |= Z_EROFS_VLE_WORKGRP_FULL_LENGTH; - - /* new workgrps have been claimed as type 1 */ - WRITE_ONCE(grp->next, *f->owned_head); - /* primary and followed work for all new workgrps */ - *f->role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; - /* it should be submitted by ourselves */ - *f->hosted = true; - - gnew = true; - work = z_erofs_vle_grab_primary_work(grp); - work->pageofs = f->pageofs; - - /* - * lock all primary followed works before visible to others - * and mutex_trylock *never* fails for a new workgroup. - */ - mutex_trylock(&work->lock); - - if (gnew) { - int err = erofs_register_workgroup(f->sb, &grp->obj, 0); - - if (err) { - mutex_unlock(&work->lock); - kmem_cache_free(z_erofs_workgroup_cachep, grp); - return ERR_PTR(-EAGAIN); - } - } - - *f->owned_head = &grp->next; - *f->grp_ret = grp; - return work; -} - -#define builder_is_hooked(builder) \ - ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED) - -#define builder_is_followed(builder) \ - ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED) - -static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder, - struct super_block *sb, - struct erofs_map_blocks *map, - z_erofs_vle_owned_workgrp_t *owned_head) -{ - const unsigned int clusterpages = erofs_clusterpages(EROFS_SB(sb)); - struct z_erofs_vle_workgroup *grp; - const struct z_erofs_vle_work_finder finder = { - .sb = sb, - .idx = erofs_blknr(map->m_pa), - .pageofs = map->m_la & ~PAGE_MASK, - .grp_ret = &grp, - .role = &builder->role, - .owned_head = owned_head, - .hosted = &builder->hosted - }; - struct z_erofs_vle_work *work; - - DBG_BUGON(builder->work); - - /* must be Z_EROFS_WORK_TAIL or the next chained work */ - DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL); - DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - - DBG_BUGON(erofs_blkoff(map->m_pa)); - -repeat: - work = z_erofs_vle_work_lookup(&finder); - if (work) { - unsigned int orig_llen; - - /* increase workgroup `llen' if needed */ - while ((orig_llen = READ_ONCE(grp->llen)) < map->m_llen && - orig_llen != cmpxchg_relaxed(&grp->llen, - orig_llen, map->m_llen)) - cpu_relax(); - goto got_it; - } - - work = z_erofs_vle_work_register(&finder, map); - if (unlikely(work == ERR_PTR(-EAGAIN))) - goto repeat; - - if (IS_ERR(work)) - return PTR_ERR(work); -got_it: - z_erofs_pagevec_ctor_init(&builder->vector, Z_EROFS_NR_INLINE_PAGEVECS, - work->pagevec, work->vcnt); - - if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) { - /* enable possibly in-place decompression */ - builder->compressed_pages = grp->compressed_pages; - builder->compressed_deficit = clusterpages; - } else { - builder->compressed_pages = NULL; - builder->compressed_deficit = 0; - } - - builder->grp = grp; - builder->work = work; - return 0; -} - -/* - * keep in mind that no referenced workgroups will be freed - * only after a RCU grace period, so rcu_read_lock() could - * prevent a workgroup from being freed. - */ -static void z_erofs_rcu_callback(struct rcu_head *head) -{ - struct z_erofs_vle_work *work = container_of(head, - struct z_erofs_vle_work, rcu); - struct z_erofs_vle_workgroup *grp = - z_erofs_vle_work_workgroup(work, true); - - kmem_cache_free(z_erofs_workgroup_cachep, grp); -} - -void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) -{ - struct z_erofs_vle_workgroup *const vgrp = container_of(grp, - struct z_erofs_vle_workgroup, obj); - struct z_erofs_vle_work *const work = &vgrp->work; - - call_rcu(&work->rcu, z_erofs_rcu_callback); -} - -static void -__z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp, - struct z_erofs_vle_work *work __maybe_unused) -{ - erofs_workgroup_put(&grp->obj); -} - -static void z_erofs_vle_work_release(struct z_erofs_vle_work *work) -{ - struct z_erofs_vle_workgroup *grp = - z_erofs_vle_work_workgroup(work, true); - - __z_erofs_vle_work_release(grp, work); -} - -static inline bool -z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder) -{ - struct z_erofs_vle_work *work = builder->work; - - if (!work) - return false; - - z_erofs_pagevec_ctor_exit(&builder->vector, false); - mutex_unlock(&work->lock); - - /* - * if all pending pages are added, don't hold work reference - * any longer if the current work isn't hosted by ourselves. - */ - if (!builder->hosted) - __z_erofs_vle_work_release(builder->grp, work); - - builder->work = NULL; - builder->grp = NULL; - return true; -} - -static inline struct page *__stagingpage_alloc(struct list_head *pagepool, - gfp_t gfp) -{ - struct page *page = erofs_allocpage(pagepool, gfp); - - if (unlikely(!page)) - return NULL; - - page->mapping = Z_EROFS_MAPPING_STAGING; - return page; -} - -struct z_erofs_vle_frontend { - struct inode *const inode; - - struct z_erofs_vle_work_builder builder; - struct erofs_map_blocks map; - - z_erofs_vle_owned_workgrp_t owned_head; - - /* used for applying cache strategy on the fly */ - bool backmost; - erofs_off_t headoffset; -}; - -#define VLE_FRONTEND_INIT(__i) { \ - .inode = __i, \ - .map = { \ - .m_llen = 0, \ - .m_plen = 0, \ - .mpage = NULL \ - }, \ - .builder = VLE_WORK_BUILDER_INIT(), \ - .owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \ - .backmost = true, } - -#ifdef EROFS_FS_HAS_MANAGED_CACHE -static inline bool -should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la) -{ - if (fe->backmost) - return true; - - if (EROFS_FS_ZIP_CACHE_LVL >= 2) - return la < fe->headoffset; - - return false; -} -#else -static inline bool -should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la) -{ - return false; -} -#endif - -static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe, - struct page *page, - struct list_head *page_pool) -{ - struct super_block *const sb = fe->inode->i_sb; - struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb); - struct erofs_map_blocks *const map = &fe->map; - struct z_erofs_vle_work_builder *const builder = &fe->builder; - const loff_t offset = page_offset(page); - - bool tight = builder_is_hooked(builder); - struct z_erofs_vle_work *work = builder->work; - - enum z_erofs_cache_alloctype cache_strategy; - enum z_erofs_page_type page_type; - unsigned int cur, end, spiltted, index; - int err = 0; - - /* register locked file pages as online pages in pack */ - z_erofs_onlinepage_init(page); - - spiltted = 0; - end = PAGE_SIZE; -repeat: - cur = end - 1; - - /* lucky, within the range of the current map_blocks */ - if (offset + cur >= map->m_la && - offset + cur < map->m_la + map->m_llen) { - /* didn't get a valid unzip work previously (very rare) */ - if (!builder->work) - goto restart_now; - goto hitted; - } - - /* go ahead the next map_blocks */ - debugln("%s: [out-of-range] pos %llu", __func__, offset + cur); - - if (z_erofs_vle_work_iter_end(builder)) - fe->backmost = false; - - map->m_la = offset + cur; - map->m_llen = 0; - err = z_erofs_map_blocks_iter(fe->inode, map, 0); - if (unlikely(err)) - goto err_out; - -restart_now: - if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) - goto hitted; - - DBG_BUGON(map->m_plen != 1 << sbi->clusterbits); - DBG_BUGON(erofs_blkoff(map->m_pa)); - - err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head); - if (unlikely(err)) - goto err_out; - - /* preload all compressed pages (maybe downgrade role if necessary) */ - if (should_alloc_managed_pages(fe, map->m_la)) - cache_strategy = DELAYEDALLOC; - else - cache_strategy = DONTALLOC; - - preload_compressed_pages(builder, MNGD_MAPPING(sbi), - map->m_pa / PAGE_SIZE, - map->m_plen / PAGE_SIZE, - cache_strategy, page_pool, GFP_KERNEL); - - tight &= builder_is_hooked(builder); - work = builder->work; -hitted: - cur = end - min_t(unsigned int, offset + end - map->m_la, end); - if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) { - zero_user_segment(page, cur, end); - goto next_part; - } - - /* let's derive page type */ - page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD : - (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : - (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : - Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); - - if (cur) - tight &= builder_is_followed(builder); - -retry: - err = z_erofs_vle_work_add_page(builder, page, page_type); - /* should allocate an additional staging page for pagevec */ - if (err == -EAGAIN) { - struct page *const newpage = - __stagingpage_alloc(page_pool, GFP_NOFS); - - err = z_erofs_vle_work_add_page(builder, newpage, - Z_EROFS_PAGE_TYPE_EXCLUSIVE); - if (likely(!err)) - goto retry; - } - - if (unlikely(err)) - goto err_out; - - index = page->index - map->m_la / PAGE_SIZE; - - /* FIXME! avoid the last relundant fixup & endio */ - z_erofs_onlinepage_fixup(page, index, true); - - /* bump up the number of spiltted parts of a page */ - ++spiltted; - /* also update nr_pages */ - work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1); -next_part: - /* can be used for verification */ - map->m_llen = offset + cur - map->m_la; - - end = cur; - if (end > 0) - goto repeat; - -out: - /* FIXME! avoid the last relundant fixup & endio */ - z_erofs_onlinepage_endio(page); - - debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu", - __func__, page, spiltted, map->m_llen); - return err; - - /* if some error occurred while processing this page */ -err_out: - SetPageError(page); - goto out; -} - -static void z_erofs_vle_unzip_kickoff(void *ptr, int bios) -{ - tagptr1_t t = tagptr_init(tagptr1_t, ptr); - struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t); - bool background = tagptr_unfold_tags(t); - - if (!background) { - unsigned long flags; - - spin_lock_irqsave(&io->u.wait.lock, flags); - if (!atomic_add_return(bios, &io->pending_bios)) - wake_up_locked(&io->u.wait); - spin_unlock_irqrestore(&io->u.wait.lock, flags); - return; - } - - if (!atomic_add_return(bios, &io->pending_bios)) - queue_work(z_erofs_workqueue, &io->u.work); -} - -static inline void z_erofs_vle_read_endio(struct bio *bio) -{ - struct erofs_sb_info *sbi = NULL; - blk_status_t err = bio->bi_status; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - bool cachemngd = false; - - DBG_BUGON(PageUptodate(page)); - DBG_BUGON(!page->mapping); - - if (unlikely(!sbi && !z_erofs_page_is_staging(page))) { - sbi = EROFS_SB(page->mapping->host->i_sb); - - if (time_to_inject(sbi, FAULT_READ_IO)) { - erofs_show_injection_info(FAULT_READ_IO); - err = BLK_STS_IOERR; - } - } - - /* sbi should already be gotten if the page is managed */ - if (sbi) - cachemngd = erofs_page_is_managed(sbi, page); - - if (unlikely(err)) - SetPageError(page); - else if (cachemngd) - SetPageUptodate(page); - - if (cachemngd) - unlock_page(page); - } - - z_erofs_vle_unzip_kickoff(bio->bi_private, -1); - bio_put(bio); -} - -static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES]; -static DEFINE_MUTEX(z_pagemap_global_lock); - -static int z_erofs_vle_unzip(struct super_block *sb, - struct z_erofs_vle_workgroup *grp, - struct list_head *page_pool) -{ - struct erofs_sb_info *const sbi = EROFS_SB(sb); - const unsigned int clusterpages = erofs_clusterpages(sbi); - - struct z_erofs_pagevec_ctor ctor; - unsigned int nr_pages; - unsigned int sparsemem_pages = 0; - struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES]; - struct page **pages, **compressed_pages, *page; - unsigned int algorithm; - unsigned int i, outputsize; - - enum z_erofs_page_type page_type; - bool overlapped, partial; - struct z_erofs_vle_work *work; - int err; - - might_sleep(); - work = z_erofs_vle_grab_primary_work(grp); - DBG_BUGON(!READ_ONCE(work->nr_pages)); - - mutex_lock(&work->lock); - nr_pages = work->nr_pages; - - if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES)) - pages = pages_onstack; - else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES && - mutex_trylock(&z_pagemap_global_lock)) - pages = z_pagemap_global; - else { -repeat: - pages = kvmalloc_array(nr_pages, sizeof(struct page *), - GFP_KERNEL); - - /* fallback to global pagemap for the lowmem scenario */ - if (unlikely(!pages)) { - if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES) - goto repeat; - else { - mutex_lock(&z_pagemap_global_lock); - pages = z_pagemap_global; - } - } - } - - for (i = 0; i < nr_pages; ++i) - pages[i] = NULL; - - z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS, - work->pagevec, 0); - - for (i = 0; i < work->vcnt; ++i) { - unsigned int pagenr; - - page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type); - - /* all pages in pagevec ought to be valid */ - DBG_BUGON(!page); - DBG_BUGON(!page->mapping); - - if (z_erofs_put_stagingpage(page_pool, page)) - continue; - - if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD) - pagenr = 0; - else - pagenr = z_erofs_onlinepage_index(page); - - DBG_BUGON(pagenr >= nr_pages); - DBG_BUGON(pages[pagenr]); - - pages[pagenr] = page; - } - sparsemem_pages = i; - - z_erofs_pagevec_ctor_exit(&ctor, true); - - overlapped = false; - compressed_pages = grp->compressed_pages; - - err = 0; - for (i = 0; i < clusterpages; ++i) { - unsigned int pagenr; - - page = compressed_pages[i]; - - /* all compressed pages ought to be valid */ - DBG_BUGON(!page); - DBG_BUGON(!page->mapping); - - if (!z_erofs_page_is_staging(page)) { - if (erofs_page_is_managed(sbi, page)) { - if (unlikely(!PageUptodate(page))) - err = -EIO; - continue; - } - - /* - * only if non-head page can be selected - * for inplace decompression - */ - pagenr = z_erofs_onlinepage_index(page); - - DBG_BUGON(pagenr >= nr_pages); - DBG_BUGON(pages[pagenr]); - ++sparsemem_pages; - pages[pagenr] = page; - - overlapped = true; - } - - /* PG_error needs checking for inplaced and staging pages */ - if (unlikely(PageError(page))) { - DBG_BUGON(PageUptodate(page)); - err = -EIO; - } - } - - if (unlikely(err)) - goto out; - - if (nr_pages << PAGE_SHIFT >= work->pageofs + grp->llen) { - outputsize = grp->llen; - partial = !(grp->flags & Z_EROFS_VLE_WORKGRP_FULL_LENGTH); - } else { - outputsize = (nr_pages << PAGE_SHIFT) - work->pageofs; - partial = true; - } - - if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) - algorithm = Z_EROFS_COMPRESSION_SHIFTED; - else - algorithm = Z_EROFS_COMPRESSION_LZ4; - - err = z_erofs_decompress(&(struct z_erofs_decompress_req) { - .sb = sb, - .in = compressed_pages, - .out = pages, - .pageofs_out = work->pageofs, - .inputsize = PAGE_SIZE, - .outputsize = outputsize, - .alg = algorithm, - .inplace_io = overlapped, - .partial_decoding = partial - }, page_pool); - -out: - /* must handle all compressed pages before endding pages */ - for (i = 0; i < clusterpages; ++i) { - page = compressed_pages[i]; - - if (erofs_page_is_managed(sbi, page)) - continue; - - /* recycle all individual staging pages */ - (void)z_erofs_put_stagingpage(page_pool, page); - - WRITE_ONCE(compressed_pages[i], NULL); - } - - for (i = 0; i < nr_pages; ++i) { - page = pages[i]; - if (!page) - continue; - - DBG_BUGON(!page->mapping); - - /* recycle all individual staging pages */ - if (z_erofs_put_stagingpage(page_pool, page)) - continue; - - if (unlikely(err < 0)) - SetPageError(page); - - z_erofs_onlinepage_endio(page); - } - - if (pages == z_pagemap_global) - mutex_unlock(&z_pagemap_global_lock); - else if (unlikely(pages != pages_onstack)) - kvfree(pages); - - work->nr_pages = 0; - work->vcnt = 0; - - /* all work locks MUST be taken before the following line */ - - WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL); - - /* all work locks SHOULD be released right now */ - mutex_unlock(&work->lock); - - z_erofs_vle_work_release(work); - return err; -} - -static void z_erofs_vle_unzip_all(struct super_block *sb, - struct z_erofs_vle_unzip_io *io, - struct list_head *page_pool) -{ - z_erofs_vle_owned_workgrp_t owned = io->head; - - while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) { - struct z_erofs_vle_workgroup *grp; - - /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */ - DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL); - - /* no possible that 'owned' equals NULL */ - DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL); - - grp = container_of(owned, struct z_erofs_vle_workgroup, next); - owned = READ_ONCE(grp->next); - - z_erofs_vle_unzip(sb, grp, page_pool); - } -} - -static void z_erofs_vle_unzip_wq(struct work_struct *work) -{ - struct z_erofs_vle_unzip_io_sb *iosb = container_of(work, - struct z_erofs_vle_unzip_io_sb, io.u.work); - LIST_HEAD(page_pool); - - DBG_BUGON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool); - - put_pages_list(&page_pool); - kvfree(iosb); -} - -static struct page * -pickup_page_for_submission(struct z_erofs_vle_workgroup *grp, - unsigned int nr, - struct list_head *pagepool, - struct address_space *mc, - gfp_t gfp) -{ - /* determined at compile time to avoid too many #ifdefs */ - const bool nocache = __builtin_constant_p(mc) ? !mc : false; - const pgoff_t index = grp->obj.index; - bool tocache = false; - - struct address_space *mapping; - struct page *oldpage, *page; - - compressed_page_t t; - int justfound; - -repeat: - page = READ_ONCE(grp->compressed_pages[nr]); - oldpage = page; - - if (!page) - goto out_allocpage; - - /* - * the cached page has not been allocated and - * an placeholder is out there, prepare it now. - */ - if (!nocache && page == PAGE_UNALLOCATED) { - tocache = true; - goto out_allocpage; - } - - /* process the target tagged pointer */ - t = tagptr_init(compressed_page_t, page); - justfound = tagptr_unfold_tags(t); - page = tagptr_unfold_ptr(t); - - mapping = READ_ONCE(page->mapping); - - /* - * if managed cache is disabled, it's no way to - * get such a cached-like page. - */ - if (nocache) { - /* if managed cache is disabled, it is impossible `justfound' */ - DBG_BUGON(justfound); - - /* and it should be locked, not uptodate, and not truncated */ - DBG_BUGON(!PageLocked(page)); - DBG_BUGON(PageUptodate(page)); - DBG_BUGON(!mapping); - goto out; - } - - /* - * unmanaged (file) pages are all locked solidly, - * therefore it is impossible for `mapping' to be NULL. - */ - if (mapping && mapping != mc) - /* ought to be unmanaged pages */ - goto out; - - lock_page(page); - - /* only true if page reclaim goes wrong, should never happen */ - DBG_BUGON(justfound && PagePrivate(page)); - - /* the page is still in manage cache */ - if (page->mapping == mc) { - WRITE_ONCE(grp->compressed_pages[nr], page); - - ClearPageError(page); - if (!PagePrivate(page)) { - /* - * impossible to be !PagePrivate(page) for - * the current restriction as well if - * the page is already in compressed_pages[]. - */ - DBG_BUGON(!justfound); - - justfound = 0; - set_page_private(page, (unsigned long)grp); - SetPagePrivate(page); - } - - /* no need to submit io if it is already up-to-date */ - if (PageUptodate(page)) { - unlock_page(page); - page = NULL; - } - goto out; - } - - /* - * the managed page has been truncated, it's unsafe to - * reuse this one, let's allocate a new cache-managed page. - */ - DBG_BUGON(page->mapping); - DBG_BUGON(!justfound); - - tocache = true; - unlock_page(page); - put_page(page); -out_allocpage: - page = __stagingpage_alloc(pagepool, gfp); - if (oldpage != cmpxchg(&grp->compressed_pages[nr], oldpage, page)) { - list_add(&page->lru, pagepool); - cpu_relax(); - goto repeat; - } - if (nocache || !tocache) - goto out; - if (add_to_page_cache_lru(page, mc, index + nr, gfp)) { - page->mapping = Z_EROFS_MAPPING_STAGING; - goto out; - } - - set_page_private(page, (unsigned long)grp); - SetPagePrivate(page); -out: /* the only exit (for tracing and debugging) */ - return page; -} - -static struct z_erofs_vle_unzip_io * -jobqueue_init(struct super_block *sb, - struct z_erofs_vle_unzip_io *io, - bool foreground) -{ - struct z_erofs_vle_unzip_io_sb *iosb; - - if (foreground) { - /* waitqueue available for foreground io */ - DBG_BUGON(!io); - - init_waitqueue_head(&io->u.wait); - atomic_set(&io->pending_bios, 0); - goto out; - } - - iosb = kvzalloc(sizeof(*iosb), GFP_KERNEL | __GFP_NOFAIL); - DBG_BUGON(!iosb); - - /* initialize fields in the allocated descriptor */ - io = &iosb->io; - iosb->sb = sb; - INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq); -out: - io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED; - return io; -} - -/* define workgroup jobqueue types */ -enum { -#ifdef EROFS_FS_HAS_MANAGED_CACHE - JQ_BYPASS, -#endif - JQ_SUBMIT, - NR_JOBQUEUES, -}; - -static void *jobqueueset_init(struct super_block *sb, - z_erofs_vle_owned_workgrp_t qtail[], - struct z_erofs_vle_unzip_io *q[], - struct z_erofs_vle_unzip_io *fgq, - bool forcefg) -{ -#ifdef EROFS_FS_HAS_MANAGED_CACHE - /* - * if managed cache is enabled, bypass jobqueue is needed, - * no need to read from device for all workgroups in this queue. - */ - q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, true); - qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head; -#endif - - q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, forcefg); - qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head; - - return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], !forcefg)); -} - -#ifdef EROFS_FS_HAS_MANAGED_CACHE -static void move_to_bypass_jobqueue(struct z_erofs_vle_workgroup *grp, - z_erofs_vle_owned_workgrp_t qtail[], - z_erofs_vle_owned_workgrp_t owned_head) -{ - z_erofs_vle_owned_workgrp_t *const submit_qtail = qtail[JQ_SUBMIT]; - z_erofs_vle_owned_workgrp_t *const bypass_qtail = qtail[JQ_BYPASS]; - - DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - if (owned_head == Z_EROFS_VLE_WORKGRP_TAIL) - owned_head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED; - - WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - - WRITE_ONCE(*submit_qtail, owned_head); - WRITE_ONCE(*bypass_qtail, &grp->next); - - qtail[JQ_BYPASS] = &grp->next; -} - -static bool postsubmit_is_all_bypassed(struct z_erofs_vle_unzip_io *q[], - unsigned int nr_bios, - bool force_fg) -{ - /* - * although background is preferred, no one is pending for submission. - * don't issue workqueue for decompression but drop it directly instead. - */ - if (force_fg || nr_bios) - return false; - - kvfree(container_of(q[JQ_SUBMIT], - struct z_erofs_vle_unzip_io_sb, - io)); - return true; -} -#else -static void move_to_bypass_jobqueue(struct z_erofs_vle_workgroup *grp, - z_erofs_vle_owned_workgrp_t qtail[], - z_erofs_vle_owned_workgrp_t owned_head) -{ - /* impossible to bypass submission for managed cache disabled */ - DBG_BUGON(1); -} - -static bool postsubmit_is_all_bypassed(struct z_erofs_vle_unzip_io *q[], - unsigned int nr_bios, - bool force_fg) -{ - /* bios should be >0 if managed cache is disabled */ - DBG_BUGON(!nr_bios); - return false; -} -#endif - -static bool z_erofs_vle_submit_all(struct super_block *sb, - z_erofs_vle_owned_workgrp_t owned_head, - struct list_head *pagepool, - struct z_erofs_vle_unzip_io *fgq, - bool force_fg) -{ - struct erofs_sb_info *const sbi = EROFS_SB(sb); - const unsigned int clusterpages = erofs_clusterpages(sbi); - const gfp_t gfp = GFP_NOFS; - - z_erofs_vle_owned_workgrp_t qtail[NR_JOBQUEUES]; - struct z_erofs_vle_unzip_io *q[NR_JOBQUEUES]; - struct bio *bio; - void *bi_private; - /* since bio will be NULL, no need to initialize last_index */ - pgoff_t uninitialized_var(last_index); - bool force_submit = false; - unsigned int nr_bios; - - if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL)) - return false; - - force_submit = false; - bio = NULL; - nr_bios = 0; - bi_private = jobqueueset_init(sb, qtail, q, fgq, force_fg); - - /* by default, all need io submission */ - q[JQ_SUBMIT]->head = owned_head; - - do { - struct z_erofs_vle_workgroup *grp; - pgoff_t first_index; - struct page *page; - unsigned int i = 0, bypass = 0; - int err; - - /* no possible 'owned_head' equals the following */ - DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL); - - grp = container_of(owned_head, - struct z_erofs_vle_workgroup, next); - - /* close the main owned chain at first */ - owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL, - Z_EROFS_VLE_WORKGRP_TAIL_CLOSED); - - first_index = grp->obj.index; - force_submit |= (first_index != last_index + 1); - -repeat: - page = pickup_page_for_submission(grp, i, pagepool, - MNGD_MAPPING(sbi), gfp); - if (!page) { - force_submit = true; - ++bypass; - goto skippage; - } - - if (bio && force_submit) { -submit_bio_retry: - __submit_bio(bio, REQ_OP_READ, 0); - bio = NULL; - } - - if (!bio) { - bio = erofs_grab_bio(sb, first_index + i, - BIO_MAX_PAGES, bi_private, - z_erofs_vle_read_endio, true); - ++nr_bios; - } - - err = bio_add_page(bio, page, PAGE_SIZE, 0); - if (err < PAGE_SIZE) - goto submit_bio_retry; - - force_submit = false; - last_index = first_index + i; -skippage: - if (++i < clusterpages) - goto repeat; - - if (bypass < clusterpages) - qtail[JQ_SUBMIT] = &grp->next; - else - move_to_bypass_jobqueue(grp, qtail, owned_head); - } while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL); - - if (bio) - __submit_bio(bio, REQ_OP_READ, 0); - - if (postsubmit_is_all_bypassed(q, nr_bios, force_fg)) - return true; - - z_erofs_vle_unzip_kickoff(bi_private, nr_bios); - return true; -} - -static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f, - struct list_head *pagepool, - bool force_fg) -{ - struct super_block *sb = f->inode->i_sb; - struct z_erofs_vle_unzip_io io[NR_JOBQUEUES]; - - if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg)) - return; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - z_erofs_vle_unzip_all(sb, &io[JQ_BYPASS], pagepool); -#endif - if (!force_fg) - return; - - /* wait until all bios are completed */ - wait_event(io[JQ_SUBMIT].u.wait, - !atomic_read(&io[JQ_SUBMIT].pending_bios)); - - /* let's synchronous decompression */ - z_erofs_vle_unzip_all(sb, &io[JQ_SUBMIT], pagepool); -} - -static int z_erofs_vle_normalaccess_readpage(struct file *file, - struct page *page) -{ - struct inode *const inode = page->mapping->host; - struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode); - int err; - LIST_HEAD(pagepool); - - trace_erofs_readpage(page, false); - - f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT; - - err = z_erofs_do_read_page(&f, page, &pagepool); - (void)z_erofs_vle_work_iter_end(&f.builder); - - if (err) { - errln("%s, failed to read, err [%d]", __func__, err); - goto out; - } - - z_erofs_submit_and_unzip(&f, &pagepool, true); -out: - if (f.map.mpage) - put_page(f.map.mpage); - - /* clean up the remaining free pages */ - put_pages_list(&pagepool); - return 0; -} - -static int z_erofs_vle_normalaccess_readpages(struct file *filp, - struct address_space *mapping, - struct list_head *pages, - unsigned int nr_pages) -{ - struct inode *const inode = mapping->host; - struct erofs_sb_info *const sbi = EROFS_I_SB(inode); - - bool sync = __should_decompress_synchronously(sbi, nr_pages); - struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode); - gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); - struct page *head = NULL; - LIST_HEAD(pagepool); - - trace_erofs_readpages(mapping->host, lru_to_page(pages), - nr_pages, false); - - f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT; - - for (; nr_pages; --nr_pages) { - struct page *page = lru_to_page(pages); - - prefetchw(&page->flags); - list_del(&page->lru); - - /* - * A pure asynchronous readahead is indicated if - * a PG_readahead marked page is hitted at first. - * Let's also do asynchronous decompression for this case. - */ - sync &= !(PageReadahead(page) && !head); - - if (add_to_page_cache_lru(page, mapping, page->index, gfp)) { - list_add(&page->lru, &pagepool); - continue; - } - - set_page_private(page, (unsigned long)head); - head = page; - } - - while (head) { - struct page *page = head; - int err; - - /* traversal in reverse order */ - head = (void *)page_private(page); - - err = z_erofs_do_read_page(&f, page, &pagepool); - if (err) { - struct erofs_vnode *vi = EROFS_V(inode); - - errln("%s, readahead error at page %lu of nid %llu", - __func__, page->index, vi->nid); - } - - put_page(page); - } - - (void)z_erofs_vle_work_iter_end(&f.builder); - - z_erofs_submit_and_unzip(&f, &pagepool, sync); - - if (f.map.mpage) - put_page(f.map.mpage); - - /* clean up the remaining free pages */ - put_pages_list(&pagepool); - return 0; -} - -const struct address_space_operations z_erofs_vle_normalaccess_aops = { - .readpage = z_erofs_vle_normalaccess_readpage, - .readpages = z_erofs_vle_normalaccess_readpages, -}; - diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h deleted file mode 100644 index ab509d75aefd..000000000000 --- a/drivers/staging/erofs/unzip_vle.h +++ /dev/null @@ -1,196 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * linux/drivers/staging/erofs/unzip_vle.h - * - * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#ifndef __EROFS_FS_UNZIP_VLE_H -#define __EROFS_FS_UNZIP_VLE_H - -#include "internal.h" -#include "unzip_pagevec.h" - -#define Z_EROFS_NR_INLINE_PAGEVECS 3 - -/* - * Structure fields follow one of the following exclusion rules. - * - * I: Modifiable by initialization/destruction paths and read-only - * for everyone else. - * - */ - -struct z_erofs_vle_work { - struct mutex lock; - - /* I: decompression offset in page */ - unsigned short pageofs; - unsigned short nr_pages; - - /* L: queued pages in pagevec[] */ - unsigned vcnt; - - union { - /* L: pagevec */ - erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS]; - struct rcu_head rcu; - }; -}; - -#define Z_EROFS_VLE_WORKGRP_FMT_PLAIN 0 -#define Z_EROFS_VLE_WORKGRP_FMT_LZ4 1 -#define Z_EROFS_VLE_WORKGRP_FMT_MASK 1 -#define Z_EROFS_VLE_WORKGRP_FULL_LENGTH 2 - -typedef void *z_erofs_vle_owned_workgrp_t; - -struct z_erofs_vle_workgroup { - struct erofs_workgroup obj; - struct z_erofs_vle_work work; - - /* point to next owned_workgrp_t */ - z_erofs_vle_owned_workgrp_t next; - - /* compressed pages (including multi-usage pages) */ - struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES]; - unsigned int llen, flags; -}; - -/* let's avoid the valid 32-bit kernel addresses */ - -/* the chained workgroup has't submitted io (still open) */ -#define Z_EROFS_VLE_WORKGRP_TAIL ((void *)0x5F0ECAFE) -/* the chained workgroup has already submitted io */ -#define Z_EROFS_VLE_WORKGRP_TAIL_CLOSED ((void *)0x5F0EDEAD) - -#define Z_EROFS_VLE_WORKGRP_NIL (NULL) - -#define z_erofs_vle_workgrp_fmt(grp) \ - ((grp)->flags & Z_EROFS_VLE_WORKGRP_FMT_MASK) - -static inline void z_erofs_vle_set_workgrp_fmt( - struct z_erofs_vle_workgroup *grp, - unsigned int fmt) -{ - grp->flags = fmt | (grp->flags & ~Z_EROFS_VLE_WORKGRP_FMT_MASK); -} - - -/* definitions if multiref is disabled */ -#define z_erofs_vle_grab_primary_work(grp) (&(grp)->work) -#define z_erofs_vle_grab_work(grp, pageofs) (&(grp)->work) -#define z_erofs_vle_work_workgroup(wrk, primary) \ - ((primary) ? container_of(wrk, \ - struct z_erofs_vle_workgroup, work) : \ - ({ BUG(); (void *)NULL; })) - - -#define Z_EROFS_WORKGROUP_SIZE sizeof(struct z_erofs_vle_workgroup) - -struct z_erofs_vle_unzip_io { - atomic_t pending_bios; - z_erofs_vle_owned_workgrp_t head; - - union { - wait_queue_head_t wait; - struct work_struct work; - } u; -}; - -struct z_erofs_vle_unzip_io_sb { - struct z_erofs_vle_unzip_io io; - struct super_block *sb; -}; - -#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 -#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) -#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS) - -/* - * waiters (aka. ongoing_packs): # to unlock the page - * sub-index: 0 - for partial page, >= 1 full page sub-index - */ -typedef atomic_t z_erofs_onlinepage_t; - -/* type punning */ -union z_erofs_onlinepage_converter { - z_erofs_onlinepage_t *o; - unsigned long *v; -}; - -static inline unsigned z_erofs_onlinepage_index(struct page *page) -{ - union z_erofs_onlinepage_converter u; - - DBG_BUGON(!PagePrivate(page)); - u.v = &page_private(page); - - return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; -} - -static inline void z_erofs_onlinepage_init(struct page *page) -{ - union { - z_erofs_onlinepage_t o; - unsigned long v; - /* keep from being unlocked in advance */ - } u = { .o = ATOMIC_INIT(1) }; - - set_page_private(page, u.v); - smp_wmb(); - SetPagePrivate(page); -} - -static inline void z_erofs_onlinepage_fixup(struct page *page, - uintptr_t index, bool down) -{ - unsigned long *p, o, v, id; -repeat: - p = &page_private(page); - o = READ_ONCE(*p); - - id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT; - if (id) { - if (!index) - return; - - DBG_BUGON(id != index); - } - - v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) | - ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned)down); - if (cmpxchg(p, o, v) != o) - goto repeat; -} - -static inline void z_erofs_onlinepage_endio(struct page *page) -{ - union z_erofs_onlinepage_converter u; - unsigned v; - - DBG_BUGON(!PagePrivate(page)); - u.v = &page_private(page); - - v = atomic_dec_return(u.o); - if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) { - ClearPagePrivate(page); - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - } - - debugln("%s, page %p value %x", __func__, page, atomic_read(u.o)); -} - -#define Z_EROFS_VLE_VMAP_ONSTACK_PAGES \ - min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U) -#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES 2048 - -#endif - diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c deleted file mode 100644 index 4bbd3bf34acd..000000000000 --- a/drivers/staging/erofs/utils.c +++ /dev/null @@ -1,353 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/utils.c - * - * Copyright (C) 2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ - -#include "internal.h" -#include <linux/pagevec.h> - -struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) -{ - struct page *page; - - if (!list_empty(pool)) { - page = lru_to_page(pool); - list_del(&page->lru); - } else { - page = alloc_pages(gfp | __GFP_NOFAIL, 0); - } - return page; -} - -#if (EROFS_PCPUBUF_NR_PAGES > 0) -static struct { - u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; -} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; - -void *erofs_get_pcpubuf(unsigned int pagenr) -{ - preempt_disable(); - return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; -} -#endif - -/* global shrink count (for all mounted EROFS instances) */ -static atomic_long_t erofs_global_shrink_cnt; - -#ifdef CONFIG_EROFS_FS_ZIP -#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) -#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) - -static int erofs_workgroup_get(struct erofs_workgroup *grp) -{ - int o; - -repeat: - o = erofs_wait_on_workgroup_freezed(grp); - if (unlikely(o <= 0)) - return -1; - - if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o)) - goto repeat; - - /* decrease refcount paired by erofs_workgroup_put */ - if (unlikely(o == 1)) - atomic_long_dec(&erofs_global_shrink_cnt); - return 0; -} - -struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, - pgoff_t index, bool *tag) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_workgroup *grp; - -repeat: - rcu_read_lock(); - grp = radix_tree_lookup(&sbi->workstn_tree, index); - if (grp) { - *tag = xa_pointer_tag(grp); - grp = xa_untag_pointer(grp); - - if (erofs_workgroup_get(grp)) { - /* prefer to relax rcu read side */ - rcu_read_unlock(); - goto repeat; - } - - DBG_BUGON(index != grp->index); - } - rcu_read_unlock(); - return grp; -} - -int erofs_register_workgroup(struct super_block *sb, - struct erofs_workgroup *grp, - bool tag) -{ - struct erofs_sb_info *sbi; - int err; - - /* grp shouldn't be broken or used before */ - if (unlikely(atomic_read(&grp->refcount) != 1)) { - DBG_BUGON(1); - return -EINVAL; - } - - err = radix_tree_preload(GFP_NOFS); - if (err) - return err; - - sbi = EROFS_SB(sb); - erofs_workstn_lock(sbi); - - grp = xa_tag_pointer(grp, tag); - - /* - * Bump up reference count before making this workgroup - * visible to other users in order to avoid potential UAF - * without serialized by erofs_workstn_lock. - */ - __erofs_workgroup_get(grp); - - err = radix_tree_insert(&sbi->workstn_tree, - grp->index, grp); - if (unlikely(err)) - /* - * it's safe to decrease since the workgroup isn't visible - * and refcount >= 2 (cannot be freezed). - */ - __erofs_workgroup_put(grp); - - erofs_workstn_unlock(sbi); - radix_tree_preload_end(); - return err; -} - -static void __erofs_workgroup_free(struct erofs_workgroup *grp) -{ - atomic_long_dec(&erofs_global_shrink_cnt); - erofs_workgroup_free_rcu(grp); -} - -int erofs_workgroup_put(struct erofs_workgroup *grp) -{ - int count = atomic_dec_return(&grp->refcount); - - if (count == 1) - atomic_long_inc(&erofs_global_shrink_cnt); - else if (!count) - __erofs_workgroup_free(grp); - return count; -} - -#ifdef EROFS_FS_HAS_MANAGED_CACHE -/* for cache-managed case, customized reclaim paths exist */ -static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) -{ - erofs_workgroup_unfreeze(grp, 0); - __erofs_workgroup_free(grp); -} - -static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp, - bool cleanup) -{ - /* - * for managed cache enabled, the refcount of workgroups - * themselves could be < 0 (freezed). So there is no guarantee - * that all refcount > 0 if managed cache is enabled. - */ - if (!erofs_workgroup_try_to_freeze(grp, 1)) - return false; - - /* - * note that all cached pages should be unlinked - * before delete it from the radix tree. - * Otherwise some cached pages of an orphan old workgroup - * could be still linked after the new one is available. - */ - if (erofs_try_to_free_all_cached_pages(sbi, grp)) { - erofs_workgroup_unfreeze(grp, 1); - return false; - } - - /* - * it is impossible to fail after the workgroup is freezed, - * however in order to avoid some race conditions, add a - * DBG_BUGON to observe this in advance. - */ - DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, - grp->index)) != grp); - - /* - * if managed cache is enable, the last refcount - * should indicate the related workstation. - */ - erofs_workgroup_unfreeze_final(grp); - return true; -} - -#else -/* for nocache case, no customized reclaim path at all */ -static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp, - bool cleanup) -{ - int cnt = atomic_read(&grp->refcount); - - DBG_BUGON(cnt <= 0); - DBG_BUGON(cleanup && cnt != 1); - - if (cnt > 1) - return false; - - DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, - grp->index)) != grp); - - /* (rarely) could be grabbed again when freeing */ - erofs_workgroup_put(grp); - return true; -} - -#endif - -unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, - unsigned long nr_shrink, - bool cleanup) -{ - pgoff_t first_index = 0; - void *batch[PAGEVEC_SIZE]; - unsigned int freed = 0; - - int i, found; -repeat: - erofs_workstn_lock(sbi); - - found = radix_tree_gang_lookup(&sbi->workstn_tree, - batch, first_index, PAGEVEC_SIZE); - - for (i = 0; i < found; ++i) { - struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); - - first_index = grp->index + 1; - - /* try to shrink each valid workgroup */ - if (!erofs_try_to_release_workgroup(sbi, grp, cleanup)) - continue; - - ++freed; - if (unlikely(!--nr_shrink)) - break; - } - erofs_workstn_unlock(sbi); - - if (i && nr_shrink) - goto repeat; - return freed; -} - -#endif - -/* protected by 'erofs_sb_list_lock' */ -static unsigned int shrinker_run_no; - -/* protects the mounted 'erofs_sb_list' */ -static DEFINE_SPINLOCK(erofs_sb_list_lock); -static LIST_HEAD(erofs_sb_list); - -void erofs_register_super(struct super_block *sb) -{ - struct erofs_sb_info *sbi = EROFS_SB(sb); - - mutex_init(&sbi->umount_mutex); - - spin_lock(&erofs_sb_list_lock); - list_add(&sbi->list, &erofs_sb_list); - spin_unlock(&erofs_sb_list_lock); -} - -void erofs_unregister_super(struct super_block *sb) -{ - spin_lock(&erofs_sb_list_lock); - list_del(&EROFS_SB(sb)->list); - spin_unlock(&erofs_sb_list_lock); -} - -static unsigned long erofs_shrink_count(struct shrinker *shrink, - struct shrink_control *sc) -{ - return atomic_long_read(&erofs_global_shrink_cnt); -} - -static unsigned long erofs_shrink_scan(struct shrinker *shrink, - struct shrink_control *sc) -{ - struct erofs_sb_info *sbi; - struct list_head *p; - - unsigned long nr = sc->nr_to_scan; - unsigned int run_no; - unsigned long freed = 0; - - spin_lock(&erofs_sb_list_lock); - do - run_no = ++shrinker_run_no; - while (run_no == 0); - - /* Iterate over all mounted superblocks and try to shrink them */ - p = erofs_sb_list.next; - while (p != &erofs_sb_list) { - sbi = list_entry(p, struct erofs_sb_info, list); - - /* - * We move the ones we do to the end of the list, so we stop - * when we see one we have already done. - */ - if (sbi->shrinker_run_no == run_no) - break; - - if (!mutex_trylock(&sbi->umount_mutex)) { - p = p->next; - continue; - } - - spin_unlock(&erofs_sb_list_lock); - sbi->shrinker_run_no = run_no; - -#ifdef CONFIG_EROFS_FS_ZIP - freed += erofs_shrink_workstation(sbi, nr, false); -#endif - - spin_lock(&erofs_sb_list_lock); - /* Get the next list element before we move this one */ - p = p->next; - - /* - * Move this one to the end of the list to provide some - * fairness. - */ - list_move_tail(&sbi->list, &erofs_sb_list); - mutex_unlock(&sbi->umount_mutex); - - if (freed >= nr) - break; - } - spin_unlock(&erofs_sb_list_lock); - return freed; -} - -struct shrinker erofs_shrinker_info = { - .scan_objects = erofs_shrink_scan, - .count_objects = erofs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; - diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c deleted file mode 100644 index df40654b9fbb..000000000000 --- a/drivers/staging/erofs/xattr.c +++ /dev/null @@ -1,704 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/xattr.c - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#include <linux/security.h> -#include "xattr.h" - -struct xattr_iter { - struct super_block *sb; - struct page *page; - void *kaddr; - - erofs_blk_t blkaddr; - unsigned int ofs; -}; - -static inline void xattr_iter_end(struct xattr_iter *it, bool atomic) -{ - /* the only user of kunmap() is 'init_inode_xattrs' */ - if (unlikely(!atomic)) - kunmap(it->page); - else - kunmap_atomic(it->kaddr); - - unlock_page(it->page); - put_page(it->page); -} - -static inline void xattr_iter_end_final(struct xattr_iter *it) -{ - if (!it->page) - return; - - xattr_iter_end(it, true); -} - -static int init_inode_xattrs(struct inode *inode) -{ - struct erofs_vnode *const vi = EROFS_V(inode); - struct xattr_iter it; - unsigned int i; - struct erofs_xattr_ibody_header *ih; - struct super_block *sb; - struct erofs_sb_info *sbi; - bool atomic_map; - int ret = 0; - - /* the most case is that xattrs of this inode are initialized. */ - if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags)) - return 0; - - if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE)) - return -ERESTARTSYS; - - /* someone has initialized xattrs for us? */ - if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags)) - goto out_unlock; - - /* - * bypass all xattr operations if ->xattr_isize is not greater than - * sizeof(struct erofs_xattr_ibody_header), in detail: - * 1) it is not enough to contain erofs_xattr_ibody_header then - * ->xattr_isize should be 0 (it means no xattr); - * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk - * undefined right now (maybe use later with some new sb feature). - */ - if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) { - errln("xattr_isize %d of nid %llu is not supported yet", - vi->xattr_isize, vi->nid); - ret = -ENOTSUPP; - goto out_unlock; - } else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) { - if (unlikely(vi->xattr_isize)) { - DBG_BUGON(1); - ret = -EIO; - goto out_unlock; /* xattr ondisk layout error */ - } - ret = -ENOATTR; - goto out_unlock; - } - - sb = inode->i_sb; - sbi = EROFS_SB(sb); - it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize); - it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); - - it.page = erofs_get_inline_page(inode, it.blkaddr); - if (IS_ERR(it.page)) { - ret = PTR_ERR(it.page); - goto out_unlock; - } - - /* read in shared xattr array (non-atomic, see kmalloc below) */ - it.kaddr = kmap(it.page); - atomic_map = false; - - ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs); - - vi->xattr_shared_count = ih->h_shared_count; - vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count, - sizeof(uint), GFP_KERNEL); - if (!vi->xattr_shared_xattrs) { - xattr_iter_end(&it, atomic_map); - ret = -ENOMEM; - goto out_unlock; - } - - /* let's skip ibody header */ - it.ofs += sizeof(struct erofs_xattr_ibody_header); - - for (i = 0; i < vi->xattr_shared_count; ++i) { - if (unlikely(it.ofs >= EROFS_BLKSIZ)) { - /* cannot be unaligned */ - BUG_ON(it.ofs != EROFS_BLKSIZ); - xattr_iter_end(&it, atomic_map); - - it.page = erofs_get_meta_page(sb, ++it.blkaddr, - S_ISDIR(inode->i_mode)); - if (IS_ERR(it.page)) { - kfree(vi->xattr_shared_xattrs); - vi->xattr_shared_xattrs = NULL; - ret = PTR_ERR(it.page); - goto out_unlock; - } - - it.kaddr = kmap_atomic(it.page); - atomic_map = true; - it.ofs = 0; - } - vi->xattr_shared_xattrs[i] = - le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs)); - it.ofs += sizeof(__le32); - } - xattr_iter_end(&it, atomic_map); - - set_bit(EROFS_V_EA_INITED_BIT, &vi->flags); - -out_unlock: - clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags); - return ret; -} - -/* - * the general idea for these return values is - * if 0 is returned, go on processing the current xattr; - * 1 (> 0) is returned, skip this round to process the next xattr; - * -err (< 0) is returned, an error (maybe ENOXATTR) occurred - * and need to be handled - */ -struct xattr_iter_handlers { - int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry); - int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf, - unsigned int len); - int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz); - void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf, - unsigned int len); -}; - -static inline int xattr_iter_fixup(struct xattr_iter *it) -{ - if (it->ofs < EROFS_BLKSIZ) - return 0; - - xattr_iter_end(it, true); - - it->blkaddr += erofs_blknr(it->ofs); - - it->page = erofs_get_meta_page(it->sb, it->blkaddr, false); - if (IS_ERR(it->page)) { - int err = PTR_ERR(it->page); - - it->page = NULL; - return err; - } - - it->kaddr = kmap_atomic(it->page); - it->ofs = erofs_blkoff(it->ofs); - return 0; -} - -static int inline_xattr_iter_begin(struct xattr_iter *it, - struct inode *inode) -{ - struct erofs_vnode *const vi = EROFS_V(inode); - struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb); - unsigned int xattr_header_sz, inline_xattr_ofs; - - xattr_header_sz = inlinexattr_header_size(inode); - if (unlikely(xattr_header_sz >= vi->xattr_isize)) { - BUG_ON(xattr_header_sz > vi->xattr_isize); - return -ENOATTR; - } - - inline_xattr_ofs = vi->inode_isize + xattr_header_sz; - - it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs); - it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs); - - it->page = erofs_get_inline_page(inode, it->blkaddr); - if (IS_ERR(it->page)) - return PTR_ERR(it->page); - - it->kaddr = kmap_atomic(it->page); - return vi->xattr_isize - xattr_header_sz; -} - -/* - * Regardless of success or failure, `xattr_foreach' will end up with - * `ofs' pointing to the next xattr item rather than an arbitrary position. - */ -static int xattr_foreach(struct xattr_iter *it, - const struct xattr_iter_handlers *op, - unsigned int *tlimit) -{ - struct erofs_xattr_entry entry; - unsigned int value_sz, processed, slice; - int err; - - /* 0. fixup blkaddr, ofs, ipage */ - err = xattr_iter_fixup(it); - if (err) - return err; - - /* - * 1. read xattr entry to the memory, - * since we do EROFS_XATTR_ALIGN - * therefore entry should be in the page - */ - entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs); - if (tlimit) { - unsigned int entry_sz = EROFS_XATTR_ENTRY_SIZE(&entry); - - BUG_ON(*tlimit < entry_sz); - *tlimit -= entry_sz; - } - - it->ofs += sizeof(struct erofs_xattr_entry); - value_sz = le16_to_cpu(entry.e_value_size); - - /* handle entry */ - err = op->entry(it, &entry); - if (err) { - it->ofs += entry.e_name_len + value_sz; - goto out; - } - - /* 2. handle xattr name (ofs will finally be at the end of name) */ - processed = 0; - - while (processed < entry.e_name_len) { - if (it->ofs >= EROFS_BLKSIZ) { - BUG_ON(it->ofs > EROFS_BLKSIZ); - - err = xattr_iter_fixup(it); - if (err) - goto out; - it->ofs = 0; - } - - slice = min_t(unsigned int, PAGE_SIZE - it->ofs, - entry.e_name_len - processed); - - /* handle name */ - err = op->name(it, processed, it->kaddr + it->ofs, slice); - if (err) { - it->ofs += entry.e_name_len - processed + value_sz; - goto out; - } - - it->ofs += slice; - processed += slice; - } - - /* 3. handle xattr value */ - processed = 0; - - if (op->alloc_buffer) { - err = op->alloc_buffer(it, value_sz); - if (err) { - it->ofs += value_sz; - goto out; - } - } - - while (processed < value_sz) { - if (it->ofs >= EROFS_BLKSIZ) { - BUG_ON(it->ofs > EROFS_BLKSIZ); - - err = xattr_iter_fixup(it); - if (err) - goto out; - it->ofs = 0; - } - - slice = min_t(unsigned int, PAGE_SIZE - it->ofs, - value_sz - processed); - op->value(it, processed, it->kaddr + it->ofs, slice); - it->ofs += slice; - processed += slice; - } - -out: - /* xattrs should be 4-byte aligned (on-disk constraint) */ - it->ofs = EROFS_XATTR_ALIGN(it->ofs); - return err < 0 ? err : 0; -} - -struct getxattr_iter { - struct xattr_iter it; - - char *buffer; - int buffer_size, index; - struct qstr name; -}; - -static int xattr_entrymatch(struct xattr_iter *_it, - struct erofs_xattr_entry *entry) -{ - struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); - - return (it->index != entry->e_name_index || - it->name.len != entry->e_name_len) ? -ENOATTR : 0; -} - -static int xattr_namematch(struct xattr_iter *_it, - unsigned int processed, char *buf, unsigned int len) -{ - struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); - - return memcmp(buf, it->name.name + processed, len) ? -ENOATTR : 0; -} - -static int xattr_checkbuffer(struct xattr_iter *_it, - unsigned int value_sz) -{ - struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); - int err = it->buffer_size < value_sz ? -ERANGE : 0; - - it->buffer_size = value_sz; - return !it->buffer ? 1 : err; -} - -static void xattr_copyvalue(struct xattr_iter *_it, - unsigned int processed, - char *buf, unsigned int len) -{ - struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it); - - memcpy(it->buffer + processed, buf, len); -} - -static const struct xattr_iter_handlers find_xattr_handlers = { - .entry = xattr_entrymatch, - .name = xattr_namematch, - .alloc_buffer = xattr_checkbuffer, - .value = xattr_copyvalue -}; - -static int inline_getxattr(struct inode *inode, struct getxattr_iter *it) -{ - int ret; - unsigned int remaining; - - ret = inline_xattr_iter_begin(&it->it, inode); - if (ret < 0) - return ret; - - remaining = ret; - while (remaining) { - ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining); - if (ret != -ENOATTR) - break; - } - xattr_iter_end_final(&it->it); - - return ret ? ret : it->buffer_size; -} - -static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) -{ - struct erofs_vnode *const vi = EROFS_V(inode); - struct super_block *const sb = inode->i_sb; - struct erofs_sb_info *const sbi = EROFS_SB(sb); - unsigned int i; - int ret = -ENOATTR; - - for (i = 0; i < vi->xattr_shared_count; ++i) { - erofs_blk_t blkaddr = - xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); - - it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); - - if (!i || blkaddr != it->it.blkaddr) { - if (i) - xattr_iter_end(&it->it, true); - - it->it.page = erofs_get_meta_page(sb, blkaddr, false); - if (IS_ERR(it->it.page)) - return PTR_ERR(it->it.page); - - it->it.kaddr = kmap_atomic(it->it.page); - it->it.blkaddr = blkaddr; - } - - ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL); - if (ret != -ENOATTR) - break; - } - if (vi->xattr_shared_count) - xattr_iter_end_final(&it->it); - - return ret ? ret : it->buffer_size; -} - -static bool erofs_xattr_user_list(struct dentry *dentry) -{ - return test_opt(EROFS_SB(dentry->d_sb), XATTR_USER); -} - -static bool erofs_xattr_trusted_list(struct dentry *dentry) -{ - return capable(CAP_SYS_ADMIN); -} - -int erofs_getxattr(struct inode *inode, int index, - const char *name, - void *buffer, size_t buffer_size) -{ - int ret; - struct getxattr_iter it; - - if (unlikely(!name)) - return -EINVAL; - - ret = init_inode_xattrs(inode); - if (ret) - return ret; - - it.index = index; - - it.name.len = strlen(name); - if (it.name.len > EROFS_NAME_LEN) - return -ERANGE; - it.name.name = name; - - it.buffer = buffer; - it.buffer_size = buffer_size; - - it.it.sb = inode->i_sb; - ret = inline_getxattr(inode, &it); - if (ret == -ENOATTR) - ret = shared_getxattr(inode, &it); - return ret; -} - -static int erofs_xattr_generic_get(const struct xattr_handler *handler, - struct dentry *unused, struct inode *inode, - const char *name, void *buffer, size_t size) -{ - struct erofs_sb_info *const sbi = EROFS_I_SB(inode); - - switch (handler->flags) { - case EROFS_XATTR_INDEX_USER: - if (!test_opt(sbi, XATTR_USER)) - return -EOPNOTSUPP; - break; - case EROFS_XATTR_INDEX_TRUSTED: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - break; - case EROFS_XATTR_INDEX_SECURITY: - break; - default: - return -EINVAL; - } - - return erofs_getxattr(inode, handler->flags, name, buffer, size); -} - -const struct xattr_handler erofs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .flags = EROFS_XATTR_INDEX_USER, - .list = erofs_xattr_user_list, - .get = erofs_xattr_generic_get, -}; - -const struct xattr_handler erofs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .flags = EROFS_XATTR_INDEX_TRUSTED, - .list = erofs_xattr_trusted_list, - .get = erofs_xattr_generic_get, -}; - -#ifdef CONFIG_EROFS_FS_SECURITY -const struct xattr_handler __maybe_unused erofs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .flags = EROFS_XATTR_INDEX_SECURITY, - .get = erofs_xattr_generic_get, -}; -#endif - -const struct xattr_handler *erofs_xattr_handlers[] = { - &erofs_xattr_user_handler, -#ifdef CONFIG_EROFS_FS_POSIX_ACL - &posix_acl_access_xattr_handler, - &posix_acl_default_xattr_handler, -#endif - &erofs_xattr_trusted_handler, -#ifdef CONFIG_EROFS_FS_SECURITY - &erofs_xattr_security_handler, -#endif - NULL, -}; - -struct listxattr_iter { - struct xattr_iter it; - - struct dentry *dentry; - char *buffer; - int buffer_size, buffer_ofs; -}; - -static int xattr_entrylist(struct xattr_iter *_it, - struct erofs_xattr_entry *entry) -{ - struct listxattr_iter *it = - container_of(_it, struct listxattr_iter, it); - unsigned int prefix_len; - const char *prefix; - - const struct xattr_handler *h = - erofs_xattr_handler(entry->e_name_index); - - if (!h || (h->list && !h->list(it->dentry))) - return 1; - - prefix = xattr_prefix(h); - prefix_len = strlen(prefix); - - if (!it->buffer) { - it->buffer_ofs += prefix_len + entry->e_name_len + 1; - return 1; - } - - if (it->buffer_ofs + prefix_len - + entry->e_name_len + 1 > it->buffer_size) - return -ERANGE; - - memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len); - it->buffer_ofs += prefix_len; - return 0; -} - -static int xattr_namelist(struct xattr_iter *_it, - unsigned int processed, char *buf, unsigned int len) -{ - struct listxattr_iter *it = - container_of(_it, struct listxattr_iter, it); - - memcpy(it->buffer + it->buffer_ofs, buf, len); - it->buffer_ofs += len; - return 0; -} - -static int xattr_skipvalue(struct xattr_iter *_it, - unsigned int value_sz) -{ - struct listxattr_iter *it = - container_of(_it, struct listxattr_iter, it); - - it->buffer[it->buffer_ofs++] = '\0'; - return 1; -} - -static const struct xattr_iter_handlers list_xattr_handlers = { - .entry = xattr_entrylist, - .name = xattr_namelist, - .alloc_buffer = xattr_skipvalue, - .value = NULL -}; - -static int inline_listxattr(struct listxattr_iter *it) -{ - int ret; - unsigned int remaining; - - ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry)); - if (ret < 0) - return ret; - - remaining = ret; - while (remaining) { - ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining); - if (ret) - break; - } - xattr_iter_end_final(&it->it); - return ret ? ret : it->buffer_ofs; -} - -static int shared_listxattr(struct listxattr_iter *it) -{ - struct inode *const inode = d_inode(it->dentry); - struct erofs_vnode *const vi = EROFS_V(inode); - struct super_block *const sb = inode->i_sb; - struct erofs_sb_info *const sbi = EROFS_SB(sb); - unsigned int i; - int ret = 0; - - for (i = 0; i < vi->xattr_shared_count; ++i) { - erofs_blk_t blkaddr = - xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]); - - it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]); - if (!i || blkaddr != it->it.blkaddr) { - if (i) - xattr_iter_end(&it->it, true); - - it->it.page = erofs_get_meta_page(sb, blkaddr, false); - if (IS_ERR(it->it.page)) - return PTR_ERR(it->it.page); - - it->it.kaddr = kmap_atomic(it->it.page); - it->it.blkaddr = blkaddr; - } - - ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL); - if (ret) - break; - } - if (vi->xattr_shared_count) - xattr_iter_end_final(&it->it); - - return ret ? ret : it->buffer_ofs; -} - -ssize_t erofs_listxattr(struct dentry *dentry, - char *buffer, size_t buffer_size) -{ - int ret; - struct listxattr_iter it; - - ret = init_inode_xattrs(d_inode(dentry)); - if (ret) - return ret; - - it.dentry = dentry; - it.buffer = buffer; - it.buffer_size = buffer_size; - it.buffer_ofs = 0; - - it.it.sb = dentry->d_sb; - - ret = inline_listxattr(&it); - if (ret < 0 && ret != -ENOATTR) - return ret; - return shared_listxattr(&it); -} - -#ifdef CONFIG_EROFS_FS_POSIX_ACL -struct posix_acl *erofs_get_acl(struct inode *inode, int type) -{ - struct posix_acl *acl; - int prefix, rc; - char *value = NULL; - - switch (type) { - case ACL_TYPE_ACCESS: - prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - case ACL_TYPE_DEFAULT: - prefix = EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - default: - return ERR_PTR(-EINVAL); - } - - rc = erofs_getxattr(inode, prefix, "", NULL, 0); - if (rc > 0) { - value = kmalloc(rc, GFP_KERNEL); - if (!value) - return ERR_PTR(-ENOMEM); - rc = erofs_getxattr(inode, prefix, "", value, rc); - } - - if (rc == -ENOATTR) - acl = NULL; - else if (rc < 0) - acl = ERR_PTR(rc); - else - acl = posix_acl_from_xattr(&init_user_ns, value, rc); - kfree(value); - return acl; -} -#endif - diff --git a/drivers/staging/erofs/xattr.h b/drivers/staging/erofs/xattr.h deleted file mode 100644 index 35ba5ac2139a..000000000000 --- a/drivers/staging/erofs/xattr.h +++ /dev/null @@ -1,97 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * linux/drivers/staging/erofs/xattr.h - * - * Copyright (C) 2017-2018 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - */ -#ifndef __EROFS_XATTR_H -#define __EROFS_XATTR_H - -#include "internal.h" -#include <linux/posix_acl_xattr.h> -#include <linux/xattr.h> - -/* Attribute not found */ -#define ENOATTR ENODATA - -static inline unsigned inlinexattr_header_size(struct inode *inode) -{ - return sizeof(struct erofs_xattr_ibody_header) - + sizeof(u32) * EROFS_V(inode)->xattr_shared_count; -} - -static inline erofs_blk_t -xattrblock_addr(struct erofs_sb_info *sbi, unsigned xattr_id) -{ -#ifdef CONFIG_EROFS_FS_XATTR - return sbi->xattr_blkaddr + - xattr_id * sizeof(__u32) / EROFS_BLKSIZ; -#else - return 0; -#endif -} - -static inline unsigned -xattrblock_offset(struct erofs_sb_info *sbi, unsigned xattr_id) -{ - return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ; -} - -extern const struct xattr_handler erofs_xattr_user_handler; -extern const struct xattr_handler erofs_xattr_trusted_handler; -#ifdef CONFIG_EROFS_FS_SECURITY -extern const struct xattr_handler erofs_xattr_security_handler; -#endif - -static inline const struct xattr_handler *erofs_xattr_handler(unsigned index) -{ -static const struct xattr_handler *xattr_handler_map[] = { - [EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler, -#ifdef CONFIG_EROFS_FS_POSIX_ACL - [EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, - [EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = - &posix_acl_default_xattr_handler, -#endif - [EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler, -#ifdef CONFIG_EROFS_FS_SECURITY - [EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler, -#endif -}; - return index && index < ARRAY_SIZE(xattr_handler_map) ? - xattr_handler_map[index] : NULL; -} - -#ifdef CONFIG_EROFS_FS_XATTR -extern const struct xattr_handler *erofs_xattr_handlers[]; - -int erofs_getxattr(struct inode *, int, const char *, void *, size_t); -ssize_t erofs_listxattr(struct dentry *, char *, size_t); -#else -static int __maybe_unused erofs_getxattr(struct inode *inode, int index, - const char *name, - void *buffer, size_t buffer_size) -{ - return -ENOTSUPP; -} - -static ssize_t __maybe_unused erofs_listxattr(struct dentry *dentry, - char *buffer, size_t buffer_size) -{ - return -ENOTSUPP; -} -#endif - -#ifdef CONFIG_EROFS_FS_POSIX_ACL -struct posix_acl *erofs_get_acl(struct inode *inode, int type); -#else -#define erofs_get_acl (NULL) -#endif - -#endif - diff --git a/drivers/staging/erofs/zmap.c b/drivers/staging/erofs/zmap.c deleted file mode 100644 index 9c0bd65c46bf..000000000000 --- a/drivers/staging/erofs/zmap.c +++ /dev/null @@ -1,463 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/drivers/staging/erofs/zmap.c - * - * Copyright (C) 2018-2019 HUAWEI, Inc. - * http://www.huawei.com/ - * Created by Gao Xiang <gaoxiang25@huawei.com> - */ -#include "internal.h" -#include <asm/unaligned.h> -#include <trace/events/erofs.h> - -int z_erofs_fill_inode(struct inode *inode) -{ - struct erofs_vnode *const vi = EROFS_V(inode); - struct super_block *const sb = inode->i_sb; - - if (vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { - vi->z_advise = 0; - vi->z_algorithmtype[0] = 0; - vi->z_algorithmtype[1] = 0; - vi->z_logical_clusterbits = EROFS_SB(sb)->clusterbits; - vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits; - vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits; - set_bit(EROFS_V_Z_INITED_BIT, &vi->flags); - } - - inode->i_mapping->a_ops = &z_erofs_vle_normalaccess_aops; - return 0; -} - -static int fill_inode_lazy(struct inode *inode) -{ - struct erofs_vnode *const vi = EROFS_V(inode); - struct super_block *const sb = inode->i_sb; - int err; - erofs_off_t pos; - struct page *page; - void *kaddr; - struct z_erofs_map_header *h; - - if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags)) - return 0; - - if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_Z_BIT, TASK_KILLABLE)) - return -ERESTARTSYS; - - err = 0; - if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags)) - goto out_unlock; - - DBG_BUGON(vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY); - - pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + - vi->xattr_isize, 8); - page = erofs_get_meta_page(sb, erofs_blknr(pos), false); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out_unlock; - } - - kaddr = kmap_atomic(page); - - h = kaddr + erofs_blkoff(pos); - vi->z_advise = le16_to_cpu(h->h_advise); - vi->z_algorithmtype[0] = h->h_algorithmtype & 15; - vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; - - if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) { - errln("unknown compression format %u for nid %llu, please upgrade kernel", - vi->z_algorithmtype[0], vi->nid); - err = -ENOTSUPP; - goto unmap_done; - } - - vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); - vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits + - ((h->h_clusterbits >> 3) & 3); - - if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) { - errln("unsupported physical clusterbits %u for nid %llu, please upgrade kernel", - vi->z_physical_clusterbits[0], vi->nid); - err = -ENOTSUPP; - goto unmap_done; - } - - vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits + - ((h->h_clusterbits >> 5) & 7); -unmap_done: - kunmap_atomic(kaddr); - unlock_page(page); - put_page(page); - - set_bit(EROFS_V_Z_INITED_BIT, &vi->flags); -out_unlock: - clear_and_wake_up_bit(EROFS_V_BL_Z_BIT, &vi->flags); - return err; -} - -struct z_erofs_maprecorder { - struct inode *inode; - struct erofs_map_blocks *map; - void *kaddr; - - unsigned long lcn; - /* compression extent information gathered */ - u8 type; - u16 clusterofs; - u16 delta[2]; - erofs_blk_t pblk; -}; - -static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, - erofs_blk_t eblk) -{ - struct super_block *const sb = m->inode->i_sb; - struct erofs_map_blocks *const map = m->map; - struct page *mpage = map->mpage; - - if (mpage) { - if (mpage->index == eblk) { - if (!m->kaddr) - m->kaddr = kmap_atomic(mpage); - return 0; - } - - if (m->kaddr) { - kunmap_atomic(m->kaddr); - m->kaddr = NULL; - } - put_page(mpage); - } - - mpage = erofs_get_meta_page(sb, eblk, false); - if (IS_ERR(mpage)) { - map->mpage = NULL; - return PTR_ERR(mpage); - } - m->kaddr = kmap_atomic(mpage); - unlock_page(mpage); - map->mpage = mpage; - return 0; -} - -static int vle_legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned long lcn) -{ - struct inode *const inode = m->inode; - struct erofs_vnode *const vi = EROFS_V(inode); - const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid); - const erofs_off_t pos = - Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize + - vi->xattr_isize) + - lcn * sizeof(struct z_erofs_vle_decompressed_index); - struct z_erofs_vle_decompressed_index *di; - unsigned int advise, type; - int err; - - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); - if (err) - return err; - - m->lcn = lcn; - di = m->kaddr + erofs_blkoff(pos); - - advise = le16_to_cpu(di->di_advise); - type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) & - ((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1); - switch (type) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - m->clusterofs = 1 << vi->z_logical_clusterbits; - m->delta[0] = le16_to_cpu(di->di_u.delta[0]); - m->delta[1] = le16_to_cpu(di->di_u.delta[1]); - break; - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: - m->clusterofs = le16_to_cpu(di->di_clusterofs); - m->pblk = le32_to_cpu(di->di_u.blkaddr); - break; - default: - DBG_BUGON(1); - return -EIO; - } - m->type = type; - return 0; -} - -static unsigned int decode_compactedbits(unsigned int lobits, - unsigned int lomask, - u8 *in, unsigned int pos, u8 *type) -{ - const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7); - const unsigned int lo = v & lomask; - - *type = (v >> lobits) & 3; - return lo; -} - -static int unpack_compacted_index(struct z_erofs_maprecorder *m, - unsigned int amortizedshift, - unsigned int eofs) -{ - struct erofs_vnode *const vi = EROFS_V(m->inode); - const unsigned int lclusterbits = vi->z_logical_clusterbits; - const unsigned int lomask = (1 << lclusterbits) - 1; - unsigned int vcnt, base, lo, encodebits, nblk; - int i; - u8 *in, type; - - if (1 << amortizedshift == 4) - vcnt = 2; - else if (1 << amortizedshift == 2 && lclusterbits == 12) - vcnt = 16; - else - return -ENOTSUPP; - - encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; - base = round_down(eofs, vcnt << amortizedshift); - in = m->kaddr + base; - - i = (eofs - base) >> amortizedshift; - - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * i, &type); - m->type = type; - if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { - m->clusterofs = 1 << lclusterbits; - if (i + 1 != vcnt) { - m->delta[0] = lo; - return 0; - } - /* - * since the last lcluster in the pack is special, - * of which lo saves delta[1] rather than delta[0]. - * Hence, get delta[0] by the previous lcluster indirectly. - */ - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * (i - 1), &type); - if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) - lo = 0; - m->delta[0] = lo + 1; - return 0; - } - m->clusterofs = lo; - m->delta[0] = 0; - /* figout out blkaddr (pblk) for HEAD lclusters */ - nblk = 1; - while (i > 0) { - --i; - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * i, &type); - if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) - i -= lo; - - if (i >= 0) - ++nblk; - } - in += (vcnt << amortizedshift) - sizeof(__le32); - m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; - return 0; -} - -static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned long lcn) -{ - struct inode *const inode = m->inode; - struct erofs_vnode *const vi = EROFS_V(inode); - const unsigned int lclusterbits = vi->z_logical_clusterbits; - const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) + - vi->inode_isize + vi->xattr_isize, 8) + - sizeof(struct z_erofs_map_header); - const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ); - unsigned int compacted_4b_initial, compacted_2b; - unsigned int amortizedshift; - erofs_off_t pos; - int err; - - if (lclusterbits != 12) - return -ENOTSUPP; - - if (lcn >= totalidx) - return -EINVAL; - - m->lcn = lcn; - /* used to align to 32-byte (compacted_2b) alignment */ - compacted_4b_initial = (32 - ebase % 32) / 4; - if (compacted_4b_initial == 32 / 4) - compacted_4b_initial = 0; - - if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) - compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); - else - compacted_2b = 0; - - pos = ebase; - if (lcn < compacted_4b_initial) { - amortizedshift = 2; - goto out; - } - pos += compacted_4b_initial * 4; - lcn -= compacted_4b_initial; - - if (lcn < compacted_2b) { - amortizedshift = 1; - goto out; - } - pos += compacted_2b * 2; - lcn -= compacted_2b; - amortizedshift = 2; -out: - pos += lcn * (1 << amortizedshift); - err = z_erofs_reload_indexes(m, erofs_blknr(pos)); - if (err) - return err; - return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos)); -} - -static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned int lcn) -{ - const unsigned int datamode = EROFS_V(m->inode)->datamode; - - if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) - return vle_legacy_load_cluster_from_disk(m, lcn); - - if (datamode == EROFS_INODE_FLAT_COMPRESSION) - return compacted_load_cluster_from_disk(m, lcn); - - return -EINVAL; -} - -static int vle_extent_lookback(struct z_erofs_maprecorder *m, - unsigned int lookback_distance) -{ - struct erofs_vnode *const vi = EROFS_V(m->inode); - struct erofs_map_blocks *const map = m->map; - const unsigned int lclusterbits = vi->z_logical_clusterbits; - unsigned long lcn = m->lcn; - int err; - - if (lcn < lookback_distance) { - DBG_BUGON(1); - return -EIO; - } - - /* load extent head logical cluster if needed */ - lcn -= lookback_distance; - err = vle_load_cluster_from_disk(m, lcn); - if (err) - return err; - - switch (m->type) { - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - return vle_extent_lookback(m, m->delta[0]); - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - map->m_flags &= ~EROFS_MAP_ZIPPED; - /* fallthrough */ - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: - map->m_la = (lcn << lclusterbits) | m->clusterofs; - break; - default: - errln("unknown type %u at lcn %lu of nid %llu", - m->type, lcn, vi->nid); - DBG_BUGON(1); - return -EIO; - } - return 0; -} - -int z_erofs_map_blocks_iter(struct inode *inode, - struct erofs_map_blocks *map, - int flags) -{ - struct erofs_vnode *const vi = EROFS_V(inode); - struct z_erofs_maprecorder m = { - .inode = inode, - .map = map, - }; - int err = 0; - unsigned int lclusterbits, endoff; - unsigned long long ofs, end; - - trace_z_erofs_map_blocks_iter_enter(inode, map, flags); - - /* when trying to read beyond EOF, leave it unmapped */ - if (unlikely(map->m_la >= inode->i_size)) { - map->m_llen = map->m_la + 1 - inode->i_size; - map->m_la = inode->i_size; - map->m_flags = 0; - goto out; - } - - err = fill_inode_lazy(inode); - if (err) - goto out; - - lclusterbits = vi->z_logical_clusterbits; - ofs = map->m_la; - m.lcn = ofs >> lclusterbits; - endoff = ofs & ((1 << lclusterbits) - 1); - - err = vle_load_cluster_from_disk(&m, m.lcn); - if (err) - goto unmap_out; - - map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */ - end = (m.lcn + 1ULL) << lclusterbits; - - switch (m.type) { - case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - if (endoff >= m.clusterofs) - map->m_flags &= ~EROFS_MAP_ZIPPED; - /* fallthrough */ - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: - if (endoff >= m.clusterofs) { - map->m_la = (m.lcn << lclusterbits) | m.clusterofs; - break; - } - /* m.lcn should be >= 1 if endoff < m.clusterofs */ - if (unlikely(!m.lcn)) { - errln("invalid logical cluster 0 at nid %llu", - vi->nid); - err = -EIO; - goto unmap_out; - } - end = (m.lcn << lclusterbits) | m.clusterofs; - map->m_flags |= EROFS_MAP_FULL_MAPPED; - m.delta[0] = 1; - /* fallthrough */ - case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - /* get the correspoinding first chunk */ - err = vle_extent_lookback(&m, m.delta[0]); - if (unlikely(err)) - goto unmap_out; - break; - default: - errln("unknown type %u at offset %llu of nid %llu", - m.type, ofs, vi->nid); - err = -EIO; - goto unmap_out; - } - - map->m_llen = end - map->m_la; - map->m_plen = 1 << lclusterbits; - map->m_pa = blknr_to_addr(m.pblk); - map->m_flags |= EROFS_MAP_MAPPED; - -unmap_out: - if (m.kaddr) - kunmap_atomic(m.kaddr); - -out: - debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o", - __func__, map->m_la, map->m_pa, - map->m_llen, map->m_plen, map->m_flags); - - trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err); - - /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */ - DBG_BUGON(err < 0 && err != -ENOMEM); - return err; -} - |