diff options
Diffstat (limited to 'drivers/staging/erofs')
-rw-r--r-- | drivers/staging/erofs/Documentation/filesystems/erofs.txt | 208 | ||||
-rw-r--r-- | drivers/staging/erofs/Makefile | 2 | ||||
-rw-r--r-- | drivers/staging/erofs/data.c | 37 | ||||
-rw-r--r-- | drivers/staging/erofs/dir.c | 12 | ||||
-rw-r--r-- | drivers/staging/erofs/inode.c | 41 | ||||
-rw-r--r-- | drivers/staging/erofs/internal.h | 147 | ||||
-rw-r--r-- | drivers/staging/erofs/namei.c | 194 | ||||
-rw-r--r-- | drivers/staging/erofs/super.c | 29 | ||||
-rw-r--r-- | drivers/staging/erofs/unzip_vle.c | 165 | ||||
-rw-r--r-- | drivers/staging/erofs/unzip_vle.h | 23 | ||||
-rw-r--r-- | drivers/staging/erofs/unzip_vle_lz4.c | 21 | ||||
-rw-r--r-- | drivers/staging/erofs/utils.c | 58 | ||||
-rw-r--r-- | drivers/staging/erofs/xattr.c | 115 | ||||
-rw-r--r-- | drivers/staging/erofs/xattr.h | 10 |
14 files changed, 662 insertions, 400 deletions
diff --git a/drivers/staging/erofs/Documentation/filesystems/erofs.txt b/drivers/staging/erofs/Documentation/filesystems/erofs.txt new file mode 100644 index 000000000000..961ec4da7705 --- /dev/null +++ b/drivers/staging/erofs/Documentation/filesystems/erofs.txt @@ -0,0 +1,208 @@ +Overview +======== + +EROFS file-system stands for Enhanced Read-Only File System. Different +from other read-only file systems, it aims to be designed for flexibility, +scalability, but be kept simple and high performance. + +It is designed as a better filesystem solution for the following scenarios: + - read-only storage media or + + - part of a fully trusted read-only solution, which means it needs to be + immutable and bit-for-bit identical to the official golden image for + their releases due to security and other considerations and + + - hope to save some extra storage space with guaranteed end-to-end performance + by using reduced metadata and transparent file compression, especially + for those embedded devices with limited memory (ex, smartphone); + +Here is the main features of EROFS: + - Little endian on-disk design; + + - Currently 4KB block size (nobh) and therefore maximum 16TB address space; + + - Metadata & data could be mixed by design; + + - 2 inode versions for different requirements: + v1 v2 + Inode metadata size: 32 bytes 64 bytes + Max file size: 4 GB 16 EB (also limited by max. vol size) + Max uids/gids: 65536 4294967296 + File creation time: no yes (64 + 32-bit timestamp) + Max hardlinks: 65536 4294967296 + Metadata reserved: 4 bytes 14 bytes + + - Support extended attributes (xattrs) as an option; + + - Support xattr inline and tail-end data inline for all files; + + - Support POSIX.1e ACLs by using xattrs; + + - Support transparent file compression as an option: + LZ4 algorithm with 4 KB fixed-output compression for high performance; + +The following git tree provides the file system user-space tools under +development (ex, formatting tool mkfs.erofs): +>> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git + +Bugs and patches are welcome, please kindly help us and send to the following +linux-erofs mailing list: +>> linux-erofs mailing list <linux-erofs@lists.ozlabs.org> + +Note that EROFS is still working in progress as a Linux staging driver, +Cc the staging mailing list as well is highly recommended: +>> Linux Driver Project Developer List <devel@driverdev.osuosl.org> + +Mount options +============= + +fault_injection=%d Enable fault injection in all supported types with + specified injection rate. Supported injection type: + Type_Name Type_Value + FAULT_KMALLOC 0x000000001 +(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled + by default if CONFIG_EROFS_FS_XATTR is selected. +(no)acl Setup POSIX Access Control List. Note: acl is enabled + by default if CONFIG_EROFS_FS_POSIX_ACL is selected. + +On-disk details +=============== + +Summary +------- +Different from other read-only file systems, an EROFS volume is designed +to be as simple as possible: + + |-> aligned with the block size + ____________________________________________________________ + | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data | + |_|__|_|_____|__________|_____|______|__________|_____|______| + 0 +1K + +All data areas should be aligned with the block size, but metadata areas +may not. All metadatas can be now observed in two different spaces (views): + 1. Inode metadata space + Each valid inode should be aligned with an inode slot, which is a fixed + value (32 bytes) and designed to be kept in line with v1 inode size. + + Each inode can be directly found with the following formula: + inode offset = meta_blkaddr * block_size + 32 * nid + + |-> aligned with 8B + |-> followed closely + + meta_blkaddr blocks |-> another slot + _____________________________________________________________________ + | ... | inode | xattrs | extents | data inline | ... | inode ... + |________|_______|(optional)|(optional)|__(optional)_|_____|__________ + |-> aligned with the inode slot size + . . + . . + . . + . . + . . + . . + .____________________________________________________|-> aligned with 4B + | xattr_ibody_header | shared xattrs | inline xattrs | + |____________________|_______________|_______________| + |-> 12 bytes <-|->x * 4 bytes<-| . + . . . + . . . + . . . + ._______________________________.______________________. + | id | id | id | id | ... | id | ent | ... | ent| ... | + |____|____|____|____|______|____|_____|_____|____|_____| + |-> aligned with 4B + |-> aligned with 4B + + Inode could be 32 or 64 bytes, which can be distinguished from a common + field which all inode versions have -- i_advise: + + __________________ __________________ + | i_advise | | i_advise | + |__________________| |__________________| + | ... | | ... | + | | | | + |__________________| 32 bytes | | + | | + |__________________| 64 bytes + + Xattrs, extents, data inline are followed by the corresponding inode with + proper alignes, and they could be optional for different data mappings, + _currently_ there are totally 3 valid data mappings supported: + + 1) flat file data without data inline (no extent); + 2) fixed-output size data compression (must have extents); + 3) flat file data with tail-end data inline (no extent); + + The size of the optional xattrs is indicated by i_xattr_count in inode + header. Large xattrs or xattrs shared by many different files can be + stored in shared xattrs metadata rather than inlined right after inode. + + 2. Shared xattrs metadata space + Shared xattrs space is similar to the above inode space, started with + a specific block indicated by xattr_blkaddr, organized one by one with + proper align. + + Each share xattr can also be directly found by the following formula: + xattr offset = xattr_blkaddr * block_size + 4 * xattr_id + + |-> aligned by 4 bytes + + xattr_blkaddr blocks |-> aligned with 4 bytes + _________________________________________________________________________ + | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... + |________|_____________|_____________|_____|______________|_______________ + +Directories +----------- +All directories are now organized in a compact on-disk format. Note that +each directory block is divided into index and name areas in order to support +random file lookup, and all directory entries are _strictly_ recorded in +alphabetical order in order to support improved prefix binary search +algorithm (could refer to the related source code). + + ___________________________ + / | + / ______________|________________ + / / | nameoff1 | nameoffN-1 + ____________.______________._______________v________________v__________ +| dirent | dirent | ... | dirent | filename | filename | ... | filename | +|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| + \ ^ + \ | * could have + \ | trailing '\0' + \________________________| nameoff0 + + Directory block + +Note that apart from the offset of the first filename, nameoff0 also indicates +the total number of directory entries in this block since it is no need to +introduce another on-disk field at all. + +Compression +----------- +Currently, EROFS supports 4KB fixed-output clustersize transparent file +compression, as illustrated below: + + |---- Variant-Length Extent ----|-------- VLE --------|----- VLE ----- + clusterofs clusterofs clusterofs + | | | logical data +_________v_______________________________v_____________________v_______________ +... | . | | . | | . | ... +____|____.________|_____________|________.____|_____________|__.__________|____ + |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-| + size size size size size + . . . . + . . . . + . . . . + _______._____________._____________._____________._____________________ + ... | | | | ... physical data + _______|_____________|_____________|_____________|_____________________ + |-> cluster <-|-> cluster <-|-> cluster <-| + size size size + +Currently each on-disk physical cluster can contain 4KB (un)compressed data +at most. For each logical cluster, there is a corresponding on-disk index to +describe its cluster type, physical cluster address, etc. + +See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. + diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile index c91b65223f99..38ab344a285e 100644 --- a/drivers/staging/erofs/Makefile +++ b/drivers/staging/erofs/Makefile @@ -6,7 +6,7 @@ ccflags-y += -Wall -DEROFS_VERSION=\"$(EROFS_VERSION)\" obj-$(CONFIG_EROFS_FS) += erofs.o # staging requirement: to be self-contained in its own directory -ccflags-y += -I$(src)/include +ccflags-y += -I $(srctree)/$(src)/include erofs-objs := super.o inode.o data.o namei.o dir.o utils.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_vle_lz4.o diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c index 5a55f0bfdfbb..9c471f08ffd4 100644 --- a/drivers/staging/erofs/data.c +++ b/drivers/staging/erofs/data.c @@ -165,43 +165,16 @@ err_out: return err; } -#ifdef CONFIG_EROFS_FS_ZIP -extern int z_erofs_map_blocks_iter(struct inode *, - struct erofs_map_blocks *, - struct page **, int); -#endif - -int erofs_map_blocks_iter(struct inode *inode, - struct erofs_map_blocks *map, - struct page **mpage_ret, int flags) -{ - /* by default, reading raw data never use erofs_map_blocks_iter */ - if (unlikely(!is_inode_layout_compression(inode))) { - if (*mpage_ret) - put_page(*mpage_ret); - *mpage_ret = NULL; - - return erofs_map_blocks(inode, map, flags); - } - -#ifdef CONFIG_EROFS_FS_ZIP - return z_erofs_map_blocks_iter(inode, map, mpage_ret, flags); -#else - /* data compression is not available */ - return -ENOTSUPP; -#endif -} - int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map, int flags) { if (unlikely(is_inode_layout_compression(inode))) { - struct page *mpage = NULL; - int err; + int err = z_erofs_map_blocks_iter(inode, map, flags); - err = erofs_map_blocks_iter(inode, map, &mpage, flags); - if (mpage) - put_page(mpage); + if (map->mpage) { + put_page(map->mpage); + map->mpage = NULL; + } return err; } return erofs_map_blocks_flatmode(inode, map, flags); diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c index 833f052f79d0..829f7b12e0dc 100644 --- a/drivers/staging/erofs/dir.c +++ b/drivers/staging/erofs/dir.c @@ -24,8 +24,8 @@ static const unsigned char erofs_filetype_table[EROFS_FT_MAX] = { }; static int erofs_fill_dentries(struct dir_context *ctx, - void *dentry_blk, unsigned int *ofs, - unsigned int nameoff, unsigned int maxsize) + void *dentry_blk, unsigned int *ofs, + unsigned int nameoff, unsigned int maxsize) { struct erofs_dirent *de = dentry_blk; const struct erofs_dirent *end = dentry_blk + nameoff; @@ -98,15 +98,14 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) if (IS_ERR(dentry_page)) continue; - lock_page(dentry_page); de = (struct erofs_dirent *)kmap(dentry_page); nameoff = le16_to_cpu(de->nameoff); if (unlikely(nameoff < sizeof(struct erofs_dirent) || - nameoff >= PAGE_SIZE)) { + nameoff >= PAGE_SIZE)) { errln("%s, invalid de[0].nameoff %u", - __func__, nameoff); + __func__, nameoff); err = -EIO; goto skip_this; @@ -128,7 +127,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) skip_this: kunmap(dentry_page); - unlock_page(dentry_page); put_page(dentry_page); ctx->pos = blknr_to_addr(i) + ofs; @@ -144,6 +142,6 @@ skip_this: const struct file_operations erofs_dir_fops = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = erofs_readdir, + .iterate_shared = erofs_readdir, }; diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c index d7fbf5f4600f..924b8dfc7a8f 100644 --- a/drivers/staging/erofs/inode.c +++ b/drivers/staging/erofs/inode.c @@ -184,32 +184,18 @@ static int fill_inode(struct inode *inode, int isdir) if (!err) { /* setup the new inode */ if (S_ISREG(inode->i_mode)) { -#ifdef CONFIG_EROFS_FS_XATTR - if (vi->xattr_isize) - inode->i_op = &erofs_generic_xattr_iops; -#endif + inode->i_op = &erofs_generic_iops; inode->i_fop = &generic_ro_fops; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = -#ifdef CONFIG_EROFS_FS_XATTR - vi->xattr_isize ? &erofs_dir_xattr_iops : -#endif - &erofs_dir_iops; + inode->i_op = &erofs_dir_iops; inode->i_fop = &erofs_dir_fops; } else if (S_ISLNK(inode->i_mode)) { /* by default, page_get_link is used for symlink */ - inode->i_op = -#ifdef CONFIG_EROFS_FS_XATTR - &erofs_symlink_xattr_iops, -#else - &page_symlink_inode_operations; -#endif + inode->i_op = &erofs_symlink_iops; inode_nohighmem(inode); } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { -#ifdef CONFIG_EROFS_FS_XATTR - inode->i_op = &erofs_special_inode_operations; -#endif + inode->i_op = &erofs_generic_iops; init_special_inode(inode, inode->i_mode, inode->i_rdev); } else { err = -EIO; @@ -297,23 +283,26 @@ struct inode *erofs_iget(struct super_block *sb, return inode; } +const struct inode_operations erofs_generic_iops = { #ifdef CONFIG_EROFS_FS_XATTR -const struct inode_operations erofs_generic_xattr_iops = { .listxattr = erofs_listxattr, +#endif + .get_acl = erofs_get_acl, }; -const struct inode_operations erofs_symlink_xattr_iops = { +const struct inode_operations erofs_symlink_iops = { .get_link = page_get_link, +#ifdef CONFIG_EROFS_FS_XATTR .listxattr = erofs_listxattr, +#endif + .get_acl = erofs_get_acl, }; -const struct inode_operations erofs_special_inode_operations = { - .listxattr = erofs_listxattr, -}; - -const struct inode_operations erofs_fast_symlink_xattr_iops = { +const struct inode_operations erofs_fast_symlink_iops = { .get_link = simple_get_link, +#ifdef CONFIG_EROFS_FS_XATTR .listxattr = erofs_listxattr, -}; #endif + .get_acl = erofs_get_acl, +}; diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h index e049d00c087a..e3bfde00c7d2 100644 --- a/drivers/staging/erofs/internal.h +++ b/drivers/staging/erofs/internal.h @@ -252,47 +252,20 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) } #endif -static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt) -{ - int o; - -repeat: - o = erofs_wait_on_workgroup_freezed(grp); - - if (unlikely(o <= 0)) - return -1; - - if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o)) - goto repeat; - - *ocnt = o; - return 0; -} - -#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) -#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) - -extern int erofs_workgroup_put(struct erofs_workgroup *grp); - -extern struct erofs_workgroup *erofs_find_workgroup( - struct super_block *sb, pgoff_t index, bool *tag); - -extern int erofs_register_workgroup(struct super_block *sb, - struct erofs_workgroup *grp, bool tag); - -extern unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, - unsigned long nr_shrink, bool cleanup); - -static inline void erofs_workstation_cleanup_all(struct super_block *sb) -{ - erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true); -} +int erofs_workgroup_put(struct erofs_workgroup *grp); +struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, + pgoff_t index, bool *tag); +int erofs_register_workgroup(struct super_block *sb, + struct erofs_workgroup *grp, bool tag); +unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, + unsigned long nr_shrink, bool cleanup); +void erofs_workgroup_free_rcu(struct erofs_workgroup *grp); #ifdef EROFS_FS_HAS_MANAGED_CACHE -extern int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *egrp); -extern int erofs_try_to_free_cached_page(struct address_space *mapping, - struct page *page); +int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, + struct erofs_workgroup *egrp); +int erofs_try_to_free_cached_page(struct address_space *mapping, + struct page *page); #define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) #else @@ -354,12 +327,17 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid) return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits); } -#define inode_set_inited_xattr(inode) (EROFS_V(inode)->flags |= 1) -#define inode_has_inited_xattr(inode) (EROFS_V(inode)->flags & 1) +/* atomic flag definitions */ +#define EROFS_V_EA_INITED_BIT 0 + +/* bitlock definitions (arranged in reverse order) */ +#define EROFS_V_BL_XATTR_BIT (BITS_PER_LONG - 1) struct erofs_vnode { erofs_nid_t nid; - unsigned int flags; + + /* atomic flags (including bitlocks) */ + unsigned long flags; unsigned char data_mapping_mode; /* inline size in bytes */ @@ -412,8 +390,6 @@ static inline bool is_inode_layout_inline(struct inode *inode) } extern const struct super_operations erofs_sops; -extern const struct inode_operations erofs_dir_iops; -extern const struct file_operations erofs_dir_fops; extern const struct address_space_operations erofs_raw_access_aops; #ifdef CONFIG_EROFS_FS_ZIP @@ -461,11 +437,26 @@ struct erofs_map_blocks { u64 m_plen, m_llen; unsigned int m_flags; + + struct page *mpage; }; /* Flags used by erofs_map_blocks() */ #define EROFS_GET_BLOCKS_RAW 0x0001 +#ifdef CONFIG_EROFS_FS_ZIP +int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags); +#else +static inline int z_erofs_map_blocks_iter(struct inode *inode, + struct erofs_map_blocks *map, + int flags) +{ + return -ENOTSUPP; +} +#endif + /* data.c */ static inline struct bio * erofs_grab_bio(struct super_block *sb, @@ -506,8 +497,8 @@ static inline void __submit_bio(struct bio *bio, unsigned op, unsigned op_flags) #define EROFS_IO_MAX_RETRIES_NOFAIL CONFIG_EROFS_FS_IO_MAX_RETRIES #endif -extern struct page *__erofs_get_meta_page(struct super_block *sb, - erofs_blk_t blkaddr, bool prio, bool nofail); +struct page *__erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr, + bool prio, bool nofail); static inline struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr, bool prio) @@ -521,15 +512,7 @@ static inline struct page *erofs_get_meta_page_nofail(struct super_block *sb, return __erofs_get_meta_page(sb, blkaddr, prio, true); } -extern int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int); -extern int erofs_map_blocks_iter(struct inode *, struct erofs_map_blocks *, - struct page **, int); - -struct erofs_map_blocks_iter { - struct erofs_map_blocks map; - struct page *mpage; -}; - +int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int); static inline struct page * erofs_get_inline_page(struct inode *inode, @@ -549,41 +532,31 @@ static inline unsigned long erofs_inode_hash(erofs_nid_t nid) #endif } -extern struct inode *erofs_iget(struct super_block *sb, - erofs_nid_t nid, bool dir); - -/* dir.c */ -int erofs_namei(struct inode *dir, struct qstr *name, - erofs_nid_t *nid, unsigned *d_type); - -#ifdef CONFIG_EROFS_FS_XATTR -/* xattr.c */ -extern const struct xattr_handler *erofs_xattr_handlers[]; - -/* symlink and special inode */ -extern const struct inode_operations erofs_symlink_xattr_iops; -extern const struct inode_operations erofs_fast_symlink_xattr_iops; -extern const struct inode_operations erofs_special_inode_operations; -#endif +extern const struct inode_operations erofs_generic_iops; +extern const struct inode_operations erofs_symlink_iops; +extern const struct inode_operations erofs_fast_symlink_iops; static inline void set_inode_fast_symlink(struct inode *inode) { -#ifdef CONFIG_EROFS_FS_XATTR - inode->i_op = &erofs_fast_symlink_xattr_iops; -#else - inode->i_op = &simple_symlink_inode_operations; -#endif + inode->i_op = &erofs_fast_symlink_iops; } static inline bool is_inode_fast_symlink(struct inode *inode) { -#ifdef CONFIG_EROFS_FS_XATTR - return inode->i_op == &erofs_fast_symlink_xattr_iops; -#else - return inode->i_op == &simple_symlink_inode_operations; -#endif + return inode->i_op == &erofs_fast_symlink_iops; } +struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir); + +/* namei.c */ +extern const struct inode_operations erofs_dir_iops; + +int erofs_namei(struct inode *dir, struct qstr *name, + erofs_nid_t *nid, unsigned int *d_type); + +/* dir.c */ +extern const struct file_operations erofs_dir_fops; + static inline void *erofs_vmap(struct page **pages, unsigned int count) { #ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM @@ -612,15 +585,11 @@ static inline void erofs_vunmap(const void *mem, unsigned int count) } /* utils.c */ -extern struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp); - -extern void erofs_register_super(struct super_block *sb); -extern void erofs_unregister_super(struct super_block *sb); +extern struct shrinker erofs_shrinker_info; -extern unsigned long erofs_shrink_count(struct shrinker *shrink, - struct shrink_control *sc); -extern unsigned long erofs_shrink_scan(struct shrinker *shrink, - struct shrink_control *sc); +struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp); +void erofs_register_super(struct super_block *sb); +void erofs_unregister_super(struct super_block *sb); #ifndef lru_to_page #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c index 5596c52e246d..3f4fa52c10fa 100644 --- a/drivers/staging/erofs/namei.c +++ b/drivers/staging/erofs/namei.c @@ -15,74 +15,77 @@ #include <trace/events/erofs.h> -/* based on the value of qn->len is accurate */ -static inline int dirnamecmp(struct qstr *qn, - struct qstr *qd, unsigned int *matched) +struct erofs_qstr { + const unsigned char *name; + const unsigned char *end; +}; + +/* based on the end of qn is accurate and it must have the trailing '\0' */ +static inline int dirnamecmp(const struct erofs_qstr *qn, + const struct erofs_qstr *qd, + unsigned int *matched) { - unsigned int i = *matched, len = min(qn->len, qd->len); -loop: - if (unlikely(i >= len)) { - *matched = i; - if (qn->len < qd->len) { - /* - * actually (qn->len == qd->len) - * when qd->name[i] == '\0' - */ - return qd->name[i] == '\0' ? 0 : -1; + unsigned int i = *matched; + + /* + * on-disk error, let's only BUG_ON in the debugging mode. + * otherwise, it will return 1 to just skip the invalid name + * and go on (in consideration of the lookup performance). + */ + DBG_BUGON(qd->name > qd->end); + + /* qd could not have trailing '\0' */ + /* However it is absolutely safe if < qd->end */ + while (qd->name + i < qd->end && qd->name[i] != '\0') { + if (qn->name[i] != qd->name[i]) { + *matched = i; + return qn->name[i] > qd->name[i] ? 1 : -1; } - return (qn->len > qd->len); - } - - if (qn->name[i] != qd->name[i]) { - *matched = i; - return qn->name[i] > qd->name[i] ? 1 : -1; + ++i; } - - ++i; - goto loop; + *matched = i; + /* See comments in __d_alloc on the terminating NUL character */ + return qn->name[i] == '\0' ? 0 : 1; } -static struct erofs_dirent *find_target_dirent( - struct qstr *name, - u8 *data, int maxsize) +#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1)) + +static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name, + u8 *data, + unsigned int dirblksize, + const int ndirents) { - unsigned int ndirents, head, back; + int head, back; unsigned int startprfx, endprfx; struct erofs_dirent *const de = (struct erofs_dirent *)data; - /* make sure that maxsize is valid */ - BUG_ON(maxsize < sizeof(struct erofs_dirent)); - - ndirents = le16_to_cpu(de->nameoff) / sizeof(*de); - - /* corrupted dir (may be unnecessary...) */ - BUG_ON(!ndirents); - - head = 0; + /* since the 1st dirent has been evaluated previously */ + head = 1; back = ndirents - 1; startprfx = endprfx = 0; while (head <= back) { - unsigned int mid = head + (back - head) / 2; - unsigned int nameoff = le16_to_cpu(de[mid].nameoff); + const int mid = head + (back - head) / 2; + const int nameoff = nameoff_from_disk(de[mid].nameoff, + dirblksize); unsigned int matched = min(startprfx, endprfx); - - struct qstr dname = QSTR_INIT(data + nameoff, - unlikely(mid >= ndirents - 1) ? - maxsize - nameoff : - le16_to_cpu(de[mid + 1].nameoff) - nameoff); + struct erofs_qstr dname = { + .name = data + nameoff, + .end = unlikely(mid >= ndirents - 1) ? + data + dirblksize : + data + nameoff_from_disk(de[mid + 1].nameoff, + dirblksize) + }; /* string comparison without already matched prefix */ int ret = dirnamecmp(name, &dname, &matched); - if (unlikely(!ret)) + if (unlikely(!ret)) { return de + mid; - else if (ret > 0) { + } else if (ret > 0) { head = mid + 1; startprfx = matched; - } else if (unlikely(mid < 1)) /* fix "mid" overflow */ - break; - else { + } else { back = mid - 1; endprfx = matched; } @@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent( return ERR_PTR(-ENOENT); } -static struct page *find_target_block_classic( - struct inode *dir, - struct qstr *name, int *_diff) +static struct page *find_target_block_classic(struct inode *dir, + struct erofs_qstr *name, + int *_ndirents) { unsigned int startprfx, endprfx; - unsigned int head, back; + int head, back; struct address_space *const mapping = dir->i_mapping; struct page *candidate = ERR_PTR(-ENOENT); @@ -105,89 +108,97 @@ static struct page *find_target_block_classic( back = inode_datablocks(dir) - 1; while (head <= back) { - unsigned int mid = head + (back - head) / 2; + const int mid = head + (back - head) / 2; struct page *page = read_mapping_page(mapping, mid, NULL); - if (IS_ERR(page)) { -exact_out: - if (!IS_ERR(candidate)) /* valid candidate */ - put_page(candidate); - return page; - } else { - int diff; - unsigned int ndirents, matched; - struct qstr dname; + if (!IS_ERR(page)) { struct erofs_dirent *de = kmap_atomic(page); - unsigned int nameoff = le16_to_cpu(de->nameoff); - - ndirents = nameoff / sizeof(*de); + const int nameoff = nameoff_from_disk(de->nameoff, + EROFS_BLKSIZ); + const int ndirents = nameoff / sizeof(*de); + int diff; + unsigned int matched; + struct erofs_qstr dname; - /* corrupted dir (should have one entry at least) */ - BUG_ON(!ndirents || nameoff > PAGE_SIZE); + if (unlikely(!ndirents)) { + DBG_BUGON(1); + kunmap_atomic(de); + put_page(page); + page = ERR_PTR(-EIO); + goto out; + } matched = min(startprfx, endprfx); dname.name = (u8 *)de + nameoff; - dname.len = ndirents == 1 ? - /* since the rest of the last page is 0 */ - EROFS_BLKSIZ - nameoff - : le16_to_cpu(de[1].nameoff) - nameoff; + if (ndirents == 1) + dname.end = (u8 *)de + EROFS_BLKSIZ; + else + dname.end = (u8 *)de + + nameoff_from_disk(de[1].nameoff, + EROFS_BLKSIZ); /* string comparison without already matched prefix */ diff = dirnamecmp(name, &dname, &matched); kunmap_atomic(de); if (unlikely(!diff)) { - *_diff = 0; - goto exact_out; + *_ndirents = 0; + goto out; } else if (diff > 0) { head = mid + 1; startprfx = matched; - if (likely(!IS_ERR(candidate))) + if (!IS_ERR(candidate)) put_page(candidate); candidate = page; + *_ndirents = ndirents; } else { put_page(page); - if (unlikely(mid < 1)) /* fix "mid" overflow */ - break; - back = mid - 1; endprfx = matched; } + continue; } +out: /* free if the candidate is valid */ + if (!IS_ERR(candidate)) + put_page(candidate); + return page; } - *_diff = 1; return candidate; } int erofs_namei(struct inode *dir, - struct qstr *name, - erofs_nid_t *nid, unsigned int *d_type) + struct qstr *name, + erofs_nid_t *nid, unsigned int *d_type) { - int diff; + int ndirents; struct page *page; - u8 *data; + void *data; struct erofs_dirent *de; + struct erofs_qstr qn; if (unlikely(!dir->i_size)) return -ENOENT; - diff = 1; - page = find_target_block_classic(dir, name, &diff); + qn.name = name->name; + qn.end = name->name + name->len; + + ndirents = 0; + page = find_target_block_classic(dir, &qn, &ndirents); - if (unlikely(IS_ERR(page))) + if (IS_ERR(page)) return PTR_ERR(page); data = kmap_atomic(page); /* the target page has been mapped */ - de = likely(diff) ? - /* since the rest of the last page is 0 */ - find_target_dirent(name, data, EROFS_BLKSIZ) : - (struct erofs_dirent *)data; + if (ndirents) + de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents); + else + de = (struct erofs_dirent *)data; - if (likely(!IS_ERR(de))) { + if (!IS_ERR(de)) { *nid = le64_to_cpu(de->nid); *d_type = de->file_type; } @@ -235,12 +246,9 @@ static struct dentry *erofs_lookup(struct inode *dir, const struct inode_operations erofs_dir_iops = { .lookup = erofs_lookup, -}; - -const struct inode_operations erofs_dir_xattr_iops = { - .lookup = erofs_lookup, #ifdef CONFIG_EROFS_FS_XATTR .listxattr = erofs_listxattr, #endif + .get_acl = erofs_get_acl, }; diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c index 1c2eb69682ef..15c784fba879 100644 --- a/drivers/staging/erofs/super.c +++ b/drivers/staging/erofs/super.c @@ -16,6 +16,7 @@ #include <linux/parser.h> #include <linux/seq_file.h> #include "internal.h" +#include "xattr.h" #define CREATE_TRACE_POINTS #include <trace/events/erofs.h> @@ -397,6 +398,11 @@ static int erofs_read_super(struct super_block *sb, if (!silent) infoln("root inode @ nid %llu", ROOT_NID(sbi)); + if (test_opt(sbi, POSIX_ACL)) + sb->s_flags |= SB_POSIXACL; + else + sb->s_flags &= ~SB_POSIXACL; + #ifdef CONFIG_EROFS_FS_ZIP INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC); #endif @@ -420,13 +426,14 @@ static int erofs_read_super(struct super_block *sb, errln("rootino(nid %llu) is not a directory(i_mode %o)", ROOT_NID(sbi), inode->i_mode); err = -EINVAL; - goto err_isdir; + iput(inode); + goto err_iget; } sb->s_root = d_make_root(inode); if (sb->s_root == NULL) { err = -ENOMEM; - goto err_makeroot; + goto err_iget; } /* save the device name to sbi */ @@ -452,10 +459,6 @@ static int erofs_read_super(struct super_block *sb, */ err_devname: dput(sb->s_root); -err_makeroot: -err_isdir: - if (sb->s_root == NULL) - iput(inode); err_iget: #ifdef EROFS_FS_HAS_MANAGED_CACHE iput(sbi->managed_cache); @@ -493,7 +496,8 @@ static void erofs_put_super(struct super_block *sb) mutex_lock(&sbi->umount_mutex); #ifdef CONFIG_EROFS_FS_ZIP - erofs_workstation_cleanup_all(sb); + /* clean up the compression space of this sb */ + erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true); #endif erofs_unregister_super(sb); @@ -537,12 +541,6 @@ static void erofs_kill_sb(struct super_block *sb) kill_block_super(sb); } -static struct shrinker erofs_shrinker_info = { - .scan_objects = erofs_shrink_scan, - .count_objects = erofs_shrink_count, - .seeks = DEFAULT_SEEKS, -}; - static struct file_system_type erofs_fs_type = { .owner = THIS_MODULE, .name = "erofs", @@ -653,6 +651,11 @@ static int erofs_remount(struct super_block *sb, int *flags, char *data) if (err) goto out; + if (test_opt(sbi, POSIX_ACL)) + sb->s_flags |= SB_POSIXACL; + else + sb->s_flags &= ~SB_POSIXACL; + *flags |= SB_RDONLY; return 0; out: diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index 4ac1099a39c6..02f34a83147d 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -107,15 +107,30 @@ enum z_erofs_vle_work_role { Z_EROFS_VLE_WORK_SECONDARY, Z_EROFS_VLE_WORK_PRIMARY, /* - * The current work has at least been linked with the following - * processed chained works, which means if the processing page - * is the tail partial page of the work, the current work can - * safely use the whole page, as illustrated below: - * +--------------+-------------------------------------------+ - * | tail page | head page (of the previous work) | - * +--------------+-------------------------------------------+ - * /\ which belongs to the current work - * [ (*) this page can be used for the current work itself. ] + * The current work was the tail of an exist chain, and the previous + * processed chained works are all decided to be hooked up to it. + * A new chain should be created for the remaining unprocessed works, + * therefore different from Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, + * the next work cannot reuse the whole page in the following scenario: + * ________________________________________________________________ + * | tail (partial) page | head (partial) page | + * | (belongs to the next work) | (belongs to the current work) | + * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________| + */ + Z_EROFS_VLE_WORK_PRIMARY_HOOKED, + /* + * The current work has been linked with the processed chained works, + * and could be also linked with the potential remaining works, which + * means if the processing page is the tail partial page of the work, + * the current work can safely use the whole page (since the next work + * is under control) for in-place decompression, as illustrated below: + * ________________________________________________________________ + * | tail (partial) page | head (partial) page | + * | (of the current work) | (of the previous work) | + * | PRIMARY_FOLLOWED or | | + * |_____PRIMARY_HOOKED____|____________PRIMARY_FOLLOWED____________| + * + * [ (*) the above page can be used for the current work itself. ] */ Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED, Z_EROFS_VLE_WORK_MAX @@ -238,14 +253,9 @@ int erofs_try_to_free_cached_page(struct address_space *mapping, { struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb); const unsigned int clusterpages = erofs_clusterpages(sbi); - - struct z_erofs_vle_workgroup *grp; + struct z_erofs_vle_workgroup *const grp = (void *)page_private(page); int ret = 0; /* 0 - busy */ - /* prevent the workgroup from being freed */ - rcu_read_lock(); - grp = (void *)page_private(page); - if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) { unsigned int i; @@ -257,12 +267,11 @@ int erofs_try_to_free_cached_page(struct address_space *mapping, } } erofs_workgroup_unfreeze(&grp->obj, 1); - } - rcu_read_unlock(); - if (ret) { - ClearPagePrivate(page); - put_page(page); + if (ret) { + ClearPagePrivate(page); + put_page(page); + } } return ret; } @@ -315,10 +324,10 @@ static int z_erofs_vle_work_add_page( return ret ? 0 : -EAGAIN; } -static inline bool try_to_claim_workgroup( - struct z_erofs_vle_workgroup *grp, - z_erofs_vle_owned_workgrp_t *owned_head, - bool *hosted) +static enum z_erofs_vle_work_role +try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp, + z_erofs_vle_owned_workgrp_t *owned_head, + bool *hosted) { DBG_BUGON(*hosted == true); @@ -332,6 +341,9 @@ retry: *owned_head = &grp->next; *hosted = true; + /* lucky, I am the followee :) */ + return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; + } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) { /* * type 2, link to the end of a existing open chain, @@ -341,12 +353,11 @@ retry: if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL, *owned_head) != Z_EROFS_VLE_WORKGRP_TAIL) goto retry; - *owned_head = Z_EROFS_VLE_WORKGRP_TAIL; - } else - return false; /* :( better luck next time */ + return Z_EROFS_VLE_WORK_PRIMARY_HOOKED; + } - return true; /* lucky, I am the followee :) */ + return Z_EROFS_VLE_WORK_PRIMARY; /* :( better luck next time */ } struct z_erofs_vle_work_finder { @@ -424,12 +435,9 @@ z_erofs_vle_work_lookup(const struct z_erofs_vle_work_finder *f) *f->hosted = false; if (!primary) *f->role = Z_EROFS_VLE_WORK_SECONDARY; - /* claim the workgroup if possible */ - else if (try_to_claim_workgroup(grp, f->owned_head, f->hosted)) - *f->role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED; - else - *f->role = Z_EROFS_VLE_WORK_PRIMARY; - + else /* claim the workgroup if possible */ + *f->role = try_to_claim_workgroup(grp, f->owned_head, + f->hosted); return work; } @@ -493,6 +501,9 @@ z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f, return work; } +#define builder_is_hooked(builder) \ + ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED) + #define builder_is_followed(builder) \ ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED) @@ -539,7 +550,7 @@ repeat: if (unlikely(work == ERR_PTR(-EAGAIN))) goto repeat; - if (unlikely(IS_ERR(work))) + if (IS_ERR(work)) return PTR_ERR(work); got_it: z_erofs_pagevec_ctor_init(&builder->vector, @@ -589,7 +600,7 @@ static void __z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp, erofs_workgroup_put(&grp->obj); } -void z_erofs_vle_work_release(struct z_erofs_vle_work *work) +static void z_erofs_vle_work_release(struct z_erofs_vle_work *work) { struct z_erofs_vle_workgroup *grp = z_erofs_vle_work_workgroup(work, true); @@ -636,7 +647,7 @@ struct z_erofs_vle_frontend { struct inode *const inode; struct z_erofs_vle_work_builder builder; - struct erofs_map_blocks_iter m_iter; + struct erofs_map_blocks map; z_erofs_vle_owned_workgrp_t owned_head; @@ -647,8 +658,9 @@ struct z_erofs_vle_frontend { #define VLE_FRONTEND_INIT(__i) { \ .inode = __i, \ - .m_iter = { \ - { .m_llen = 0, .m_plen = 0 }, \ + .map = { \ + .m_llen = 0, \ + .m_plen = 0, \ .mpage = NULL \ }, \ .builder = VLE_WORK_BUILDER_INIT(), \ @@ -681,12 +693,11 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe, { struct super_block *const sb = fe->inode->i_sb; struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb); - struct erofs_map_blocks_iter *const m = &fe->m_iter; - struct erofs_map_blocks *const map = &m->map; + struct erofs_map_blocks *const map = &fe->map; struct z_erofs_vle_work_builder *const builder = &fe->builder; const loff_t offset = page_offset(page); - bool tight = builder_is_followed(builder); + bool tight = builder_is_hooked(builder); struct z_erofs_vle_work *work = builder->work; enum z_erofs_cache_alloctype cache_strategy; @@ -704,8 +715,12 @@ repeat: /* lucky, within the range of the current map_blocks */ if (offset + cur >= map->m_la && - offset + cur < map->m_la + map->m_llen) + offset + cur < map->m_la + map->m_llen) { + /* didn't get a valid unzip work previously (very rare) */ + if (!builder->work) + goto restart_now; goto hitted; + } /* go ahead the next map_blocks */ debugln("%s: [out-of-range] pos %llu", __func__, offset + cur); @@ -715,10 +730,11 @@ repeat: map->m_la = offset + cur; map->m_llen = 0; - err = erofs_map_blocks_iter(fe->inode, map, &m->mpage, 0); + err = z_erofs_map_blocks_iter(fe->inode, map, 0); if (unlikely(err)) goto err_out; +restart_now: if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) goto hitted; @@ -740,7 +756,7 @@ repeat: map->m_plen / PAGE_SIZE, cache_strategy, page_pool, GFP_KERNEL); - tight &= builder_is_followed(builder); + tight &= builder_is_hooked(builder); work = builder->work; hitted: cur = end - min_t(unsigned int, offset + end - map->m_la, end); @@ -755,6 +771,9 @@ hitted: (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE : Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED)); + if (cur) + tight &= builder_is_followed(builder); + retry: err = z_erofs_vle_work_add_page(builder, page, page_type); /* should allocate an additional staging page for pagevec */ @@ -992,11 +1011,10 @@ repeat: if (llen > grp->llen) llen = grp->llen; - err = z_erofs_vle_unzip_fast_percpu(compressed_pages, - clusterpages, pages, llen, work->pageofs, - z_erofs_onlinepage_endio); + err = z_erofs_vle_unzip_fast_percpu(compressed_pages, clusterpages, + pages, llen, work->pageofs); if (err != -ENOTSUPP) - goto out_percpu; + goto out; if (sparsemem_pages >= nr_pages) goto skip_allocpage; @@ -1017,8 +1035,25 @@ skip_allocpage: erofs_vunmap(vout, nr_pages); out: + /* must handle all compressed pages before endding pages */ + for (i = 0; i < clusterpages; ++i) { + page = compressed_pages[i]; + +#ifdef EROFS_FS_HAS_MANAGED_CACHE + if (page->mapping == MNGD_MAPPING(sbi)) + continue; +#endif + /* recycle all individual staging pages */ + (void)z_erofs_gather_if_stagingpage(page_pool, page); + + WRITE_ONCE(compressed_pages[i], NULL); + } + for (i = 0; i < nr_pages; ++i) { page = pages[i]; + if (!page) + continue; + DBG_BUGON(!page->mapping); /* recycle all individual staging pages */ @@ -1031,20 +1066,6 @@ out: z_erofs_onlinepage_endio(page); } -out_percpu: - for (i = 0; i < clusterpages; ++i) { - page = compressed_pages[i]; - -#ifdef EROFS_FS_HAS_MANAGED_CACHE - if (page->mapping == MNGD_MAPPING(sbi)) - continue; -#endif - /* recycle all individual staging pages */ - (void)z_erofs_gather_if_stagingpage(page_pool, page); - - WRITE_ONCE(compressed_pages[i], NULL); - } - if (pages == z_pagemap_global) mutex_unlock(&z_pagemap_global_lock); else if (unlikely(pages != pages_onstack)) @@ -1484,8 +1505,8 @@ static int z_erofs_vle_normalaccess_readpage(struct file *file, z_erofs_submit_and_unzip(&f, &pagepool, true); out: - if (f.m_iter.mpage) - put_page(f.m_iter.mpage); + if (f.map.mpage) + put_page(f.map.mpage); /* clean up the remaining free pages */ put_pages_list(&pagepool); @@ -1555,8 +1576,8 @@ static int z_erofs_vle_normalaccess_readpages(struct file *filp, z_erofs_submit_and_unzip(&f, &pagepool, sync); - if (f.m_iter.mpage) - put_page(f.m_iter.mpage); + if (f.map.mpage) + put_page(f.map.mpage); /* clean up the remaining free pages */ put_pages_list(&pagepool); @@ -1701,14 +1722,14 @@ vle_get_logical_extent_head(const struct vle_map_blocks_iter_ctx *ctx, int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, - struct page **mpage_ret, int flags) + int flags) { void *kaddr; const struct vle_map_blocks_iter_ctx ctx = { .inode = inode, .sb = inode->i_sb, .clusterbits = EROFS_I_SB(inode)->clusterbits, - .mpage_ret = mpage_ret, + .mpage_ret = &map->mpage, .kaddr_ret = &kaddr }; const unsigned int clustersize = 1 << ctx.clusterbits; @@ -1722,7 +1743,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, /* initialize `pblk' to keep gcc from printing foolish warnings */ erofs_blk_t mblk, pblk = 0; - struct page *mpage = *mpage_ret; + struct page *mpage = map->mpage; struct z_erofs_vle_decompressed_index *di; unsigned int cluster_type, logical_cluster_ofs; int err = 0; @@ -1758,7 +1779,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, err = PTR_ERR(mpage); goto out; } - *mpage_ret = mpage; + map->mpage = mpage; } else { lock_page(mpage); DBG_BUGON(!PageUptodate(mpage)); @@ -1818,7 +1839,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, /* get the correspoinding first chunk */ err = vle_get_logical_extent_head(&ctx, lcn, &ofs, &pblk, &map->m_flags); - mpage = *mpage_ret; + mpage = map->mpage; if (unlikely(err)) { if (mpage) diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h index 5a4e1b62c0d1..517e5ce8c5e9 100644 --- a/drivers/staging/erofs/unzip_vle.h +++ b/drivers/staging/erofs/unzip_vle.h @@ -212,18 +212,17 @@ static inline void z_erofs_onlinepage_endio(struct page *page) #define Z_EROFS_VLE_VMAP_GLOBAL_PAGES 2048 /* unzip_vle_lz4.c */ -extern int z_erofs_vle_plain_copy(struct page **compressed_pages, - unsigned clusterpages, struct page **pages, - unsigned nr_pages, unsigned short pageofs); - -extern int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, - unsigned clusterpages, struct page **pages, - unsigned outlen, unsigned short pageofs, - void (*endio)(struct page *)); - -extern int z_erofs_vle_unzip_vmap(struct page **compressed_pages, - unsigned clusterpages, void *vaddr, unsigned llen, - unsigned short pageofs, bool overlapped); +int z_erofs_vle_plain_copy(struct page **compressed_pages, + unsigned int clusterpages, struct page **pages, + unsigned int nr_pages, unsigned short pageofs); +int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, + unsigned int clusterpages, + struct page **pages, unsigned int outlen, + unsigned short pageofs); +int z_erofs_vle_unzip_vmap(struct page **compressed_pages, + unsigned int clusterpages, + void *vaddr, unsigned int llen, + unsigned short pageofs, bool overlapped); #endif diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c index 52797bd89da1..48b263a2731a 100644 --- a/drivers/staging/erofs/unzip_vle_lz4.c +++ b/drivers/staging/erofs/unzip_vle_lz4.c @@ -13,7 +13,7 @@ #include "unzip_vle.h" #include <linux/lz4.h> -int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen) +static int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen) { int ret = LZ4_decompress_safe_partial(in, out, inlen, outlen, outlen); @@ -125,8 +125,7 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, unsigned int clusterpages, struct page **pages, unsigned int outlen, - unsigned short pageofs, - void (*endio)(struct page *)) + unsigned short pageofs) { void *vin, *vout; unsigned int nr_pages, i, j; @@ -148,19 +147,16 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, ret = z_erofs_unzip_lz4(vin, vout + pageofs, clusterpages * PAGE_SIZE, outlen); - if (ret >= 0) { - outlen = ret; - ret = 0; - } + if (ret < 0) + goto out; + ret = 0; for (i = 0; i < nr_pages; ++i) { j = min((unsigned int)PAGE_SIZE - pageofs, outlen); if (pages[i]) { - if (ret < 0) { - SetPageError(pages[i]); - } else if (clusterpages == 1 && - pages[i] == compressed_pages[0]) { + if (clusterpages == 1 && + pages[i] == compressed_pages[0]) { memcpy(vin + pageofs, vout + pageofs, j); } else { void *dst = kmap_atomic(pages[i]); @@ -168,12 +164,13 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, memcpy(dst + pageofs, vout + pageofs, j); kunmap_atomic(dst); } - endio(pages[i]); } vout += PAGE_SIZE; outlen -= j; pageofs = 0; } + +out: preempt_enable(); if (clusterpages == 1) diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c index b535898ca753..5f61f99f4c10 100644 --- a/drivers/staging/erofs/utils.c +++ b/drivers/staging/erofs/utils.c @@ -31,13 +31,32 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) static atomic_long_t erofs_global_shrink_cnt; #ifdef CONFIG_EROFS_FS_ZIP +#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) +#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) -struct erofs_workgroup *erofs_find_workgroup( - struct super_block *sb, pgoff_t index, bool *tag) +static int erofs_workgroup_get(struct erofs_workgroup *grp) +{ + int o; + +repeat: + o = erofs_wait_on_workgroup_freezed(grp); + if (unlikely(o <= 0)) + return -1; + + if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o)) + goto repeat; + + /* decrease refcount paired by erofs_workgroup_put */ + if (unlikely(o == 1)) + atomic_long_dec(&erofs_global_shrink_cnt); + return 0; +} + +struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, + pgoff_t index, bool *tag) { struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_workgroup *grp; - int oldcount; repeat: rcu_read_lock(); @@ -46,15 +65,12 @@ repeat: *tag = xa_pointer_tag(grp); grp = xa_untag_pointer(grp); - if (erofs_workgroup_get(grp, &oldcount)) { + if (erofs_workgroup_get(grp)) { /* prefer to relax rcu read side */ rcu_read_unlock(); goto repeat; } - /* decrease refcount added by erofs_workgroup_put */ - if (unlikely(oldcount == 1)) - atomic_long_dec(&erofs_global_shrink_cnt); DBG_BUGON(index != grp->index); } rcu_read_unlock(); @@ -104,8 +120,6 @@ int erofs_register_workgroup(struct super_block *sb, return err; } -extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp); - static void __erofs_workgroup_free(struct erofs_workgroup *grp) { atomic_long_dec(&erofs_global_shrink_cnt); @@ -131,9 +145,9 @@ static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) __erofs_workgroup_free(grp); } -bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp, - bool cleanup) +static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp, + bool cleanup) { /* * for managed cache enabled, the refcount of workgroups @@ -172,9 +186,9 @@ bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, #else /* for nocache case, no customized reclaim path at all */ -bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp, - bool cleanup) +static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp, + bool cleanup) { int cnt = atomic_read(&grp->refcount); @@ -256,14 +270,14 @@ void erofs_unregister_super(struct super_block *sb) spin_unlock(&erofs_sb_list_lock); } -unsigned long erofs_shrink_count(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long erofs_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) { return atomic_long_read(&erofs_global_shrink_cnt); } -unsigned long erofs_shrink_scan(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long erofs_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct erofs_sb_info *sbi; struct list_head *p; @@ -319,3 +333,9 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink, return freed; } +struct shrinker erofs_shrinker_info = { + .scan_objects = erofs_shrink_scan, + .count_objects = erofs_shrink_count, + .seeks = DEFAULT_SEEKS, +}; + diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c index 80dca6a4adbe..f716ab0446e5 100644 --- a/drivers/staging/erofs/xattr.c +++ b/drivers/staging/erofs/xattr.c @@ -44,19 +44,48 @@ static inline void xattr_iter_end_final(struct xattr_iter *it) static int init_inode_xattrs(struct inode *inode) { + struct erofs_vnode *const vi = EROFS_V(inode); struct xattr_iter it; unsigned int i; struct erofs_xattr_ibody_header *ih; struct super_block *sb; struct erofs_sb_info *sbi; - struct erofs_vnode *vi; bool atomic_map; + int ret = 0; - if (likely(inode_has_inited_xattr(inode))) + /* the most case is that xattrs of this inode are initialized. */ + if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags)) return 0; - vi = EROFS_V(inode); - BUG_ON(!vi->xattr_isize); + if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE)) + return -ERESTARTSYS; + + /* someone has initialized xattrs for us? */ + if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags)) + goto out_unlock; + + /* + * bypass all xattr operations if ->xattr_isize is not greater than + * sizeof(struct erofs_xattr_ibody_header), in detail: + * 1) it is not enough to contain erofs_xattr_ibody_header then + * ->xattr_isize should be 0 (it means no xattr); + * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk + * undefined right now (maybe use later with some new sb feature). + */ + if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) { + errln("xattr_isize %d of nid %llu is not supported yet", + vi->xattr_isize, vi->nid); + ret = -ENOTSUPP; + goto out_unlock; + } else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) { + if (unlikely(vi->xattr_isize)) { + DBG_BUGON(1); + ret = -EIO; + goto out_unlock; /* xattr ondisk layout error */ + } + ret = -ENOATTR; + goto out_unlock; + } sb = inode->i_sb; sbi = EROFS_SB(sb); @@ -64,8 +93,10 @@ static int init_inode_xattrs(struct inode *inode) it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize); it.page = erofs_get_inline_page(inode, it.blkaddr); - if (IS_ERR(it.page)) - return PTR_ERR(it.page); + if (IS_ERR(it.page)) { + ret = PTR_ERR(it.page); + goto out_unlock; + } /* read in shared xattr array (non-atomic, see kmalloc below) */ it.kaddr = kmap(it.page); @@ -78,7 +109,8 @@ static int init_inode_xattrs(struct inode *inode) sizeof(uint), GFP_KERNEL); if (vi->xattr_shared_xattrs == NULL) { xattr_iter_end(&it, atomic_map); - return -ENOMEM; + ret = -ENOMEM; + goto out_unlock; } /* let's skip ibody header */ @@ -92,8 +124,12 @@ static int init_inode_xattrs(struct inode *inode) it.page = erofs_get_meta_page(sb, ++it.blkaddr, S_ISDIR(inode->i_mode)); - if (IS_ERR(it.page)) - return PTR_ERR(it.page); + if (IS_ERR(it.page)) { + kfree(vi->xattr_shared_xattrs); + vi->xattr_shared_xattrs = NULL; + ret = PTR_ERR(it.page); + goto out_unlock; + } it.kaddr = kmap_atomic(it.page); atomic_map = true; @@ -105,8 +141,11 @@ static int init_inode_xattrs(struct inode *inode) } xattr_iter_end(&it, atomic_map); - inode_set_inited_xattr(inode); - return 0; + set_bit(EROFS_V_EA_INITED_BIT, &vi->flags); + +out_unlock: + clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags); + return ret; } /* @@ -117,10 +156,12 @@ static int init_inode_xattrs(struct inode *inode) * and need to be handled */ struct xattr_iter_handlers { - int (*entry)(struct xattr_iter *, struct erofs_xattr_entry *); - int (*name)(struct xattr_iter *, unsigned int, char *, unsigned int); - int (*alloc_buffer)(struct xattr_iter *, unsigned int); - void (*value)(struct xattr_iter *, unsigned int, char *, unsigned int); + int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry); + int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf, + unsigned int len); + int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz); + void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf, + unsigned int len); }; static inline int xattr_iter_fixup(struct xattr_iter *it) @@ -422,7 +463,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { - struct erofs_vnode *const vi = EROFS_V(inode); struct erofs_sb_info *const sbi = EROFS_I_SB(inode); switch (handler->flags) { @@ -440,9 +480,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, return -EINVAL; } - if (!vi->xattr_isize) - return -ENOATTR; - return erofs_getxattr(inode, handler->flags, name, buffer, size); } @@ -503,8 +540,7 @@ static int xattr_entrylist(struct xattr_iter *_it, if (h == NULL || (h->list != NULL && !h->list(it->dentry))) return 1; - /* Note that at least one of 'prefix' and 'name' should be non-NULL */ - prefix = h->prefix != NULL ? h->prefix : h->name; + prefix = xattr_prefix(h); prefix_len = strlen(prefix); if (it->buffer == NULL) { @@ -627,3 +663,40 @@ ssize_t erofs_listxattr(struct dentry *dentry, return shared_listxattr(&it); } +#ifdef CONFIG_EROFS_FS_POSIX_ACL +struct posix_acl *erofs_get_acl(struct inode *inode, int type) +{ + struct posix_acl *acl; + int prefix, rc; + char *value = NULL; + + switch (type) { + case ACL_TYPE_ACCESS: + prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + prefix = EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EINVAL); + } + + rc = erofs_getxattr(inode, prefix, "", NULL, 0); + if (rc > 0) { + value = kmalloc(rc, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + rc = erofs_getxattr(inode, prefix, "", value, rc); + } + + if (rc == -ENOATTR) + acl = NULL; + else if (rc < 0) + acl = ERR_PTR(rc); + else + acl = posix_acl_from_xattr(&init_user_ns, value, rc); + kfree(value); + return acl; +} +#endif + diff --git a/drivers/staging/erofs/xattr.h b/drivers/staging/erofs/xattr.h index 0c7379282fc5..35ba5ac2139a 100644 --- a/drivers/staging/erofs/xattr.h +++ b/drivers/staging/erofs/xattr.h @@ -68,9 +68,7 @@ static const struct xattr_handler *xattr_handler_map[] = { } #ifdef CONFIG_EROFS_FS_XATTR - -extern const struct inode_operations erofs_generic_xattr_iops; -extern const struct inode_operations erofs_dir_xattr_iops; +extern const struct xattr_handler *erofs_xattr_handlers[]; int erofs_getxattr(struct inode *, int, const char *, void *, size_t); ssize_t erofs_listxattr(struct dentry *, char *, size_t); @@ -89,5 +87,11 @@ static ssize_t __maybe_unused erofs_listxattr(struct dentry *dentry, } #endif +#ifdef CONFIG_EROFS_FS_POSIX_ACL +struct posix_acl *erofs_get_acl(struct inode *inode, int type); +#else +#define erofs_get_acl (NULL) +#endif + #endif |