aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/staging/erofs
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/erofs')
-rw-r--r--drivers/staging/erofs/Documentation/filesystems/erofs.txt208
-rw-r--r--drivers/staging/erofs/Makefile2
-rw-r--r--drivers/staging/erofs/data.c37
-rw-r--r--drivers/staging/erofs/dir.c12
-rw-r--r--drivers/staging/erofs/inode.c41
-rw-r--r--drivers/staging/erofs/internal.h147
-rw-r--r--drivers/staging/erofs/namei.c194
-rw-r--r--drivers/staging/erofs/super.c29
-rw-r--r--drivers/staging/erofs/unzip_vle.c165
-rw-r--r--drivers/staging/erofs/unzip_vle.h23
-rw-r--r--drivers/staging/erofs/unzip_vle_lz4.c21
-rw-r--r--drivers/staging/erofs/utils.c58
-rw-r--r--drivers/staging/erofs/xattr.c115
-rw-r--r--drivers/staging/erofs/xattr.h10
14 files changed, 662 insertions, 400 deletions
diff --git a/drivers/staging/erofs/Documentation/filesystems/erofs.txt b/drivers/staging/erofs/Documentation/filesystems/erofs.txt
new file mode 100644
index 000000000000..961ec4da7705
--- /dev/null
+++ b/drivers/staging/erofs/Documentation/filesystems/erofs.txt
@@ -0,0 +1,208 @@
+Overview
+========
+
+EROFS file-system stands for Enhanced Read-Only File System. Different
+from other read-only file systems, it aims to be designed for flexibility,
+scalability, but be kept simple and high performance.
+
+It is designed as a better filesystem solution for the following scenarios:
+ - read-only storage media or
+
+ - part of a fully trusted read-only solution, which means it needs to be
+ immutable and bit-for-bit identical to the official golden image for
+ their releases due to security and other considerations and
+
+ - hope to save some extra storage space with guaranteed end-to-end performance
+ by using reduced metadata and transparent file compression, especially
+ for those embedded devices with limited memory (ex, smartphone);
+
+Here is the main features of EROFS:
+ - Little endian on-disk design;
+
+ - Currently 4KB block size (nobh) and therefore maximum 16TB address space;
+
+ - Metadata & data could be mixed by design;
+
+ - 2 inode versions for different requirements:
+ v1 v2
+ Inode metadata size: 32 bytes 64 bytes
+ Max file size: 4 GB 16 EB (also limited by max. vol size)
+ Max uids/gids: 65536 4294967296
+ File creation time: no yes (64 + 32-bit timestamp)
+ Max hardlinks: 65536 4294967296
+ Metadata reserved: 4 bytes 14 bytes
+
+ - Support extended attributes (xattrs) as an option;
+
+ - Support xattr inline and tail-end data inline for all files;
+
+ - Support POSIX.1e ACLs by using xattrs;
+
+ - Support transparent file compression as an option:
+ LZ4 algorithm with 4 KB fixed-output compression for high performance;
+
+The following git tree provides the file system user-space tools under
+development (ex, formatting tool mkfs.erofs):
+>> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+
+Bugs and patches are welcome, please kindly help us and send to the following
+linux-erofs mailing list:
+>> linux-erofs mailing list <linux-erofs@lists.ozlabs.org>
+
+Note that EROFS is still working in progress as a Linux staging driver,
+Cc the staging mailing list as well is highly recommended:
+>> Linux Driver Project Developer List <devel@driverdev.osuosl.org>
+
+Mount options
+=============
+
+fault_injection=%d Enable fault injection in all supported types with
+ specified injection rate. Supported injection type:
+ Type_Name Type_Value
+ FAULT_KMALLOC 0x000000001
+(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled
+ by default if CONFIG_EROFS_FS_XATTR is selected.
+(no)acl Setup POSIX Access Control List. Note: acl is enabled
+ by default if CONFIG_EROFS_FS_POSIX_ACL is selected.
+
+On-disk details
+===============
+
+Summary
+-------
+Different from other read-only file systems, an EROFS volume is designed
+to be as simple as possible:
+
+ |-> aligned with the block size
+ ____________________________________________________________
+ | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data |
+ |_|__|_|_____|__________|_____|______|__________|_____|______|
+ 0 +1K
+
+All data areas should be aligned with the block size, but metadata areas
+may not. All metadatas can be now observed in two different spaces (views):
+ 1. Inode metadata space
+ Each valid inode should be aligned with an inode slot, which is a fixed
+ value (32 bytes) and designed to be kept in line with v1 inode size.
+
+ Each inode can be directly found with the following formula:
+ inode offset = meta_blkaddr * block_size + 32 * nid
+
+ |-> aligned with 8B
+ |-> followed closely
+ + meta_blkaddr blocks |-> another slot
+ _____________________________________________________________________
+ | ... | inode | xattrs | extents | data inline | ... | inode ...
+ |________|_______|(optional)|(optional)|__(optional)_|_____|__________
+ |-> aligned with the inode slot size
+ . .
+ . .
+ . .
+ . .
+ . .
+ . .
+ .____________________________________________________|-> aligned with 4B
+ | xattr_ibody_header | shared xattrs | inline xattrs |
+ |____________________|_______________|_______________|
+ |-> 12 bytes <-|->x * 4 bytes<-| .
+ . . .
+ . . .
+ . . .
+ ._______________________________.______________________.
+ | id | id | id | id | ... | id | ent | ... | ent| ... |
+ |____|____|____|____|______|____|_____|_____|____|_____|
+ |-> aligned with 4B
+ |-> aligned with 4B
+
+ Inode could be 32 or 64 bytes, which can be distinguished from a common
+ field which all inode versions have -- i_advise:
+
+ __________________ __________________
+ | i_advise | | i_advise |
+ |__________________| |__________________|
+ | ... | | ... |
+ | | | |
+ |__________________| 32 bytes | |
+ | |
+ |__________________| 64 bytes
+
+ Xattrs, extents, data inline are followed by the corresponding inode with
+ proper alignes, and they could be optional for different data mappings,
+ _currently_ there are totally 3 valid data mappings supported:
+
+ 1) flat file data without data inline (no extent);
+ 2) fixed-output size data compression (must have extents);
+ 3) flat file data with tail-end data inline (no extent);
+
+ The size of the optional xattrs is indicated by i_xattr_count in inode
+ header. Large xattrs or xattrs shared by many different files can be
+ stored in shared xattrs metadata rather than inlined right after inode.
+
+ 2. Shared xattrs metadata space
+ Shared xattrs space is similar to the above inode space, started with
+ a specific block indicated by xattr_blkaddr, organized one by one with
+ proper align.
+
+ Each share xattr can also be directly found by the following formula:
+ xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
+
+ |-> aligned by 4 bytes
+ + xattr_blkaddr blocks |-> aligned with 4 bytes
+ _________________________________________________________________________
+ | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ...
+ |________|_____________|_____________|_____|______________|_______________
+
+Directories
+-----------
+All directories are now organized in a compact on-disk format. Note that
+each directory block is divided into index and name areas in order to support
+random file lookup, and all directory entries are _strictly_ recorded in
+alphabetical order in order to support improved prefix binary search
+algorithm (could refer to the related source code).
+
+ ___________________________
+ / |
+ / ______________|________________
+ / / | nameoff1 | nameoffN-1
+ ____________.______________._______________v________________v__________
+| dirent | dirent | ... | dirent | filename | filename | ... | filename |
+|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
+ \ ^
+ \ | * could have
+ \ | trailing '\0'
+ \________________________| nameoff0
+
+ Directory block
+
+Note that apart from the offset of the first filename, nameoff0 also indicates
+the total number of directory entries in this block since it is no need to
+introduce another on-disk field at all.
+
+Compression
+-----------
+Currently, EROFS supports 4KB fixed-output clustersize transparent file
+compression, as illustrated below:
+
+ |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
+ clusterofs clusterofs clusterofs
+ | | | logical data
+_________v_______________________________v_____________________v_______________
+... | . | | . | | . | ...
+____|____.________|_____________|________.____|_____________|__.__________|____
+ |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
+ size size size size size
+ . . . .
+ . . . .
+ . . . .
+ _______._____________._____________._____________._____________________
+ ... | | | | ... physical data
+ _______|_____________|_____________|_____________|_____________________
+ |-> cluster <-|-> cluster <-|-> cluster <-|
+ size size size
+
+Currently each on-disk physical cluster can contain 4KB (un)compressed data
+at most. For each logical cluster, there is a corresponding on-disk index to
+describe its cluster type, physical cluster address, etc.
+
+See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
+
diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile
index c91b65223f99..38ab344a285e 100644
--- a/drivers/staging/erofs/Makefile
+++ b/drivers/staging/erofs/Makefile
@@ -6,7 +6,7 @@ ccflags-y += -Wall -DEROFS_VERSION=\"$(EROFS_VERSION)\"
obj-$(CONFIG_EROFS_FS) += erofs.o
# staging requirement: to be self-contained in its own directory
-ccflags-y += -I$(src)/include
+ccflags-y += -I $(srctree)/$(src)/include
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_vle_lz4.o
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
index 5a55f0bfdfbb..9c471f08ffd4 100644
--- a/drivers/staging/erofs/data.c
+++ b/drivers/staging/erofs/data.c
@@ -165,43 +165,16 @@ err_out:
return err;
}
-#ifdef CONFIG_EROFS_FS_ZIP
-extern int z_erofs_map_blocks_iter(struct inode *,
- struct erofs_map_blocks *,
- struct page **, int);
-#endif
-
-int erofs_map_blocks_iter(struct inode *inode,
- struct erofs_map_blocks *map,
- struct page **mpage_ret, int flags)
-{
- /* by default, reading raw data never use erofs_map_blocks_iter */
- if (unlikely(!is_inode_layout_compression(inode))) {
- if (*mpage_ret)
- put_page(*mpage_ret);
- *mpage_ret = NULL;
-
- return erofs_map_blocks(inode, map, flags);
- }
-
-#ifdef CONFIG_EROFS_FS_ZIP
- return z_erofs_map_blocks_iter(inode, map, mpage_ret, flags);
-#else
- /* data compression is not available */
- return -ENOTSUPP;
-#endif
-}
-
int erofs_map_blocks(struct inode *inode,
struct erofs_map_blocks *map, int flags)
{
if (unlikely(is_inode_layout_compression(inode))) {
- struct page *mpage = NULL;
- int err;
+ int err = z_erofs_map_blocks_iter(inode, map, flags);
- err = erofs_map_blocks_iter(inode, map, &mpage, flags);
- if (mpage)
- put_page(mpage);
+ if (map->mpage) {
+ put_page(map->mpage);
+ map->mpage = NULL;
+ }
return err;
}
return erofs_map_blocks_flatmode(inode, map, flags);
diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c
index 833f052f79d0..829f7b12e0dc 100644
--- a/drivers/staging/erofs/dir.c
+++ b/drivers/staging/erofs/dir.c
@@ -24,8 +24,8 @@ static const unsigned char erofs_filetype_table[EROFS_FT_MAX] = {
};
static int erofs_fill_dentries(struct dir_context *ctx,
- void *dentry_blk, unsigned int *ofs,
- unsigned int nameoff, unsigned int maxsize)
+ void *dentry_blk, unsigned int *ofs,
+ unsigned int nameoff, unsigned int maxsize)
{
struct erofs_dirent *de = dentry_blk;
const struct erofs_dirent *end = dentry_blk + nameoff;
@@ -98,15 +98,14 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
if (IS_ERR(dentry_page))
continue;
- lock_page(dentry_page);
de = (struct erofs_dirent *)kmap(dentry_page);
nameoff = le16_to_cpu(de->nameoff);
if (unlikely(nameoff < sizeof(struct erofs_dirent) ||
- nameoff >= PAGE_SIZE)) {
+ nameoff >= PAGE_SIZE)) {
errln("%s, invalid de[0].nameoff %u",
- __func__, nameoff);
+ __func__, nameoff);
err = -EIO;
goto skip_this;
@@ -128,7 +127,6 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
skip_this:
kunmap(dentry_page);
- unlock_page(dentry_page);
put_page(dentry_page);
ctx->pos = blknr_to_addr(i) + ofs;
@@ -144,6 +142,6 @@ skip_this:
const struct file_operations erofs_dir_fops = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = erofs_readdir,
+ .iterate_shared = erofs_readdir,
};
diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
index d7fbf5f4600f..924b8dfc7a8f 100644
--- a/drivers/staging/erofs/inode.c
+++ b/drivers/staging/erofs/inode.c
@@ -184,32 +184,18 @@ static int fill_inode(struct inode *inode, int isdir)
if (!err) {
/* setup the new inode */
if (S_ISREG(inode->i_mode)) {
-#ifdef CONFIG_EROFS_FS_XATTR
- if (vi->xattr_isize)
- inode->i_op = &erofs_generic_xattr_iops;
-#endif
+ inode->i_op = &erofs_generic_iops;
inode->i_fop = &generic_ro_fops;
} else if (S_ISDIR(inode->i_mode)) {
- inode->i_op =
-#ifdef CONFIG_EROFS_FS_XATTR
- vi->xattr_isize ? &erofs_dir_xattr_iops :
-#endif
- &erofs_dir_iops;
+ inode->i_op = &erofs_dir_iops;
inode->i_fop = &erofs_dir_fops;
} else if (S_ISLNK(inode->i_mode)) {
/* by default, page_get_link is used for symlink */
- inode->i_op =
-#ifdef CONFIG_EROFS_FS_XATTR
- &erofs_symlink_xattr_iops,
-#else
- &page_symlink_inode_operations;
-#endif
+ inode->i_op = &erofs_symlink_iops;
inode_nohighmem(inode);
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
-#ifdef CONFIG_EROFS_FS_XATTR
- inode->i_op = &erofs_special_inode_operations;
-#endif
+ inode->i_op = &erofs_generic_iops;
init_special_inode(inode, inode->i_mode, inode->i_rdev);
} else {
err = -EIO;
@@ -297,23 +283,26 @@ struct inode *erofs_iget(struct super_block *sb,
return inode;
}
+const struct inode_operations erofs_generic_iops = {
#ifdef CONFIG_EROFS_FS_XATTR
-const struct inode_operations erofs_generic_xattr_iops = {
.listxattr = erofs_listxattr,
+#endif
+ .get_acl = erofs_get_acl,
};
-const struct inode_operations erofs_symlink_xattr_iops = {
+const struct inode_operations erofs_symlink_iops = {
.get_link = page_get_link,
+#ifdef CONFIG_EROFS_FS_XATTR
.listxattr = erofs_listxattr,
+#endif
+ .get_acl = erofs_get_acl,
};
-const struct inode_operations erofs_special_inode_operations = {
- .listxattr = erofs_listxattr,
-};
-
-const struct inode_operations erofs_fast_symlink_xattr_iops = {
+const struct inode_operations erofs_fast_symlink_iops = {
.get_link = simple_get_link,
+#ifdef CONFIG_EROFS_FS_XATTR
.listxattr = erofs_listxattr,
-};
#endif
+ .get_acl = erofs_get_acl,
+};
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
index e049d00c087a..e3bfde00c7d2 100644
--- a/drivers/staging/erofs/internal.h
+++ b/drivers/staging/erofs/internal.h
@@ -252,47 +252,20 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
}
#endif
-static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt)
-{
- int o;
-
-repeat:
- o = erofs_wait_on_workgroup_freezed(grp);
-
- if (unlikely(o <= 0))
- return -1;
-
- if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
- goto repeat;
-
- *ocnt = o;
- return 0;
-}
-
-#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
-#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
-
-extern int erofs_workgroup_put(struct erofs_workgroup *grp);
-
-extern struct erofs_workgroup *erofs_find_workgroup(
- struct super_block *sb, pgoff_t index, bool *tag);
-
-extern int erofs_register_workgroup(struct super_block *sb,
- struct erofs_workgroup *grp, bool tag);
-
-extern unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
- unsigned long nr_shrink, bool cleanup);
-
-static inline void erofs_workstation_cleanup_all(struct super_block *sb)
-{
- erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true);
-}
+int erofs_workgroup_put(struct erofs_workgroup *grp);
+struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
+ pgoff_t index, bool *tag);
+int erofs_register_workgroup(struct super_block *sb,
+ struct erofs_workgroup *grp, bool tag);
+unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
+ unsigned long nr_shrink, bool cleanup);
+void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
#ifdef EROFS_FS_HAS_MANAGED_CACHE
-extern int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
- struct erofs_workgroup *egrp);
-extern int erofs_try_to_free_cached_page(struct address_space *mapping,
- struct page *page);
+int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *egrp);
+int erofs_try_to_free_cached_page(struct address_space *mapping,
+ struct page *page);
#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
#else
@@ -354,12 +327,17 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
}
-#define inode_set_inited_xattr(inode) (EROFS_V(inode)->flags |= 1)
-#define inode_has_inited_xattr(inode) (EROFS_V(inode)->flags & 1)
+/* atomic flag definitions */
+#define EROFS_V_EA_INITED_BIT 0
+
+/* bitlock definitions (arranged in reverse order) */
+#define EROFS_V_BL_XATTR_BIT (BITS_PER_LONG - 1)
struct erofs_vnode {
erofs_nid_t nid;
- unsigned int flags;
+
+ /* atomic flags (including bitlocks) */
+ unsigned long flags;
unsigned char data_mapping_mode;
/* inline size in bytes */
@@ -412,8 +390,6 @@ static inline bool is_inode_layout_inline(struct inode *inode)
}
extern const struct super_operations erofs_sops;
-extern const struct inode_operations erofs_dir_iops;
-extern const struct file_operations erofs_dir_fops;
extern const struct address_space_operations erofs_raw_access_aops;
#ifdef CONFIG_EROFS_FS_ZIP
@@ -461,11 +437,26 @@ struct erofs_map_blocks {
u64 m_plen, m_llen;
unsigned int m_flags;
+
+ struct page *mpage;
};
/* Flags used by erofs_map_blocks() */
#define EROFS_GET_BLOCKS_RAW 0x0001
+#ifdef CONFIG_EROFS_FS_ZIP
+int z_erofs_map_blocks_iter(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags);
+#else
+static inline int z_erofs_map_blocks_iter(struct inode *inode,
+ struct erofs_map_blocks *map,
+ int flags)
+{
+ return -ENOTSUPP;
+}
+#endif
+
/* data.c */
static inline struct bio *
erofs_grab_bio(struct super_block *sb,
@@ -506,8 +497,8 @@ static inline void __submit_bio(struct bio *bio, unsigned op, unsigned op_flags)
#define EROFS_IO_MAX_RETRIES_NOFAIL CONFIG_EROFS_FS_IO_MAX_RETRIES
#endif
-extern struct page *__erofs_get_meta_page(struct super_block *sb,
- erofs_blk_t blkaddr, bool prio, bool nofail);
+struct page *__erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr,
+ bool prio, bool nofail);
static inline struct page *erofs_get_meta_page(struct super_block *sb,
erofs_blk_t blkaddr, bool prio)
@@ -521,15 +512,7 @@ static inline struct page *erofs_get_meta_page_nofail(struct super_block *sb,
return __erofs_get_meta_page(sb, blkaddr, prio, true);
}
-extern int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int);
-extern int erofs_map_blocks_iter(struct inode *, struct erofs_map_blocks *,
- struct page **, int);
-
-struct erofs_map_blocks_iter {
- struct erofs_map_blocks map;
- struct page *mpage;
-};
-
+int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int);
static inline struct page *
erofs_get_inline_page(struct inode *inode,
@@ -549,41 +532,31 @@ static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
#endif
}
-extern struct inode *erofs_iget(struct super_block *sb,
- erofs_nid_t nid, bool dir);
-
-/* dir.c */
-int erofs_namei(struct inode *dir, struct qstr *name,
- erofs_nid_t *nid, unsigned *d_type);
-
-#ifdef CONFIG_EROFS_FS_XATTR
-/* xattr.c */
-extern const struct xattr_handler *erofs_xattr_handlers[];
-
-/* symlink and special inode */
-extern const struct inode_operations erofs_symlink_xattr_iops;
-extern const struct inode_operations erofs_fast_symlink_xattr_iops;
-extern const struct inode_operations erofs_special_inode_operations;
-#endif
+extern const struct inode_operations erofs_generic_iops;
+extern const struct inode_operations erofs_symlink_iops;
+extern const struct inode_operations erofs_fast_symlink_iops;
static inline void set_inode_fast_symlink(struct inode *inode)
{
-#ifdef CONFIG_EROFS_FS_XATTR
- inode->i_op = &erofs_fast_symlink_xattr_iops;
-#else
- inode->i_op = &simple_symlink_inode_operations;
-#endif
+ inode->i_op = &erofs_fast_symlink_iops;
}
static inline bool is_inode_fast_symlink(struct inode *inode)
{
-#ifdef CONFIG_EROFS_FS_XATTR
- return inode->i_op == &erofs_fast_symlink_xattr_iops;
-#else
- return inode->i_op == &simple_symlink_inode_operations;
-#endif
+ return inode->i_op == &erofs_fast_symlink_iops;
}
+struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir);
+
+/* namei.c */
+extern const struct inode_operations erofs_dir_iops;
+
+int erofs_namei(struct inode *dir, struct qstr *name,
+ erofs_nid_t *nid, unsigned int *d_type);
+
+/* dir.c */
+extern const struct file_operations erofs_dir_fops;
+
static inline void *erofs_vmap(struct page **pages, unsigned int count)
{
#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM
@@ -612,15 +585,11 @@ static inline void erofs_vunmap(const void *mem, unsigned int count)
}
/* utils.c */
-extern struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
-
-extern void erofs_register_super(struct super_block *sb);
-extern void erofs_unregister_super(struct super_block *sb);
+extern struct shrinker erofs_shrinker_info;
-extern unsigned long erofs_shrink_count(struct shrinker *shrink,
- struct shrink_control *sc);
-extern unsigned long erofs_shrink_scan(struct shrinker *shrink,
- struct shrink_control *sc);
+struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
+void erofs_register_super(struct super_block *sb);
+void erofs_unregister_super(struct super_block *sb);
#ifndef lru_to_page
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
index 5596c52e246d..3f4fa52c10fa 100644
--- a/drivers/staging/erofs/namei.c
+++ b/drivers/staging/erofs/namei.c
@@ -15,74 +15,77 @@
#include <trace/events/erofs.h>
-/* based on the value of qn->len is accurate */
-static inline int dirnamecmp(struct qstr *qn,
- struct qstr *qd, unsigned int *matched)
+struct erofs_qstr {
+ const unsigned char *name;
+ const unsigned char *end;
+};
+
+/* based on the end of qn is accurate and it must have the trailing '\0' */
+static inline int dirnamecmp(const struct erofs_qstr *qn,
+ const struct erofs_qstr *qd,
+ unsigned int *matched)
{
- unsigned int i = *matched, len = min(qn->len, qd->len);
-loop:
- if (unlikely(i >= len)) {
- *matched = i;
- if (qn->len < qd->len) {
- /*
- * actually (qn->len == qd->len)
- * when qd->name[i] == '\0'
- */
- return qd->name[i] == '\0' ? 0 : -1;
+ unsigned int i = *matched;
+
+ /*
+ * on-disk error, let's only BUG_ON in the debugging mode.
+ * otherwise, it will return 1 to just skip the invalid name
+ * and go on (in consideration of the lookup performance).
+ */
+ DBG_BUGON(qd->name > qd->end);
+
+ /* qd could not have trailing '\0' */
+ /* However it is absolutely safe if < qd->end */
+ while (qd->name + i < qd->end && qd->name[i] != '\0') {
+ if (qn->name[i] != qd->name[i]) {
+ *matched = i;
+ return qn->name[i] > qd->name[i] ? 1 : -1;
}
- return (qn->len > qd->len);
- }
-
- if (qn->name[i] != qd->name[i]) {
- *matched = i;
- return qn->name[i] > qd->name[i] ? 1 : -1;
+ ++i;
}
-
- ++i;
- goto loop;
+ *matched = i;
+ /* See comments in __d_alloc on the terminating NUL character */
+ return qn->name[i] == '\0' ? 0 : 1;
}
-static struct erofs_dirent *find_target_dirent(
- struct qstr *name,
- u8 *data, int maxsize)
+#define nameoff_from_disk(off, sz) (le16_to_cpu(off) & ((sz) - 1))
+
+static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
+ u8 *data,
+ unsigned int dirblksize,
+ const int ndirents)
{
- unsigned int ndirents, head, back;
+ int head, back;
unsigned int startprfx, endprfx;
struct erofs_dirent *const de = (struct erofs_dirent *)data;
- /* make sure that maxsize is valid */
- BUG_ON(maxsize < sizeof(struct erofs_dirent));
-
- ndirents = le16_to_cpu(de->nameoff) / sizeof(*de);
-
- /* corrupted dir (may be unnecessary...) */
- BUG_ON(!ndirents);
-
- head = 0;
+ /* since the 1st dirent has been evaluated previously */
+ head = 1;
back = ndirents - 1;
startprfx = endprfx = 0;
while (head <= back) {
- unsigned int mid = head + (back - head) / 2;
- unsigned int nameoff = le16_to_cpu(de[mid].nameoff);
+ const int mid = head + (back - head) / 2;
+ const int nameoff = nameoff_from_disk(de[mid].nameoff,
+ dirblksize);
unsigned int matched = min(startprfx, endprfx);
-
- struct qstr dname = QSTR_INIT(data + nameoff,
- unlikely(mid >= ndirents - 1) ?
- maxsize - nameoff :
- le16_to_cpu(de[mid + 1].nameoff) - nameoff);
+ struct erofs_qstr dname = {
+ .name = data + nameoff,
+ .end = unlikely(mid >= ndirents - 1) ?
+ data + dirblksize :
+ data + nameoff_from_disk(de[mid + 1].nameoff,
+ dirblksize)
+ };
/* string comparison without already matched prefix */
int ret = dirnamecmp(name, &dname, &matched);
- if (unlikely(!ret))
+ if (unlikely(!ret)) {
return de + mid;
- else if (ret > 0) {
+ } else if (ret > 0) {
head = mid + 1;
startprfx = matched;
- } else if (unlikely(mid < 1)) /* fix "mid" overflow */
- break;
- else {
+ } else {
back = mid - 1;
endprfx = matched;
}
@@ -91,12 +94,12 @@ static struct erofs_dirent *find_target_dirent(
return ERR_PTR(-ENOENT);
}
-static struct page *find_target_block_classic(
- struct inode *dir,
- struct qstr *name, int *_diff)
+static struct page *find_target_block_classic(struct inode *dir,
+ struct erofs_qstr *name,
+ int *_ndirents)
{
unsigned int startprfx, endprfx;
- unsigned int head, back;
+ int head, back;
struct address_space *const mapping = dir->i_mapping;
struct page *candidate = ERR_PTR(-ENOENT);
@@ -105,89 +108,97 @@ static struct page *find_target_block_classic(
back = inode_datablocks(dir) - 1;
while (head <= back) {
- unsigned int mid = head + (back - head) / 2;
+ const int mid = head + (back - head) / 2;
struct page *page = read_mapping_page(mapping, mid, NULL);
- if (IS_ERR(page)) {
-exact_out:
- if (!IS_ERR(candidate)) /* valid candidate */
- put_page(candidate);
- return page;
- } else {
- int diff;
- unsigned int ndirents, matched;
- struct qstr dname;
+ if (!IS_ERR(page)) {
struct erofs_dirent *de = kmap_atomic(page);
- unsigned int nameoff = le16_to_cpu(de->nameoff);
-
- ndirents = nameoff / sizeof(*de);
+ const int nameoff = nameoff_from_disk(de->nameoff,
+ EROFS_BLKSIZ);
+ const int ndirents = nameoff / sizeof(*de);
+ int diff;
+ unsigned int matched;
+ struct erofs_qstr dname;
- /* corrupted dir (should have one entry at least) */
- BUG_ON(!ndirents || nameoff > PAGE_SIZE);
+ if (unlikely(!ndirents)) {
+ DBG_BUGON(1);
+ kunmap_atomic(de);
+ put_page(page);
+ page = ERR_PTR(-EIO);
+ goto out;
+ }
matched = min(startprfx, endprfx);
dname.name = (u8 *)de + nameoff;
- dname.len = ndirents == 1 ?
- /* since the rest of the last page is 0 */
- EROFS_BLKSIZ - nameoff
- : le16_to_cpu(de[1].nameoff) - nameoff;
+ if (ndirents == 1)
+ dname.end = (u8 *)de + EROFS_BLKSIZ;
+ else
+ dname.end = (u8 *)de +
+ nameoff_from_disk(de[1].nameoff,
+ EROFS_BLKSIZ);
/* string comparison without already matched prefix */
diff = dirnamecmp(name, &dname, &matched);
kunmap_atomic(de);
if (unlikely(!diff)) {
- *_diff = 0;
- goto exact_out;
+ *_ndirents = 0;
+ goto out;
} else if (diff > 0) {
head = mid + 1;
startprfx = matched;
- if (likely(!IS_ERR(candidate)))
+ if (!IS_ERR(candidate))
put_page(candidate);
candidate = page;
+ *_ndirents = ndirents;
} else {
put_page(page);
- if (unlikely(mid < 1)) /* fix "mid" overflow */
- break;
-
back = mid - 1;
endprfx = matched;
}
+ continue;
}
+out: /* free if the candidate is valid */
+ if (!IS_ERR(candidate))
+ put_page(candidate);
+ return page;
}
- *_diff = 1;
return candidate;
}
int erofs_namei(struct inode *dir,
- struct qstr *name,
- erofs_nid_t *nid, unsigned int *d_type)
+ struct qstr *name,
+ erofs_nid_t *nid, unsigned int *d_type)
{
- int diff;
+ int ndirents;
struct page *page;
- u8 *data;
+ void *data;
struct erofs_dirent *de;
+ struct erofs_qstr qn;
if (unlikely(!dir->i_size))
return -ENOENT;
- diff = 1;
- page = find_target_block_classic(dir, name, &diff);
+ qn.name = name->name;
+ qn.end = name->name + name->len;
+
+ ndirents = 0;
+ page = find_target_block_classic(dir, &qn, &ndirents);
- if (unlikely(IS_ERR(page)))
+ if (IS_ERR(page))
return PTR_ERR(page);
data = kmap_atomic(page);
/* the target page has been mapped */
- de = likely(diff) ?
- /* since the rest of the last page is 0 */
- find_target_dirent(name, data, EROFS_BLKSIZ) :
- (struct erofs_dirent *)data;
+ if (ndirents)
+ de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
+ else
+ de = (struct erofs_dirent *)data;
- if (likely(!IS_ERR(de))) {
+ if (!IS_ERR(de)) {
*nid = le64_to_cpu(de->nid);
*d_type = de->file_type;
}
@@ -235,12 +246,9 @@ static struct dentry *erofs_lookup(struct inode *dir,
const struct inode_operations erofs_dir_iops = {
.lookup = erofs_lookup,
-};
-
-const struct inode_operations erofs_dir_xattr_iops = {
- .lookup = erofs_lookup,
#ifdef CONFIG_EROFS_FS_XATTR
.listxattr = erofs_listxattr,
#endif
+ .get_acl = erofs_get_acl,
};
diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c
index 1c2eb69682ef..15c784fba879 100644
--- a/drivers/staging/erofs/super.c
+++ b/drivers/staging/erofs/super.c
@@ -16,6 +16,7 @@
#include <linux/parser.h>
#include <linux/seq_file.h>
#include "internal.h"
+#include "xattr.h"
#define CREATE_TRACE_POINTS
#include <trace/events/erofs.h>
@@ -397,6 +398,11 @@ static int erofs_read_super(struct super_block *sb,
if (!silent)
infoln("root inode @ nid %llu", ROOT_NID(sbi));
+ if (test_opt(sbi, POSIX_ACL))
+ sb->s_flags |= SB_POSIXACL;
+ else
+ sb->s_flags &= ~SB_POSIXACL;
+
#ifdef CONFIG_EROFS_FS_ZIP
INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
#endif
@@ -420,13 +426,14 @@ static int erofs_read_super(struct super_block *sb,
errln("rootino(nid %llu) is not a directory(i_mode %o)",
ROOT_NID(sbi), inode->i_mode);
err = -EINVAL;
- goto err_isdir;
+ iput(inode);
+ goto err_iget;
}
sb->s_root = d_make_root(inode);
if (sb->s_root == NULL) {
err = -ENOMEM;
- goto err_makeroot;
+ goto err_iget;
}
/* save the device name to sbi */
@@ -452,10 +459,6 @@ static int erofs_read_super(struct super_block *sb,
*/
err_devname:
dput(sb->s_root);
-err_makeroot:
-err_isdir:
- if (sb->s_root == NULL)
- iput(inode);
err_iget:
#ifdef EROFS_FS_HAS_MANAGED_CACHE
iput(sbi->managed_cache);
@@ -493,7 +496,8 @@ static void erofs_put_super(struct super_block *sb)
mutex_lock(&sbi->umount_mutex);
#ifdef CONFIG_EROFS_FS_ZIP
- erofs_workstation_cleanup_all(sb);
+ /* clean up the compression space of this sb */
+ erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true);
#endif
erofs_unregister_super(sb);
@@ -537,12 +541,6 @@ static void erofs_kill_sb(struct super_block *sb)
kill_block_super(sb);
}
-static struct shrinker erofs_shrinker_info = {
- .scan_objects = erofs_shrink_scan,
- .count_objects = erofs_shrink_count,
- .seeks = DEFAULT_SEEKS,
-};
-
static struct file_system_type erofs_fs_type = {
.owner = THIS_MODULE,
.name = "erofs",
@@ -653,6 +651,11 @@ static int erofs_remount(struct super_block *sb, int *flags, char *data)
if (err)
goto out;
+ if (test_opt(sbi, POSIX_ACL))
+ sb->s_flags |= SB_POSIXACL;
+ else
+ sb->s_flags &= ~SB_POSIXACL;
+
*flags |= SB_RDONLY;
return 0;
out:
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index 4ac1099a39c6..02f34a83147d 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -107,15 +107,30 @@ enum z_erofs_vle_work_role {
Z_EROFS_VLE_WORK_SECONDARY,
Z_EROFS_VLE_WORK_PRIMARY,
/*
- * The current work has at least been linked with the following
- * processed chained works, which means if the processing page
- * is the tail partial page of the work, the current work can
- * safely use the whole page, as illustrated below:
- * +--------------+-------------------------------------------+
- * | tail page | head page (of the previous work) |
- * +--------------+-------------------------------------------+
- * /\ which belongs to the current work
- * [ (*) this page can be used for the current work itself. ]
+ * The current work was the tail of an exist chain, and the previous
+ * processed chained works are all decided to be hooked up to it.
+ * A new chain should be created for the remaining unprocessed works,
+ * therefore different from Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
+ * the next work cannot reuse the whole page in the following scenario:
+ * ________________________________________________________________
+ * | tail (partial) page | head (partial) page |
+ * | (belongs to the next work) | (belongs to the current work) |
+ * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
+ */
+ Z_EROFS_VLE_WORK_PRIMARY_HOOKED,
+ /*
+ * The current work has been linked with the processed chained works,
+ * and could be also linked with the potential remaining works, which
+ * means if the processing page is the tail partial page of the work,
+ * the current work can safely use the whole page (since the next work
+ * is under control) for in-place decompression, as illustrated below:
+ * ________________________________________________________________
+ * | tail (partial) page | head (partial) page |
+ * | (of the current work) | (of the previous work) |
+ * | PRIMARY_FOLLOWED or | |
+ * |_____PRIMARY_HOOKED____|____________PRIMARY_FOLLOWED____________|
+ *
+ * [ (*) the above page can be used for the current work itself. ]
*/
Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
Z_EROFS_VLE_WORK_MAX
@@ -238,14 +253,9 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
{
struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb);
const unsigned int clusterpages = erofs_clusterpages(sbi);
-
- struct z_erofs_vle_workgroup *grp;
+ struct z_erofs_vle_workgroup *const grp = (void *)page_private(page);
int ret = 0; /* 0 - busy */
- /* prevent the workgroup from being freed */
- rcu_read_lock();
- grp = (void *)page_private(page);
-
if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) {
unsigned int i;
@@ -257,12 +267,11 @@ int erofs_try_to_free_cached_page(struct address_space *mapping,
}
}
erofs_workgroup_unfreeze(&grp->obj, 1);
- }
- rcu_read_unlock();
- if (ret) {
- ClearPagePrivate(page);
- put_page(page);
+ if (ret) {
+ ClearPagePrivate(page);
+ put_page(page);
+ }
}
return ret;
}
@@ -315,10 +324,10 @@ static int z_erofs_vle_work_add_page(
return ret ? 0 : -EAGAIN;
}
-static inline bool try_to_claim_workgroup(
- struct z_erofs_vle_workgroup *grp,
- z_erofs_vle_owned_workgrp_t *owned_head,
- bool *hosted)
+static enum z_erofs_vle_work_role
+try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp,
+ z_erofs_vle_owned_workgrp_t *owned_head,
+ bool *hosted)
{
DBG_BUGON(*hosted == true);
@@ -332,6 +341,9 @@ retry:
*owned_head = &grp->next;
*hosted = true;
+ /* lucky, I am the followee :) */
+ return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
+
} else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
/*
* type 2, link to the end of a existing open chain,
@@ -341,12 +353,11 @@ retry:
if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
*owned_head) != Z_EROFS_VLE_WORKGRP_TAIL)
goto retry;
-
*owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
- } else
- return false; /* :( better luck next time */
+ return Z_EROFS_VLE_WORK_PRIMARY_HOOKED;
+ }
- return true; /* lucky, I am the followee :) */
+ return Z_EROFS_VLE_WORK_PRIMARY; /* :( better luck next time */
}
struct z_erofs_vle_work_finder {
@@ -424,12 +435,9 @@ z_erofs_vle_work_lookup(const struct z_erofs_vle_work_finder *f)
*f->hosted = false;
if (!primary)
*f->role = Z_EROFS_VLE_WORK_SECONDARY;
- /* claim the workgroup if possible */
- else if (try_to_claim_workgroup(grp, f->owned_head, f->hosted))
- *f->role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
- else
- *f->role = Z_EROFS_VLE_WORK_PRIMARY;
-
+ else /* claim the workgroup if possible */
+ *f->role = try_to_claim_workgroup(grp, f->owned_head,
+ f->hosted);
return work;
}
@@ -493,6 +501,9 @@ z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f,
return work;
}
+#define builder_is_hooked(builder) \
+ ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED)
+
#define builder_is_followed(builder) \
((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
@@ -539,7 +550,7 @@ repeat:
if (unlikely(work == ERR_PTR(-EAGAIN)))
goto repeat;
- if (unlikely(IS_ERR(work)))
+ if (IS_ERR(work))
return PTR_ERR(work);
got_it:
z_erofs_pagevec_ctor_init(&builder->vector,
@@ -589,7 +600,7 @@ static void __z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
erofs_workgroup_put(&grp->obj);
}
-void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
+static void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
{
struct z_erofs_vle_workgroup *grp =
z_erofs_vle_work_workgroup(work, true);
@@ -636,7 +647,7 @@ struct z_erofs_vle_frontend {
struct inode *const inode;
struct z_erofs_vle_work_builder builder;
- struct erofs_map_blocks_iter m_iter;
+ struct erofs_map_blocks map;
z_erofs_vle_owned_workgrp_t owned_head;
@@ -647,8 +658,9 @@ struct z_erofs_vle_frontend {
#define VLE_FRONTEND_INIT(__i) { \
.inode = __i, \
- .m_iter = { \
- { .m_llen = 0, .m_plen = 0 }, \
+ .map = { \
+ .m_llen = 0, \
+ .m_plen = 0, \
.mpage = NULL \
}, \
.builder = VLE_WORK_BUILDER_INIT(), \
@@ -681,12 +693,11 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
{
struct super_block *const sb = fe->inode->i_sb;
struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
- struct erofs_map_blocks_iter *const m = &fe->m_iter;
- struct erofs_map_blocks *const map = &m->map;
+ struct erofs_map_blocks *const map = &fe->map;
struct z_erofs_vle_work_builder *const builder = &fe->builder;
const loff_t offset = page_offset(page);
- bool tight = builder_is_followed(builder);
+ bool tight = builder_is_hooked(builder);
struct z_erofs_vle_work *work = builder->work;
enum z_erofs_cache_alloctype cache_strategy;
@@ -704,8 +715,12 @@ repeat:
/* lucky, within the range of the current map_blocks */
if (offset + cur >= map->m_la &&
- offset + cur < map->m_la + map->m_llen)
+ offset + cur < map->m_la + map->m_llen) {
+ /* didn't get a valid unzip work previously (very rare) */
+ if (!builder->work)
+ goto restart_now;
goto hitted;
+ }
/* go ahead the next map_blocks */
debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
@@ -715,10 +730,11 @@ repeat:
map->m_la = offset + cur;
map->m_llen = 0;
- err = erofs_map_blocks_iter(fe->inode, map, &m->mpage, 0);
+ err = z_erofs_map_blocks_iter(fe->inode, map, 0);
if (unlikely(err))
goto err_out;
+restart_now:
if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
goto hitted;
@@ -740,7 +756,7 @@ repeat:
map->m_plen / PAGE_SIZE,
cache_strategy, page_pool, GFP_KERNEL);
- tight &= builder_is_followed(builder);
+ tight &= builder_is_hooked(builder);
work = builder->work;
hitted:
cur = end - min_t(unsigned int, offset + end - map->m_la, end);
@@ -755,6 +771,9 @@ hitted:
(tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
+ if (cur)
+ tight &= builder_is_followed(builder);
+
retry:
err = z_erofs_vle_work_add_page(builder, page, page_type);
/* should allocate an additional staging page for pagevec */
@@ -992,11 +1011,10 @@ repeat:
if (llen > grp->llen)
llen = grp->llen;
- err = z_erofs_vle_unzip_fast_percpu(compressed_pages,
- clusterpages, pages, llen, work->pageofs,
- z_erofs_onlinepage_endio);
+ err = z_erofs_vle_unzip_fast_percpu(compressed_pages, clusterpages,
+ pages, llen, work->pageofs);
if (err != -ENOTSUPP)
- goto out_percpu;
+ goto out;
if (sparsemem_pages >= nr_pages)
goto skip_allocpage;
@@ -1017,8 +1035,25 @@ skip_allocpage:
erofs_vunmap(vout, nr_pages);
out:
+ /* must handle all compressed pages before endding pages */
+ for (i = 0; i < clusterpages; ++i) {
+ page = compressed_pages[i];
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+ if (page->mapping == MNGD_MAPPING(sbi))
+ continue;
+#endif
+ /* recycle all individual staging pages */
+ (void)z_erofs_gather_if_stagingpage(page_pool, page);
+
+ WRITE_ONCE(compressed_pages[i], NULL);
+ }
+
for (i = 0; i < nr_pages; ++i) {
page = pages[i];
+ if (!page)
+ continue;
+
DBG_BUGON(!page->mapping);
/* recycle all individual staging pages */
@@ -1031,20 +1066,6 @@ out:
z_erofs_onlinepage_endio(page);
}
-out_percpu:
- for (i = 0; i < clusterpages; ++i) {
- page = compressed_pages[i];
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
- if (page->mapping == MNGD_MAPPING(sbi))
- continue;
-#endif
- /* recycle all individual staging pages */
- (void)z_erofs_gather_if_stagingpage(page_pool, page);
-
- WRITE_ONCE(compressed_pages[i], NULL);
- }
-
if (pages == z_pagemap_global)
mutex_unlock(&z_pagemap_global_lock);
else if (unlikely(pages != pages_onstack))
@@ -1484,8 +1505,8 @@ static int z_erofs_vle_normalaccess_readpage(struct file *file,
z_erofs_submit_and_unzip(&f, &pagepool, true);
out:
- if (f.m_iter.mpage)
- put_page(f.m_iter.mpage);
+ if (f.map.mpage)
+ put_page(f.map.mpage);
/* clean up the remaining free pages */
put_pages_list(&pagepool);
@@ -1555,8 +1576,8 @@ static int z_erofs_vle_normalaccess_readpages(struct file *filp,
z_erofs_submit_and_unzip(&f, &pagepool, sync);
- if (f.m_iter.mpage)
- put_page(f.m_iter.mpage);
+ if (f.map.mpage)
+ put_page(f.map.mpage);
/* clean up the remaining free pages */
put_pages_list(&pagepool);
@@ -1701,14 +1722,14 @@ vle_get_logical_extent_head(const struct vle_map_blocks_iter_ctx *ctx,
int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
- struct page **mpage_ret, int flags)
+ int flags)
{
void *kaddr;
const struct vle_map_blocks_iter_ctx ctx = {
.inode = inode,
.sb = inode->i_sb,
.clusterbits = EROFS_I_SB(inode)->clusterbits,
- .mpage_ret = mpage_ret,
+ .mpage_ret = &map->mpage,
.kaddr_ret = &kaddr
};
const unsigned int clustersize = 1 << ctx.clusterbits;
@@ -1722,7 +1743,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
/* initialize `pblk' to keep gcc from printing foolish warnings */
erofs_blk_t mblk, pblk = 0;
- struct page *mpage = *mpage_ret;
+ struct page *mpage = map->mpage;
struct z_erofs_vle_decompressed_index *di;
unsigned int cluster_type, logical_cluster_ofs;
int err = 0;
@@ -1758,7 +1779,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
err = PTR_ERR(mpage);
goto out;
}
- *mpage_ret = mpage;
+ map->mpage = mpage;
} else {
lock_page(mpage);
DBG_BUGON(!PageUptodate(mpage));
@@ -1818,7 +1839,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
/* get the correspoinding first chunk */
err = vle_get_logical_extent_head(&ctx, lcn, &ofs,
&pblk, &map->m_flags);
- mpage = *mpage_ret;
+ mpage = map->mpage;
if (unlikely(err)) {
if (mpage)
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
index 5a4e1b62c0d1..517e5ce8c5e9 100644
--- a/drivers/staging/erofs/unzip_vle.h
+++ b/drivers/staging/erofs/unzip_vle.h
@@ -212,18 +212,17 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES 2048
/* unzip_vle_lz4.c */
-extern int z_erofs_vle_plain_copy(struct page **compressed_pages,
- unsigned clusterpages, struct page **pages,
- unsigned nr_pages, unsigned short pageofs);
-
-extern int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
- unsigned clusterpages, struct page **pages,
- unsigned outlen, unsigned short pageofs,
- void (*endio)(struct page *));
-
-extern int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
- unsigned clusterpages, void *vaddr, unsigned llen,
- unsigned short pageofs, bool overlapped);
+int z_erofs_vle_plain_copy(struct page **compressed_pages,
+ unsigned int clusterpages, struct page **pages,
+ unsigned int nr_pages, unsigned short pageofs);
+int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
+ unsigned int clusterpages,
+ struct page **pages, unsigned int outlen,
+ unsigned short pageofs);
+int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
+ unsigned int clusterpages,
+ void *vaddr, unsigned int llen,
+ unsigned short pageofs, bool overlapped);
#endif
diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c
index 52797bd89da1..48b263a2731a 100644
--- a/drivers/staging/erofs/unzip_vle_lz4.c
+++ b/drivers/staging/erofs/unzip_vle_lz4.c
@@ -13,7 +13,7 @@
#include "unzip_vle.h"
#include <linux/lz4.h>
-int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen)
+static int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen)
{
int ret = LZ4_decompress_safe_partial(in, out, inlen, outlen, outlen);
@@ -125,8 +125,7 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
unsigned int clusterpages,
struct page **pages,
unsigned int outlen,
- unsigned short pageofs,
- void (*endio)(struct page *))
+ unsigned short pageofs)
{
void *vin, *vout;
unsigned int nr_pages, i, j;
@@ -148,19 +147,16 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
ret = z_erofs_unzip_lz4(vin, vout + pageofs,
clusterpages * PAGE_SIZE, outlen);
- if (ret >= 0) {
- outlen = ret;
- ret = 0;
- }
+ if (ret < 0)
+ goto out;
+ ret = 0;
for (i = 0; i < nr_pages; ++i) {
j = min((unsigned int)PAGE_SIZE - pageofs, outlen);
if (pages[i]) {
- if (ret < 0) {
- SetPageError(pages[i]);
- } else if (clusterpages == 1 &&
- pages[i] == compressed_pages[0]) {
+ if (clusterpages == 1 &&
+ pages[i] == compressed_pages[0]) {
memcpy(vin + pageofs, vout + pageofs, j);
} else {
void *dst = kmap_atomic(pages[i]);
@@ -168,12 +164,13 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
memcpy(dst + pageofs, vout + pageofs, j);
kunmap_atomic(dst);
}
- endio(pages[i]);
}
vout += PAGE_SIZE;
outlen -= j;
pageofs = 0;
}
+
+out:
preempt_enable();
if (clusterpages == 1)
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index b535898ca753..5f61f99f4c10 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -31,13 +31,32 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
static atomic_long_t erofs_global_shrink_cnt;
#ifdef CONFIG_EROFS_FS_ZIP
+#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
+#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
-struct erofs_workgroup *erofs_find_workgroup(
- struct super_block *sb, pgoff_t index, bool *tag)
+static int erofs_workgroup_get(struct erofs_workgroup *grp)
+{
+ int o;
+
+repeat:
+ o = erofs_wait_on_workgroup_freezed(grp);
+ if (unlikely(o <= 0))
+ return -1;
+
+ if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
+ goto repeat;
+
+ /* decrease refcount paired by erofs_workgroup_put */
+ if (unlikely(o == 1))
+ atomic_long_dec(&erofs_global_shrink_cnt);
+ return 0;
+}
+
+struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
+ pgoff_t index, bool *tag)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_workgroup *grp;
- int oldcount;
repeat:
rcu_read_lock();
@@ -46,15 +65,12 @@ repeat:
*tag = xa_pointer_tag(grp);
grp = xa_untag_pointer(grp);
- if (erofs_workgroup_get(grp, &oldcount)) {
+ if (erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */
rcu_read_unlock();
goto repeat;
}
- /* decrease refcount added by erofs_workgroup_put */
- if (unlikely(oldcount == 1))
- atomic_long_dec(&erofs_global_shrink_cnt);
DBG_BUGON(index != grp->index);
}
rcu_read_unlock();
@@ -104,8 +120,6 @@ int erofs_register_workgroup(struct super_block *sb,
return err;
}
-extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
-
static void __erofs_workgroup_free(struct erofs_workgroup *grp)
{
atomic_long_dec(&erofs_global_shrink_cnt);
@@ -131,9 +145,9 @@ static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
__erofs_workgroup_free(grp);
}
-bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
- struct erofs_workgroup *grp,
- bool cleanup)
+static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *grp,
+ bool cleanup)
{
/*
* for managed cache enabled, the refcount of workgroups
@@ -172,9 +186,9 @@ bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
#else
/* for nocache case, no customized reclaim path at all */
-bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
- struct erofs_workgroup *grp,
- bool cleanup)
+static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+ struct erofs_workgroup *grp,
+ bool cleanup)
{
int cnt = atomic_read(&grp->refcount);
@@ -256,14 +270,14 @@ void erofs_unregister_super(struct super_block *sb)
spin_unlock(&erofs_sb_list_lock);
}
-unsigned long erofs_shrink_count(struct shrinker *shrink,
- struct shrink_control *sc)
+static unsigned long erofs_shrink_count(struct shrinker *shrink,
+ struct shrink_control *sc)
{
return atomic_long_read(&erofs_global_shrink_cnt);
}
-unsigned long erofs_shrink_scan(struct shrinker *shrink,
- struct shrink_control *sc)
+static unsigned long erofs_shrink_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
{
struct erofs_sb_info *sbi;
struct list_head *p;
@@ -319,3 +333,9 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink,
return freed;
}
+struct shrinker erofs_shrinker_info = {
+ .scan_objects = erofs_shrink_scan,
+ .count_objects = erofs_shrink_count,
+ .seeks = DEFAULT_SEEKS,
+};
+
diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c
index 80dca6a4adbe..f716ab0446e5 100644
--- a/drivers/staging/erofs/xattr.c
+++ b/drivers/staging/erofs/xattr.c
@@ -44,19 +44,48 @@ static inline void xattr_iter_end_final(struct xattr_iter *it)
static int init_inode_xattrs(struct inode *inode)
{
+ struct erofs_vnode *const vi = EROFS_V(inode);
struct xattr_iter it;
unsigned int i;
struct erofs_xattr_ibody_header *ih;
struct super_block *sb;
struct erofs_sb_info *sbi;
- struct erofs_vnode *vi;
bool atomic_map;
+ int ret = 0;
- if (likely(inode_has_inited_xattr(inode)))
+ /* the most case is that xattrs of this inode are initialized. */
+ if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
return 0;
- vi = EROFS_V(inode);
- BUG_ON(!vi->xattr_isize);
+ if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE))
+ return -ERESTARTSYS;
+
+ /* someone has initialized xattrs for us? */
+ if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
+ goto out_unlock;
+
+ /*
+ * bypass all xattr operations if ->xattr_isize is not greater than
+ * sizeof(struct erofs_xattr_ibody_header), in detail:
+ * 1) it is not enough to contain erofs_xattr_ibody_header then
+ * ->xattr_isize should be 0 (it means no xattr);
+ * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk
+ * undefined right now (maybe use later with some new sb feature).
+ */
+ if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) {
+ errln("xattr_isize %d of nid %llu is not supported yet",
+ vi->xattr_isize, vi->nid);
+ ret = -ENOTSUPP;
+ goto out_unlock;
+ } else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) {
+ if (unlikely(vi->xattr_isize)) {
+ DBG_BUGON(1);
+ ret = -EIO;
+ goto out_unlock; /* xattr ondisk layout error */
+ }
+ ret = -ENOATTR;
+ goto out_unlock;
+ }
sb = inode->i_sb;
sbi = EROFS_SB(sb);
@@ -64,8 +93,10 @@ static int init_inode_xattrs(struct inode *inode)
it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
it.page = erofs_get_inline_page(inode, it.blkaddr);
- if (IS_ERR(it.page))
- return PTR_ERR(it.page);
+ if (IS_ERR(it.page)) {
+ ret = PTR_ERR(it.page);
+ goto out_unlock;
+ }
/* read in shared xattr array (non-atomic, see kmalloc below) */
it.kaddr = kmap(it.page);
@@ -78,7 +109,8 @@ static int init_inode_xattrs(struct inode *inode)
sizeof(uint), GFP_KERNEL);
if (vi->xattr_shared_xattrs == NULL) {
xattr_iter_end(&it, atomic_map);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_unlock;
}
/* let's skip ibody header */
@@ -92,8 +124,12 @@ static int init_inode_xattrs(struct inode *inode)
it.page = erofs_get_meta_page(sb,
++it.blkaddr, S_ISDIR(inode->i_mode));
- if (IS_ERR(it.page))
- return PTR_ERR(it.page);
+ if (IS_ERR(it.page)) {
+ kfree(vi->xattr_shared_xattrs);
+ vi->xattr_shared_xattrs = NULL;
+ ret = PTR_ERR(it.page);
+ goto out_unlock;
+ }
it.kaddr = kmap_atomic(it.page);
atomic_map = true;
@@ -105,8 +141,11 @@ static int init_inode_xattrs(struct inode *inode)
}
xattr_iter_end(&it, atomic_map);
- inode_set_inited_xattr(inode);
- return 0;
+ set_bit(EROFS_V_EA_INITED_BIT, &vi->flags);
+
+out_unlock:
+ clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags);
+ return ret;
}
/*
@@ -117,10 +156,12 @@ static int init_inode_xattrs(struct inode *inode)
* and need to be handled
*/
struct xattr_iter_handlers {
- int (*entry)(struct xattr_iter *, struct erofs_xattr_entry *);
- int (*name)(struct xattr_iter *, unsigned int, char *, unsigned int);
- int (*alloc_buffer)(struct xattr_iter *, unsigned int);
- void (*value)(struct xattr_iter *, unsigned int, char *, unsigned int);
+ int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry);
+ int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf,
+ unsigned int len);
+ int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz);
+ void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf,
+ unsigned int len);
};
static inline int xattr_iter_fixup(struct xattr_iter *it)
@@ -422,7 +463,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
{
- struct erofs_vnode *const vi = EROFS_V(inode);
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
switch (handler->flags) {
@@ -440,9 +480,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
return -EINVAL;
}
- if (!vi->xattr_isize)
- return -ENOATTR;
-
return erofs_getxattr(inode, handler->flags, name, buffer, size);
}
@@ -503,8 +540,7 @@ static int xattr_entrylist(struct xattr_iter *_it,
if (h == NULL || (h->list != NULL && !h->list(it->dentry)))
return 1;
- /* Note that at least one of 'prefix' and 'name' should be non-NULL */
- prefix = h->prefix != NULL ? h->prefix : h->name;
+ prefix = xattr_prefix(h);
prefix_len = strlen(prefix);
if (it->buffer == NULL) {
@@ -627,3 +663,40 @@ ssize_t erofs_listxattr(struct dentry *dentry,
return shared_listxattr(&it);
}
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+struct posix_acl *erofs_get_acl(struct inode *inode, int type)
+{
+ struct posix_acl *acl;
+ int prefix, rc;
+ char *value = NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ prefix = EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ rc = erofs_getxattr(inode, prefix, "", NULL, 0);
+ if (rc > 0) {
+ value = kmalloc(rc, GFP_KERNEL);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ rc = erofs_getxattr(inode, prefix, "", value, rc);
+ }
+
+ if (rc == -ENOATTR)
+ acl = NULL;
+ else if (rc < 0)
+ acl = ERR_PTR(rc);
+ else
+ acl = posix_acl_from_xattr(&init_user_ns, value, rc);
+ kfree(value);
+ return acl;
+}
+#endif
+
diff --git a/drivers/staging/erofs/xattr.h b/drivers/staging/erofs/xattr.h
index 0c7379282fc5..35ba5ac2139a 100644
--- a/drivers/staging/erofs/xattr.h
+++ b/drivers/staging/erofs/xattr.h
@@ -68,9 +68,7 @@ static const struct xattr_handler *xattr_handler_map[] = {
}
#ifdef CONFIG_EROFS_FS_XATTR
-
-extern const struct inode_operations erofs_generic_xattr_iops;
-extern const struct inode_operations erofs_dir_xattr_iops;
+extern const struct xattr_handler *erofs_xattr_handlers[];
int erofs_getxattr(struct inode *, int, const char *, void *, size_t);
ssize_t erofs_listxattr(struct dentry *, char *, size_t);
@@ -89,5 +87,11 @@ static ssize_t __maybe_unused erofs_listxattr(struct dentry *dentry,
}
#endif
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+struct posix_acl *erofs_get_acl(struct inode *inode, int type);
+#else
+#define erofs_get_acl (NULL)
+#endif
+
#endif