From 7ca02d0ae586fe7df59632966a64f3f1a756ef05 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 15 Apr 2015 16:13:42 -0700 Subject: hugetlbfs: accept subpool min_size mount option and setup accordingly Make 'min_size=' be an option when mounting a hugetlbfs. This option takes the same value as the 'size' option. min_size can be specified without specifying size. If both are specified, min_size must be less that or equal to size else the mount will fail. If min_size is specified, then at mount time an attempt is made to reserve min_size pages. If the reservation fails, the mount fails. At umount time, the reserved pages are released. Signed-off-by: Mike Kravetz Cc: Davidlohr Bueso Cc: Aneesh Kumar Cc: Joonsoo Kim Cc: Andi Kleen Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 90 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 71 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index db76cec3ce21..3a8f12762821 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -47,9 +47,10 @@ struct hugetlbfs_config { kuid_t uid; kgid_t gid; umode_t mode; - long nr_blocks; + long max_hpages; long nr_inodes; struct hstate *hstate; + long min_hpages; }; struct hugetlbfs_inode_info { @@ -67,7 +68,7 @@ int sysctl_hugetlb_shm_group; enum { Opt_size, Opt_nr_inodes, Opt_mode, Opt_uid, Opt_gid, - Opt_pagesize, + Opt_pagesize, Opt_min_size, Opt_err, }; @@ -78,6 +79,7 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_pagesize, "pagesize=%s"}, + {Opt_min_size, "min_size=%s"}, {Opt_err, NULL}, }; @@ -754,14 +756,38 @@ static const struct super_operations hugetlbfs_ops = { .show_options = generic_show_options, }; +enum { NO_SIZE, SIZE_STD, SIZE_PERCENT }; + +/* + * Convert size option passed from command line to number of huge pages + * in the pool specified by hstate. Size option could be in bytes + * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT). + */ +static long long +hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt, + int val_type) +{ + if (val_type == NO_SIZE) + return -1; + + if (val_type == SIZE_PERCENT) { + size_opt <<= huge_page_shift(h); + size_opt *= h->max_huge_pages; + do_div(size_opt, 100); + } + + size_opt >>= huge_page_shift(h); + return size_opt; +} + static int hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) { char *p, *rest; substring_t args[MAX_OPT_ARGS]; int option; - unsigned long long size = 0; - enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE; + unsigned long long max_size_opt = 0, min_size_opt = 0; + int max_val_type = NO_SIZE, min_val_type = NO_SIZE; if (!options) return 0; @@ -799,10 +825,10 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) /* memparse() will accept a K/M/G without a digit */ if (!isdigit(*args[0].from)) goto bad_val; - size = memparse(args[0].from, &rest); - setsize = SIZE_STD; + max_size_opt = memparse(args[0].from, &rest); + max_val_type = SIZE_STD; if (*rest == '%') - setsize = SIZE_PERCENT; + max_val_type = SIZE_PERCENT; break; } @@ -825,6 +851,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) break; } + case Opt_min_size: { + /* memparse() will accept a K/M/G without a digit */ + if (!isdigit(*args[0].from)) + goto bad_val; + min_size_opt = memparse(args[0].from, &rest); + min_val_type = SIZE_STD; + if (*rest == '%') + min_val_type = SIZE_PERCENT; + break; + } + default: pr_err("Bad mount option: \"%s\"\n", p); return -EINVAL; @@ -832,15 +869,22 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) } } - /* Do size after hstate is set up */ - if (setsize > NO_SIZE) { - struct hstate *h = pconfig->hstate; - if (setsize == SIZE_PERCENT) { - size <<= huge_page_shift(h); - size *= h->max_huge_pages; - do_div(size, 100); - } - pconfig->nr_blocks = (size >> huge_page_shift(h)); + /* + * Use huge page pool size (in hstate) to convert the size + * options to number of huge pages. If NO_SIZE, -1 is returned. + */ + pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, + max_size_opt, max_val_type); + pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate, + min_size_opt, min_val_type); + + /* + * If max_size was specified, then min_size must be smaller + */ + if (max_val_type > NO_SIZE && + pconfig->min_hpages > pconfig->max_hpages) { + pr_err("minimum size can not be greater than maximum size\n"); + return -EINVAL; } return 0; @@ -859,12 +903,13 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) save_mount_options(sb, data); - config.nr_blocks = -1; /* No limit on size by default */ + config.max_hpages = -1; /* No limit on size by default */ config.nr_inodes = -1; /* No limit on number of inodes by default */ config.uid = current_fsuid(); config.gid = current_fsgid(); config.mode = 0755; config.hstate = &default_hstate; + config.min_hpages = -1; /* No default minimum size */ ret = hugetlbfs_parse_options(data, &config); if (ret) return ret; @@ -878,8 +923,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) sbinfo->max_inodes = config.nr_inodes; sbinfo->free_inodes = config.nr_inodes; sbinfo->spool = NULL; - if (config.nr_blocks != -1) { - sbinfo->spool = hugepage_new_subpool(config.nr_blocks); + /* + * Allocate and initialize subpool if maximum or minimum size is + * specified. Any needed reservations (for minimim size) are taken + * taken when the subpool is created. + */ + if (config.max_hpages != -1 || config.min_hpages != -1) { + sbinfo->spool = hugepage_new_subpool(config.hstate, + config.max_hpages, + config.min_hpages); if (!sbinfo->spool) goto out_free; } -- cgit v1.2.3-59-g8ed1b From ee1462458cb543bbcfd379176bbba0d4bd052b7f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 15 Apr 2015 16:14:11 -0700 Subject: fs, jfs: remove slab object constructor Mempools based on slab caches with object constructors are risky because element allocation can happen either from the slab cache itself, meaning the constructor is properly called before returning, or from the mempool reserve pool, meaning the constructor is not called before returning, depending on the allocation context. For this reason, we should disallow creating mempools based on slab caches that have object constructors. Callers of mempool_alloc() will be responsible for properly initializing the returned element. Then, it doesn't matter if the element came from the slab cache or the mempool reserved pool. The only occurrence of a mempool being based on a slab cache with an object constructor in the tree is in fs/jfs/jfs_metapage.c. Remove it and properly initialize the element in alloc_metapage(). At the same time, META_free is never used, so remove it as well. Signed-off-by: David Rientjes Acked-by: Dave Kleikamp Cc: Christoph Hellwig Cc: Sebastian Ott Cc: Mikulas Patocka Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jfs/jfs_metapage.c | 31 ++++++++++++------------------- fs/jfs/jfs_metapage.h | 1 - 2 files changed, 12 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 49ba7ff1bbb9..16a0922beb59 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) #endif -static void init_once(void *foo) -{ - struct metapage *mp = (struct metapage *)foo; - - mp->lid = 0; - mp->lsn = 0; - mp->flag = 0; - mp->data = NULL; - mp->clsn = 0; - mp->log = NULL; - set_bit(META_free, &mp->flag); - init_waitqueue_head(&mp->wait); -} - static inline struct metapage *alloc_metapage(gfp_t gfp_mask) { - return mempool_alloc(metapage_mempool, gfp_mask); + struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask); + + if (mp) { + mp->lid = 0; + mp->lsn = 0; + mp->data = NULL; + mp->clsn = 0; + mp->log = NULL; + init_waitqueue_head(&mp->wait); + } + return mp; } static inline void free_metapage(struct metapage *mp) { - mp->flag = 0; - set_bit(META_free, &mp->flag); - mempool_free(mp, metapage_mempool); } @@ -216,7 +209,7 @@ int __init metapage_init(void) * Allocate the metapage structures */ metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), - 0, 0, init_once); + 0, 0, NULL); if (metapage_cache == NULL) return -ENOMEM; diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index a78beda85f68..337e9e51ac06 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -48,7 +48,6 @@ struct metapage { /* metapage flag */ #define META_locked 0 -#define META_free 1 #define META_dirty 2 #define META_sync 3 #define META_discard 4 -- cgit v1.2.3-59-g8ed1b From 0e3b210ce1722168227cb3bc7746256d0c0afece Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 15 Apr 2015 16:15:14 -0700 Subject: dax: use pfn_mkwrite to update c/mtime + freeze protection From: Yigal Korman [v1] Without this patch, c/mtime is not updated correctly when mmap'ed page is first read from and then written to. A new xfstest is submitted for testing this (generic/080) [v2] Jan Kara has pointed out that if we add the sb_start/end_pagefault pair in the new pfn_mkwrite we are then fixing another bug where: A user could start writing to the page while filesystem is frozen. Signed-off-by: Yigal Korman Signed-off-by: Boaz Harrosh Reviewed-by: Jan Kara Cc: Matthew Wilcox Cc: Dave Chinner Cc: Hugh Dickins Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 17 +++++++++++++++++ fs/ext2/file.c | 1 + fs/ext4/file.c | 1 + include/linux/fs.h | 1 + 4 files changed, 20 insertions(+) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index ed1619ec6537..d0bd1f4f81b3 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -463,6 +463,23 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, } EXPORT_SYMBOL_GPL(dax_fault); +/** + * dax_pfn_mkwrite - handle first write to DAX page + * @vma: The virtual memory area where the fault occurred + * @vmf: The description of the fault + * + */ +int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct super_block *sb = file_inode(vma->vm_file)->i_sb; + + sb_start_pagefault(sb); + file_update_time(vma->vm_file); + sb_end_pagefault(sb); + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL_GPL(dax_pfn_mkwrite); + /** * dax_zero_page_range - zero a range within a page of a DAX file * @inode: The file being truncated diff --git a/fs/ext2/file.c b/fs/ext2/file.c index e31701713516..866a3ce3f864 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -39,6 +39,7 @@ static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct ext2_dax_vm_ops = { .fault = ext2_dax_fault, .page_mkwrite = ext2_dax_mkwrite, + .pfn_mkwrite = dax_pfn_mkwrite, }; static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 598abbbe6786..aa78c70553f4 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -206,6 +206,7 @@ static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct ext4_dax_vm_ops = { .fault = ext4_dax_fault, .page_mkwrite = ext4_dax_mkwrite, + .pfn_mkwrite = dax_pfn_mkwrite, }; #else #define ext4_dax_vm_ops ext4_file_vm_ops diff --git a/include/linux/fs.h b/include/linux/fs.h index 60733bdc74b4..0f696328f218 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2620,6 +2620,7 @@ int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); +int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *); #define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb) #ifdef CONFIG_BLOCK -- cgit v1.2.3-59-g8ed1b From be64f884bed729b5d127db6a737155a4e514d286 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 15 Apr 2015 16:15:17 -0700 Subject: dax: unify ext2/4_{dax,}_file_operations The original dax patchset split the ext2/4_file_operations because of the two NULL splice_read/splice_write in the dax case. In the vfs if splice_read/splice_write are NULL we then call default_splice_read/write. What we do here is make generic_file_splice_read aware of IS_DAX() so the original ext2/4_file_operations can be used as is. For write it appears that iter_file_splice_write is just fine. It uses the regular f_op->write(file,..) or new_sync_write(file, ...). Signed-off-by: Boaz Harrosh Reviewed-by: Jan Kara Cc: Dave Chinner Cc: Matthew Wilcox Cc: Hugh Dickins Cc: Mel Gorman Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/ext2.h | 1 - fs/ext2/file.c | 18 ------------------ fs/ext2/inode.c | 5 +---- fs/ext2/namei.c | 10 ++-------- fs/ext4/ext4.h | 1 - fs/ext4/file.c | 20 -------------------- fs/ext4/inode.c | 5 +---- fs/ext4/namei.c | 10 ++-------- fs/splice.c | 3 +++ 9 files changed, 9 insertions(+), 64 deletions(-) (limited to 'fs') diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 678f9ab08c48..8d15febd0aa3 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -793,7 +793,6 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync); extern const struct inode_operations ext2_file_inode_operations; extern const struct file_operations ext2_file_operations; -extern const struct file_operations ext2_dax_file_operations; /* inode.c */ extern const struct address_space_operations ext2_aops; diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 866a3ce3f864..19cac93a65d3 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -109,24 +109,6 @@ const struct file_operations ext2_file_operations = { .splice_write = iter_file_splice_write, }; -#ifdef CONFIG_FS_DAX -const struct file_operations ext2_dax_file_operations = { - .llseek = generic_file_llseek, - .read = new_sync_read, - .write = new_sync_write, - .read_iter = generic_file_read_iter, - .write_iter = generic_file_write_iter, - .unlocked_ioctl = ext2_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext2_compat_ioctl, -#endif - .mmap = ext2_file_mmap, - .open = dquot_file_open, - .release = ext2_release_file, - .fsync = ext2_fsync, -}; -#endif - const struct inode_operations ext2_file_inode_operations = { #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index df9d6afbc5d5..b29eb6747116 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1388,10 +1388,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) if (S_ISREG(inode->i_mode)) { inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_dax_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { + if (test_opt(inode->i_sb, NOBH)) { inode->i_mapping->a_ops = &ext2_nobh_aops; inode->i_fop = &ext2_file_operations; } else { diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 148f6e3789ea..ce422931f411 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -104,10 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode return PTR_ERR(inode); inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_dax_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { + if (test_opt(inode->i_sb, NOBH)) { inode->i_mapping->a_ops = &ext2_nobh_aops; inode->i_fop = &ext2_file_operations; } else { @@ -125,10 +122,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) return PTR_ERR(inode); inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_dax_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { + if (test_opt(inode->i_sb, NOBH)) { inode->i_mapping->a_ops = &ext2_nobh_aops; inode->i_fop = &ext2_file_operations; } else { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f63c3d5805c4..8a3981ea35d8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2593,7 +2593,6 @@ extern const struct file_operations ext4_dir_operations; /* file.c */ extern const struct inode_operations ext4_file_inode_operations; extern const struct file_operations ext4_file_operations; -extern const struct file_operations ext4_dax_file_operations; extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); /* inline.c */ diff --git a/fs/ext4/file.c b/fs/ext4/file.c index aa78c70553f4..e6d4280d66be 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -625,26 +625,6 @@ const struct file_operations ext4_file_operations = { .fallocate = ext4_fallocate, }; -#ifdef CONFIG_FS_DAX -const struct file_operations ext4_dax_file_operations = { - .llseek = ext4_llseek, - .read = new_sync_read, - .write = new_sync_write, - .read_iter = generic_file_read_iter, - .write_iter = ext4_file_write_iter, - .unlocked_ioctl = ext4_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext4_compat_ioctl, -#endif - .mmap = ext4_file_mmap, - .open = ext4_file_open, - .release = ext4_release_file, - .fsync = ext4_sync_file, - /* Splice not yet supported with DAX */ - .fallocate = ext4_fallocate, -}; -#endif - const struct inode_operations ext4_file_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_getattr, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a3f451370bef..035b7a06f1c3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4090,10 +4090,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) - inode->i_fop = &ext4_dax_file_operations; - else - inode->i_fop = &ext4_file_operations; + inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext4_dir_inode_operations; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 28fe71a2904c..2291923dae4e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2235,10 +2235,7 @@ retry: err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) - inode->i_fop = &ext4_dax_file_operations; - else - inode->i_fop = &ext4_file_operations; + inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); err = ext4_add_nondir(handle, dentry, inode); if (!err && IS_DIRSYNC(dir)) @@ -2302,10 +2299,7 @@ retry: err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; - if (test_opt(inode->i_sb, DAX)) - inode->i_fop = &ext4_dax_file_operations; - else - inode->i_fop = &ext4_file_operations; + inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); d_tmpfile(dentry, inode); err = ext4_orphan_add(handle, inode); diff --git a/fs/splice.c b/fs/splice.c index 41cbb16299e0..476024bb6546 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -523,6 +523,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, loff_t isize, left; int ret; + if (IS_DAX(in->f_mapping->host)) + return default_file_splice_read(in, ppos, pipe, len, flags); + isize = i_size_read(in->f_mapping->host); if (unlikely(*ppos >= isize)) return 0; -- cgit v1.2.3-59-g8ed1b From e4bc33245124db69b74a6d853ac76c2976f472d5 Mon Sep 17 00:00:00 2001 From: Chen Hanxiao Date: Wed, 15 Apr 2015 16:16:30 -0700 Subject: /proc/PID/status: show all sets of pid according to ns If some issues occurred inside a container guest, host user could not know which process is in trouble just by guest pid: the users of container guest only knew the pid inside containers. This will bring obstacle for trouble shooting. This patch adds four fields: NStgid, NSpid, NSpgid and NSsid: a) In init_pid_ns, nothing changed; b) In one pidns, will tell the pid inside containers: NStgid: 21776 5 1 NSpid: 21776 5 1 NSpgid: 21776 5 1 NSsid: 21729 1 0 ** Process id is 21776 in level 0, 5 in level 1, 1 in level 2. c) If pidns is nested, it depends on which pidns are you in. NStgid: 5 1 NSpid: 5 1 NSpgid: 5 1 NSsid: 1 0 ** Views from level 1 [akpm@linux-foundation.org: add CONFIG_PID_NS ifdef] Signed-off-by: Chen Hanxiao Acked-by: Serge Hallyn Acked-by: "Eric W. Biederman" Tested-by: Serge Hallyn Tested-by: Nathan Scott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 1295a00ca316..a4490c0a4644 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -188,6 +188,24 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, from_kgid_munged(user_ns, GROUP_AT(group_info, g))); put_cred(cred); +#ifdef CONFIG_PID_NS + seq_puts(m, "\nNStgid:"); + for (g = ns->level; g <= pid->level; g++) + seq_printf(m, "\t%d", + task_tgid_nr_ns(p, pid->numbers[g].ns)); + seq_puts(m, "\nNSpid:"); + for (g = ns->level; g <= pid->level; g++) + seq_printf(m, "\t%d", + task_pid_nr_ns(p, pid->numbers[g].ns)); + seq_puts(m, "\nNSpgid:"); + for (g = ns->level; g <= pid->level; g++) + seq_printf(m, "\t%d", + task_pgrp_nr_ns(p, pid->numbers[g].ns)); + seq_puts(m, "\nNSsid:"); + for (g = ns->level; g <= pid->level; g++) + seq_printf(m, "\t%d", + task_session_nr_ns(p, pid->numbers[g].ns)); +#endif seq_putc(m, '\n'); } -- cgit v1.2.3-59-g8ed1b From 2813893f8b197a14f1e1ddb04d99bce46817c84a Mon Sep 17 00:00:00 2001 From: Iulia Manda Date: Wed, 15 Apr 2015 16:16:41 -0700 Subject: kernel: conditionally support non-root users, groups and capabilities There are a lot of embedded systems that run most or all of their functionality in init, running as root:root. For these systems, supporting multiple users is not necessary. This patch adds a new symbol, CONFIG_MULTIUSER, that makes support for non-root users, non-root groups, and capabilities optional. It is enabled under CONFIG_EXPERT menu. When this symbol is not defined, UID and GID are zero in any possible case and processes always have all capabilities. The following syscalls are compiled out: setuid, setregid, setgid, setreuid, setresuid, getresuid, setresgid, getresgid, setgroups, getgroups, setfsuid, setfsgid, capget, capset. Also, groups.c is compiled out completely. In kernel/capability.c, capable function was moved in order to avoid adding two ifdef blocks. This change saves about 25 KB on a defconfig build. The most minimal kernels have total text sizes in the high hundreds of kB rather than low MB. (The 25k goes down a bit with allnoconfig, but not that much. The kernel was booted in Qemu. All the common functionalities work. Adding users/groups is not possible, failing with -ENOSYS. Bloat-o-meter output: add/remove: 7/87 grow/shrink: 19/397 up/down: 1675/-26325 (-24650) [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Iulia Manda Reviewed-by: Josh Triplett Acked-by: Geert Uytterhoeven Tested-by: Paul E. McKenney Reviewed-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/Kconfig | 1 + drivers/staging/lustre/lustre/Kconfig | 1 + fs/nfs/Kconfig | 2 +- fs/nfsd/Kconfig | 1 + include/linux/capability.h | 29 +++++++++++++++++++++++++++++ include/linux/cred.h | 23 +++++++++++++++++++---- include/linux/uidgid.h | 12 ++++++++++++ init/Kconfig | 19 ++++++++++++++++++- kernel/Makefile | 4 +++- kernel/capability.c | 35 +++++++++++++++++++---------------- kernel/cred.c | 3 +++ kernel/groups.c | 3 --- kernel/sys.c | 2 ++ kernel/sys_ni.c | 14 ++++++++++++++ net/sunrpc/Kconfig | 2 ++ security/Kconfig | 1 + 16 files changed, 126 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a5ced5c3c1e0..de2726a487b0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -328,6 +328,7 @@ config COMPAT select COMPAT_BINFMT_ELF if BINFMT_ELF select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION + depends on MULTIUSER help Select this option if you want to enable your system kernel to handle system-calls from ELF binaries for 31 bit ESA. This option diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig index 6725467ef4d0..62c7bba75274 100644 --- a/drivers/staging/lustre/lustre/Kconfig +++ b/drivers/staging/lustre/lustre/Kconfig @@ -10,6 +10,7 @@ config LUSTRE_FS select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 + depends on MULTIUSER help This option enables Lustre file system client support. Choose Y here if you want to access a Lustre file system cluster. To compile diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index c7abc10279af..f31fd0dd92c6 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -1,6 +1,6 @@ config NFS_FS tristate "NFS client support" - depends on INET && FILE_LOCKING + depends on INET && FILE_LOCKING && MULTIUSER select LOCKD select SUNRPC select NFS_ACL_SUPPORT if NFS_V3_ACL diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 683bf718aead..fc2d108f5272 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -6,6 +6,7 @@ config NFSD select SUNRPC select EXPORTFS select NFS_ACL_SUPPORT if NFSD_V2_ACL + depends on MULTIUSER help Choose Y here if you want to allow other computers to access files residing on this system using Sun's Network File System diff --git a/include/linux/capability.h b/include/linux/capability.h index aa93e5ef594c..af9f0b9e80e6 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -205,6 +205,7 @@ static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, cap_intersect(permitted, __cap_nfsd_set)); } +#ifdef CONFIG_MULTIUSER extern bool has_capability(struct task_struct *t, int cap); extern bool has_ns_capability(struct task_struct *t, struct user_namespace *ns, int cap); @@ -213,6 +214,34 @@ extern bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); +#else +static inline bool has_capability(struct task_struct *t, int cap) +{ + return true; +} +static inline bool has_ns_capability(struct task_struct *t, + struct user_namespace *ns, int cap) +{ + return true; +} +static inline bool has_capability_noaudit(struct task_struct *t, int cap) +{ + return true; +} +static inline bool has_ns_capability_noaudit(struct task_struct *t, + struct user_namespace *ns, int cap) +{ + return true; +} +static inline bool capable(int cap) +{ + return true; +} +static inline bool ns_capable(struct user_namespace *ns, int cap) +{ + return true; +} +#endif /* CONFIG_MULTIUSER */ extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); diff --git a/include/linux/cred.h b/include/linux/cred.h index 2fb2ca2127ed..8b6c083e68a7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -62,9 +62,27 @@ do { \ groups_free(group_info); \ } while (0) -extern struct group_info *groups_alloc(int); extern struct group_info init_groups; +#ifdef CONFIG_MULTIUSER +extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); + +extern int in_group_p(kgid_t); +extern int in_egroup_p(kgid_t); +#else +static inline void groups_free(struct group_info *group_info) +{ +} + +static inline int in_group_p(kgid_t grp) +{ + return 1; +} +static inline int in_egroup_p(kgid_t grp) +{ + return 1; +} +#endif extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); @@ -74,9 +92,6 @@ extern bool may_setgroups(void); #define GROUP_AT(gi, i) \ ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) -extern int in_group_p(kgid_t); -extern int in_egroup_p(kgid_t); - /* * The security context of a task * diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index 2d1f9b627f91..0ee05da38899 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -29,6 +29,7 @@ typedef struct { #define KUIDT_INIT(value) (kuid_t){ value } #define KGIDT_INIT(value) (kgid_t){ value } +#ifdef CONFIG_MULTIUSER static inline uid_t __kuid_val(kuid_t uid) { return uid.val; @@ -38,6 +39,17 @@ static inline gid_t __kgid_val(kgid_t gid) { return gid.val; } +#else +static inline uid_t __kuid_val(kuid_t uid) +{ + return 0; +} + +static inline gid_t __kgid_val(kgid_t gid) +{ + return 0; +} +#endif #define GLOBAL_ROOT_UID KUIDT_INIT(0) #define GLOBAL_ROOT_GID KGIDT_INIT(0) diff --git a/init/Kconfig b/init/Kconfig index a905b7301e10..3b9df1aa35db 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -394,6 +394,7 @@ endchoice config BSD_PROCESS_ACCT bool "BSD Process Accounting" + depends on MULTIUSER help If you say Y here, a user level program will be able to instruct the kernel (via a special system call) to write process accounting @@ -420,6 +421,7 @@ config BSD_PROCESS_ACCT_V3 config TASKSTATS bool "Export task/process statistics through netlink" depends on NET + depends on MULTIUSER default n help Export selected statistics for tasks/processes through the @@ -1160,6 +1162,7 @@ config CHECKPOINT_RESTORE menuconfig NAMESPACES bool "Namespaces support" if EXPERT + depends on MULTIUSER default !EXPERT help Provides the way to make tasks work with different objects using @@ -1356,11 +1359,25 @@ menuconfig EXPERT config UID16 bool "Enable 16-bit UID system calls" if EXPERT - depends on HAVE_UID16 + depends on HAVE_UID16 && MULTIUSER default y help This enables the legacy 16-bit UID syscall wrappers. +config MULTIUSER + bool "Multiple users, groups and capabilities support" if EXPERT + default y + help + This option enables support for non-root users, groups and + capabilities. + + If you say N here, all processes will run with UID 0, GID 0, and all + possible capabilities. Saying N here also compiles out support for + system calls related to UIDs, GIDs, and capabilities, such as setuid, + setgid, and capset. + + If unsure, say Y here. + config SGETMASK_SYSCALL bool "sgetmask/ssetmask syscalls support" if EXPERT def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH diff --git a/kernel/Makefile b/kernel/Makefile index 1408b3353a3c..0f8f8b0bc1bf 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,9 @@ obj-y = fork.o exec_domain.o panic.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o groups.o smpboot.o + async.o range.o smpboot.o + +obj-$(CONFIG_MULTIUSER) += groups.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/capability.c b/kernel/capability.c index 989f5bfc57dc..45432b54d5c6 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -35,6 +35,7 @@ static int __init file_caps_disable(char *str) } __setup("no_file_caps", file_caps_disable); +#ifdef CONFIG_MULTIUSER /* * More recent versions of libcap are available from: * @@ -386,6 +387,24 @@ bool ns_capable(struct user_namespace *ns, int cap) } EXPORT_SYMBOL(ns_capable); + +/** + * capable - Determine if the current task has a superior capability in effect + * @cap: The capability to be tested for + * + * Return true if the current task has the given superior capability currently + * available for use, false if not. + * + * This sets PF_SUPERPRIV on the task if the capability is available on the + * assumption that it's about to be used. + */ +bool capable(int cap) +{ + return ns_capable(&init_user_ns, cap); +} +EXPORT_SYMBOL(capable); +#endif /* CONFIG_MULTIUSER */ + /** * file_ns_capable - Determine if the file's opener had a capability in effect * @file: The file we want to check @@ -411,22 +430,6 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns, } EXPORT_SYMBOL(file_ns_capable); -/** - * capable - Determine if the current task has a superior capability in effect - * @cap: The capability to be tested for - * - * Return true if the current task has the given superior capability currently - * available for use, false if not. - * - * This sets PF_SUPERPRIV on the task if the capability is available on the - * assumption that it's about to be used. - */ -bool capable(int cap) -{ - return ns_capable(&init_user_ns, cap); -} -EXPORT_SYMBOL(capable); - /** * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question diff --git a/kernel/cred.c b/kernel/cred.c index e0573a43c7df..ec1c07667ec1 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -29,6 +29,9 @@ static struct kmem_cache *cred_jar; +/* init to 2 - one for init_task, one to ensure it is never freed */ +struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; + /* * The initial credentials for the initial task */ diff --git a/kernel/groups.c b/kernel/groups.c index 664411f171b5..74d431d25251 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -9,9 +9,6 @@ #include #include -/* init to 2 - one for init_task, one to ensure it is never freed */ -struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; - struct group_info *groups_alloc(int gidsetsize) { struct group_info *group_info; diff --git a/kernel/sys.c b/kernel/sys.c index a03d9cd23ed7..3be344902316 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -325,6 +325,7 @@ out_unlock: * SMP: There are not races, the GIDs are checked only by filesystem * operations (as far as semantic preservation is concerned). */ +#ifdef CONFIG_MULTIUSER SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) { struct user_namespace *ns = current_user_ns(); @@ -815,6 +816,7 @@ change_okay: commit_creds(new); return old_fsgid; } +#endif /* CONFIG_MULTIUSER */ /** * sys_getpid - return the thread group id of the current process diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 5adcb0ae3a58..7995ef5868d8 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -159,6 +159,20 @@ cond_syscall(sys_uselib); cond_syscall(sys_fadvise64); cond_syscall(sys_fadvise64_64); cond_syscall(sys_madvise); +cond_syscall(sys_setuid); +cond_syscall(sys_setregid); +cond_syscall(sys_setgid); +cond_syscall(sys_setreuid); +cond_syscall(sys_setresuid); +cond_syscall(sys_getresuid); +cond_syscall(sys_setresgid); +cond_syscall(sys_getresgid); +cond_syscall(sys_setgroups); +cond_syscall(sys_getgroups); +cond_syscall(sys_setfsuid); +cond_syscall(sys_setfsgid); +cond_syscall(sys_capget); +cond_syscall(sys_capset); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index fb78117b896c..9068e72aa73c 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -1,9 +1,11 @@ config SUNRPC tristate + depends on MULTIUSER config SUNRPC_GSS tristate select OID_REGISTRY + depends on MULTIUSER config SUNRPC_BACKCHANNEL bool diff --git a/security/Kconfig b/security/Kconfig index beb86b500adf..bf4ec46474b6 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -21,6 +21,7 @@ config SECURITY_DMESG_RESTRICT config SECURITY bool "Enable different security models" depends on SYSFS + depends on MULTIUSER help This allows you to choose different security modules to be configured into your kernel. -- cgit v1.2.3-59-g8ed1b From 41416f2330112d29f2cfa337bfc7e672bf0c2768 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 15 Apr 2015 16:17:28 -0700 Subject: lib/string_helpers.c: change semantics of string_escape_mem The current semantics of string_escape_mem are inadequate for one of its current users, vsnprintf(). If that is to honour its contract, it must know how much space would be needed for the entire escaped buffer, and string_escape_mem provides no way of obtaining that (short of allocating a large enough buffer (~4 times input string) to let it play with, and that's definitely a big no-no inside vsnprintf). So change the semantics for string_escape_mem to be more snprintf-like: Return the size of the output that would be generated if the destination buffer was big enough, but of course still only write to the part of dst it is allowed to, and (contrary to snprintf) don't do '\0'-termination. It is then up to the caller to detect whether output was truncated and to append a '\0' if desired. Also, we must output partial escape sequences, otherwise a call such as snprintf(buf, 3, "%1pE", "\123") would cause printf to write a \0 to buf[2] but leaving buf[0] and buf[1] with whatever they previously contained. This also fixes a bug in the escaped_string() helper function, which used to unconditionally pass a length of "end-buf" to string_escape_mem(); since the latter doesn't check osz for being insanely large, it would happily write to dst. For example, kasprintf(GFP_KERNEL, "something and then %pE", ...); is an easy way to trigger an oops. In test-string_helpers.c, the -ENOMEM test is replaced with testing for getting the expected return value even if the buffer is too small. We also ensure that nothing is written (by relying on a NULL pointer deref) if the output size is 0 by passing NULL - this has to work for kasprintf("%pE") to work. In net/sunrpc/cache.c, I think qword_add still has the same semantics. Someone should definitely double-check this. In fs/proc/array.c, I made the minimum possible change, but longer-term it should stop poking around in seq_file internals. [andriy.shevchenko@linux.intel.com: simplify qword_add] [andriy.shevchenko@linux.intel.com: add missed curly braces] Signed-off-by: Rasmus Villemoes Acked-by: Andy Shevchenko Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 4 ++-- include/linux/string_helpers.h | 8 +++---- lib/string_helpers.c | 49 ++++++------------------------------------ lib/test-string_helpers.c | 40 +++++++++++++++++----------------- lib/vsprintf.c | 8 +++++-- net/sunrpc/cache.c | 8 ++++--- 6 files changed, 44 insertions(+), 73 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index a4490c0a4644..13f047ad08e4 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -99,8 +99,8 @@ static inline void task_name(struct seq_file *m, struct task_struct *p) buf = m->buf + m->count; /* Ignore error for now */ - string_escape_str(tcomm, &buf, m->size - m->count, - ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); + buf += string_escape_str(tcomm, buf, m->size - m->count, + ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\"); m->count = buf - m->buf; seq_putc(m, '\n'); diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index 657571817260..0991913f4953 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -47,22 +47,22 @@ static inline int string_unescape_any_inplace(char *buf) #define ESCAPE_ANY_NP (ESCAPE_ANY | ESCAPE_NP) #define ESCAPE_HEX 0x20 -int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, +int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *esc); static inline int string_escape_mem_any_np(const char *src, size_t isz, - char **dst, size_t osz, const char *esc) + char *dst, size_t osz, const char *esc) { return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc); } -static inline int string_escape_str(const char *src, char **dst, size_t sz, +static inline int string_escape_str(const char *src, char *dst, size_t sz, unsigned int flags, const char *esc) { return string_escape_mem(src, strlen(src), dst, sz, flags, esc); } -static inline int string_escape_str_any_np(const char *src, char **dst, +static inline int string_escape_str_any_np(const char *src, char *dst, size_t sz, const char *esc) { return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc); diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 9c48ddad0f0d..1826c7407258 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -274,11 +274,6 @@ static bool escape_space(unsigned char c, char **dst, char *end) return false; } - if (out + 2 > end) { - *dst = out + 2; - return true; - } - if (out < end) *out = '\\'; ++out; @@ -309,11 +304,6 @@ static bool escape_special(unsigned char c, char **dst, char *end) return false; } - if (out + 2 > end) { - *dst = out + 2; - return true; - } - if (out < end) *out = '\\'; ++out; @@ -332,11 +322,6 @@ static bool escape_null(unsigned char c, char **dst, char *end) if (c) return false; - if (out + 2 > end) { - *dst = out + 2; - return true; - } - if (out < end) *out = '\\'; ++out; @@ -352,11 +337,6 @@ static bool escape_octal(unsigned char c, char **dst, char *end) { char *out = *dst; - if (out + 4 > end) { - *dst = out + 4; - return true; - } - if (out < end) *out = '\\'; ++out; @@ -378,11 +358,6 @@ static bool escape_hex(unsigned char c, char **dst, char *end) { char *out = *dst; - if (out + 4 > end) { - *dst = out + 4; - return true; - } - if (out < end) *out = '\\'; ++out; @@ -449,20 +424,17 @@ static bool escape_hex(unsigned char c, char **dst, char *end) * it if needs. * * Return: - * The amount of the characters processed to the destination buffer, or - * %-ENOMEM if the size of buffer is not enough to put an escaped character is - * returned. - * - * Even in the case of error @dst pointer will be updated to point to the byte - * after the last processed character. + * The total size of the escaped output that would be generated for + * the given input and flags. To check whether the output was + * truncated, compare the return value to osz. There is room left in + * dst for a '\0' terminator if and only if ret < osz. */ -int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, +int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *esc) { - char *p = *dst; + char *p = dst; char *end = p + osz; bool is_dict = esc && *esc; - int ret; while (isz--) { unsigned char c = *src++; @@ -502,13 +474,6 @@ int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, escape_passthrough(c, &p, end); } - if (p > end) { - *dst = end; - return -ENOMEM; - } - - ret = p - *dst; - *dst = p; - return ret; + return p - dst; } EXPORT_SYMBOL(string_escape_mem); diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index ab0d30e1e18f..8e376efd88a4 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -260,16 +260,28 @@ static __init const char *test_string_find_match(const struct test_string_2 *s2, return NULL; } +static __init void +test_string_escape_overflow(const char *in, int p, unsigned int flags, const char *esc, + int q_test, const char *name) +{ + int q_real; + + q_real = string_escape_mem(in, p, NULL, 0, flags, esc); + if (q_real != q_test) + pr_warn("Test '%s' failed: flags = %u, osz = 0, expected %d, got %d\n", + name, flags, q_test, q_real); +} + static __init void test_string_escape(const char *name, const struct test_string_2 *s2, unsigned int flags, const char *esc) { - int q_real = 512; - char *out_test = kmalloc(q_real, GFP_KERNEL); - char *out_real = kmalloc(q_real, GFP_KERNEL); + size_t out_size = 512; + char *out_test = kmalloc(out_size, GFP_KERNEL); + char *out_real = kmalloc(out_size, GFP_KERNEL); char *in = kmalloc(256, GFP_KERNEL); - char *buf = out_real; int p = 0, q_test = 0; + int q_real; if (!out_test || !out_real || !in) goto out; @@ -301,29 +313,19 @@ static __init void test_string_escape(const char *name, q_test += len; } - q_real = string_escape_mem(in, p, &buf, q_real, flags, esc); + q_real = string_escape_mem(in, p, out_real, out_size, flags, esc); test_string_check_buf(name, flags, in, p, out_real, q_real, out_test, q_test); + + test_string_escape_overflow(in, p, flags, esc, q_test, name); + out: kfree(in); kfree(out_real); kfree(out_test); } -static __init void test_string_escape_nomem(void) -{ - char *in = "\eb \\C\007\"\x90\r]"; - char out[64], *buf = out; - int rc = -ENOMEM, ret; - - ret = string_escape_str_any_np(in, &buf, strlen(in), NULL); - if (ret == rc) - return; - - pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc); -} - static int __init test_string_helpers_init(void) { unsigned int i; @@ -342,8 +344,6 @@ static int __init test_string_helpers_init(void) for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); - test_string_escape_nomem(); - return -EINVAL; } module_init(test_string_helpers_init); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 4da1e7aaf9d5..3a1e0843f9a2 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1235,8 +1235,12 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, len = spec.field_width < 0 ? 1 : spec.field_width; - /* Ignore the error. We print as many characters as we can */ - string_escape_mem(addr, len, &buf, end - buf, flags, NULL); + /* + * string_escape_mem() writes as many characters as it can to + * the given buffer, and returns the total size of the output + * had the buffer been big enough. + */ + buf += string_escape_mem(addr, len, buf, buf < end ? end - buf : 0, flags, NULL); return buf; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 5199bb1a017e..2928afffbb81 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1072,10 +1072,12 @@ void qword_add(char **bpp, int *lp, char *str) if (len < 0) return; - ret = string_escape_str(str, &bp, len, ESCAPE_OCTAL, "\\ \n\t"); - if (ret < 0 || ret == len) + ret = string_escape_str(str, bp, len, ESCAPE_OCTAL, "\\ \n\t"); + if (ret >= len) { + bp += len; len = -1; - else { + } else { + bp += ret; len -= ret; *bp++ = ' '; len--; -- cgit v1.2.3-59-g8ed1b From 25ce319167b517a913a2ba9fc80da8330dbc3249 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 15 Apr 2015 16:18:17 -0700 Subject: proc: remove use of seq_printf return value The seq_printf return value, because it's frequently misused, will eventually be converted to void. See: commit 1f33c41c03da ("seq_file: Rename seq_overflow() to seq_has_overflowed() and make public") Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 4 ++- fs/proc/base.c | 82 +++++++++++++++++++++++++++++++++------------------------ 2 files changed, 50 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index 13f047ad08e4..fd02a9ebfc30 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -632,7 +632,9 @@ static int children_seq_show(struct seq_file *seq, void *v) pid_t pid; pid = pid_nr_ns(v, inode->i_sb->s_fs_info); - return seq_printf(seq, "%d ", pid); + seq_printf(seq, "%d ", pid); + + return 0; } static void *children_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/fs/proc/base.c b/fs/proc/base.c index 3f3d7aeb0712..7a3b82f986dd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -238,13 +238,15 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); - if (lookup_symbol_name(wchan, symname) < 0) + if (lookup_symbol_name(wchan, symname) < 0) { if (!ptrace_may_access(task, PTRACE_MODE_READ)) return 0; - else - return seq_printf(m, "%lu", wchan); - else - return seq_printf(m, "%s", symname); + seq_printf(m, "%lu", wchan); + } else { + seq_printf(m, "%s", symname); + } + + return 0; } #endif /* CONFIG_KALLSYMS */ @@ -309,10 +311,12 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - return seq_printf(m, "%llu %llu %lu\n", - (unsigned long long)task->se.sum_exec_runtime, - (unsigned long long)task->sched_info.run_delay, - task->sched_info.pcount); + seq_printf(m, "%llu %llu %lu\n", + (unsigned long long)task->se.sum_exec_runtime, + (unsigned long long)task->sched_info.run_delay, + task->sched_info.pcount); + + return 0; } #endif @@ -387,7 +391,9 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, points = oom_badness(task, NULL, NULL, totalpages) * 1000 / totalpages; read_unlock(&tasklist_lock); - return seq_printf(m, "%lu\n", points); + seq_printf(m, "%lu\n", points); + + return 0; } struct limit_names { @@ -432,15 +438,15 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, * print the file header */ seq_printf(m, "%-25s %-20s %-20s %-10s\n", - "Limit", "Soft Limit", "Hard Limit", "Units"); + "Limit", "Soft Limit", "Hard Limit", "Units"); for (i = 0; i < RLIM_NLIMITS; i++) { if (rlim[i].rlim_cur == RLIM_INFINITY) seq_printf(m, "%-25s %-20s ", - lnames[i].name, "unlimited"); + lnames[i].name, "unlimited"); else seq_printf(m, "%-25s %-20lu ", - lnames[i].name, rlim[i].rlim_cur); + lnames[i].name, rlim[i].rlim_cur); if (rlim[i].rlim_max == RLIM_INFINITY) seq_printf(m, "%-20s ", "unlimited"); @@ -462,7 +468,9 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, { long nr; unsigned long args[6], sp, pc; - int res = lock_trace(task); + int res; + + res = lock_trace(task); if (res) return res; @@ -477,7 +485,8 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, args[0], args[1], args[2], args[3], args[4], args[5], sp, pc); unlock_trace(task); - return res; + + return 0; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ @@ -2002,12 +2011,13 @@ static int show_timer(struct seq_file *m, void *v) notify = timer->it_sigev_notify; seq_printf(m, "ID: %d\n", timer->it_id); - seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo, - timer->sigq->info.si_value.sival_ptr); + seq_printf(m, "signal: %d/%p\n", + timer->sigq->info.si_signo, + timer->sigq->info.si_value.sival_ptr); seq_printf(m, "notify: %s/%s.%d\n", - nstr[notify & ~SIGEV_THREAD_ID], - (notify & SIGEV_THREAD_ID) ? "tid" : "pid", - pid_nr_ns(timer->it_pid, tp->ns)); + nstr[notify & ~SIGEV_THREAD_ID], + (notify & SIGEV_THREAD_ID) ? "tid" : "pid", + pid_nr_ns(timer->it_pid, tp->ns)); seq_printf(m, "ClockID: %d\n", timer->it_clock); return 0; @@ -2352,21 +2362,23 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh unlock_task_sighand(task, &flags); } - result = seq_printf(m, - "rchar: %llu\n" - "wchar: %llu\n" - "syscr: %llu\n" - "syscw: %llu\n" - "read_bytes: %llu\n" - "write_bytes: %llu\n" - "cancelled_write_bytes: %llu\n", - (unsigned long long)acct.rchar, - (unsigned long long)acct.wchar, - (unsigned long long)acct.syscr, - (unsigned long long)acct.syscw, - (unsigned long long)acct.read_bytes, - (unsigned long long)acct.write_bytes, - (unsigned long long)acct.cancelled_write_bytes); + seq_printf(m, + "rchar: %llu\n" + "wchar: %llu\n" + "syscr: %llu\n" + "syscw: %llu\n" + "read_bytes: %llu\n" + "write_bytes: %llu\n" + "cancelled_write_bytes: %llu\n", + (unsigned long long)acct.rchar, + (unsigned long long)acct.wchar, + (unsigned long long)acct.syscr, + (unsigned long long)acct.syscw, + (unsigned long long)acct.read_bytes, + (unsigned long long)acct.write_bytes, + (unsigned long long)acct.cancelled_write_bytes); + result = 0; + out_unlock: mutex_unlock(&task->signal->cred_guard_mutex); return result; -- cgit v1.2.3-59-g8ed1b