diff options
Diffstat (limited to 'fs/orangefs')
-rw-r--r-- | fs/orangefs/acl.c | 30 | ||||
-rw-r--r-- | fs/orangefs/dcache.c | 4 | ||||
-rw-r--r-- | fs/orangefs/dir.c | 2 | ||||
-rw-r--r-- | fs/orangefs/file.c | 158 | ||||
-rw-r--r-- | fs/orangefs/inode.c | 416 | ||||
-rw-r--r-- | fs/orangefs/namei.c | 12 | ||||
-rw-r--r-- | fs/orangefs/orangefs-bufmap.c | 16 | ||||
-rw-r--r-- | fs/orangefs/orangefs-kernel.h | 19 | ||||
-rw-r--r-- | fs/orangefs/orangefs-mod.c | 5 | ||||
-rw-r--r-- | fs/orangefs/orangefs-sysfs.c | 21 | ||||
-rw-r--r-- | fs/orangefs/orangefs-utils.c | 2 | ||||
-rw-r--r-- | fs/orangefs/super.c | 9 | ||||
-rw-r--r-- | fs/orangefs/xattr.c | 1 |
13 files changed, 271 insertions, 424 deletions
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index eced272a3c57..605e5a3506ec 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -10,12 +10,15 @@ #include "orangefs-bufmap.h" #include <linux/posix_acl_xattr.h> -struct posix_acl *orangefs_get_acl(struct inode *inode, int type) +struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; int ret; char *key = NULL, *value = NULL; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: key = XATTR_NAME_POSIX_ACL_ACCESS; @@ -116,12 +119,15 @@ out: return error; } -int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +int orangefs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, + struct posix_acl *acl, int type) { int error; struct iattr iattr; int rc; + memset(&iattr, 0, sizeof iattr); + if (type == ACL_TYPE_ACCESS && acl) { /* * posix_acl_update_mode checks to see if the permissions @@ -130,7 +136,8 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) * and "mode" to the new desired value. It is up to * us to propagate the new mode back to the server... */ - error = posix_acl_update_mode(inode, &iattr.ia_mode, &acl); + error = posix_acl_update_mode(&init_user_ns, inode, + &iattr.ia_mode, &acl); if (error) { gossip_err("%s: posix_acl_update_mode err: %d\n", __func__, @@ -138,18 +145,17 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return error; } - if (acl) { - rc = __orangefs_set_acl(inode, acl, type); - } else { + if (inode->i_mode != iattr.ia_mode) iattr.ia_valid = ATTR_MODE; - rc = __orangefs_setattr(inode, &iattr); - } - return rc; - - } else { - return -EINVAL; } + + rc = __orangefs_set_acl(inode, acl, type); + + if (!rc && (iattr.ia_valid == ATTR_MODE)) + rc = __orangefs_setattr(inode, &iattr); + + return rc; } int orangefs_init_acl(struct inode *inode, struct inode *dir) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index fe484cf93e5c..8bbe9486e3a6 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -26,8 +26,10 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); - if (!new_op) + if (!new_op) { + ret = -ENOMEM; goto out_put_parent; + } new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.parent_refn = parent->refn; diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c index e2c2699d8016..9cacce5d55c1 100644 --- a/fs/orangefs/dir.c +++ b/fs/orangefs/dir.c @@ -398,7 +398,7 @@ static int orangefs_dir_release(struct inode *inode, struct file *file) const struct file_operations orangefs_dir_operations = { .llseek = orangefs_dir_llseek, .read = generic_read_dir, - .iterate = orangefs_dir_iterate, + .iterate_shared = orangefs_dir_iterate, .open = orangefs_dir_open, .release = orangefs_dir_release }; diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index c740159d9ad1..732661aa2680 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -248,21 +248,7 @@ populate_shared_memory: * or it can pointers to struct page's */ - /* - * When reading, readahead_size will only be zero when - * we're doing O_DIRECT, otherwise we got here from - * orangefs_readpage. - * - * If we got here from orangefs_readpage we want to - * copy either a page or the whole file into the io - * vector, whichever is smaller. - */ - if (readahead_size) - copy_amount = - min(new_op->downcall.resp.io.amt_complete, - (__s64)PAGE_SIZE); - else - copy_amount = new_op->downcall.resp.io.amt_complete; + copy_amount = new_op->downcall.resp.io.amt_complete; ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, copy_amount); @@ -283,19 +269,11 @@ populate_shared_memory: out: if (buffer_index >= 0) { - if ((readahead_size) && (type == ORANGEFS_IO_READ)) { - /* readpage */ - *index_return = buffer_index; - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: hold on to buffer_index :%d:\n", - __func__, buffer_index); - } else { - /* O_DIRECT */ - orangefs_bufmap_put(buffer_index); - gossip_debug(GOSSIP_FILE_DEBUG, - "%s(%pU): PUT buffer_index %d\n", - __func__, handle, buffer_index); - } + orangefs_bufmap_put(buffer_index); + gossip_debug(GOSSIP_FILE_DEBUG, + "%s(%pU): PUT buffer_index %d\n", + __func__, handle, buffer_index); + buffer_index = -1; } op_release(new_op); return ret; @@ -346,23 +324,8 @@ static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { int ret; - struct orangefs_read_options *ro; - orangefs_stats.reads++; - /* - * Remember how they set "count" in read(2) or pread(2) or whatever - - * users can use count as a knob to control orangefs io size and later - * we can try to help them fill as many pages as possible in readpage. - */ - if (!iocb->ki_filp->private_data) { - iocb->ki_filp->private_data = kmalloc(sizeof *ro, GFP_KERNEL); - if (!iocb->ki_filp->private_data) - return(ENOMEM); - ro = iocb->ki_filp->private_data; - ro->blksiz = iter->count; - } - down_read(&file_inode(iocb->ki_filp)->i_rwsem); ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); if (ret) @@ -390,84 +353,6 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, return ret; } -static int orangefs_getflags(struct inode *inode, unsigned long *uval) -{ - __u64 val = 0; - int ret; - - ret = orangefs_inode_getxattr(inode, - "user.pvfs2.meta_hint", - &val, sizeof(val)); - if (ret < 0 && ret != -ENODATA) - return ret; - else if (ret == -ENODATA) - val = 0; - *uval = val; - return 0; -} - -/* - * Perform a miscellaneous operation on a file. - */ -static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file_inode(file); - int ret = -ENOTTY; - __u64 val = 0; - unsigned long uval; - - gossip_debug(GOSSIP_FILE_DEBUG, - "orangefs_ioctl: called with cmd %d\n", - cmd); - - /* - * we understand some general ioctls on files, such as the immutable - * and append flags - */ - if (cmd == FS_IOC_GETFLAGS) { - ret = orangefs_getflags(inode, &uval); - if (ret) - return ret; - gossip_debug(GOSSIP_FILE_DEBUG, - "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n", - (unsigned long long)uval); - return put_user(uval, (int __user *)arg); - } else if (cmd == FS_IOC_SETFLAGS) { - unsigned long old_uval; - - ret = 0; - if (get_user(uval, (int __user *)arg)) - return -EFAULT; - /* - * ORANGEFS_MIRROR_FL is set internally when the mirroring mode - * is turned on for a file. The user is not allowed to turn - * on this bit, but the bit is present if the user first gets - * the flags and then updates the flags with some new - * settings. So, we ignore it in the following edit. bligon. - */ - if ((uval & ~ORANGEFS_MIRROR_FL) & - (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) { - gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); - return -EINVAL; - } - ret = orangefs_getflags(inode, &old_uval); - if (ret) - return ret; - ret = vfs_ioc_setflags_prepare(inode, old_uval, uval); - if (ret) - return ret; - val = uval; - gossip_debug(GOSSIP_FILE_DEBUG, - "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n", - (unsigned long long)val); - ret = orangefs_inode_setxattr(inode, - "user.pvfs2.meta_hint", - &val, sizeof(val), 0); - } - - return ret; -} - static vm_fault_t orangefs_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; @@ -502,10 +387,7 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) return ret; gossip_debug(GOSSIP_FILE_DEBUG, - "orangefs_file_mmap: called on %s\n", - (file ? - (char *)file->f_path.dentry->d_name.name : - (char *)"Unknown")); + "orangefs_file_mmap: called on %pD\n", file); /* set the sequential readahead hint */ vma->vm_flags |= VM_SEQ_READ; @@ -535,9 +417,7 @@ static int orangefs_file_release(struct inode *inode, struct file *file) * readahead cache (if any); this forces an expensive refresh of * data for the next caller of mmap (or 'get_block' accesses) */ - if (file_inode(file) && - file_inode(file)->i_mapping && - mapping_nrpages(&file_inode(file)->i_data)) { + if (mapping_nrpages(file->f_mapping)) { if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) { gossip_debug(GOSSIP_INODE_DEBUG, "calling flush_racache on %pU\n", @@ -650,12 +530,6 @@ static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl) return rc; } -static int orangefs_file_open(struct inode * inode, struct file *file) -{ - file->private_data = NULL; - return generic_file_open(inode, file); -} - static int orangefs_flush(struct file *file, fl_owner_t id) { /* @@ -666,19 +540,8 @@ static int orangefs_flush(struct file *file, fl_owner_t id) * on an explicit fsync call. This duplicates historical OrangeFS * behavior. */ - struct inode *inode = file->f_mapping->host; int r; - kfree(file->private_data); - file->private_data = NULL; - - if (inode->i_state & I_DIRTY_TIME) { - spin_lock(&inode->i_lock); - inode->i_state &= ~I_DIRTY_TIME; - spin_unlock(&inode->i_lock); - mark_inode_dirty_sync(inode); - } - r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX); if (r > 0) return 0; @@ -692,9 +555,10 @@ const struct file_operations orangefs_file_operations = { .read_iter = orangefs_file_read_iter, .write_iter = orangefs_file_write_iter, .lock = orangefs_lock, - .unlocked_ioctl = orangefs_ioctl, .mmap = orangefs_file_mmap, - .open = orangefs_file_open, + .open = generic_file_open, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .flush = orangefs_flush, .release = orangefs_file_release, .fsync = orangefs_fsync, diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 961c0fd8675a..7a8c0c6e698d 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -10,7 +10,8 @@ * Linux VFS inode operations. */ -#include <linux/bvec.h> +#include <linux/blkdev.h> +#include <linux/fileattr.h> #include "protocol.h" #include "orangefs-kernel.h" #include "orangefs-bufmap.h" @@ -45,7 +46,7 @@ static int orangefs_writepage_locked(struct page *page, else wlen = PAGE_SIZE; } - /* Should've been handled in orangefs_invalidatepage. */ + /* Should've been handled in orangefs_invalidate_folio. */ WARN_ON(off == len || off + wlen > len); bv.bv_page = page; @@ -62,12 +63,7 @@ static int orangefs_writepage_locked(struct page *page, } else { ret = 0; } - if (wr) { - kfree(wr); - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); - } + kfree(detach_page_private(page)); return ret; } @@ -247,182 +243,124 @@ static int orangefs_writepages(struct address_space *mapping, return ret; } -static int orangefs_launder_page(struct page *); +static int orangefs_launder_folio(struct folio *); -static int orangefs_readpage(struct file *file, struct page *page) +static void orangefs_readahead(struct readahead_control *rac) { - struct inode *inode = page->mapping->host; + loff_t offset; struct iov_iter iter; - struct bio_vec bv; - ssize_t ret; - loff_t off; /* offset into this page */ - pgoff_t index; /* which page */ - struct page *next_page; - char *kaddr; - struct orangefs_read_options *ro = file->private_data; - loff_t read_size; - loff_t roundedup; - int buffer_index = -1; /* orangefs shared memory slot */ - int slot_index; /* index into slot */ - int remaining; + struct inode *inode = rac->mapping->host; + struct xarray *i_pages; + struct page *page; + loff_t new_start = readahead_pos(rac); + int ret; + size_t new_len = 0; - /* - * If they set some miniscule size for "count" in read(2) - * (for example) then let's try to read a page, or the whole file - * if it is smaller than a page. Once "count" goes over a page - * then lets round up to the highest page size multiple that is - * less than or equal to "count" and do that much orangefs IO and - * try to fill as many pages as we can from it. - * - * "count" should be represented in ro->blksiz. - * - * inode->i_size = file size. - */ - if (ro) { - if (ro->blksiz < PAGE_SIZE) { - if (inode->i_size < PAGE_SIZE) - read_size = inode->i_size; - else - read_size = PAGE_SIZE; - } else { - roundedup = ((PAGE_SIZE - 1) & ro->blksiz) ? - ((ro->blksiz + PAGE_SIZE) & ~(PAGE_SIZE -1)) : - ro->blksiz; - if (roundedup > inode->i_size) - read_size = inode->i_size; - else - read_size = roundedup; + loff_t bytes_remaining = inode->i_size - readahead_pos(rac); + loff_t pages_remaining = bytes_remaining / PAGE_SIZE; - } - } else { - read_size = PAGE_SIZE; + if (pages_remaining >= 1024) + new_len = 4194304; + else if (pages_remaining > readahead_count(rac)) + new_len = bytes_remaining; + + if (new_len) + readahead_expand(rac, new_start, new_len); + + offset = readahead_pos(rac); + i_pages = &rac->mapping->i_pages; + + iov_iter_xarray(&iter, READ, i_pages, offset, readahead_length(rac)); + + /* read in the pages. */ + if ((ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, + &offset, &iter, readahead_length(rac), + inode->i_size, NULL, NULL, rac->file)) < 0) + gossip_debug(GOSSIP_FILE_DEBUG, + "%s: wait_for_direct_io failed. \n", __func__); + else + ret = 0; + + /* clean up. */ + while ((page = readahead_page(rac))) { + page_endio(page, false, ret); + put_page(page); } - if (!read_size) - read_size = PAGE_SIZE; +} - if (PageDirty(page)) - orangefs_launder_page(page); +static int orangefs_read_folio(struct file *file, struct folio *folio) +{ + struct inode *inode = folio->mapping->host; + struct iov_iter iter; + struct bio_vec bv; + ssize_t ret; + loff_t off; /* offset of this folio in the file */ - off = page_offset(page); - index = off >> PAGE_SHIFT; - bv.bv_page = page; - bv.bv_len = PAGE_SIZE; + if (folio_test_dirty(folio)) + orangefs_launder_folio(folio); + + off = folio_pos(folio); + bv.bv_page = &folio->page; + bv.bv_len = folio_size(folio); bv.bv_offset = 0; - iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE); + iov_iter_bvec(&iter, READ, &bv, 1, folio_size(folio)); ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter, - read_size, inode->i_size, NULL, &buffer_index, file); - remaining = ret; - /* this will only zero remaining unread portions of the page data */ + folio_size(folio), inode->i_size, NULL, NULL, file); + /* this will only zero remaining unread portions of the folio data */ iov_iter_zero(~0U, &iter); /* takes care of potential aliasing */ - flush_dcache_page(page); + flush_dcache_folio(folio); if (ret < 0) { - SetPageError(page); - unlock_page(page); - goto out; + folio_set_error(folio); } else { - SetPageUptodate(page); - if (PageError(page)) - ClearPageError(page); + folio_mark_uptodate(folio); ret = 0; } - /* unlock the page after the ->readpage() routine completes */ - unlock_page(page); - - if (remaining > PAGE_SIZE) { - slot_index = 0; - while ((remaining - PAGE_SIZE) >= PAGE_SIZE) { - remaining -= PAGE_SIZE; - /* - * It is an optimization to try and fill more than one - * page... by now we've already gotten the single - * page we were after, if stuff doesn't seem to - * be going our way at this point just return - * and hope for the best. - * - * If we look for pages and they're already there is - * one reason to give up, and if they're not there - * and we can't create them is another reason. - */ - - index++; - slot_index++; - next_page = find_get_page(inode->i_mapping, index); - if (next_page) { - gossip_debug(GOSSIP_FILE_DEBUG, - "%s: found next page, quitting\n", - __func__); - put_page(next_page); - goto out; - } - next_page = find_or_create_page(inode->i_mapping, - index, - GFP_KERNEL); - /* - * I've never hit this, leave it as a printk for - * now so it will be obvious. - */ - if (!next_page) { - printk("%s: can't create next page, quitting\n", - __func__); - goto out; - } - kaddr = kmap_atomic(next_page); - orangefs_bufmap_page_fill(kaddr, - buffer_index, - slot_index); - kunmap_atomic(kaddr); - SetPageUptodate(next_page); - unlock_page(next_page); - put_page(next_page); - } - } - -out: - if (buffer_index != -1) - orangefs_bufmap_put(buffer_index); - return ret; + /* unlock the folio after the ->read_folio() routine completes */ + folio_unlock(folio); + return ret; } static int orangefs_write_begin(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, struct page **pagep, - void **fsdata) + struct address_space *mapping, loff_t pos, unsigned len, + struct page **pagep, void **fsdata) { struct orangefs_write_range *wr; + struct folio *folio; struct page *page; pgoff_t index; int ret; index = pos >> PAGE_SHIFT; - page = grab_cache_page_write_begin(mapping, index, flags); + page = grab_cache_page_write_begin(mapping, index); if (!page) return -ENOMEM; *pagep = page; + folio = page_folio(page); - if (PageDirty(page) && !PagePrivate(page)) { + if (folio_test_dirty(folio) && !folio_test_private(folio)) { /* * Should be impossible. If it happens, launder the page * since we don't know what's dirty. This will WARN in * orangefs_writepage_locked. */ - ret = orangefs_launder_page(page); + ret = orangefs_launder_folio(folio); if (ret) return ret; } - if (PagePrivate(page)) { + if (folio_test_private(folio)) { struct orangefs_write_range *wr; - wr = (struct orangefs_write_range *)page_private(page); + wr = folio_get_private(folio); if (wr->pos + wr->len == pos && uid_eq(wr->uid, current_fsuid()) && gid_eq(wr->gid, current_fsgid())) { wr->len += len; goto okay; } else { - ret = orangefs_launder_page(page); + ret = orangefs_launder_folio(folio); if (ret) return ret; } @@ -436,9 +374,7 @@ static int orangefs_write_begin(struct file *file, wr->len = len; wr->uid = current_fsuid(); wr->gid = current_fsgid(); - SetPagePrivate(page); - set_page_private(page, (unsigned long)wr); - get_page(page); + folio_attach_private(folio, wr); okay: return 0; } @@ -478,53 +414,45 @@ static int orangefs_write_end(struct file *file, struct address_space *mapping, return copied; } -static void orangefs_invalidatepage(struct page *page, - unsigned int offset, - unsigned int length) +static void orangefs_invalidate_folio(struct folio *folio, + size_t offset, size_t length) { - struct orangefs_write_range *wr; - wr = (struct orangefs_write_range *)page_private(page); + struct orangefs_write_range *wr = folio_get_private(folio); if (offset == 0 && length == PAGE_SIZE) { - kfree((struct orangefs_write_range *)page_private(page)); - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); + kfree(folio_detach_private(folio)); return; /* write range entirely within invalidate range (or equal) */ - } else if (page_offset(page) + offset <= wr->pos && - wr->pos + wr->len <= page_offset(page) + offset + length) { - kfree((struct orangefs_write_range *)page_private(page)); - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); + } else if (folio_pos(folio) + offset <= wr->pos && + wr->pos + wr->len <= folio_pos(folio) + offset + length) { + kfree(folio_detach_private(folio)); /* XXX is this right? only caller in fs */ - cancel_dirty_page(page); + folio_cancel_dirty(folio); return; /* invalidate range chops off end of write range */ - } else if (wr->pos < page_offset(page) + offset && - wr->pos + wr->len <= page_offset(page) + offset + length && - page_offset(page) + offset < wr->pos + wr->len) { + } else if (wr->pos < folio_pos(folio) + offset && + wr->pos + wr->len <= folio_pos(folio) + offset + length && + folio_pos(folio) + offset < wr->pos + wr->len) { size_t x; - x = wr->pos + wr->len - (page_offset(page) + offset); + x = wr->pos + wr->len - (folio_pos(folio) + offset); WARN_ON(x > wr->len); wr->len -= x; wr->uid = current_fsuid(); wr->gid = current_fsgid(); /* invalidate range chops off beginning of write range */ - } else if (page_offset(page) + offset <= wr->pos && - page_offset(page) + offset + length < wr->pos + wr->len && - wr->pos < page_offset(page) + offset + length) { + } else if (folio_pos(folio) + offset <= wr->pos && + folio_pos(folio) + offset + length < wr->pos + wr->len && + wr->pos < folio_pos(folio) + offset + length) { size_t x; - x = page_offset(page) + offset + length - wr->pos; + x = folio_pos(folio) + offset + length - wr->pos; WARN_ON(x > wr->len); wr->pos += x; wr->len -= x; wr->uid = current_fsuid(); wr->gid = current_fsgid(); /* invalidate range entirely within write range (punch hole) */ - } else if (wr->pos < page_offset(page) + offset && - page_offset(page) + offset + length < wr->pos + wr->len) { + } else if (wr->pos < folio_pos(folio) + offset && + folio_pos(folio) + offset + length < wr->pos + wr->len) { /* XXX what do we do here... should not WARN_ON */ WARN_ON(1); /* punch hole */ @@ -536,11 +464,11 @@ static void orangefs_invalidatepage(struct page *page, /* non-overlapping ranges */ } else { /* WARN if they do overlap */ - if (!((page_offset(page) + offset + length <= wr->pos) ^ - (wr->pos + wr->len <= page_offset(page) + offset))) { + if (!((folio_pos(folio) + offset + length <= wr->pos) ^ + (wr->pos + wr->len <= folio_pos(folio) + offset))) { WARN_ON(1); - printk("invalidate range offset %llu length %u\n", - page_offset(page) + offset, length); + printk("invalidate range offset %llu length %zu\n", + folio_pos(folio) + offset, length); printk("write range offset %llu length %zu\n", wr->pos, wr->len); } @@ -552,35 +480,30 @@ static void orangefs_invalidatepage(struct page *page, * Thus the following runs if wr was modified above. */ - orangefs_launder_page(page); + orangefs_launder_folio(folio); } -static int orangefs_releasepage(struct page *page, gfp_t foo) +static bool orangefs_release_folio(struct folio *folio, gfp_t foo) { - return !PagePrivate(page); + return !folio_test_private(folio); } -static void orangefs_freepage(struct page *page) +static void orangefs_free_folio(struct folio *folio) { - if (PagePrivate(page)) { - kfree((struct orangefs_write_range *)page_private(page)); - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); - } + kfree(folio_detach_private(folio)); } -static int orangefs_launder_page(struct page *page) +static int orangefs_launder_folio(struct folio *folio) { int r = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 0, }; - wait_on_page_writeback(page); - if (clear_page_dirty_for_io(page)) { - r = orangefs_writepage_locked(page, &wbc); - end_page_writeback(page); + folio_wait_writeback(folio); + if (folio_clear_dirty_for_io(folio)) { + r = orangefs_writepage_locked(&folio->page, &wbc); + folio_end_writeback(folio); } return r; } @@ -704,21 +627,22 @@ out: /** ORANGEFS2 implementation of address space operations */ static const struct address_space_operations orangefs_address_operations = { .writepage = orangefs_writepage, - .readpage = orangefs_readpage, + .readahead = orangefs_readahead, + .read_folio = orangefs_read_folio, .writepages = orangefs_writepages, - .set_page_dirty = __set_page_dirty_nobuffers, + .dirty_folio = filemap_dirty_folio, .write_begin = orangefs_write_begin, .write_end = orangefs_write_end, - .invalidatepage = orangefs_invalidatepage, - .releasepage = orangefs_releasepage, - .freepage = orangefs_freepage, - .launder_page = orangefs_launder_page, + .invalidate_folio = orangefs_invalidate_folio, + .release_folio = orangefs_release_folio, + .free_folio = orangefs_free_folio, + .launder_folio = orangefs_launder_folio, .direct_IO = orangefs_direct_IO, }; vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); unsigned long *bitlock = &orangefs_inode->bitlock; @@ -732,27 +656,27 @@ vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf) goto out; } - lock_page(page); - if (PageDirty(page) && !PagePrivate(page)) { + folio_lock(folio); + if (folio_test_dirty(folio) && !folio_test_private(folio)) { /* - * Should be impossible. If it happens, launder the page + * Should be impossible. If it happens, launder the folio * since we don't know what's dirty. This will WARN in * orangefs_writepage_locked. */ - if (orangefs_launder_page(page)) { + if (orangefs_launder_folio(folio)) { ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; goto out; } } - if (PagePrivate(page)) { - wr = (struct orangefs_write_range *)page_private(page); + if (folio_test_private(folio)) { + wr = folio_get_private(folio); if (uid_eq(wr->uid, current_fsuid()) && gid_eq(wr->gid, current_fsgid())) { - wr->pos = page_offset(page); + wr->pos = page_offset(vmf->page); wr->len = PAGE_SIZE; goto okay; } else { - if (orangefs_launder_page(page)) { + if (orangefs_launder_folio(folio)) { ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; goto out; } @@ -763,29 +687,27 @@ vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf) ret = VM_FAULT_LOCKED|VM_FAULT_RETRY; goto out; } - wr->pos = page_offset(page); + wr->pos = page_offset(vmf->page); wr->len = PAGE_SIZE; wr->uid = current_fsuid(); wr->gid = current_fsgid(); - SetPagePrivate(page); - set_page_private(page, (unsigned long)wr); - get_page(page); + folio_attach_private(folio, wr); okay: file_update_time(vmf->vma->vm_file); - if (page->mapping != inode->i_mapping) { - unlock_page(page); + if (folio->mapping != inode->i_mapping) { + folio_unlock(folio); ret = VM_FAULT_LOCKED|VM_FAULT_NOPAGE; goto out; } /* - * We mark the page dirty already here so that when freeze is in + * We mark the folio dirty already here so that when freeze is in * progress, we are guaranteed that writeback during freezing will - * see the dirty page and writeprotect it again. + * see the dirty folio and writeprotect it again. */ - set_page_dirty(page); - wait_for_stable_page(page); + folio_mark_dirty(folio); + folio_wait_stable(folio); ret = VM_FAULT_LOCKED; out: sb_end_pagefault(inode->i_sb); @@ -902,13 +824,13 @@ again: ORANGEFS_I(inode)->attr_uid = current_fsuid(); ORANGEFS_I(inode)->attr_gid = current_fsgid(); } - setattr_copy(inode, iattr); + setattr_copy(&init_user_ns, inode, iattr); spin_unlock(&inode->i_lock); mark_inode_dirty(inode); if (iattr->ia_valid & ATTR_MODE) /* change mod on a file that has ACLs */ - ret = posix_acl_chmod(inode, inode->i_mode); + ret = posix_acl_chmod(&init_user_ns, inode, inode->i_mode); ret = 0; out: @@ -918,12 +840,13 @@ out: /* * Change attributes of an object referenced by dentry. */ -int orangefs_setattr(struct dentry *dentry, struct iattr *iattr) +int orangefs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *iattr) { int ret; gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n", dentry); - ret = setattr_prepare(dentry, iattr); + ret = setattr_prepare(&init_user_ns, dentry, iattr); if (ret) goto out; ret = __orangefs_setattr(d_inode(dentry), iattr); @@ -937,8 +860,8 @@ out: /* * Obtain attributes of an object given a dentry */ -int orangefs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags) +int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags) { int ret; struct inode *inode = path->dentry->d_inode; @@ -950,23 +873,19 @@ int orangefs_getattr(const struct path *path, struct kstat *stat, ret = orangefs_inode_getattr(inode, request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0); if (ret == 0) { - generic_fillattr(inode, stat); + generic_fillattr(&init_user_ns, inode, stat); /* override block size reported to stat */ if (!(request_mask & STATX_SIZE)) stat->result_mask &= ~STATX_SIZE; - stat->attributes_mask = STATX_ATTR_IMMUTABLE | - STATX_ATTR_APPEND; - if (inode->i_flags & S_IMMUTABLE) - stat->attributes |= STATX_ATTR_IMMUTABLE; - if (inode->i_flags & S_APPEND) - stat->attributes |= STATX_ATTR_APPEND; + generic_fill_statx_attr(inode, stat); } return ret; } -int orangefs_permission(struct inode *inode, int mask) +int orangefs_permission(struct user_namespace *mnt_userns, + struct inode *inode, int mask) { int ret; @@ -980,7 +899,7 @@ int orangefs_permission(struct inode *inode, int mask) if (ret < 0) return ret; - return generic_permission(inode, mask); + return generic_permission(&init_user_ns, inode, mask); } int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags) @@ -999,6 +918,53 @@ int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags return __orangefs_setattr(inode, &iattr); } +static int orangefs_fileattr_get(struct dentry *dentry, struct fileattr *fa) +{ + u64 val = 0; + int ret; + + gossip_debug(GOSSIP_FILE_DEBUG, "%s: called on %pd\n", __func__, + dentry); + + ret = orangefs_inode_getxattr(d_inode(dentry), + "user.pvfs2.meta_hint", + &val, sizeof(val)); + if (ret < 0 && ret != -ENODATA) + return ret; + + gossip_debug(GOSSIP_FILE_DEBUG, "%s: flags=%u\n", __func__, (u32) val); + + fileattr_fill_flags(fa, val); + return 0; +} + +static int orangefs_fileattr_set(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa) +{ + u64 val = 0; + + gossip_debug(GOSSIP_FILE_DEBUG, "%s: called on %pd\n", __func__, + dentry); + /* + * ORANGEFS_MIRROR_FL is set internally when the mirroring mode is + * turned on for a file. The user is not allowed to turn on this bit, + * but the bit is present if the user first gets the flags and then + * updates the flags with some new settings. So, we ignore it in the + * following edit. bligon. + */ + if (fileattr_has_fsx(fa) || + (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL | ORANGEFS_MIRROR_FL))) { + gossip_err("%s: only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n", + __func__); + return -EOPNOTSUPP; + } + val = fa->flags; + gossip_debug(GOSSIP_FILE_DEBUG, "%s: flags=%u\n", __func__, (u32) val); + return orangefs_inode_setxattr(d_inode(dentry), + "user.pvfs2.meta_hint", + &val, sizeof(val), 0); +} + /* ORANGEFS2 implementation of VFS inode operations for files */ static const struct inode_operations orangefs_file_inode_operations = { .get_acl = orangefs_get_acl, @@ -1008,6 +974,8 @@ static const struct inode_operations orangefs_file_inode_operations = { .listxattr = orangefs_listxattr, .permission = orangefs_permission, .update_time = orangefs_update_time, + .fileattr_get = orangefs_fileattr_get, + .fileattr_set = orangefs_fileattr_set, }; static int orangefs_init_iops(struct inode *inode) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index 3e7cf3d0a494..600e8eee541f 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -15,7 +15,8 @@ /* * Get a newly allocated inode to go with a negative dentry. */ -static int orangefs_create(struct inode *dir, +static int orangefs_create(struct user_namespace *mnt_userns, + struct inode *dir, struct dentry *dentry, umode_t mode, bool exclusive) @@ -215,7 +216,8 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) return ret; } -static int orangefs_symlink(struct inode *dir, +static int orangefs_symlink(struct user_namespace *mnt_userns, + struct inode *dir, struct dentry *dentry, const char *symname) { @@ -303,7 +305,8 @@ out: return ret; } -static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +static int orangefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; @@ -372,7 +375,8 @@ out: return ret; } -static int orangefs_rename(struct inode *old_dir, +static int orangefs_rename(struct user_namespace *mnt_userns, + struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 2bb916d68576..b501dc07f922 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -168,10 +168,7 @@ static DEFINE_SPINLOCK(orangefs_bufmap_lock); static void orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap) { - int i; - - for (i = 0; i < bufmap->page_count; i++) - put_page(bufmap->page_array[i]); + unpin_user_pages(bufmap->page_array, bufmap->page_count); } static void @@ -179,7 +176,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap) { kfree(bufmap->page_array); kfree(bufmap->desc_array); - kfree(bufmap->buffer_index_array); + bitmap_free(bufmap->buffer_index_array); kfree(bufmap); } @@ -229,8 +226,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) bufmap->desc_size = user_desc->size; bufmap->desc_shift = ilog2(bufmap->desc_size); - bufmap->buffer_index_array = - kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL); + bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL); if (!bufmap->buffer_index_array) goto out_free_bufmap; @@ -253,7 +249,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) out_free_desc_array: kfree(bufmap->desc_array); out_free_index_array: - kfree(bufmap->buffer_index_array); + bitmap_free(bufmap->buffer_index_array); out_free_bufmap: kfree(bufmap); out: @@ -268,7 +264,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap, int offset = 0, ret, i; /* map the pages */ - ret = get_user_pages_fast((unsigned long)user_desc->ptr, + ret = pin_user_pages_fast((unsigned long)user_desc->ptr, bufmap->page_count, FOLL_WRITE, bufmap->page_array); if (ret < 0) @@ -280,7 +276,7 @@ orangefs_bufmap_map(struct orangefs_bufmap *bufmap, for (i = 0; i < ret; i++) { SetPageError(bufmap->page_array[i]); - put_page(bufmap->page_array[i]); + unpin_user_page(bufmap->page_array[i]); } return -ENOMEM; } diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index ed67f39fa7ce..b5940ec1836a 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -106,8 +106,10 @@ enum orangefs_vfs_op_states { extern int orangefs_init_acl(struct inode *inode, struct inode *dir); extern const struct xattr_handler *orangefs_xattr_handlers[]; -extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type); -extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu); +extern int orangefs_set_acl(struct user_namespace *mnt_userns, + struct inode *inode, struct posix_acl *acl, + int type); /* * orangefs data structures @@ -239,10 +241,6 @@ struct orangefs_write_range { kgid_t gid; }; -struct orangefs_read_options { - ssize_t blksiz; -}; - extern struct orangefs_stats orangefs_stats; /* @@ -363,12 +361,13 @@ struct inode *orangefs_new_inode(struct super_block *sb, struct orangefs_object_kref *ref); int __orangefs_setattr(struct inode *, struct iattr *); -int orangefs_setattr(struct dentry *, struct iattr *); +int orangefs_setattr(struct user_namespace *, struct dentry *, struct iattr *); -int orangefs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int flags); +int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int flags); -int orangefs_permission(struct inode *inode, int mask); +int orangefs_permission(struct user_namespace *mnt_userns, + struct inode *inode, int mask); int orangefs_update_time(struct inode *, struct timespec64 *, int); diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index c010c1fddafc..cd7297815f91 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -31,7 +31,7 @@ static ulong module_parm_debug_mask; __u64 orangefs_gossip_debug_mask; int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS; int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS; -int orangefs_cache_timeout_msecs = 50; +int orangefs_cache_timeout_msecs = 500; int orangefs_dcache_timeout_msecs = 50; int orangefs_getattr_timeout_msecs = 50; @@ -79,7 +79,7 @@ DECLARE_WAIT_QUEUE_HEAD(orangefs_request_list_waitq); static int __init orangefs_init(void) { - int ret = -1; + int ret; __u32 i = 0; if (op_timeout_secs < 0) @@ -149,7 +149,6 @@ static int __init orangefs_init(void) pr_info("%s: module version %s loaded\n", __func__, ORANGEFS_VERSION); - ret = 0; goto out; } diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 3627ea946402..de80b62553bb 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -894,10 +894,11 @@ static struct attribute *orangefs_default_attrs[] = { &perf_time_interval_secs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(orangefs_default); static struct kobj_type orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = orangefs_default_attrs, + .default_groups = orangefs_default_groups, }; static struct orangefs_attribute acache_hard_limit_attribute = @@ -931,10 +932,11 @@ static struct attribute *acache_orangefs_default_attrs[] = { &acache_timeout_msecs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(acache_orangefs_default); static struct kobj_type acache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = acache_orangefs_default_attrs, + .default_groups = acache_orangefs_default_groups, }; static struct orangefs_attribute capcache_hard_limit_attribute = @@ -968,10 +970,11 @@ static struct attribute *capcache_orangefs_default_attrs[] = { &capcache_timeout_secs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(capcache_orangefs_default); static struct kobj_type capcache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = capcache_orangefs_default_attrs, + .default_groups = capcache_orangefs_default_groups, }; static struct orangefs_attribute ccache_hard_limit_attribute = @@ -1005,10 +1008,11 @@ static struct attribute *ccache_orangefs_default_attrs[] = { &ccache_timeout_secs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(ccache_orangefs_default); static struct kobj_type ccache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = ccache_orangefs_default_attrs, + .default_groups = ccache_orangefs_default_groups, }; static struct orangefs_attribute ncache_hard_limit_attribute = @@ -1042,10 +1046,11 @@ static struct attribute *ncache_orangefs_default_attrs[] = { &ncache_timeout_msecs_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(ncache_orangefs_default); static struct kobj_type ncache_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = ncache_orangefs_default_attrs, + .default_groups = ncache_orangefs_default_groups, }; static struct orangefs_attribute pc_acache_attribute = @@ -1072,10 +1077,11 @@ static struct attribute *pc_orangefs_default_attrs[] = { &pc_ncache_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(pc_orangefs_default); static struct kobj_type pc_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = pc_orangefs_default_attrs, + .default_groups = pc_orangefs_default_groups, }; static struct orangefs_attribute stats_reads_attribute = @@ -1095,10 +1101,11 @@ static struct attribute *stats_orangefs_default_attrs[] = { &stats_writes_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(stats_orangefs_default); static struct kobj_type stats_orangefs_ktype = { .sysfs_ops = &orangefs_sysfs_ops, - .default_attrs = stats_orangefs_default_attrs, + .default_groups = stats_orangefs_default_groups, }; static struct kobject *orangefs_obj; diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index d4b7ae763186..46b7dcff18ac 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -221,7 +221,7 @@ static int orangefs_inode_is_stale(struct inode *inode, * If the inode type or symlink target have changed then this * inode is stale. */ - if (type == -1 || !(inode->i_mode & type)) { + if (type == -1 || inode_wrong_type(inode, type)) { orangefs_make_bad_inode(inode); return 1; } diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index ee5efdc35cc1..5254256a224d 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -11,6 +11,7 @@ #include <linux/parser.h> #include <linux/hashtable.h> +#include <linux/seq_file.h> /* a cache for orangefs-inode objects (i.e. orangefs inode private data) */ static struct kmem_cache *orangefs_inode_cache; @@ -106,7 +107,7 @@ static struct inode *orangefs_alloc_inode(struct super_block *sb) { struct orangefs_inode_s *orangefs_inode; - orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, GFP_KERNEL); + orangefs_inode = alloc_inode_sb(sb, orangefs_inode_cache, GFP_KERNEL); if (!orangefs_inode) return NULL; @@ -209,7 +210,7 @@ static int orangefs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail; buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total; buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail; - buf->f_frsize = sb->s_blocksize; + buf->f_frsize = 0; out_op_release: op_release(new_op); @@ -475,7 +476,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, const char *devname, void *data) { - int ret = -EINVAL; + int ret; struct super_block *sb = ERR_PTR(-EINVAL); struct orangefs_kernel_op_s *new_op; struct dentry *d = ERR_PTR(-EINVAL); @@ -526,7 +527,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst, sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL); if (!ORANGEFS_SB(sb)) { d = ERR_PTR(-ENOMEM); - goto free_op; + goto free_sb_and_op; } ret = orangefs_fill_sb(sb, diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index bdc285aea360..9a5b757fbd2f 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -526,6 +526,7 @@ out_unlock: } static int orangefs_xattr_set_default(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *unused, struct inode *inode, const char *name, |